Harald Kliems: Comparing counts from Strava Metro and loop counters

Harald Kliems

counts_2022 <- readxl::read_excel("data/EcoCounter_2022.xlsx", skip = 3,
                   col_names = c("time_count", "count_cap_city", "count_sw_path")) |> 
  mutate(date_count = floor_date(time_count, unit = "hours")) |> 
  summarize(across(starts_with("count_"), ~ sum(.x, na.rm = T)), .by = date_count) |> 
  pivot_longer(cols = starts_with("count_"), names_to = "location", values_to = "count_hourly") |> 
  mutate(location = case_when(location == "count_cap_city" ~ "Cap City at North Shore",
                              location == "count_sw_path" ~ "SW Path at Randall"),
         dayofweek = wday(date_count),
         weekendind = ifelse(dayofweek %in% c(1:5), "weekday", "weekend"),
         month_count = month(date_count, label = T, abbr = T)) 

strava_cap_city <- read_csv("data/cap_city_strava_hourly.csv") %>% 
  mutate(location = "Cap City at North Shore")
strava_sw_path <- read_csv("data/sw_path_strava_hourly.csv") %>% 
  mutate(location = "SW Path at Randall")

counts_cap_city <- strava_cap_city %>% 
  full_join(counts_2022 %>% filter(location == "Cap City at North Shore"), by = join_by(hour == date_count, location))

counts_sw_path <- strava_sw_path %>% 
  full_join(counts_2022 %>% filter(location == "SW Path at Randall"), by = join_by(hour == date_count, location))


all_counts_2022 <- rbind(counts_cap_city, counts_sw_path) %>% 
  mutate(strava_count = replace_na(total_trip_count, 0))

all_counts_2022 %>% ggplot(aes(count_hourly, strava_count)) + geom_jitter(alpha = .1)+ facet_wrap(~ location) + labs(x = "Eco Counter hourly counts", y = "Strava hourly counts") + hrbrthemes::theme_ipsum_rc() + geom_smooth(method = "lm")

all_counts_2022 %>% group_by(location) %>% summarize(cor = (cor(count_hourly, strava_count)), total_count_strava = sum(strava_count), total_count_eco = sum(count_hourly), pct_strava = total_count_strava/total_count_eco) %>% gt() %>% tab_header(title = md("Comparing _hourly_ counts between Strava and Eco Counter"), subtitle = "Two locations in Madison, Wisconsin") %>% tab_spanner(columns = c(total_count_strava, total_count_eco), label = "Total count 2022") %>% cols_label( location = "Counter/segment location", cor = "Correlation (r)", total_count_strava = "Strava", total_count_eco = "Eco Counter", pct_strava = "Strava/Eco Counter counts" ) %>% fmt_percent(columns = pct_strava, decimals = 0) %>% fmt_number(columns = cor, decimals = 2) %>% fmt_auto(columns = starts_with("total_count"))

Comparing hourly counts between Strava and Eco Counter

Two locations in Madison, Wisconsin

Counter/segment location

Correlation (r)

Total count 2022

Strava/Eco Counter counts

Strava

Eco Counter

Cap City at North Shore

0.90

50,235

440,717

11%

SW Path at Randall

0.85

26,710

298,947

all_counts_2022 %>% mutate(day = floor_date(hour, unit = "days")) %>% group_by(day, location) %>% summarize(daily_eco = sum(count_hourly), daily_strava = sum(strava_count)) %>% ggplot(aes(daily_eco, daily_strava)) + geom_point(alpha = .1)+ facet_wrap(~ location) + labs(x = "Eco Counter daily counts", y = "Strava daily counts") + hrbrthemes::theme_ipsum_rc() + geom_smooth(method = "lm")

all_counts_2022 %>% mutate(day = floor_date(hour, unit = "days")) %>% group_by(day, location) %>% summarize(daily_eco = sum(count_hourly), daily_strava = sum(strava_count)) %>% ungroup() %>% group_by(location) %>% summarize(cor = (cor(daily_eco, daily_strava))) %>% gt() %>% tab_header(title = md("Comparing _daily_ counts between Strava and Eco Counter"), subtitle = "Two locations in Madison, Wisconsin") %>% # tab_spanner(columns = c(total_count_strava, total_count_eco), # label = "Total count 2022") %>% cols_label( location = "Counter/segment location", cor = "Correlation (r)") %>% fmt_number(columns = cor, decimals = 2)

Comparing daily counts between Strava and Eco Counter

Two locations in Madison, Wisconsin

Counter/segment location

Correlation (r)

Cap City at North Shore

0.96

SW Path at Randall

0.93

all_counts_2022 %>% mutate(day = floor_date(hour, unit = "days")) %>% group_by(day, location) %>% summarize(daily_eco = sum(count_hourly), daily_strava = sum(strava_count)) %>% ungroup() %>% filter(location == "SW Path at Randall") %>% with(lm(daily_eco ~ daily_strava)) %>% broom::glance() %>% select(r.squared)

all_counts_2022 %>% mutate(day = floor_date(hour, unit = "days")) %>% group_by(day, location) %>% summarize(daily_eco = sum(count_hourly), daily_strava = sum(strava_count)) %>% ungroup() %>% filter(location == "Cap City at North Shore") %>% with(lm(daily_eco ~ daily_strava)) %>% broom::glance() %>% select(r.squared)

@misc{kliems2024comparing, author = {Kliems, Harald}, title = {Harald Kliems: Comparing counts from Strava Metro and loop counters}, url = {https://haraldkliems.netlify.app/posts/2024-01-19-strava-vs-eco-counter/}, year = {2024} }

Comparing counts from Strava Metro and loop counters

Author

Affiliation

Published

Citation

Acknowledgments

Footnotes

References

Citation