TITLE: Processing bike ride data from Fitotrack Android app
DATE: 2024-06-19
AUTHOR: John L. Godlee
====================================================================


I have been using The Fitotrack Android app for a over a year now
to track my bike rides. Fitotrack allows you to export the tracking
data in XML format. I wrote an R script to process the XML data and
create some basic summary plots. I have broken down the R script
below:

 [Fitotrack Android app]: https://github.com/russok/FitoTrack

Firstly, load necessary packages and import the compressed XML
file, which has the file extension .ftb. I use Syncthing to sync
the backup files from my phone to my laptop.

 [Syncthing]: https://syncthing.net/

   # Process data from FitoTrack Android app
   # John L. Godlee ([email protected])
   # Last updated: 2024-06-19

   # Packages
   library(dplyr)
   library(XML)
   library(lubridate)
   library(ggplot2)
   library(patchwork)
   library(archive)
   library(leaflet)
   library(sf)

   # Find all fitotrack backups
   f <- list.files("~/syncthing/fitotrack", "*.ftb", full.names =
TRUE)

   # Check files found
   stopifnot(length(f) > 0)

Then parse the file and extract each ride, represented by child
nodes in the workouts part of the XML.

   # 7z unarchive the file
   conn <- archive_read(sort(f)[1])

   # Import data
   dat <- xmlParse(readLines(conn))

   # Separate nodes with summary data
   summ_nodes <- getNodeSet(dat, "//workouts//workouts")

Summarise each node and create a pretty dataframe, where each row
is a ride.

   # For each node, get children as list
   summ_df <- bind_rows(lapply(seq_len(xmlSize(summ_nodes)),
function(x) {
     as.list(getChildrenStrings(summ_nodes[[x]]))
   })) %>%
     mutate(
       across(all_of(c("calorie", "ascent", "descent",
             "avgPace", "avgSpeed", "topSpeed",
             "length", "maxElevationMSL", "minElevationMSL")),
as.numeric),
       start = as_datetime(as.numeric(start) / 1000),
       end = as_datetime(as.numeric(end) / 1000),
       duration = round(as.period(end - start)),
       pauseDuration =
round(seconds_to_period(as.numeric(pauseDuration) / 1000)))

Create plots with summary information for each ride (plots_all),
and a table with the same information (month_summ).

   # Define conversion factor km to miles
   kmt <- 0.6213711922

   # Plot average speed of all rides over time
   avgSpeed_ts <- ggplot(summ_df, aes(x = start, y = avgSpeed)) +
     geom_point(shape = 21) +
     theme_bw() +
     scale_y_continuous(
       name = expression("Average speed"~(km~h^-1)),
       sec.axis = sec_axis(
         transform = ~.*kmt, name = expression("Average
speed"~(miles~h^-1)) )) +
     xlab("Date")

   # Plot top speed of all rides over time
   topSpeed_ts <- ggplot(summ_df, aes(x = start, y = topSpeed)) +
     geom_point(shape = 21) +
     theme_bw() +
     scale_y_continuous(
       name = expression("Top speed"~(km~h^-1)),
       sec.axis = sec_axis(
         transform = ~.*kmt, name = expression("Top
speed"~(miles~h^-1)) )) +
     xlab("Date")

   # Plot length of all rides over time
   length_ts <- summ_df %>%
     mutate(length_km = length / 1000) %>%
     ggplot(., aes(x = start, y = length_km)) +
       geom_point(shape = 21) +
       theme_bw() +
       scale_y_continuous(
         name = "Distance (km)",
         sec.axis = sec_axis(
           transform = ~.*kmt, name = "Distance (miles)")) +
       xlab("Date")

   # Monthly breakdown of:
   # total distance
   # average speed
   # top speed
   month_summ <- summ_df %>%
     mutate(month_year = format(as.Date(start), "%Y-%m")) %>%
     group_by(month_year) %>%
     summarise(
       total_dist = sum(length, na.rm = TRUE) / 1000,
       mean_speed = mean(avgSpeed, na.rm = TRUE),
       max_speed = max(topSpeed, na.rm = TRUE)) %>%
     mutate(
       total_dist_miles = total_dist * kmt,
       mean_speed_mph = mean_speed * kmt,
       max_speed_mph = max_speed * kmt)

   # Plot monthly total distance bar chart
   month_dist <- month_summ %>%
     mutate(month_year_date = as.Date(paste0(month_year, "-01")))
%>%
     ggplot(., aes(x = month_year_date, y = total_dist)) +
       geom_bar(stat = "identity", colour = "black", fill =
"grey") +
       theme_bw() +
       scale_x_date(
         breaks = seq(
           as.Date(paste0(min(month_summ$month_year), "-01")),
           as.Date(paste0(max(month_summ$month_year), "-01")),
           by = "month"),
         date_labels = "%b %Y") +
       scale_y_continuous(
         name = "Total distance (km)",
         sec.axis = sec_axis(
           transform = ~.*kmt, name = "Total distance (miles)")) +
       xlab("Month")

   # Patchwork plots together
   plots_all <- avgSpeed_ts + topSpeed_ts + length_ts + month_dist

 ![Summary plots created by code
above.](https://johngodlee.xyz/img_full/fitotrack/plots_all.png)

 ![Monthly summary table cretaed by code
above.](https://johngodlee.xyz/img_full/fitotrack/month_summ.png)

Now to process the data from a single ride. Fitotrack splits each
ride up into interals which share a single ID, within the samples
part of the XML.

First process each node and create a pretty dataframe.

   # Get intervals
   # Separate nodes
   int_nodes <- getNodeSet(dat, "//samples//samples")

   # For each node, get children as list
   int_list <- lapply(seq_len(xmlSize(int_nodes)), function(x) {
     as.list(getChildrenStrings(int_nodes[[x]]))
   })

   # Process intervals
   # summ_list$samples[[1]]
   int_df <- bind_rows(lapply(int_list, function(x) {
     data.frame(
       "int_id" = x$id,
       "id" = x$workoutId,
       "elevation" = as.numeric(x$elevation),
       "latitude" = as.numeric(x$lat),
       "longitude" = as.numeric(x$lon),
       "speed" = as.numeric(x$speed))
     })) %>%
     group_by(id) %>%
     arrange(int_id) %>%
     mutate(int = row_number()) %>%
     relocate(id, int) %>%
     dplyr::select(-int_id) %>%
     mutate(per = int / max(int))

   # Check all interval IDs in summary dataframe
   stopifnot(all(sort(unique(int_df$id)) %in%
sort(unique(summ_df$id))))

Then extract a single ride ID, in this case the most recent ride,
and create interval plots. The first is a speed plot, and the
second is an elevation plot.

   # Extract most recent ID
   ex_id <- summ_df$id[order(summ_df$start, decreasing = TRUE)][1]

   # Create speed plot of a particular ride
   int_speed <- int_df %>%
     filter(id == ex_id) %>%
     ggplot(., aes(x = int, y = speed)) +
       geom_line() +
       theme_bw() +
       scale_y_continuous(
         name = expression("Speed"~(km~h^-1)),
         sec.axis = sec_axis(
           transform = ~.*kmt, name =
expression("Speed"~(miles~h^-1)) )) +
       xlab("Interval")

   # Create elevation plot of a particular ride
   int_elev <- int_df %>%
     filter(id == ex_id) %>%
     ggplot(., aes(x = int, y = elevation)) +
       geom_line() +
       theme_bw() +
       labs(
         x = "Interval",
         y = "Elevation (m)")

   # Combine speed and elevation plots for a particular ride
   plots_ride <- (int_speed + int_elev) +
     plot_layout(ncol = 1)

 ![Plots of single ride generated by code
above.](https://johngodlee.xyz/img_full/fitotrack/plots_ride.png)

Finally, create a simple interactive map of the ride.

   # Create sf object with interval points
   int_sf <- int_fil %>%
     st_as_sf(., coords = c("longitude", "latitude"), crs = 4326)

   # Duplicate points to get start and end of interval,
   # add ID, summarise to interval lines
   int_lines <- int_sf %>%
     mutate(int = int - 1) %>%
     bind_rows(., int_sf) %>%
     arrange(int) %>%
     group_by(int) %>%
     summarise(
       elevation = mean(elevation),
       speed = mean(speed),
       per = mean(per),
       n = n(),
       do_union = FALSE) %>%
     filter(n > 1) %>%
     st_cast(., "LINESTRING")

   # Create colour palette
   pal <- colorNumeric(palette = "plasma", domain =
int_lines$speed)

   # Create leaflet map call
   lmap <- leaflet() %>%
     addTiles() %>%
     setView(
       lng = mean(int_fil$longitude),
       lat = mean(int_fil$latitude),
       zoom = 12) %>%
     addPolylines(
       data = int_lines,
       color = pal(int_lines$speed),
       opacity = 1)

 ![Screenshot of leaflet map showing route with colouring by
speed.](https://johngodlee.xyz/img_full/fitotrack/leaflet.png)

Update 2024-07-02

I recently added a kind of heatmap that plots all my rides using
leaflet. It uses the leafgl package to efficiently render many line
segments using webGL. For ~140,000 line segments it took about 5
seconds to load the map and the map is pretty snappy in the browser
once it is rendered.

 [leafgl]: https://github.com/r-spatial/leafgl

   int_all_sf <- int_all %>%
     st_as_sf(., coords = c("longitude", "latitude"), crs = 4326)

   # Duplicate points to get start and end of interval,
   # add ID, summarise to interval lines
   int_all_lines <- int_all_sf %>%
     mutate(int = int - 1) %>%
     bind_rows(., int_all_sf) %>%
     arrange(int) %>%
     group_by(id, int) %>%
     summarise(
       elevation = mean(elevation),
       speed = mean(speed * 1/kmt),
       per = mean(per),
       n = n(),
       do_union = FALSE,
       .groups = "keep") %>%
     filter(n > 1) %>%
     st_cast(., "LINESTRING")

   # Create leaflet heatmap of all rides
   heatmap_all <- leaflet(options = leafletOptions(perferCanvas =
TRUE)) %>%
     addTiles() %>%
     setView(
       lng = mean(int_all$longitude),
       lat = mean(int_all$latitude),
       zoom = 12) %>%
     addGlPolylines(
       data = int_all_lines,
       color = "#0000ff",
       opacity = 0.05,
       src = TRUE,
       digits = 5)

 ![Screenshot of leaflet
heatmap.](https://johngodlee.xyz/img_full/fitotrack/heatmap.png)