TITLE: Processing bike ride data from Fitotrack Android app
DATE: 2024-06-19
AUTHOR: John L. Godlee
====================================================================
I have been using The Fitotrack Android app for a over a year now
to track my bike rides. Fitotrack allows you to export the tracking
data in XML format. I wrote an R script to process the XML data and
create some basic summary plots. I have broken down the R script
below:
[Fitotrack Android app]:
https://github.com/russok/FitoTrack
Firstly, load necessary packages and import the compressed XML
file, which has the file extension .ftb. I use Syncthing to sync
the backup files from my phone to my laptop.
[Syncthing]:
https://syncthing.net/
# Process data from FitoTrack Android app
# John L. Godlee (
[email protected])
# Last updated: 2024-06-19
# Packages
library(dplyr)
library(XML)
library(lubridate)
library(ggplot2)
library(patchwork)
library(archive)
library(leaflet)
library(sf)
# Find all fitotrack backups
f <- list.files("~/syncthing/fitotrack", "*.ftb", full.names =
TRUE)
# Check files found
stopifnot(length(f) > 0)
Then parse the file and extract each ride, represented by child
nodes in the workouts part of the XML.
# 7z unarchive the file
conn <- archive_read(sort(f)[1])
# Import data
dat <- xmlParse(readLines(conn))
# Separate nodes with summary data
summ_nodes <- getNodeSet(dat, "//workouts//workouts")
Summarise each node and create a pretty dataframe, where each row
is a ride.
# For each node, get children as list
summ_df <- bind_rows(lapply(seq_len(xmlSize(summ_nodes)),
function(x) {
as.list(getChildrenStrings(summ_nodes[[x]]))
})) %>%
mutate(
across(all_of(c("calorie", "ascent", "descent",
"avgPace", "avgSpeed", "topSpeed",
"length", "maxElevationMSL", "minElevationMSL")),
as.numeric),
start = as_datetime(as.numeric(start) / 1000),
end = as_datetime(as.numeric(end) / 1000),
duration = round(as.period(end - start)),
pauseDuration =
round(seconds_to_period(as.numeric(pauseDuration) / 1000)))
Create plots with summary information for each ride (plots_all),
and a table with the same information (month_summ).
# Define conversion factor km to miles
kmt <- 0.6213711922
# Plot average speed of all rides over time
avgSpeed_ts <- ggplot(summ_df, aes(x = start, y = avgSpeed)) +
geom_point(shape = 21) +
theme_bw() +
scale_y_continuous(
name = expression("Average speed"~(km~h^-1)),
sec.axis = sec_axis(
transform = ~.*kmt, name = expression("Average
speed"~(miles~h^-1)) )) +
xlab("Date")
# Plot top speed of all rides over time
topSpeed_ts <- ggplot(summ_df, aes(x = start, y = topSpeed)) +
geom_point(shape = 21) +
theme_bw() +
scale_y_continuous(
name = expression("Top speed"~(km~h^-1)),
sec.axis = sec_axis(
transform = ~.*kmt, name = expression("Top
speed"~(miles~h^-1)) )) +
xlab("Date")
# Plot length of all rides over time
length_ts <- summ_df %>%
mutate(length_km = length / 1000) %>%
ggplot(., aes(x = start, y = length_km)) +
geom_point(shape = 21) +
theme_bw() +
scale_y_continuous(
name = "Distance (km)",
sec.axis = sec_axis(
transform = ~.*kmt, name = "Distance (miles)")) +
xlab("Date")
# Monthly breakdown of:
# total distance
# average speed
# top speed
month_summ <- summ_df %>%
mutate(month_year = format(as.Date(start), "%Y-%m")) %>%
group_by(month_year) %>%
summarise(
total_dist = sum(length, na.rm = TRUE) / 1000,
mean_speed = mean(avgSpeed, na.rm = TRUE),
max_speed = max(topSpeed, na.rm = TRUE)) %>%
mutate(
total_dist_miles = total_dist * kmt,
mean_speed_mph = mean_speed * kmt,
max_speed_mph = max_speed * kmt)
# Plot monthly total distance bar chart
month_dist <- month_summ %>%
mutate(month_year_date = as.Date(paste0(month_year, "-01")))
%>%
ggplot(., aes(x = month_year_date, y = total_dist)) +
geom_bar(stat = "identity", colour = "black", fill =
"grey") +
theme_bw() +
scale_x_date(
breaks = seq(
as.Date(paste0(min(month_summ$month_year), "-01")),
as.Date(paste0(max(month_summ$month_year), "-01")),
by = "month"),
date_labels = "%b %Y") +
scale_y_continuous(
name = "Total distance (km)",
sec.axis = sec_axis(
transform = ~.*kmt, name = "Total distance (miles)")) +
xlab("Month")
# Patchwork plots together
plots_all <- avgSpeed_ts + topSpeed_ts + length_ts + month_dist


Now to process the data from a single ride. Fitotrack splits each
ride up into interals which share a single ID, within the samples
part of the XML.
First process each node and create a pretty dataframe.
# Get intervals
# Separate nodes
int_nodes <- getNodeSet(dat, "//samples//samples")
# For each node, get children as list
int_list <- lapply(seq_len(xmlSize(int_nodes)), function(x) {
as.list(getChildrenStrings(int_nodes[[x]]))
})
# Process intervals
# summ_list$samples[[1]]
int_df <- bind_rows(lapply(int_list, function(x) {
data.frame(
"int_id" = x$id,
"id" = x$workoutId,
"elevation" = as.numeric(x$elevation),
"latitude" = as.numeric(x$lat),
"longitude" = as.numeric(x$lon),
"speed" = as.numeric(x$speed))
})) %>%
group_by(id) %>%
arrange(int_id) %>%
mutate(int = row_number()) %>%
relocate(id, int) %>%
dplyr::select(-int_id) %>%
mutate(per = int / max(int))
# Check all interval IDs in summary dataframe
stopifnot(all(sort(unique(int_df$id)) %in%
sort(unique(summ_df$id))))
Then extract a single ride ID, in this case the most recent ride,
and create interval plots. The first is a speed plot, and the
second is an elevation plot.
# Extract most recent ID
ex_id <- summ_df$id[order(summ_df$start, decreasing = TRUE)][1]
# Create speed plot of a particular ride
int_speed <- int_df %>%
filter(id == ex_id) %>%
ggplot(., aes(x = int, y = speed)) +
geom_line() +
theme_bw() +
scale_y_continuous(
name = expression("Speed"~(km~h^-1)),
sec.axis = sec_axis(
transform = ~.*kmt, name =
expression("Speed"~(miles~h^-1)) )) +
xlab("Interval")
# Create elevation plot of a particular ride
int_elev <- int_df %>%
filter(id == ex_id) %>%
ggplot(., aes(x = int, y = elevation)) +
geom_line() +
theme_bw() +
labs(
x = "Interval",
y = "Elevation (m)")
# Combine speed and elevation plots for a particular ride
plots_ride <- (int_speed + int_elev) +
plot_layout(ncol = 1)

Finally, create a simple interactive map of the ride.
# Create sf object with interval points
int_sf <- int_fil %>%
st_as_sf(., coords = c("longitude", "latitude"), crs = 4326)
# Duplicate points to get start and end of interval,
# add ID, summarise to interval lines
int_lines <- int_sf %>%
mutate(int = int - 1) %>%
bind_rows(., int_sf) %>%
arrange(int) %>%
group_by(int) %>%
summarise(
elevation = mean(elevation),
speed = mean(speed),
per = mean(per),
n = n(),
do_union = FALSE) %>%
filter(n > 1) %>%
st_cast(., "LINESTRING")
# Create colour palette
pal <- colorNumeric(palette = "plasma", domain =
int_lines$speed)
# Create leaflet map call
lmap <- leaflet() %>%
addTiles() %>%
setView(
lng = mean(int_fil$longitude),
lat = mean(int_fil$latitude),
zoom = 12) %>%
addPolylines(
data = int_lines,
color = pal(int_lines$speed),
opacity = 1)

Update 2024-07-02
I recently added a kind of heatmap that plots all my rides using
leaflet. It uses the leafgl package to efficiently render many line
segments using webGL. For ~140,000 line segments it took about 5
seconds to load the map and the map is pretty snappy in the browser
once it is rendered.
[leafgl]:
https://github.com/r-spatial/leafgl
int_all_sf <- int_all %>%
st_as_sf(., coords = c("longitude", "latitude"), crs = 4326)
# Duplicate points to get start and end of interval,
# add ID, summarise to interval lines
int_all_lines <- int_all_sf %>%
mutate(int = int - 1) %>%
bind_rows(., int_all_sf) %>%
arrange(int) %>%
group_by(id, int) %>%
summarise(
elevation = mean(elevation),
speed = mean(speed * 1/kmt),
per = mean(per),
n = n(),
do_union = FALSE,
.groups = "keep") %>%
filter(n > 1) %>%
st_cast(., "LINESTRING")
# Create leaflet heatmap of all rides
heatmap_all <- leaflet(options = leafletOptions(perferCanvas =
TRUE)) %>%
addTiles() %>%
setView(
lng = mean(int_all$longitude),
lat = mean(int_all$latitude),
zoom = 12) %>%
addGlPolylines(
data = int_all_lines,
color = "#0000ff",
opacity = 0.05,
src = TRUE,
digits = 5)
