TITLE: R functions to download data from KoboToolbox
DATE: 2023-05-18
AUTHOR: John L. Godlee
====================================================================


I have created a couple of R functions to download data from a
KoboToolbox server, using their API. The functions sometimes fail
with a 502 bad gateway error, so it might be worth wrapping them in
a try() statement to get them to fail nicely.

 [KoboToolbox]:

First, a function to list all the form IDs for a given user. This
function is used primarily to find the form ID, which can be used
in the second function to get the data. The function takes either
an access token or a username and password and returns a dataframe:

   #' Get KoboToolbox form IDs for a user
   #'
   #' @param server_url URL of the KoboToolbox server,
   #'     e.g. \url{https://kc.kobotoolbox.org/}
   #' @param token optional KoboToolbox user access token
   #' @param user optional KoboToolbox username
   #' @param pass optional KoboToolbox password
   #'
   #' @return dataframe containing form IDs and names for all
forms accessible to
   #'     the specified user
   #'
   #' @details if \code{token} is supplied it is used
preferentially over
   #'     \code{user} and \code{pass}. If token is not supplied,
\code{user} and
   #'     \code{pass} must be supplied.
   #'
   #' @importFrom httr GET add_headers content authenticate
   #' @importFrom readr read_csv
   #'
   #' @export
   #'
   koboFormID <- function(server_url, token = NULL, user = NULL,
pass = NULL) {
     # Create URL
     kobo_url <- paste0(server_url, "api/v1/data.json")

     # Use either token or user+password
     if (!is.null(token)) {
       # Get data
       rawdata <- httr::GET(kobo_url,
         httr::add_headers(Authorization = paste("Token", token)))
     } else {
       # Check, are both username and password supplied?
       if (any(is.null(user), is.null(pass))) {
         stop("If token not supplied, both user and pass must be
supplied")
       }

       # Get data
       rawdata <- httr::GET(kobo_url, httr::authenticate(user,
pass))
     }

     # Parse content
     dat <- httr::content(rawdata, "parsed")

     # Extract form strings and descriptions and put in dataframe
     out <- do.call(rbind, lapply(dat, function(x) {
       data.frame(id = x$id,
       id_string = x$id_string,
       title = x$title,
       description = x$description,
       url = x$url)
     }))

     # Return
     return(out)
   }

The second function uses a form ID to download the submitted data
for that form and produces a dataframe:

   #' Get KoboToolbox form data as a dataframe for a given form
   #'
   #' @param server_url URL of the KoboToolbox server,
   #'     e.g. \url{https://kc.kobotoolbox.org/}
   #' @param formid ID or ID string for a given KoboToolbox form
   #' @param token optional KoboToolbox user access token
   #' @param user optional KoboToolbox username
   #' @param pass optional KoboToolbox password
   #'
   #' @return dataframe containing submitted data for the
specified form
   #'
   #' @details if \code{token} is supplied it is used
preferentially over
   #'     \code{user} and \code{pass}. If token is not supplied,
\code{user} and
   #'     \code{pass} must be supplied.
   #'
   #' @importFrom httr GET add_headers content authenticate
   #' @importFrom readr read_csv
   #'
   #' @export
   #'
   koboDataGet <- function(server_url, formid,
     token = NULL, user = NULL, pass = NULL) {

     # Create URL
     kobo_csv_url <- paste0(server_url, "api/v1/data/", formid,
".csv")

     # Use either token or user+password
     if (!is.null(token)) {
       # Get data
       rawdata <- httr::GET(kobo_csv_url,
         httr::add_headers(Authorization = paste("Token", token)))
     } else {
       # Check, are both username and password supplied?
       if (any(is.null(user), is.null(pass))) {
         stop("If token not supplied, both user and pass must be
supplied")
       }

       # Get data
       rawdata <- httr::GET(kobo_csv_url, httr::authenticate(user,
pass))
     }

     # Create dataframe from data
     out <- readr::read_csv(httr::content(rawdata, "raw", encoding
= "UTF-8"),
       na = c("", "NA", "n/a"))

     # Return
     return(out)
   }