TITLE: Rendering list structure in R
DATE: 2020-05-15
AUTHOR: John L. Godlee
====================================================================


On UNIX systems there is the tree command, which allows you to
visualise the structure of a set of nested directories. For example:

   .
   ├── basal_area_fn.R
   ├── base_r_plot_tutorial.R
   ├── cheatsheets
   │   ├── base_r_cheatsheet.pdf
   │   ├── rmarkdown_cheatsheet.pdf
   │   └── rmarkdown_cheatsheet_2.pdf
   └── dplyr
       ├── dplyr_tutorial_hadley_wickham
       │   ├── 1-data.R
       │   ├── 2-single-table.R
       │   ├── 3-pipelines.R
       │   ├── 4-grouped-mutate.R
       │   ├── 5-joins.R
       │   ├── 6-do.R
       │   ├── 7-databases.R
       │   └── weather.csv
       └── filter_dates_with_sysdate.R

In R, lists can also have a nested structure. Consider this list:

   level_1a <- seq(1:3)
   level_2a <- seq(1:3)
   level_3a <-seq(1:5)
   level_3b <- matrix(c(1:5), nrow = 1)
   level_3c <- data.frame(x = c(1:5), y = c(6:10))
   level_3d <- matrix(c(6:10), nrow = 1)

   level_3 <- list(level_3a, level_3b, level_3c, level_3d)

   level_2 <- list()
   for(i in level_2a){
     level_2[[i]] <- level_3
   }

   test_l <- list()
   for(i in level_1a){
     test_l[[i]] <- level_2
   }

   test_l_named <- test_l
   names(test_l_named) <- LETTERS[1:length(test_l_named)]
   for(i in 1:length(test_l_named)){
     names(test_l_named[[i]]) <- paste0(names(test_l_named[i]),
letters[1:length(test_l_named[[i]])])

     for(j in 1:length(test_l_named[[i]])){
       names(test_l_named[[i]][[j]]) <- c("vec", "mat", "df",
"mat2")
     }
   }

In tree, if I were to treat each list as a directory and each
non-list-object as a file, it would look like:

   .
   ├── A
   │   ├── Aa
   │   │   ├── vec
   │   │   ├── mat
   │   │   ├── df
   │   │   └── [[4]]
   │   ├── Ab
   │   │   ├── vec
   │   │   ├── mat
   │   │   ├── df
   │   │   └── [[4]]
   │   └── Ac
   │       ├── vec
   │       ├── mat
   │       ├── df
   │       └── [[4]]
   ├── B
   │   ├── Ba
   │   │   ├── vec
   │   │   ├── mat
   │   │   ├── df
   │   │   └── [[4]]
   │   ├── Bb
   │   │   ├── vec
   │   │   ├── mat
   │   │   ├── df
   │   │   └── [[4]]
   │   └── Bc
   │       ├── vec
   │       ├── mat
   │       ├── df
   │       └── [[4]]
   └── C
       ├── Ca
       │   ├── vec
       │   ├── mat
       │   ├── df
       │   └── [[4]]
       ├── Cb
       │   ├── vec
       │   ├── mat
       │   ├── df
       │   └── [[4]]
       └── Cc
           ├── vec
           ├── mat
           ├── df
           └── [[4]]

I wanted to replicate that in R. There is the default str()
command, which produces a really ugly looking list representation.
For things like .rmd reports it would be nice to have a tidier
output. The function below is far from finished, and probably isn't
written particularly well, but I got frustrated with the project.
Also in the meantime I found the {{data.tree}} package, which
contains FromListSimple(), which performs basically the same
functionality.

   listTree <- function(x, rootName = NULL){
    require(purrr)
    # Function to check object is a real list
    ##' is.list wrongly identifies data.frame
    isList <- function(x){
      inherits(x, "list")
    }

    # Function to render one level of a tree
    levelRender <- function(x){
      x_names <- sapply(1:length(x), function(y){
        ifelse(is.null(names(x)[y]), paste0("[[", y, "]]"),
names(x)[y])
      })

      x_class <- sapply(x, class)
      x_dim <- sapply(x, function(y){
        ifelse(is.matrix(y) | is.data.frame(y), paste0(" [",
paste(dim(y), collapse="x"), "]"),
          ifelse(is.atomic(y), paste0(" [1:", length(y), "]"),
            ""))
      })
      x_conn <- sapply(x, function(y){
        ifelse(isList(y), "\U252C", "\U2500")
      })

      elements <- c(
        paste0("\U251C\U2500",
          x_conn[1:length(x_conn)-1],
          x_names[1:length(x_names)-1],
          " - ",
          x_class[1:length(x_class)-1],
          x_dim[1:length(x_dim)-1]),
        paste0("\U2514\U2500",
          x_conn[length(x_conn)],
          x_names[length(x_names)],
          " - ",
          x_class[length(x_class)],
          x_dim[length(x_dim)]))
      return(elements)
    }

    # Recursive function to build levels of tree
    recurList <- function(x){
      deep = max_depth - vec_depth(x)
      prep = rep("  ", times = deep)

      if(isList(x)){
        lev =  levelRender(x)
        last_item = tail(names(x), n = 1)
        for(i in 1:length(x)){
          cat(
            prep,
            lev[[i]],
           "\n", sep = "")
          recurList(x[[i]])
        }
      }
    }

    # Print root name of list
    if(is.null(rootName)) {
      cat(deparse(substitute(x)), "\n", sep = "")
    } else {
      cat(rootName, "\n", sep = "")
    }

    # Define initial values
    max_depth <- purrr::vec_depth(x)

    # Build tree
    recurList(x)
   }

The output from listTree(test_l_named) looks like this:

   test_l_named
   ├─┬A - list
     ├─┬Aa - list
       ├──vec - integer [1:5]
       ├──mat - matrix [1x5]
       ├──df - data.frame [5x2]
       └──mat2 - matrix [1x5]
     ├─┬Ab - list
       ├──vec - integer [1:5]
       ├──mat - matrix [1x5]
       ├──df - data.frame [5x2]
       └──mat2 - matrix [1x5]
     └─┬Ac - list
       ├──vec - integer [1:5]
       ├──mat - matrix [1x5]
       ├──df - data.frame [5x2]
       └──mat2 - matrix [1x5]
   ├─┬B - list
     ├─┬Ba - list
       ├──vec - integer [1:5]
       ├──mat - matrix [1x5]
       ├──df - data.frame [5x2]
       └──mat2 - matrix [1x5]
     ├─┬Bb - list
       ├──vec - integer [1:5]
       ├──mat - matrix [1x5]
       ├──df - data.frame [5x2]
       └──mat2 - matrix [1x5]
     └─┬Bc - list
       ├──vec - integer [1:5]
       ├──mat - matrix [1x5]
       ├──df - data.frame [5x2]
       └──mat2 - matrix [1x5]
   └─┬C - list
     ├─┬Ca - list
       ├──vec - integer [1:5]
       ├──mat - matrix [1x5]
       ├──df - data.frame [5x2]
       └──mat2 - matrix [1x5]
     ├─┬Cb - list
       ├──vec - integer [1:5]
       ├──mat - matrix [1x5]
       ├──df - data.frame [5x2]
       └──mat2 - matrix [1x5]
     └─┬Cc - list
       ├──vec - integer [1:5]
       ├──mat - matrix [1x5]
       ├──df - data.frame [5x2]
       └──mat2 - matrix [1x5]

And the equivalent output from
data.tree::FromListSimple(test_l_named):

   1  Root
   2   ¦--A
   3   ¦   ¦--Aa
   4   ¦   ¦   °--df
   5   ¦   ¦--Ab
   6   ¦   ¦   °--df
   7   ¦   °--Ac
   8   ¦       °--df
   9   ¦--B
   10  ¦   ¦--Ba
   11  ¦   ¦   °--df
   12  ¦   ¦--Bb
   13  ¦   ¦   °--df
   14  ¦   °--Bc
   15  ¦       °--df
   16  °--C
   17      ¦--Ca
   18      ¦   °--df
   19      ¦--Cb
   20      ¦   °--df
   21      °--Cc
   22          °--df

In some ways my function is actually nicer. FromListSimple()
doesn't pick up on objects that aren't data frames, and the tree
structure isn't as compact. Additionally, if any list items are
unnamed, the function fails entirely.

To improve my function I would ideally want to physically link
parent tree branches with |, but I couldn't figure out how, I spent
far too much time on it.