# data.table way this stuff feels faster than dplyr but isn't very FP when using := methods
# alternatively, use the .() aka list() feature and create a new table. Still faster than dplyr or plyr
# https://mran.microsoft.com/web/packages/data.table/vignettes/datatable-intro.html
library(data.table) # for fread and other data.table functions
library(tidyverse)  # for as_tibble to feed into ggplot
library(lubridate)  # for round_date
library(fasttime)   # for fastPOSIXct
# dt01=fread("C:/kewoo/eai/d20171024.0930-1055.allEAI.csv")
dt01=fread("C:/kewoo/eai/d201711115.0830-1200.allEAI.csv")

# 20171109: chain data.tables, split over multiple lines
#           show transaction flight behaviours over time
#           fastPOSIXct assumes GMT, so subtract 36000 (10 hours as seconds) from time
AESTDiff <- 36000
nineAM.AEST <- fastPOSIXct("2017-11-15 09:00:00")-36000
tenAM.AEST <- fastPOSIXct("2017-11-15 10:00:00")-36000

start.AEST <- fastPOSIXct("2017-11-15 09:15:00")-36000
end.AEST <- fastPOSIXct("2017-11-15 09:45:00")-36000
interval.length <- "10 seconds"
interval.length <- "1 seconds"

tb01.tx.times.all <-dt01[, list(transactionid,
                           componentname,
                           startPct = round_date(fastPOSIXct(start)-AESTDiff, interval.length),
                           endtPct = round_date(fastPOSIXct(endt)-AESTDiff, interval.length))
                    ]

# tb01.tx.times <- tb01.tx.times.all[startPct > nineAM.AEST & endtPct < tenAM.AEST]
tb01.tx.times <- tb01.tx.times.all[startPct > start.AEST &
                                  endtPct < end.AEST]

tb01.allEAI <- tb01.tx.times[, list(intervals = seq(startPct, endtPct, by=1)), by = transactionid
                          ][, list(txCount = .N), by = intervals] %>% as_tibble()

tb01.AC <- tb01.tx.times[componentname %like% 'AcurityConnector',
                        list(intervals = seq(startPct, endtPct, by=1)),
                        by = transactionid
                      ][, list(txCount = .N), by = intervals] %>% as_tibble()

# I think casting defaults to as.POSIXct which takes >20sec to run
# using fastPOSIXct takes ~2sec to run
# avg_durations <- dt01[start > nineAM.AEST & endt-AESTDiff < tenAM.AEST,
avg_durations <- dt01[fastPOSIXct(start)-AESTDiff > start.AEST & fastPOSIXct(endt)-AESTDiff < end.AEST,
                    list(intervals = round_date(fastPOSIXct(start)-AESTDiff, interval.length),
                        duration_ms)
                  ][,.(avgs = mean(duration_ms/100)), by=intervals] %>% as_tibble()
ggplot() +
 geom_line(data=tb01.AC, aes(x=intervals,y=txCount), color='blue') +
 geom_line(data=tb01.allEAI, aes(x=intervals,y=txCount), color='red') +
 geom_line(data=avg_durations, aes(x=intervals,y=avgs), color='green')


dt01[, list(transactionid,
           startPct = round_date(as.POSIXct(start), "10 seconds"),
           endtPct = round_date(as.POSIXct(endt), "10 seconds"))
    ] ## 22 secs


dt01[componentname %like% 'AcurityConnector',
    list(transactionid,
         startPct = round_date(fastPOSIXct(start)-36000, "10 seconds"),
         endtPct = round_date(fastPOSIXct(endt)-36000, "10 seconds"))
    ] ## 2 secs