x

1 + 2
x <- c(3,4,45,56,7,7)
x
x
x * 3
mydata <- read.csv("c:/kewoo/eai/d20171024.0930-1055.AcurityConnector.with-header.csv", header=TRUE)
chron
library(chron)head(mydata)
install.packages("chron")
install.packages("chron")
head(mydata)
head(mydata,2)
str(mydata)
install.packages("psych")
library(chron)
libPaths()
library(tidyverse)
library(lubridate)
library(tidyverse)

rm(x)
x <- rnorm(50)
y <- rnorm(x)
plot(x,y)
ls()
rm(x,y)
x <- 1:20
w <- 1 + sqrt(x)/2
dummy <- data.frame(x=x, y= x + rnorm(x)*w)
dummy
fm <- lm(y ~ x, data=dummy)
summary(fm)
fm1 <- lm(y ~ x, data=dummy, weight=1/w^2)
summary(fm1)
attach(dummy)
attach(fm1)
deatch(fm1)
detach(fm1)
dummy
xdummy <- data.frame(x1=x, y1=x + rnorm(x)*w)
attach(xdummy)
rm(xdummy)
detach(xdummy)
detach(dummy)
rm(dummy)
rm(xdummy)
dummy
dummy <- data.frame(x=x1, y1 = x + rnorm(x)*w)
attach(dummy)
plot(fitted(fm), resid(fm),
xlab="Fitted values",
ylab="Residuals",
main="Residuals vs Fitted")

abline(coef(fm))
plot(x, y)
lrf <- lowess(x, y)
lines(x, lrf$y)
abline(0, 1, lty=3)
abline(coef(fm))
abline(coef(fm1), col = "red")
detach()

# asd sa
# section 2
filepath <- system.file("data", "morley.tab" , package="datasets")
filepath
file.show(filepath)
mm <- read.table(filepath)
mm
mm$Expt <- factor(mm$Expt)
mm$Run <- factor(mm$Run)
atatch(mm)
attach(mm)
plot(Expt, Speed, main="Speed of Light Data", xlab="Experiment No.")
fm <- aov(Speed ~ Run + Expt, data=mm)
summary(fm)
fm0 <- update(fm, . ~ . - Run)
anova(fm0, fm)
detach()
rm(fm, fm0)
pi
e
x <- seq(-pi, pi, len=50)
x <- x
y <- x
f <- outer(x,y, function(x,y) cos(y)/(1 + x^2))
f
oldpar <- par(no.readonly = TRUE)
par(pty="s")
contour(x, y, f)
contour(x, y, f, nlevels=15, add=TRUE)
fa <- (f-t(f))/2
contour(x, y, fa, nlevels=15)
par(oldpar)
image(x, y, f)
image(x, y, fa)
objects()
objects(); rm(x, y, f, fa)

th <- seq(-pi, pi, len=100)
z <- exp(1i*th)
par(pty="s")
plot(z, type="l")
w <- rnorm(100) + rnorm(100)*1i
w <- ifelse(Mod(w) > 1, 1/w, w)
plot(w, xlim=c(-1,1), ylim=c(-1,1), pch="+",xlab="x", ylab="y")
lines(z)
w <- sqrt(runif(100))*exp(2*pi*runif(100)*1i)
plot(w, xlim=c(-1,1), ylim=c(-1,1), pch="+", xlab="x", ylab="y")
lines(z)
rm(th, w, z)
q()

x <- read.table(file = "clipboard", sep="\t", header=TRUE, stringsAsFactor=FALSE)

df1.zoo<-zoo(data[,-1],data[,1])
df2 <- as.data.frame(as.zoo(merge(as.xts(df1.zoo), as.xts(zoo(,seq(start(df1.zoo),end(df1.zoo),by=10))))))

data.length <- length(data$TIME)
time.min <- data$TIME[1]
time.max <- data$TIME[data.length]
all.dates <- seq(time.min, time.max, by="min")

mutate(data, TIME= strptime(TIME, format= "%Y-%m%-%d %H:%M:%s"))
data

start <- now()
seq(start, start + days(3), by = "15 min")

start <- now()
seq(start, start + days(3), by = "15 min")

# 2017-11-01: graph exceptionRec count after adding extra data
library(tidyverse)
library(lubridate)
library(xts)
library(zoo)
library(ggplot2)
x <- read.table(file = "c:/kewoo/eai/exceptionRec.counts.txt", sep="\t", header=TRUE, stringsAsFactor=FALSE)
vdata <- mutate(x, ts= ymd_hms(ts))
plot(x=data$ts, y=data$exceptionrec_ct)
qplot(ts, exceptionrec_ct, data=vdata)

df1.zoo<-zoo(vdata[,-1],vdata[,1])
xts01 <- merge(as.xts(df1.zoo), as.xts(zoo(,seq(start(df1.zoo),end(df1.zoo),by=10))))
xts01[is.na(xts01)] <- 0
autoplot(xts01)
plot(xts01)
# INTERESTING: from https://stackoverflow.com/questions/3386850/how-can-i-change-xts-to-data-frame-and-keep-index-in-r
index(xts01)
coredata(xts01)
fxts01 <- fortify(xts01)
barplot(fxts01$as.xts.df1.zoo.,names.arg=fxts01$Index)

## START LEGACY CURIOSITY
df2 <- as.data.frame(as.zoo(merge(as.xts(df1.zoo), as.xts(zoo(,seq(start(df1.zoo),end(df1.zoo),by=10))))))
df2
df2[is.na(df2)] <- 0
plot(df2$ts, df2$exceptionrec_ct)
class(df2$ts)
str(df2)
df2
View
View(df2)
## END LEGACY CURIOSITY: CARRY ON

## 2017-11-02: C:/kewoo/eai/d20171019.0950-1200.allEAI.csv using tidyverse
## OLD WAY rd20171019 <- read.table(file = "C:/kewoo/eai/d20171019.0950-1200.allEAI.csv", sep="\t", header=TRUE, stringsAsFactor=FALSE)
rd20171019 <- read_csv("C:/kewoo/eai/d20171019.0950-1200.allEAI.csv")

plot(rd20171024$start, rd20171024$duration_ms)
qplot(`start`, duration_ms, data=rd20171024)

hist(rd20171024$start, "mins", format = "%H:%M")
ggplot(rd20171024, aes(interval(start)))1 + 2
x <- c(3,4,45,56,7,7)
x
x
x * 3
mydata <- read.csv("c:/kewoo/eai/d20171024.0930-1055.AcurityConnector.with-header.csv", header=TRUE)
chron
library(chron)head(mydata)
install.packages("chron")
install.packages("chron")
head(mydata)
head(mydata,2)
str(mydata)
install.packages("psych")
library(chron)
libPaths()
library(tidyverse)
library(lubridate)
library(tidyverse)

rm(x)
x <- rnorm(50)
y <- rnorm(x)
plot(x,y)
ls()
rm(x,y)
x <- 1:20
w <- 1 + sqrt(x)/2
dummy <- data.frame(x=x, y= x + rnorm(x)*w)
dummy
fm <- lm(y ~ x, data=dummy)
summary(fm)
fm1 <- lm(y ~ x, data=dummy, weight=1/w^2)
summary(fm1)
attach(dummy)
attach(fm1)
deatch(fm1)
detach(fm1)
dummy
xdummy <- data.frame(x1=x, y1=x + rnorm(x)*w)
attach(xdummy)
rm(xdummy)
detach(xdummy)
detach(dummy)
rm(dummy)
rm(xdummy)
dummy
dummy <- data.frame(x=x1, y1 = x + rnorm(x)*w)
attach(dummy)
plot(fitted(fm), resid(fm),
xlab="Fitted values",
ylab="Residuals",
main="Residuals vs Fitted")

abline(coef(fm))
plot(x, y)
lrf <- lowess(x, y)
lines(x, lrf$y)
abline(0, 1, lty=3)
abline(coef(fm))
abline(coef(fm1), col = "red")
detach()

# asd sa
# section 2
filepath <- system.file("data", "morley.tab" , package="datasets")
filepath
file.show(filepath)
mm <- read.table(filepath)
mm
mm$Expt <- factor(mm$Expt)
mm$Run <- factor(mm$Run)
atatch(mm)
attach(mm)
plot(Expt, Speed, main="Speed of Light Data", xlab="Experiment No.")
fm <- aov(Speed ~ Run + Expt, data=mm)
summary(fm)
fm0 <- update(fm, . ~ . - Run)
anova(fm0, fm)
detach()
rm(fm, fm0)
pi
e
x <- seq(-pi, pi, len=50)
x <- x
y <- x
f <- outer(x,y, function(x,y) cos(y)/(1 + x^2))
f
oldpar <- par(no.readonly = TRUE)
par(pty="s")
contour(x, y, f)
contour(x, y, f, nlevels=15, add=TRUE)
fa <- (f-t(f))/2
contour(x, y, fa, nlevels=15)
par(oldpar)
image(x, y, f)
image(x, y, fa)
objects()
objects(); rm(x, y, f, fa)

th <- seq(-pi, pi, len=100)
z <- exp(1i*th)
par(pty="s")
plot(z, type="l")
w <- rnorm(100) + rnorm(100)*1i
w <- ifelse(Mod(w) > 1, 1/w, w)
plot(w, xlim=c(-1,1), ylim=c(-1,1), pch="+",xlab="x", ylab="y")
lines(z)
w <- sqrt(runif(100))*exp(2*pi*runif(100)*1i)
plot(w, xlim=c(-1,1), ylim=c(-1,1), pch="+", xlab="x", ylab="y")
lines(z)
rm(th, w, z)
q()

x <- read.table(file = "clipboard", sep="\t", header=TRUE, stringsAsFactor=FALSE)

df1.zoo<-zoo(data[,-1],data[,1])
df2 <- as.data.frame(as.zoo(merge(as.xts(df1.zoo), as.xts(zoo(,seq(start(df1.zoo),end(df1.zoo),by=10))))))

data.length <- length(data$TIME)
time.min <- data$TIME[1]
time.max <- data$TIME[data.length]
all.dates <- seq(time.min, time.max, by="min")

mutate(data, TIME= strptime(TIME, format= "%Y-%m%-%d %H:%M:%s"))
data

start <- now()
seq(start, start + days(3), by = "15 min")

start <- now()
seq(start, start + days(3), by = "15 min")

# 2017-11-01: graph exceptionRec count after adding extra data
library(tidyverse)
library(lubridate)
library(xts)
library(zoo)
library(ggplot2)
x <- read.table(file = "c:/kewoo/eai/exceptionRec.counts.txt", sep="\t", header=TRUE, stringsAsFactor=FALSE)
vdata <- mutate(x, ts= ymd_hms(ts))
plot(x=data$ts, y=data$exceptionrec_ct)
qplot(ts, exceptionrec_ct, data=vdata)

df1.zoo<-zoo(vdata[,-1],vdata[,1])
xts01 <- merge(as.xts(df1.zoo), as.xts(zoo(,seq(start(df1.zoo),end(df1.zoo),by=10))))
xts01[is.na(xts01)] <- 0
autoplot(xts01)
plot(xts01)
# INTERESTING: from https://stackoverflow.com/questions/3386850/how-can-i-change-xts-to-data-frame-and-keep-index-in-r
index(xts01)
coredata(xts01)
fxts01 <- fortify(xts01)
barplot(fxts01$as.xts.df1.zoo.,names.arg=fxts01$Index)

## START LEGACY CURIOSITY
df2 <- as.data.frame(as.zoo(merge(as.xts(df1.zoo), as.xts(zoo(,seq(start(df1.zoo),end(df1.zoo),by=10))))))
df2
df2[is.na(df2)] <- 0
plot(df2$ts, df2$exceptionrec_ct)
class(df2$ts)
str(df2)
df2
View
View(df2)
## END LEGACY CURIOSITY: CARRY ON

## 2017-11-02: C:/kewoo/eai/d20171019.0950-1200.allEAI.csv using tidyverse
## OLD WAY rd20171019 <- read.table(file = "C:/kewoo/eai/d20171019.0950-1200.allEAI.csv", sep="\t", header=TRUE, stringsAsFactor=FALSE)
# rd20171019 <- read_csv("C:/kewoo/eai/d20171019.0950-1200.allEAI.csv")
rd20171024 <- read_csv("C:/kewoo/eai/d20171024.0930-1055.allEAI.csv")

plot(rd20171024$start, rd20171024$duration_ms)
qplot(`start`, duration_ms, data=rd20171024)

hist(rd20171024$start, "mins", format = "%H:%M")
ggplot(rd20171024, aes(start)) + geom_histogram(binwidth=5) # can change width unlike base::hist
ggplot(rd20171024, aes(interval(rd20171024$start, rd20171024$endt))) +geom_histogram(binwidth=1) # can change width unlike base::hist

interval(rd20171024$start, rd20171024$endt)

# something to match the simul.jy
#transmute(rd20171024,
# ts = ??? 10-sec-interval times ???,
# inflight_count = ??? count( ts %within% interval(rd20171024$start, rd20171024$endt) ???)

# 2017-11-03
tspan01 <- seq(min(rd20171024$start), max(rd20171024$endt), by=1000)
intervals01 <- interval(tspan01, tspan01 + 1000)
sum(int_overlaps(intervals01[1], interval(rd20171024$start, rd20171024$endt)))
overlap_count01 <- sum(int_overlaps(intervals01, interval(rd20171024$start, rd20171024$endt)))
# TODO: check https://stackoverflow.com/questions/28195996/count-number-of-rows-matching-a-criteria
# https://www.statmethods.net/input/contents.html

fx01 <- function(arg1) sum(int_overlaps(arg1, interval(rd20171024$start, rd20171024$endt)))
tspan01 <- seq(min(rd20171024$start), max(rd20171024$endt), by=10)
counts01 <- lapply(intervals01, fx01) # the counts are very high because rd20171024 includes all EAI transactions from 0930-1055
plot(tspan01, counts01)

fx02 <- function(arg1) sum(int_overlaps(arg1, interval(rd20171024[grep("AcurityConnector", rd20171024$componentname),]$start, rd20171024[grep("AcurityConnector", rd20171024$componentname),]$endt)))
tspan01 <- seq(min(rd20171024$start), max(rd20171024$endt), by=60)
intervals01 <- interval(tspan01, tspan01 + 60)
counts01 <- lapply(intervals01, fx02)
plot(tspan01, counts01)

fx03 <- function(arg1) sum(int_overlaps(arg1, interval(rd20171024.AC$start, rd20171024.AC$endt)))
tspan03 <- seq(min(rd20171024.AC$start), max(rd20171024.AC$endt), by=10)
intervals03 <- interval(tspan03, tspan03 + 10)
counts03 <- lapply(intervals03, fx03)
plot(tspan03, counts03)

rd20171024.AC <- filter(rd20171024, str_detect(componentname, "AcurityConnector"))
rd20171024.ACwithIntervals <- mutate(rd20171024.AC, cInt01 = interval(start, endt))
fx04 <- function(arg1) sum(int_overlaps(arg1, rd20171024.ACwithIntervals$cInt01))
tspan04 <- seq(min(rd20171024.ACwithIntervals$start), max(rd20171024.ACwithIntervals$endt), by=10)
intervals04 <- interval(tspan04, tspan04 + 10)
counts04 <- lapply(intervals04, fx04)
plot(tspan04, counts04)

rd20171024.withIntervals <- mutate(rd20171024, cInt01 = interval(start, endt))
fx05 <- function(arg1) sum(int_overlaps(arg1, rd20171024.withIntervals$cInt01))
tspan05 <- seq(min(rd20171024.withIntervals$start), max(rd20171024.withIntervals$endt), by=10)
intervals05 <- interval(tspan05, tspan05 + 10)
counts05 <- lapply(intervals05, fx05)
plot(tspan05, counts05)

rd20171024.ACwithIntervals <- mutate(rd20171024.AC, cInt01 = interval(start, endt))
tspan04 <- seq(min(rd20171024.ACwithIntervals$start), max(rd20171024.ACwithIntervals$endt), by=10)
intervals04 <- interval(tspan04, tspan04 + 10)
counts04 <- sum(int_overlaps(intervals04, rd20171024.ACwithIntervals$cInt01)) ### DOESN'T WORK
counts04 <- sum(int_overlaps(tspan04, rd20171024.ACwithIntervals$cInt01)) ### DOESN'T WORK
plot(tspan04, counts04)

summarise(filter(rd20171024, int_overlaps(interval(start, endt), intervals01.t$int_col[4])), sum = n())$sum[1]
summarise(filter(rd20171024, int_overlaps(interval(start, endt), col1)), sum = n())$sum[1]

library(stringr)

rd20171024.ACwithIntervals <- mutate(rd20171024.AC, cInt01 = interval(start, endt))
rd20171024.withIntervalList <- mutate(rd20171024, ints = seq(start, endt, by=10))
qplot(`start`, duration_ms, data=rd20171024)

# 20171108
library(plyr) ## for ddply
library(tidyverse)
library(lubridate)
library(xts)
library(zoo)
library(ggplot2)

rd20171024 <- read_csv("C:/kewoo/eai/d20171024.0930-1055.allEAI.csv")
rd20171024.AC <- filter(rd20171024, str_detect(componentname, "AcurityConnector"))
# google search "lubridate round time"
# round_date courtesy of https://rdrr.io/cran/lubridate/man/round_date.html
rd20171024.AC.round10sec <- mutate(rd20171024.AC, r10start = round_date(start, "10 seconds"), r10endt = round_date(start, "10 seconds"))

# next two lines failed, but brought me crucial error message:
# 'from' must be of length 1
df01 <- data.frame(st = rd20171024$start, et = rd20171024$endt) # , ints = seq(st,et,by=10))
df01$ints <- seq(df01$st, df01$et, by=10)

# desperate google search "r seq from must be of length 1"
# start and end date expansion courtesy of https://stackoverflow.com/questions/11494511/expand-ranges-defined-by-from-and-to-columns
tsa01 <- ddply(rd20171024.AC.round10sec, "transactionid", summarise, ints = seq(r10start,r10endt,by=10))
tsa02 <- as_tibble(tsa01) %>% count("ints")
ggplot(tsa02, aes(x=ints,y=freq)) + geom_line()

rd20171024.freqInts <- mutate(rd20171024, r10start = round_date(start, "10 seconds"), r10endt = round_date(start, "10 seconds")) %>%
ddply("transactionid", plyr::summarise, ints = seq(r10start,r10endt,by=10)) %>%
as_tibble() %>%
count("ints")

# If loading plyr after dplyr (bad thing), runs <41secs and gives desired result
# If loading plyr before dplyr, takes 3-4 minutes and has incorrect result
# UNLESS using plyr::summarise and plyr::count
rd20171024.freqInts <- transmute(rd20171024
,transactionid
,r10start = round_date(start, "10 seconds")
,r10endt = round_date(start, "10 seconds")) %>%
ddply("transactionid", plyr::summarise, ints = seq(r10start,r10endt,by=10)) %>%
as_tibble() %>%
plyr::count("ints")

rd20171024.ACfreqInts <- rd20171024 %>%
filter(str_detect(componentname, "AcurityConnector")) %>%
transmute(transactionid
,r10start = round_date(start, "10 seconds")
,r10endt = round_date(start, "10 seconds")) %>%
ddply("transactionid", summarise, ints = seq(r10start,r10endt,by=10)) %>%
as_tibble() %>%
count("ints")

# google search "overlay plots in r ggplot2"
# from https://stackoverflow.com/questions/9109156/ggplot-combining-two-plots-from-different-data-frames
ggplot() +
geom_line(data=rd20171024.ACfreqInts, aes(x=ints,y=freq), color='green') +
geom_line(data=rd20171024.freqInts, aes(x=ints,y=freq), color='red')

ggplot() + geom_line(data=rd20171024, aes(x=start,y=duration_ms), color='blue')

# this doesn't work to expand intervals across transactionids even though it's supposed to be equivalent
# according to https://blog.rstudio.com/2014/01/17/introducing-dplyr/
tmp02 %>% group_by(transactionid) %>% summarise(ints = seq(.$r10start, .$r10end, by=10))

# dplyr solution from https://stackoverflow.com/questions/11494511/expand-ranges-defined-by-from-and-to-columns
# it MIGHT work, but I didn't let it run beyond 40sec (19% complete, ~ 3 m remaining?!)
# It creates a tibble with name and ints, but I can't apply count(int) to it
tmp03 <- tmp02 %>%
rowwise() %>%
do(data.frame(name = .$transactionid, ints = seq(.$r10start, .$r10endt, by = 10)))

tmp04 <- tmp03 %>% count(ints)
ggplot() +
geom_line(data=tmp04, aes(x=ints,y=n), color='purple')

# data.table way this stuff feels faster than dplyr but isn't very FP when using := methods
# alternatively, use the .() aka list() feature and create a new table. Still faster than dplyr or plyr
# https://mran.microsoft.com/web/packages/data.table/vignettes/datatable-intro.html
library(data.table) # for fread and other data.table functions
library(tidyverse) # for as_tibble to feed into ggplot
library(lubridate) # for round_date
library(fasttime) # for fastPOSIXct
dt01=fread("C:/kewoo/eai/d20171024.0930-1055.allEAI.csv")

# exploratory
str(dt01)
nrow(dt01)
names(dt01)

dt01[, startPct := round_date(as.POSIXct(start), "10 seconds")]
dt01[, endtPct := round_date(as.POSIXct(endt), "10 seconds")]
# create two new columns in the same statement, a but hard to read though because the column names
# are separated from their definitions by the := token
dt01[, c("startPct","endtPct") := list(round_date(as.POSIXct(start), "10 seconds"),
round_date(as.POSIXct(endt), "10 seconds"))]
# gain speed using fasttime - what happens? Takes ~ 3 sec instead of ~ 11 sec
# https://stackoverflow.com/questions/29140416/r-data-table-fread-read-column-as-date
# https://stackoverflow.com/questions/12786335/why-is-as-date-slow-on-a-character-vector
# https://cran.r-project.org/web/packages/fasttime/fasttime.pdf
# replace as.POSIXct() with fastPOSIXct()
dt01[, c("startPct","endtPct") := list(round_date(fastPOSIXct(start), "10 seconds"),
round_date(fastPOSIXct(endt), "10 seconds"))]

# new columns without using :=
dt01b <-dt01[, list(transactionid,
startPct = round_date(fastPOSIXct(start), "10 seconds"),
endtPct = round_date(fastPOSIXct(endt), "10 seconds"))]

dt02 = dt01[, list(ints = seq(startPct, endtPct, by=10)), by = transactionid] # some magic happens here
# dt02 = dt01[, list(ints = seq(startPct, endtPct, by=10)), by = correlationid] # ERROR: 'from' must be of length 1 because there are correlationid isn't unique for start/endt pairs
# filter on componentanme, list ints and componentname, group by transactionid# dt02 = dt01[componentname %like% 'AcurityConnector', list(ints = seq(startPct, endtPct, by=10), componentname), by = transactionid]
# filter on componentanme, list ints and componentname
# Wrapping "freq = .N" in a list ensures a data.table object is returned (https://mran.microsoft.com/web/packages/data.table/vignettes/datatable-intro.html)
dt03 <- dt02[, list(freq = .N), by = ints]
tb01 <- as_tibble(dt03)
ggplot() + geom_line(data=tb01, aes(x=ints,y=freq), color='blue')

# PS: tidyverse+plyr returns different results to data.table
# possibly because different date conversions are being used at the time of data load

# 20171109: chain data.tables, split over multiple lines
# show transaction flight behaviours over time
tb01.allEAI <- dt01[, list(transactionid,
startPct = round_date(fastPOSIXct(start), "10 seconds"),
endtPct = round_date(fastPOSIXct(endt), "10 seconds"))
][, list(intervals = seq(startPct, endtPct, by=10)), by = transactionid
][, list(txCount = .N), by = intervals] %>% as_tibble()

tb01.AC <- dt01[componentname %like% 'AcurityConnector',
list(transactionid,
startPct = round_date(fastPOSIXct(start), "10 seconds"),
endtPct = round_date(fastPOSIXct(endt), "10 seconds"))
][, list(intervals = seq(startPct, endtPct, by=10)), by = transactionid
][, list(txCount = .N), by = intervals] %>% as_tibble()

ggplot() +
geom_line(data=tb01.AC, aes(x=intervals,y=txCount), color='blue') +
geom_line(data=tb01.allEAI, aes(x=intervals,y=txCount), color='red')

# 20171110
dt01[, list(transactionid,
startPct = round_date(fastPOSIXct(start), "10 seconds"),
endtPct = round_date(fastPOSIXct(endt), "10 seconds"))
][, list(intervals = seq(startPct, endtPct, by=10)), by = transactionid
]

dt01[1:100, list(transactionid,
startPct = round_date(fastPOSIXct(start), "10 seconds"),
endtPct = round_date(fastPOSIXct(endt), "10 seconds"))
][, seq(startPct, endtPct,by=10)
]

data.table(a = 0:200)[,.(a, b = a * 10)
][,seq(a,b,by=3), by=a]

dt01[,.N,by=transactiontype][order(-N)]

tb01.allEAI

dt02 <- dt01[, list(transactionid,
transactiontype,
startPct = round_date(fastPOSIXct(start), "10 seconds"),
endtPct = round_date(fastPOSIXct(endt), "10 seconds"))
]

# find interval with greatest count of transactions
filt01 <- dt02[ ,
list(intervals = seq(startPct, endtPct, by=10)),
by = list(transactionid,transactiontype)
][ ,
.N,
by = list(intervals)
][ N == max(N),
intervals]

dt03.allEAI <- dt02[, list(intervals = seq(startPct, endtPct, by=10)), by = list(transactionid,transactiontype)]
dt03.filt01 <- dt02[startPct == filt01, list(intervals = seq(startPct, endtPct, by=10)), by = list(transactionid,transactiontype)]

dt03.filt01[, list(txCount = .N), by = list(transactiontype, intervals)][order(-txCount)]

# 20171115