# data.table way this stuff feels faster than dplyr but isn't very FP when using := methods
# alternatively, use the .() aka list() feature and create a new table. Still faster than dplyr or plyr
# https://mran.microsoft.com/web/packages/data.table/vignettes/datatable-intro.html
library(data.table) # for fread and other data.table functions
library(tidyverse)  # for as_tibble to feed into ggplot
library(lubridate)  # for round_date
library(fasttime)   # for fastPOSIXct

# SQL for dtLGE01
# SELECT    [transactiontype]
#          ,[transactionid]
#          ,CASE when COUNT(time_stamp) = 1 then 1 else 0 END one_is_timeout
#          ,CASE when COUNT(time_stamp) = 1
#                then 0.000755
#                else DATEDIFF(MS, min(time_stamp), max([TIME_STAMP]))
#           END duration_ms
#          ,min([TIME_STAMP]) start
#          ,max([time_stamp]) endt
#          ,[componentname]
#          ,correlationid
#   -- use "with (nolock)" to prevent table locking
#   FROM [HAWK_Log_Archive].[dbo].[PR_LOG] with (nolock)
#   -- Refer to timestamp format for time-level granularity
#   where transactionid in (
#     select distinct transactionid
#     FROM [HAWK_Log_Archive].[dbo].[PR_LOG] with (nolock)
#     where TIME_STAMP  >= '20171024 09:00:00:00' and TIME_STAMP < '20171024 11:00:00:00'  -- 195341578
#   )
#    and status in ('Start','End')
#    group by transactionid,transactiontype,applicationid,componentname,correlationid
#    order by start

dtLG01=fread("c:/kewoo/eai/d20171024.cle-log.csv")
AESTDiff <- 36000
interval.length <- "1 seconds"
start.AEST <- fastPOSIXct("2017-10-24 10:15:00")-36000
end.AEST <- fastPOSIXct("2017-10-24 10:45:00")-36000

# exploratory
str(dtER01)
names(dtER01)
names(dtLG01)
dtLG01[,.(TIME_STAMP, APPLICATIONID)]
# end exploration

tb01.tx.times.all <-dtLG01[, list(transactionid,
                               componentname,
                               startPct = round_date(fastPOSIXct(start)-AESTDiff, interval.length),
                               endtPct = round_date(fastPOSIXct(endt)-AESTDiff, interval.length))
                        ]
tb01.expandedIntervals <- tb01.tx.times.all[, list(intervals = seq(startPct, endtPct, by=1)), by = transactionid
                            ][, list(txCount = .N), by = intervals]

# OLD plot with startPct == endtPct when transaction is a timeout (side-effect of not incorporating endtPct from exceptionrec)
ggplot() +
 geom_line(data=tb01.expandedIntervals[intervals > start.AEST & intervals < end.AEST],
           aes(x=intervals,y=txCount), color='blue')

# SQL for dtER
# -- GENERIC EXCEPTIONREC SQL
#    SELECT [TIME_STAMP]
#       ,[COMPONENTNAME]
#       ,[TRANSACTIONTYPE]
#       ,[transactionid]
#       ,[correlationid]
#   -- use "with (nolock)" to prevent table locking
#   FROM [HAWK_Log_Archive].[dbo].[PR_EXCEPTIONREC] with (nolock)
#   -- Refer to timestamp format for time-level granularity
#   where TIME_STAMP  >= '20171024 09:00:00:00' and TIME_STAMP < '20171024 11:00:00:00'  -- 195341578
#   order by TIME_STAMP


dtER=fread("c:/kewoo/eai/d20171024.exceptionrec.csv")
tb02.tx.times.all <-dtER[, list(transactionid,
                                  COMPONENTNAME,
                                  endtPct = round_date(fastPOSIXct(TIME_STAMP)-AESTDiff, interval.length))]
tb02.txCounts <- tb02.tx.times.all[, list(txCount = .N), by = endtPct]


ggplot() +
 geom_line(data=tb01.expandedIntervals, aes(x=intervals,y=txCount), color='blue') +
 geom_line(data=tb02.txCounts, aes(x=endtPct,y=txCount), color='red')


# 20171215: The reason there's a drop in txCount during a service interruption
#           is because startPct == endPct caused by the group by in the original extracting SQL
#           Solution is to extract actual endPct from EXCEPTIONREC joining via transactionid
# first, take outer join
dtOJ <- tb02.tx.times.all[tb01.tx.times.all, on = "transactionid"]
# second, populate blank (NA) endPct values with i.endPct which has actual end times from the cle-log table
dtOJ[is.na(endtPct), endtPct := i.endtPct]
# tb01.tx.times.filtered <- tb01.tx.times.all[startPct > start.AEST & endtPct < end.AEST]
# expand intervals only for AcurityConnector transactions
dtOJ.expandedIntervals <- dtOJ[componentname %in% c('AcurityConnector-1-AcurityConnectorPA-01',
                                                   'AcurityConnector-1-AcurityConnectorPA-02')
                            ][, list(intervals = seq(startPct, endtPct, by=1)), by = transactionid
                            ][, list(txCount = .N), by = intervals]
# expand intervals for all transactions
dtOJ.expandedIntervals <- dtOJ[, list(intervals = seq(startPct, endtPct, by=1)), by = transactionid
                                ][, list(txCount = .N), by = intervals]


# NEW plot with startPct != endtPct when transaction is a timeout after incorporating endtPct from exceptionrec
start.AEST <- fastPOSIXct("2017-10-24 10:15:00")-36000
end.AEST <- fastPOSIXct("2017-10-24 10:45:00")-36000
ggplot() +
 geom_line(data=dtOJ.expandedIntervals[intervals > start.AEST & intervals < end.AEST],
           aes(x=intervals,y=txCount), color='blue') +
 geom_line(data=tb02.txCounts[endtPct > start.AEST & endtPct < end.AEST],
           aes(x=endtPct,y=txCount), color='red')

# exploratory
View(tb02.txCounts)
View(dtOJ.expandedIntervals)
View(tb02.tx.times.all)
# end exploration