Research Article

Distance Measurement Methods for Improved Insider Threat Detection

Algorithm 5

Distance measurement full evaluation code.
library(readr)
library(stringdist)
usernames <- c()
scenarios <- c()
filenames <- c()
dlResults <- c()
jacResults <- c()
cosResults <- c()
files <-
list.files(
path = "~/answers",
pattern = "*.csv",
full.names = T,
recursive = TRUE
)
for (fin files)
username <- regmatches(f, regexpr("-[A-Za-z0-9]+∖ ∖.", f))
username <- sub("-", "", username)
username <- sub("∖∖.", "", username)
usernames <- c(usernames, username)
scenario <- regmatches(f, regexpr("-[1-3]-", f))
scenario <- sub("-", "", scenario)
scenario <- sub("-", "", scenario)
scenarios <- c(scenarios, scenario)
filenames <- c(filenames, f)
for (i  in 1:length(usernames))
dlResults_temp <- c()
jacResults_temp <- c()
cosResults_temp <- c()
answerFile <-
read_csv(filenames[i],
col_names = FALSE,
col_types = cols_only(X3 = col_guess()))
answerFileX3 <-
as.POSIXct(answerFileX3, format = "%m/%d/%Y %H:%M:%S", tz = "UTC")
user <- cert_r4_2_dataset[cert_r4_2_datasetuser == usernames[i], ]
m <-
match(answerFileX3, userdate) #match answer file dates to  user dates
week <-
userweek[m[1:length(m)]] #week in which the attack ACTUALLY occurred
#########Filter dataset to only include data relevant to chosen user. #######
allWeeks <-
split(cert_r4_2_dataset[cert_r4_2_datasetuser %in% usernames[i], ]activity,
cert_r4_2_dataset[cert_r4_2_datasetuser %in% usernames[i], ]week)
indx <-
sapply(allWeeks, length) #Convert the allWeeks variable into DataFrame.
res <-
as.data.frame(do.call(cbind, lapply(allWeeks, length<-, max(indx))))
#Reference:
http://stackoverflow.com/questions/15124590/column-binding-in-r
#################################################
for (i  in 6:length(res))
if (i <= length(res))
dl <- seq_dist(na.omit(res[i]), na.omit(res[i - 1]), method = "dl")
jacc <-
seq_dist(na.omit(res[i]), na.omit(res[i - 1]), method = "jaccard")
cosine <-
seq_dist(na.omit(res[i]), na.omit(res[i - 1]), method = "cosine")
dlResults_temp <- c(dlResults_temp, dl)
jacResults_temp <- c(jacResults_temp, jacc)
cosResults_temp <- c(cosResults_temp, cosine)
##### DL ########
highestDl = 0
for (result  in  dlResults_temp)
if ((result) > highestDl)
highestDl = result
dl_week = match(highestDl, dlResults_temp) + 5 #Offet is +5
since our results start at week 6.
if (dl_week %in% week)
dlResults <- c(dlResults, dl_week)
else
dlResults <- c(dlResults, "FALSE")
##### Jaccard ########
highestJac = 0
for (result in  jacResults_temp)
if ((result) > highestJac)
highestJac = result
jac_week = match(highestJac, jacResults_temp) + 5
if (jac_week %in% week)
jacResults <- c(jacResults, jac_week)
else
jacResults <- c(jacResults, "FALSE")
##### Cosine ########
highestCos = 0
for (result in  cosResults_temp)
if ((result) > highestCos)
highestCos = result
cos_week = match(highestCos, cosResults_temp) + 5
if (cos_week %in% week)
cosResults <- c(cosResults, cos_week)
else
cosResults <- c(cosResults, "FALSE")
################
fullResults <-
data.frame(usernames, scenarios, filenames, dlResults,
jacRsults, cosRsults)