Research Article

Distance Measurement Methods for Improved Insider Threat Detection

Algorithm 4

Damerau–Levenshtein distance calculation code.
library(readr)
library(stringdist)
cert_r4_2_dataset <- read_csv("~/cert_r4.2_dataset.csv") #Load the dataset.
Remember to change the path to file location on own machine.
username = "MCF0600"
allWeeks <- split(cert_r4_2_dataset[cert_r4_2_datasetuser %in%
username,]activity, cert_r4_2_dataset[cert_r4_2_datasetuser
%in% username,]week) #Filter dataset to only include data
relevent to chosen user.
indx <- sapply(allWeeks, length) #Convert the allWeeks variable
into DataFrame.
res <- as.data.frame(do.call(cbind,lapply(allWeeks, length<-,max(indx))))
########## Distance Calculation #############
w <- c()
for  (i  in 6:length(res))
if (i <= length(res))
di <- seq_dist(na.omit(res[i]), na.omit(res[i-1]), method="dl")
w[i - 5] <- di
highestDist = 0;
for  (result in w)
if  ((result) > highestDist)
highestDist = result
hd_week = match(highestDist, w) + 5
plot(6:(length(res)), w[1:(length(w))], type="l", xlab="Week",
ylab="DL Distance ", main=paste("DL Distance for", username, sep=" "))
text(x=hd_week, y=highestDist, label=hd_week)