Research Article

Distance Measurement Methods for Improved Insider Threat Detection

Algorithm 2

HMM implementation code.
library(HMM)
library(readr)
############## Data Parsing Phase ##############
cert_r4_2_dataset <- read_csv("~/cert_r4.2_dataset.csv") #Load the dataset.
Remember to change the path to file location on own machine.
username = "MCF0600"
allWeeks <- split(cert_r4_2_dataset[cert_r4_2_datasetuser %in%
username,]activity, cert_r4_2_dataset[cert_r4_2_datasetuser
%in% username,]week) #Filter dataset to only include data relevent to chosen user.
indx <- sapply(allWeeks, length) #Convert the allWeeks variable into DataFrame.
res <- as.data.frame(do.call(cbind,lapply(allWeeks, length<-,max(indx))))
#################################################
############## HMM Phase ########################
hmm = initHMM(c(1,2,3,4,5,6,7,8,9,10), c(1,2,3,4,5,6,7)) #Initiate a 10
state HMM with 7 labels (which represent activities of user.)
model = baumWelch(hmm, na.omit(unlist(res[1:5])), maxIterations=20,
pseudoCount =0.1, delta = 0.01) #Train our model  with the first 5 weeks of user  activity.
vector = c()
for  (i  in 6:length(res)) #For the remaining weeks of activity...
#Calculate probability of a given observed sequence with respect to our model
logForwardProbabilities = forward(modelhmm, na.omit(unlist(res[i]))) #
... calculate the probability of week i occurring against model...
like <- ((logForwardProbabilities))
lenthOfLike <- (length(like)/10)
answer <- sum(like[,lenthOfLike])
vector[i - 5] <- answer #... store result of probability in vector...
model = baumWelch(modelhmm, na.omit(unlist(res[1:i])), maxIterations=20,
pseudoCount =0.1, delta=0.01) #... and update model with week i.
######### Plot Result #########
plot(6:(length(res)), vector[1:(length(vector))] -1, type="l",
xlab="Week", ylab="-Log Probability", main=paste("HMM for", username, sep=" "))