library(readr) |
library(stringdist) |
usernames <- c() |
scenarios <- c() |
filenames <- c() |
dlResults <- c() |
jacResults <- c() |
cosResults <- c() |
files <- |
list.files( |
path = "~/answers", |
pattern = "*.csv", |
full.names = T, |
recursive = TRUE |
) |
for (fin files) |
|
username <- regmatches(f, regexpr("-[A-Za-z0-9]+∖ ∖.", f)) |
username <- sub("-", "", username) |
username <- sub("∖∖.", "", username) |
usernames <- c(usernames, username) |
scenario <- regmatches(f, regexpr("-[1-3]-", f)) |
scenario <- sub("-", "", scenario) |
scenario <- sub("-", "", scenario) |
scenarios <- c(scenarios, scenario) |
filenames <- c(filenames, f) |
|
for (i in 1:length(usernames)) |
|
dlResults_temp <- c() |
jacResults_temp <- c() |
cosResults_temp <- c() |
answerFile <- |
read_csv(filenames[i], |
col_names = FALSE, |
col_types = cols_only(X3 = col_guess())) |
answerFileX3 <- |
as.POSIXct(answerFileX3, format = "%m/%d/%Y %H:%M:%S", tz = "UTC") |
user <- cert_r4_2_dataset[cert_r4_2_datasetuser == usernames[i], ] |
m <- |
match(answerFileX3, userdate) #match answer file dates to user dates |
week <- |
userweek[m[1:length(m)]] #week in which the attack ACTUALLY occurred |
#########Filter dataset to only include data relevant to chosen user. ####### |
allWeeks <- |
split(cert_r4_2_dataset[cert_r4_2_datasetuser %in% usernames[i], ]activity, |
cert_r4_2_dataset[cert_r4_2_datasetuser %in% usernames[i], ]week) |
indx <- |
sapply(allWeeks, length) #Convert the allWeeks variable into DataFrame. |
res <- |
as.data.frame(do.call(cbind, lapply(allWeeks, length<-, max(indx)))) |
#Reference: |
http://stackoverflow.com/questions/15124590/column-binding-in-r |
################################################# |
for (i in 6:length(res)) |
|
if (i <= length(res)) |
|
dl <- seq_dist(na.omit(res[i]), na.omit(res[i - 1]), method = "dl") |
jacc <- |
seq_dist(na.omit(res[i]), na.omit(res[i - 1]), method = "jaccard") |
cosine <- |
seq_dist(na.omit(res[i]), na.omit(res[i - 1]), method = "cosine") |
dlResults_temp <- c(dlResults_temp, dl) |
jacResults_temp <- c(jacResults_temp, jacc) |
cosResults_temp <- c(cosResults_temp, cosine) |
|
|
##### DL ######## |
highestDl = 0 |
for (result in dlResults_temp) |
|
if ((result) > highestDl) |
|
highestDl = result |
|
|
dl_week = match(highestDl, dlResults_temp) + 5 #Offet is +5 |
since our results start at week 6. |
if (dl_week %in% week) |
|
dlResults <- c(dlResults, dl_week) |
|
else |
|
dlResults <- c(dlResults, "FALSE") |
|
##### Jaccard ######## |
highestJac = 0 |
for (result in jacResults_temp) |
|
if ((result) > highestJac) |
|
highestJac = result |
|
|
jac_week = match(highestJac, jacResults_temp) + 5 |
if (jac_week %in% week) |
|
jacResults <- c(jacResults, jac_week) |
|
else |
|
jacResults <- c(jacResults, "FALSE") |
|
##### Cosine ######## |
highestCos = 0 |
for (result in cosResults_temp) |
|
if ((result) > highestCos) |
|
highestCos = result |
|
|
cos_week = match(highestCos, cosResults_temp) + 5 |
if (cos_week %in% week) |
|
cosResults <- c(cosResults, cos_week) |
|
else |
|
cosResults <- c(cosResults, "FALSE") |
|
################ |
|
fullResults <- |
data.frame(usernames, scenarios, filenames, dlResults, |
jacRsults, cosRsults) |