Commit 9282ab3164eace28c7a4df9ad97b10dfe017cd84
1 parent
2de5507d
Filtered invalid HM entries (N/A countries, >1 countries, etc..)
Showing
2 changed files
with
19 additions
and
7 deletions
ISO27001effectiveness/DESCRIPTION
ISO27001effectiveness/R/Hackmageddon_Parser.R
@@ -43,14 +43,22 @@ ParseHMExcel <- function(file, cols){ | @@ -43,14 +43,22 @@ ParseHMExcel <- function(file, cols){ | ||
43 | #' @export | 43 | #' @export |
44 | #' | 44 | #' |
45 | #' @examples | 45 | #' @examples |
46 | -#' data.pro <- ProccesHMRaw(data.raw, "1899-12-30") | ||
47 | -ProccesHMRaw <- function(dataset.raw, dateOffset){ | 46 | +#' data.pro <- ProcessHMRaw(data.raw, "1899-12-30") |
47 | +ProcessHMRaw <- function(dataset.raw, dateOffset){ | ||
48 | 48 | ||
49 | #Standar names to the columns | 49 | #Standar names to the columns |
50 | dataset <- setNames(dataset.raw, c("Date", "Attack", "Target", "Country")) | 50 | dataset <- setNames(dataset.raw, c("Date", "Attack", "Target", "Country")) |
51 | 51 | ||
52 | #Remove rows with Date NA | 52 | #Remove rows with Date NA |
53 | dataset <- dataset[!is.na(dataset$Date),] | 53 | dataset <- dataset[!is.na(dataset$Date),] |
54 | + dataset <- dataset[!is.na(dataset$Country),] | ||
55 | + dataset <- dataset[!dataset$Country == ">1",] | ||
56 | + dataset <- dataset[!dataset$Country == ">A",] | ||
57 | + dataset <- dataset[!dataset$Country == "INT",] | ||
58 | + dataset <- dataset[!grepl(">",dataset$Country),] | ||
59 | + dataset$Country <- gsub("\n"," ",dataset$Country) | ||
60 | + dataset <- FilterMultiCountry(dataset) | ||
61 | + dataset <- dataset[!dataset$Country == "",] | ||
54 | 62 | ||
55 | #Format properly the date | 63 | #Format properly the date |
56 | dataset$Date <- as.POSIXct(dataset$Date*86400, tz = "GMT", origin = dateOffset) | 64 | dataset$Date <- as.POSIXct(dataset$Date*86400, tz = "GMT", origin = dateOffset) |
@@ -58,7 +66,10 @@ ProccesHMRaw <- function(dataset.raw, dateOffset){ | @@ -58,7 +66,10 @@ ProccesHMRaw <- function(dataset.raw, dateOffset){ | ||
58 | dataset | 66 | dataset |
59 | } | 67 | } |
60 | 68 | ||
61 | - | 69 | +FilterMultiCountry <- function(dataset) { |
70 | + multi <- dataset[grepl(" ",Attacks$Country),] | ||
71 | + #TODO | ||
72 | +} | ||
62 | 73 | ||
63 | #' Parse every excel file into a folder | 74 | #' Parse every excel file into a folder |
64 | #' | 75 | #' |
@@ -70,16 +81,16 @@ ProccesHMRaw <- function(dataset.raw, dateOffset){ | @@ -70,16 +81,16 @@ ProccesHMRaw <- function(dataset.raw, dateOffset){ | ||
70 | #' @export | 81 | #' @export |
71 | #' | 82 | #' |
72 | #' @examples | 83 | #' @examples |
73 | -#' data.pro <- ProccesHMRaw("./data/hackmaggedon/", c(1, 5, 3) "1899-12-30") | 84 | +#' data.pro <- ProcessHMRaw("./data/hackmaggedon/", c(1, 5, 3) "1899-12-30") |
74 | ParseHMFolder <- function(folder, cols, dateOffset){ | 85 | ParseHMFolder <- function(folder, cols, dateOffset){ |
75 | 86 | ||
76 | #List excel files into the folder | 87 | #List excel files into the folder |
77 | filelist <- list.files(folder, pattern = "*.xls*") | 88 | filelist <- list.files(folder, pattern = "*.xls*") |
78 | 89 | ||
79 | #Iterate for each file appending the returned data.frame | 90 | #Iterate for each file appending the returned data.frame |
80 | - dataset <- ProccesHMRaw(ParseHMExcel(paste(folder,filelist[1],sep = ""), cols), dateOffset) | 91 | + dataset <- ProcessHMRaw(ParseHMExcel(paste(folder,filelist[1],sep = ""), cols), dateOffset) |
81 | for (i in 2:length(filelist)) { | 92 | for (i in 2:length(filelist)) { |
82 | - dataset <- rbind(dataset, ProccesHMRaw(ParseHMExcel(paste(folder,filelist[i],sep = ""), cols), dateOffset)) | 93 | + dataset <- rbind(dataset, ProcessHMRaw(ParseHMExcel(paste(folder,filelist[i],sep = ""), cols), dateOffset)) |
83 | } | 94 | } |
84 | 95 | ||
85 | dataset | 96 | dataset |