Commit 9282ab3164eace28c7a4df9ad97b10dfe017cd84

Authored by Imanol-Mikel Barba Sabariego
1 parent 2de5507d

Filtered invalid HM entries (N/A countries, >1 countries, etc..)

ISO27001effectiveness/DESCRIPTION
@@ -10,4 +10,5 @@ License: What license is it under? @@ -10,4 +10,5 @@ License: What license is it under?
10 Encoding: UTF-8 10 Encoding: UTF-8
11 LazyData: true 11 LazyData: true
12 RoxygenNote: 5.0.1 12 RoxygenNote: 5.0.1
13 -Imports: xlsx 13 +Imports: xlsx,
  14 + ggplot2
ISO27001effectiveness/R/Hackmageddon_Parser.R
@@ -43,14 +43,22 @@ ParseHMExcel <- function(file, cols){ @@ -43,14 +43,22 @@ ParseHMExcel <- function(file, cols){
43 #' @export 43 #' @export
44 #' 44 #'
45 #' @examples 45 #' @examples
46 -#' data.pro <- ProccesHMRaw(data.raw, "1899-12-30")  
47 -ProccesHMRaw <- function(dataset.raw, dateOffset){ 46 +#' data.pro <- ProcessHMRaw(data.raw, "1899-12-30")
  47 +ProcessHMRaw <- function(dataset.raw, dateOffset){
48 48
49 #Standar names to the columns 49 #Standar names to the columns
50 dataset <- setNames(dataset.raw, c("Date", "Attack", "Target", "Country")) 50 dataset <- setNames(dataset.raw, c("Date", "Attack", "Target", "Country"))
51 51
52 #Remove rows with Date NA 52 #Remove rows with Date NA
53 dataset <- dataset[!is.na(dataset$Date),] 53 dataset <- dataset[!is.na(dataset$Date),]
  54 + dataset <- dataset[!is.na(dataset$Country),]
  55 + dataset <- dataset[!dataset$Country == ">1",]
  56 + dataset <- dataset[!dataset$Country == ">A",]
  57 + dataset <- dataset[!dataset$Country == "INT",]
  58 + dataset <- dataset[!grepl(">",dataset$Country),]
  59 + dataset$Country <- gsub("\n"," ",dataset$Country)
  60 + dataset <- FilterMultiCountry(dataset)
  61 + dataset <- dataset[!dataset$Country == "",]
54 62
55 #Format properly the date 63 #Format properly the date
56 dataset$Date <- as.POSIXct(dataset$Date*86400, tz = "GMT", origin = dateOffset) 64 dataset$Date <- as.POSIXct(dataset$Date*86400, tz = "GMT", origin = dateOffset)
@@ -58,7 +66,10 @@ ProccesHMRaw &lt;- function(dataset.raw, dateOffset){ @@ -58,7 +66,10 @@ ProccesHMRaw &lt;- function(dataset.raw, dateOffset){
58 dataset 66 dataset
59 } 67 }
60 68
61 - 69 +FilterMultiCountry <- function(dataset) {
  70 + multi <- dataset[grepl(" ",Attacks$Country),]
  71 + #TODO
  72 +}
62 73
63 #' Parse every excel file into a folder 74 #' Parse every excel file into a folder
64 #' 75 #'
@@ -70,16 +81,16 @@ ProccesHMRaw &lt;- function(dataset.raw, dateOffset){ @@ -70,16 +81,16 @@ ProccesHMRaw &lt;- function(dataset.raw, dateOffset){
70 #' @export 81 #' @export
71 #' 82 #'
72 #' @examples 83 #' @examples
73 -#' data.pro <- ProccesHMRaw("./data/hackmaggedon/", c(1, 5, 3) "1899-12-30") 84 +#' data.pro <- ProcessHMRaw("./data/hackmaggedon/", c(1, 5, 3) "1899-12-30")
74 ParseHMFolder <- function(folder, cols, dateOffset){ 85 ParseHMFolder <- function(folder, cols, dateOffset){
75 86
76 #List excel files into the folder 87 #List excel files into the folder
77 filelist <- list.files(folder, pattern = "*.xls*") 88 filelist <- list.files(folder, pattern = "*.xls*")
78 89
79 #Iterate for each file appending the returned data.frame 90 #Iterate for each file appending the returned data.frame
80 - dataset <- ProccesHMRaw(ParseHMExcel(paste(folder,filelist[1],sep = ""), cols), dateOffset) 91 + dataset <- ProcessHMRaw(ParseHMExcel(paste(folder,filelist[1],sep = ""), cols), dateOffset)
81 for (i in 2:length(filelist)) { 92 for (i in 2:length(filelist)) {
82 - dataset <- rbind(dataset, ProccesHMRaw(ParseHMExcel(paste(folder,filelist[i],sep = ""), cols), dateOffset)) 93 + dataset <- rbind(dataset, ProcessHMRaw(ParseHMExcel(paste(folder,filelist[i],sep = ""), cols), dateOffset))
83 } 94 }
84 95
85 dataset 96 dataset