Commit 9282ab3164eace28c7a4df9ad97b10dfe017cd84

Authored by Imanol-Mikel Barba Sabariego
1 parent 2de5507d

Filtered invalid HM entries (N/A countries, >1 countries, etc..)

ISO27001effectiveness/DESCRIPTION
... ... @@ -10,4 +10,5 @@ License: What license is it under?
10 10 Encoding: UTF-8
11 11 LazyData: true
12 12 RoxygenNote: 5.0.1
13   -Imports: xlsx
  13 +Imports: xlsx,
  14 + ggplot2
... ...
ISO27001effectiveness/R/Hackmageddon_Parser.R
... ... @@ -43,14 +43,22 @@ ParseHMExcel <- function(file, cols){
43 43 #' @export
44 44 #'
45 45 #' @examples
46   -#' data.pro <- ProccesHMRaw(data.raw, "1899-12-30")
47   -ProccesHMRaw <- function(dataset.raw, dateOffset){
  46 +#' data.pro <- ProcessHMRaw(data.raw, "1899-12-30")
  47 +ProcessHMRaw <- function(dataset.raw, dateOffset){
48 48  
49 49 #Standar names to the columns
50 50 dataset <- setNames(dataset.raw, c("Date", "Attack", "Target", "Country"))
51 51  
52 52 #Remove rows with Date NA
53 53 dataset <- dataset[!is.na(dataset$Date),]
  54 + dataset <- dataset[!is.na(dataset$Country),]
  55 + dataset <- dataset[!dataset$Country == ">1",]
  56 + dataset <- dataset[!dataset$Country == ">A",]
  57 + dataset <- dataset[!dataset$Country == "INT",]
  58 + dataset <- dataset[!grepl(">",dataset$Country),]
  59 + dataset$Country <- gsub("\n"," ",dataset$Country)
  60 + dataset <- FilterMultiCountry(dataset)
  61 + dataset <- dataset[!dataset$Country == "",]
54 62  
55 63 #Format properly the date
56 64 dataset$Date <- as.POSIXct(dataset$Date*86400, tz = "GMT", origin = dateOffset)
... ... @@ -58,7 +66,10 @@ ProccesHMRaw &lt;- function(dataset.raw, dateOffset){
58 66 dataset
59 67 }
60 68  
61   -
  69 +FilterMultiCountry <- function(dataset) {
  70 + multi <- dataset[grepl(" ",Attacks$Country),]
  71 + #TODO
  72 +}
62 73  
63 74 #' Parse every excel file into a folder
64 75 #'
... ... @@ -70,16 +81,16 @@ ProccesHMRaw &lt;- function(dataset.raw, dateOffset){
70 81 #' @export
71 82 #'
72 83 #' @examples
73   -#' data.pro <- ProccesHMRaw("./data/hackmaggedon/", c(1, 5, 3) "1899-12-30")
  84 +#' data.pro <- ProcessHMRaw("./data/hackmaggedon/", c(1, 5, 3) "1899-12-30")
74 85 ParseHMFolder <- function(folder, cols, dateOffset){
75 86  
76 87 #List excel files into the folder
77 88 filelist <- list.files(folder, pattern = "*.xls*")
78 89  
79 90 #Iterate for each file appending the returned data.frame
80   - dataset <- ProccesHMRaw(ParseHMExcel(paste(folder,filelist[1],sep = ""), cols), dateOffset)
  91 + dataset <- ProcessHMRaw(ParseHMExcel(paste(folder,filelist[1],sep = ""), cols), dateOffset)
81 92 for (i in 2:length(filelist)) {
82   - dataset <- rbind(dataset, ProccesHMRaw(ParseHMExcel(paste(folder,filelist[i],sep = ""), cols), dateOffset))
  93 + dataset <- rbind(dataset, ProcessHMRaw(ParseHMExcel(paste(folder,filelist[i],sep = ""), cols), dateOffset))
83 94 }
84 95  
85 96 dataset
... ...