From 9282ab3164eace28c7a4df9ad97b10dfe017cd84 Mon Sep 17 00:00:00 2001 From: Imanol-Mikel Barba Sabariego Date: Tue, 20 Dec 2016 20:32:10 +0100 Subject: [PATCH] Filtered invalid HM entries (N/A countries, >1 countries, etc..) --- ISO27001effectiveness/DESCRIPTION | 3 ++- ISO27001effectiveness/R/Hackmageddon_Parser.R | 23 +++++++++++++++++------ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/ISO27001effectiveness/DESCRIPTION b/ISO27001effectiveness/DESCRIPTION index 28ba219..d07d399 100644 --- a/ISO27001effectiveness/DESCRIPTION +++ b/ISO27001effectiveness/DESCRIPTION @@ -10,4 +10,5 @@ License: What license is it under? Encoding: UTF-8 LazyData: true RoxygenNote: 5.0.1 -Imports: xlsx +Imports: xlsx, + ggplot2 diff --git a/ISO27001effectiveness/R/Hackmageddon_Parser.R b/ISO27001effectiveness/R/Hackmageddon_Parser.R index 3d9add8..c407e7b 100644 --- a/ISO27001effectiveness/R/Hackmageddon_Parser.R +++ b/ISO27001effectiveness/R/Hackmageddon_Parser.R @@ -43,14 +43,22 @@ ParseHMExcel <- function(file, cols){ #' @export #' #' @examples -#' data.pro <- ProccesHMRaw(data.raw, "1899-12-30") -ProccesHMRaw <- function(dataset.raw, dateOffset){ +#' data.pro <- ProcessHMRaw(data.raw, "1899-12-30") +ProcessHMRaw <- function(dataset.raw, dateOffset){ #Standar names to the columns dataset <- setNames(dataset.raw, c("Date", "Attack", "Target", "Country")) #Remove rows with Date NA dataset <- dataset[!is.na(dataset$Date),] + dataset <- dataset[!is.na(dataset$Country),] + dataset <- dataset[!dataset$Country == ">1",] + dataset <- dataset[!dataset$Country == ">A",] + dataset <- dataset[!dataset$Country == "INT",] + dataset <- dataset[!grepl(">",dataset$Country),] + dataset$Country <- gsub("\n"," ",dataset$Country) + dataset <- FilterMultiCountry(dataset) + dataset <- dataset[!dataset$Country == "",] #Format properly the date dataset$Date <- as.POSIXct(dataset$Date*86400, tz = "GMT", origin = dateOffset) @@ -58,7 +66,10 @@ ProccesHMRaw <- function(dataset.raw, dateOffset){ dataset } - +FilterMultiCountry <- function(dataset) { + multi <- dataset[grepl(" ",Attacks$Country),] + #TODO +} #' Parse every excel file into a folder #' @@ -70,16 +81,16 @@ ProccesHMRaw <- function(dataset.raw, dateOffset){ #' @export #' #' @examples -#' data.pro <- ProccesHMRaw("./data/hackmaggedon/", c(1, 5, 3) "1899-12-30") +#' data.pro <- ProcessHMRaw("./data/hackmaggedon/", c(1, 5, 3) "1899-12-30") ParseHMFolder <- function(folder, cols, dateOffset){ #List excel files into the folder filelist <- list.files(folder, pattern = "*.xls*") #Iterate for each file appending the returned data.frame - dataset <- ProccesHMRaw(ParseHMExcel(paste(folder,filelist[1],sep = ""), cols), dateOffset) + dataset <- ProcessHMRaw(ParseHMExcel(paste(folder,filelist[1],sep = ""), cols), dateOffset) for (i in 2:length(filelist)) { - dataset <- rbind(dataset, ProccesHMRaw(ParseHMExcel(paste(folder,filelist[i],sep = ""), cols), dateOffset)) + dataset <- rbind(dataset, ProcessHMRaw(ParseHMExcel(paste(folder,filelist[i],sep = ""), cols), dateOffset)) } dataset -- libgit2 0.22.2