Commit 2de5507d6f309c26f053e7e88787a3fa18c633bb
1 parent
f376d0cf
Parsers working correctly
Showing
11 changed files
with
158 additions
and
59 deletions
ISO27001effectiveness/DESCRIPTION
ISO27001effectiveness/Main.R
1 | -source("./R/Util.R") | ||
2 | -LoadLibraries() | ||
3 | -source("./R/ISOSurvey_Parser.R") | ||
4 | 1 | ||
5 | -Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1) | ||
6 | -Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2) | ||
7 | -Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3) | 2 | +Cert_PerCountry <- ISO27001effectiveness::GetISOSurveyCertsPerCountry() |
3 | +Sites_PerCountry <- ISO27001effectiveness::GetISOSurveySitesPerCountry() | ||
4 | +Cert_PerSector <- ISO27001effectiveness::GetISOSurveyCertsPerSector() | ||
8 | 5 | ||
9 | -Cert_PerCountry <- ProccesISOSurveyByCountryRaw(Cert_PerCountry, c("X2012", "X2013", "X2014", "X2015")) | ||
10 | -Sites_PerCountry <- ProccesISOSurveyByCountryRaw(Sites_PerCountry, c( "X2012", "X2013", "X2014", "X2015")) | ||
11 | -#Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2012", "X2013", "X2014", "X2015")) | ||
12 | - | ||
13 | -Attacks <- GetAttacksData() | 6 | +Attacks <- ISO27001effectiveness::GetDefaultAttacksData() |
ISO27001effectiveness/R/Hackmageddon_Parser.R
@@ -4,13 +4,27 @@ | @@ -4,13 +4,27 @@ | ||
4 | 4 | ||
5 | 5 | ||
6 | 6 | ||
7 | +#' Parse an excel raw data file from armaggedon | ||
8 | +#' | ||
9 | +#' @param file path to the excel file | ||
10 | +#' @param cols list of columns index to read | ||
11 | +#' | ||
12 | +#' @return data.frame | ||
13 | +#' @export | ||
14 | +#' | ||
15 | +#' @examples | ||
16 | +#' data.raw <- ParseHMExcel("./data/hackmaggedon/file.xls", c(2, 3, 6, 5)) | ||
7 | ParseHMExcel <- function(file, cols){ | 17 | ParseHMExcel <- function(file, cols){ |
8 | - print(file) | 18 | + |
19 | + #Checks | ||
9 | if (!file.exists(file)) { | 20 | if (!file.exists(file)) { |
10 | stop(paste("Error, file [", file, "] not found")) | 21 | stop(paste("Error, file [", file, "] not found")) |
11 | } | 22 | } |
23 | + | ||
24 | + #Parse data from excell, same formats but different columns because of the files | ||
12 | dataset <- xlsx::read.xlsx2(file, 1, header = TRUE,colIndex = cols, colClasses = c("numeric", "character", "character", "character")) | 25 | dataset <- xlsx::read.xlsx2(file, 1, header = TRUE,colIndex = cols, colClasses = c("numeric", "character", "character", "character")) |
13 | 26 | ||
27 | + #If date is null probably theres a blanc row that should be ignored | ||
14 | if (is.null(dataset$Date)) { | 28 | if (is.null(dataset$Date)) { |
15 | dataset <- xlsx::read.xlsx2(file, 1, header = TRUE,colIndex = cols, colClasses = c("numeric", "character", "character", "character"), startRow = 2) | 29 | dataset <- xlsx::read.xlsx2(file, 1, header = TRUE,colIndex = cols, colClasses = c("numeric", "character", "character", "character"), startRow = 2) |
16 | } | 30 | } |
@@ -18,47 +32,77 @@ ParseHMExcel <- function(file, cols){ | @@ -18,47 +32,77 @@ ParseHMExcel <- function(file, cols){ | ||
18 | dataset | 32 | dataset |
19 | } | 33 | } |
20 | 34 | ||
21 | -ProccesHMRaw <- function(dataset.raw){ | 35 | + |
36 | + | ||
37 | +#' Prepare raw data from hackmaggedon's excel to use it | ||
38 | +#' | ||
39 | +#' @param dataset.raw data.frame with raw data | ||
40 | +#' @param dateOffset origin to add the numeric date | ||
41 | +#' | ||
42 | +#' @return data.frame | ||
43 | +#' @export | ||
44 | +#' | ||
45 | +#' @examples | ||
46 | +#' data.pro <- ProccesHMRaw(data.raw, "1899-12-30") | ||
47 | +ProccesHMRaw <- function(dataset.raw, dateOffset){ | ||
48 | + | ||
49 | + #Standar names to the columns | ||
50 | + dataset <- setNames(dataset.raw, c("Date", "Attack", "Target", "Country")) | ||
22 | 51 | ||
23 | #Remove rows with Date NA | 52 | #Remove rows with Date NA |
24 | - print(class(dataset.raw$Date)) | ||
25 | - dataset <- dataset.raw[!is.na(dataset.raw$Date),] | ||
26 | - dataset$Date <- as.POSIXct(dataset$Date*86400, tz ="GMT", origin ="1904-01-01") | 53 | + dataset <- dataset[!is.na(dataset$Date),] |
54 | + | ||
55 | + #Format properly the date | ||
56 | + dataset$Date <- as.POSIXct(dataset$Date*86400, tz = "GMT", origin = dateOffset) | ||
27 | 57 | ||
28 | dataset | 58 | dataset |
29 | } | 59 | } |
30 | 60 | ||
31 | -#' Title | 61 | + |
62 | + | ||
63 | +#' Parse every excel file into a folder | ||
32 | #' | 64 | #' |
33 | -#' @param folder | ||
34 | -#' @param cols | 65 | +#' @param folder path to the folder to iterate |
66 | +#' @param cols columns to parse into each file | ||
67 | +#' @param dateOffset origin to calc the dates into each file | ||
35 | #' | 68 | #' |
36 | -#' @return | 69 | +#' @return data.frame |
37 | #' @export | 70 | #' @export |
38 | #' | 71 | #' |
39 | #' @examples | 72 | #' @examples |
40 | -ParseHMFolder <- function(folder, cols){ | ||
41 | - filelist <- list.files(folder) | ||
42 | - #frames <- lapply(paste(folder,filelist,sep = ""),ParseHMExcel) | 73 | +#' data.pro <- ProccesHMRaw("./data/hackmaggedon/", c(1, 5, 3) "1899-12-30") |
74 | +ParseHMFolder <- function(folder, cols, dateOffset){ | ||
43 | 75 | ||
44 | - myFile <- paste(folder,filelist[1],sep = "") | ||
45 | - dataset <- ProccesHMRaw(ParseHMExcel(myFile, cols)) | 76 | + #List excel files into the folder |
77 | + filelist <- list.files(folder, pattern = "*.xls*") | ||
46 | 78 | ||
79 | + #Iterate for each file appending the returned data.frame | ||
80 | + dataset <- ProccesHMRaw(ParseHMExcel(paste(folder,filelist[1],sep = ""), cols), dateOffset) | ||
47 | for (i in 2:length(filelist)) { | 81 | for (i in 2:length(filelist)) { |
48 | - dataset <- rbind(dataset, ProccesHMRaw(ParseHMExcel(paste(folder,filelist[i],sep = ""), cols))) | 82 | + dataset <- rbind(dataset, ProccesHMRaw(ParseHMExcel(paste(folder,filelist[i],sep = ""), cols), dateOffset)) |
49 | } | 83 | } |
50 | 84 | ||
51 | dataset | 85 | dataset |
52 | } | 86 | } |
53 | 87 | ||
54 | -GetAttacksData <- function() { | ||
55 | - | ||
56 | - format1 <- ParseHMFolder("./data/hackmageddon/Format1/", c(2, 9, 7, 6)) | ||
57 | - format2 <- ParseHMFolder("./data/hackmageddon/Format2/", c(2,7, 5, 6)) | ||
58 | - format3 <- ParseHMFolder("./data/hackmageddon/Format3/", c(2, 9, 7, 6)) | ||
59 | - format4 <- ParseHMFolder("./data/hackmageddon/Format4/", c(1, 9, 5, 3)) | ||
60 | - | ||
61 | - dataset <- rbind(format1, format2, format3, format4) | 88 | +#' Parse the default data from the package from hackmaggedon (2012-2016) |
89 | +#' | ||
90 | +#' @return data.frame | ||
91 | +#' @export | ||
92 | +#' | ||
93 | +#' @examples | ||
94 | +#' Attacks <- GetDefaultAttacksData() | ||
95 | +GetDefaultAttacksData <- function() { | ||
96 | + | ||
97 | + #Parsing each different folder with the correct properties | ||
98 | + format1 <- ParseHMFolder("./data/hackmageddon/Format1/", c(2, 9, 7, 6), "1899-12-30") | ||
99 | + format2 <- ParseHMFolder("./data/hackmageddon/Format2/", c(2, 7, 5, 3), "1904-01-01") | ||
100 | + format3 <- ParseHMFolder("./data/hackmageddon/Format3/", c(2, 9, 7, 6), "1904-01-01") | ||
101 | + format3.2 <- ParseHMFolder("./data/hackmageddon/Format3/Format3.2/", c(2, 9, 7, 6), "1899-12-30") | ||
102 | + format4 <- ParseHMFolder("./data/hackmageddon/Format4/", c(1, 9, 5, 3), "1899-12-30") | ||
103 | + | ||
104 | + #Appending evey data.frame in the standard format | ||
105 | + dataset <- rbind(format1, format2, format3, format3.2, format4) | ||
62 | 106 | ||
63 | dataset | 107 | dataset |
64 | } | 108 | } |
ISO27001effectiveness/R/ISOSurvey_Parser.R
@@ -17,10 +17,12 @@ | @@ -17,10 +17,12 @@ | ||
17 | #' Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3) | 17 | #' Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3) |
18 | ParseExcelFileRaw <- function(file, sheet){ | 18 | ParseExcelFileRaw <- function(file, sheet){ |
19 | 19 | ||
20 | + #Checks | ||
20 | if (!file.exists(file)) { | 21 | if (!file.exists(file)) { |
21 | stop(paste("Error, file [", file, "] not found")) | 22 | stop(paste("Error, file [", file, "] not found")) |
22 | } | 23 | } |
23 | 24 | ||
25 | + #Parse the excel file | ||
24 | dataset <- xlsx::read.xlsx2(file, sheet,colClasses = c("character","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric"), header = TRUE, as.data.frame = TRUE) | 26 | dataset <- xlsx::read.xlsx2(file, sheet,colClasses = c("character","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric"), header = TRUE, as.data.frame = TRUE) |
25 | 27 | ||
26 | dataset | 28 | dataset |
@@ -28,9 +30,41 @@ ParseExcelFileRaw <- function(file, sheet){ | @@ -28,9 +30,41 @@ ParseExcelFileRaw <- function(file, sheet){ | ||
28 | 30 | ||
29 | 31 | ||
30 | 32 | ||
31 | -#' Process raw data from ISO survey | 33 | +#' PRocess raw data parsed from excel file ISOSurvey27001 |
34 | +#' | ||
35 | +#' @param dataset.raw data.frame with raw data | ||
36 | +#' @param years list of years to include preceded with a X | ||
37 | +#' | ||
38 | +#' @return data.frame | ||
39 | +#' @export | ||
40 | +#' | ||
41 | +#' @examples | ||
42 | +#' Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015")) | ||
43 | +ProccesISOSurveyRaw <- function(dataset.raw, years){ | ||
44 | + | ||
45 | + #Change NAs to zeros | ||
46 | + dataset <- dataset.raw | ||
47 | + dataset[is.na(dataset)] <- 0 | ||
48 | + | ||
49 | + vars <- names(dataset) | ||
50 | + | ||
51 | + #Remove blanc rows | ||
52 | + if ("INDUSTRIAL.SECTOR" == vars[1]) { | ||
53 | + dataset <- dataset[!dataset$INDUSTRIAL.SECTOR == "",] | ||
54 | + }else if ("Country" == vars[1]) { | ||
55 | + dataset <- dataset[!dataset$Country == "",] | ||
56 | + } | ||
57 | + | ||
58 | + #Remove years not included in years param | ||
59 | + years_checked <- intersect(vars, c("Country", "INDUSTRIAL.SECTOR", years)) | ||
60 | + dataset <- dataset[,years_checked] | ||
61 | + | ||
62 | + | ||
63 | + dataset | ||
64 | +} | ||
65 | + | ||
66 | +#' Join data from ISOSurvey with 2 letter code countries plus process raw.data | ||
32 | #' | 67 | #' |
33 | -#' Proccess the raw data from ISO survey to replace NAs, normalizate country names and filter years | ||
34 | #' @param dataset.raw raw data from ISO Survey excel file | 68 | #' @param dataset.raw raw data from ISO Survey excel file |
35 | #' @param years List of years to return, c("X2006", "X2010", ...) | 69 | #' @param years List of years to return, c("X2006", "X2010", ...) |
36 | #' | 70 | #' |
@@ -40,21 +74,62 @@ ParseExcelFileRaw <- function(file, sheet){ | @@ -40,21 +74,62 @@ ParseExcelFileRaw <- function(file, sheet){ | ||
40 | #' | 74 | #' |
41 | #' Cert_PerCountry <- ProccesISOSurveyRaw(Cert_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015")) | 75 | #' Cert_PerCountry <- ProccesISOSurveyRaw(Cert_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015")) |
42 | #' Sites_PerCountry <- ProccesISOSurveyRaw(Sites_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015")) | 76 | #' Sites_PerCountry <- ProccesISOSurveyRaw(Sites_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015")) |
43 | -#' Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015")) | ||
44 | ProccesISOSurveyByCountryRaw <- function(dataset.raw, years){ | 77 | ProccesISOSurveyByCountryRaw <- function(dataset.raw, years){ |
45 | 78 | ||
46 | - #NAs to 0s | ||
47 | - dataset <- dataset.raw | ||
48 | - dataset[is.na(dataset)] <- 0 | 79 | + #Standard proccess |
80 | + dataset <- ProccesISOSurveyRaw(dataset.raw, years) | ||
49 | 81 | ||
50 | #Translate country names to 2 letter code | 82 | #Translate country names to 2 letter code |
51 | CountryNames <- GetCountryAbrev() | 83 | CountryNames <- GetCountryAbrev() |
52 | - | ||
53 | dataset <- merge(x = dataset, y = CountryNames, by = "Country", all.x = TRUE) | 84 | dataset <- merge(x = dataset, y = CountryNames, by = "Country", all.x = TRUE) |
54 | 85 | ||
55 | - vars <- names(dataset) | ||
56 | - years_checked <- intersect(vars, years) | ||
57 | - dataset <- dataset[,c(c("Country", "country_short"), years_checked)] | ||
58 | - | ||
59 | dataset | 86 | dataset |
60 | } | 87 | } |
88 | + | ||
89 | + | ||
90 | + | ||
91 | +#' Get data of certificates per year and country from IS27001 | ||
92 | +#' | ||
93 | +#' @return data.frame | ||
94 | +#' @export | ||
95 | +#' | ||
96 | +#' @examples | ||
97 | +#' Cert_PerCountry <- GetISOSurveyCertsPerCountry() | ||
98 | +GetISOSurveyCertsPerCountry <- function() { | ||
99 | + Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1) | ||
100 | + | ||
101 | + Cert_PerCountry <- ProccesISOSurveyByCountryRaw(Cert_PerCountry, c("X2012", "X2013", "X2014", "X2015")) | ||
102 | + | ||
103 | + Cert_PerCountry | ||
104 | +} | ||
105 | + | ||
106 | +#' Get data of sites per year and country from IS27001 | ||
107 | +#' | ||
108 | +#' @return data.frame | ||
109 | +#' @export | ||
110 | +#' | ||
111 | +#' @examples | ||
112 | +#' Sites_PerCountry <- GetISOSurveySitesPerCountry() | ||
113 | +GetISOSurveySitesPerCountry <- function() { | ||
114 | + Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2) | ||
115 | + | ||
116 | + Sites_PerCountry <- ProccesISOSurveyByCountryRaw(Sites_PerCountry, c( "X2012", "X2013", "X2014", "X2015")) | ||
117 | + | ||
118 | + Sites_PerCountry | ||
119 | +} | ||
120 | + | ||
121 | +#' Get data of certificates per year and sector from IS27001 | ||
122 | +#' | ||
123 | +#' @return data.frame | ||
124 | +#' @export | ||
125 | +#' | ||
126 | +#' @examples | ||
127 | +#' Cert_PerSector <- GetISOSurveyCertsPerSector() | ||
128 | +GetISOSurveyCertsPerSector <- function() { | ||
129 | + Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3) | ||
130 | + | ||
131 | + Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2012", "X2013", "X2014", "X2015")) | ||
132 | + | ||
133 | + Cert_PerSector | ||
134 | + | ||
135 | +} |
ISO27001effectiveness/R/Util.R
1 | #-----------------------------Util functions-------------------------------------------------- | 1 | #-----------------------------Util functions-------------------------------------------------- |
2 | 2 | ||
3 | - | ||
4 | -#' Install and load required libraries | ||
5 | -#' | ||
6 | -#' This function checks if every required library is installed to be loaded, if not they will be installed and then loaded. | ||
7 | -#' Libraries installed: | ||
8 | -#' xlsx to parse excel files like ISO survey source format | ||
9 | -LoadParserLibraries <- function(){ | ||
10 | - if (!require("xlsx")) | ||
11 | - { | ||
12 | - install.packages("xlsx") | ||
13 | - if (!require("xlsx")) stop("Error while loading package [xlsx]") | ||
14 | - } | ||
15 | -} | ||
16 | - | ||
17 | #' Return the 2 letter code of a country relation | 3 | #' Return the 2 letter code of a country relation |
18 | #' | 4 | #' |
19 | #' Relation of country names included in the ISO Survey input file with 2 letter code | 5 | #' Relation of country names included in the ISO Survey input file with 2 letter code |
ISO27001effectiveness/data/hackmageddon/Format3/16-31 December 2014 Cyber Attacks Timeline.xlsx
No preview for this file type
ISO27001effectiveness/data/hackmageddon/Format3/16-31 Jun 2014 Cyber Attacks Timeline.xlsx deleted
No preview for this file type
ISO27001effectiveness/data/hackmageddon/Format3/1-15 Jan 2014 Cyber Attacks Timeline.xls renamed to ISO27001effectiveness/data/hackmageddon/Format3/Format3.2/1-15 Jan 2014 Cyber Attacks Timeline.xls
No preview for this file type
ISO27001effectiveness/data/hackmageddon/Format3/16-31 Jan 2014 Cyber Attacks Timeline.xlsx renamed to ISO27001effectiveness/data/hackmageddon/Format3/Format3.2/16-31 Jan 2014 Cyber Attacks Timeline.xlsx
No preview for this file type
ISO27001effectiveness/data/test/16-31 Mar 2014 Cyber Attacks Timeline.xlsx deleted
No preview for this file type
ISO27001effectiveness/data/test/16-31 Mar 2015 Cyber Attacks Timeline.xlsx deleted
No preview for this file type