Commit a35b1822f74f45fef93d064c174e56344bbae169
Merge branch 'master' of https://gitlab.imanolbarba.net/imanol/DDS
Showing
3 changed files
with
73 additions
and
27 deletions
ISO27001effectiveness/Main.R
0 → 100644
1 | +source("./R/Util.R") | ||
2 | +LoadLibraries() | ||
3 | +source("./R/ISOSurvey_Parser.R") | ||
4 | + | ||
5 | +Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1) | ||
6 | +Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2) | ||
7 | +Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3) | ||
8 | + | ||
9 | +Cert_PerCountry <- ProccesISOSurveyByCountryRaw(Cert_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015")) | ||
10 | +Sites_PerCountry <- ProccesISOSurveyByCountryRaw(Sites_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015")) | ||
11 | +#Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015")) |
ISO27001effectiveness/R/ISOSurvey_Parser.R
@@ -3,25 +3,6 @@ | @@ -3,25 +3,6 @@ | ||
3 | #------------------------------------------------------------------------------------------------------ | 3 | #------------------------------------------------------------------------------------------------------ |
4 | 4 | ||
5 | 5 | ||
6 | - | ||
7 | -#-----------------------------Library tests / install-------------------------------------------------- | ||
8 | - | ||
9 | - | ||
10 | -#' Install and load required libraries | ||
11 | -#' | ||
12 | -#' This function checks if every required library is installed to be loaded, if not they will be installed and then loaded. | ||
13 | -#' Libraries installed: | ||
14 | -#' xlsx to parse excel files like ISO survey source format | ||
15 | -LoadParserLibraries <- function(){ | ||
16 | - if (!require("xlsx")) | ||
17 | - { | ||
18 | - install.packages("xlsx") | ||
19 | - if (!require("xlsx")) stop("Error while loading package [xlsx]") | ||
20 | - } | ||
21 | -} | ||
22 | - | ||
23 | -#-----------------------------Read from file----------------------------------------------------------- | ||
24 | - | ||
25 | #' Get data frame from an excel file | 6 | #' Get data frame from an excel file |
26 | #' | 7 | #' |
27 | #' Check if the file exists and then parse it into a data.frame | 8 | #' Check if the file exists and then parse it into a data.frame |
@@ -29,27 +10,51 @@ LoadParserLibraries <- function(){ | @@ -29,27 +10,51 @@ LoadParserLibraries <- function(){ | ||
29 | #' @param sheet index of sheet to parse | 10 | #' @param sheet index of sheet to parse |
30 | #' | 11 | #' |
31 | #' @return data.frame | 12 | #' @return data.frame |
13 | +#' | ||
14 | +#' @examples | ||
15 | +#' Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1) | ||
16 | +#' Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2) | ||
17 | +#' Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3) | ||
32 | ParseExcelFileRaw <- function(file, sheet){ | 18 | ParseExcelFileRaw <- function(file, sheet){ |
33 | 19 | ||
34 | if (!file.exists(file)) { | 20 | if (!file.exists(file)) { |
35 | stop(paste("Error, file [", file, "] not found")) | 21 | stop(paste("Error, file [", file, "] not found")) |
36 | } | 22 | } |
37 | 23 | ||
38 | - dataset <- read.xlsx2(file, sheet, header = TRUE) | 24 | + dataset <- read.xlsx2(file, sheet,colClasses = c("character","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric"), header = TRUE, as.data.frame = TRUE) |
39 | 25 | ||
40 | dataset | 26 | dataset |
41 | } | 27 | } |
42 | 28 | ||
29 | + | ||
30 | + | ||
43 | #' Process raw data from ISO survey | 31 | #' Process raw data from ISO survey |
44 | #' | 32 | #' |
45 | -#' ... | 33 | +#' Proccess the raw data from ISO survey to replace NAs, normalizate country names and filter years |
46 | #' @param dataset.raw raw data from ISO Survey excel file | 34 | #' @param dataset.raw raw data from ISO Survey excel file |
35 | +#' @param years List of years to return, c("X2006", "X2010", ...) | ||
47 | #' | 36 | #' |
48 | #' @return data.frame | 37 | #' @return data.frame |
49 | -ProccesISOSurveyRaw <- function(dataset.raw){ | ||
50 | -#Complete | ||
51 | -} | 38 | +#' |
39 | +#' @examples | ||
40 | +#' | ||
41 | +#' Cert_PerCountry <- ProccesISOSurveyRaw(Cert_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015")) | ||
42 | +#' Sites_PerCountry <- ProccesISOSurveyRaw(Sites_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015")) | ||
43 | +#' Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015")) | ||
44 | +ProccesISOSurveyByCountryRaw <- function(dataset.raw, years){ | ||
45 | + | ||
46 | + #NAs to 0s | ||
47 | + dataset <- dataset.raw | ||
48 | + dataset[is.na(dataset)] <- 0 | ||
52 | 49 | ||
53 | -#ISO_survey_certificates_countries <- read.xlsx2(paste(getwd(), "/Data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", sep = ""), 1) | ||
54 | -#ISO_survey_sites_countries <- read.xlsx2(paste(getwd(), "/Data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", sep = ""), 2) | ||
55 | -#ISO_survey_certificates_sector <- read.xlsx2(paste(getwd(), "/Data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", sep = ""), 3) | 50 | + #Translate country names to 2 letter code |
51 | + CountryNames <- GetCountryAbrev() | ||
52 | + | ||
53 | + dataset <- merge(x = dataset, y = CountryNames, by = "Country", all.x = TRUE) | ||
54 | + | ||
55 | + vars <- names(dataset) | ||
56 | + years_checked <- intersect(vars, years) | ||
57 | + dataset <- dataset[,c(c("Country", "country_short"), years_checked)] | ||
58 | + | ||
59 | + dataset | ||
60 | +} |
ISO27001effectiveness/R/Util.R
0 → 100644
1 | +#-----------------------------Util functions-------------------------------------------------- | ||
2 | + | ||
3 | + | ||
4 | +#' Install and load required libraries | ||
5 | +#' | ||
6 | +#' This function checks if every required library is installed to be loaded, if not they will be installed and then loaded. | ||
7 | +#' Libraries installed: | ||
8 | +#' xlsx to parse excel files like ISO survey source format | ||
9 | +LoadParserLibraries <- function(){ | ||
10 | + if (!require("xlsx")) | ||
11 | + { | ||
12 | + install.packages("xlsx") | ||
13 | + if (!require("xlsx")) stop("Error while loading package [xlsx]") | ||
14 | + } | ||
15 | +} | ||
16 | + | ||
17 | +#' Return the 2 letter code of a country relation | ||
18 | +#' | ||
19 | +#' Relation of country names included in the ISO Survey input file with 2 letter code | ||
20 | +#' included on the hackmaggedon input files | ||
21 | +#' | ||
22 | +#' @return data.frame | ||
23 | +GetCountryAbrev <- function(){ | ||
24 | + Country <- c("Afghanistan", "Albania", "Algeria", "American Samoa", "Andorra", "Angola", "Anguilla", "Antarctica", "Antigua and Barbuda", "Argentina", "Armenia", "Aruba", "Australia", "Austria", "Azerbaijan", "Bahamas", "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium", "Belize", "Benin", "Bermuda", "Bhutan", "Bolivia", "Bonaire", "Bosnia and Herzegovina", "Botswana", "Bouvet Island", "Brazil", "British Indian Ocean Territory", "Brunei Darussalam", "Bulgaria", "Burkina Faso", "Burundi", "Cambodia", "Cameroon", "Canada", "Cape Verde", "Cayman Islands", "Central African Republic", "Chad", "Chile", "China", "Christmas Island", "Cocos (Keeling) Islands", "Colombia", "Comoros", "Congo", "Congo, Republic of", "Cook Islands", "Costa Rica", "Croatia", "Cuba", "Curaçao", "Cyprus", "Czech Republic", "Côte D'ivoire", "Denmark", "Djibouti", "Dominica", "Dominican Republic", "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia", "Ethiopia", "Falkland Islands (Malvinas)", "Faroe Islands", "Fiji", "Finland", "France", "French Guiana", "French Polynesia", "French Southern Territories", "Gabon", "Gambia", "Georgia", "Germany", "Ghana", "Gibraltar (UK)", "Greece", "Greenland", "Grenada", "Guadeloupe", "Guam", "Guatemala", "Guernsey", "Guinea", "Guinea-Bissau", "Guyana", "Haiti", "Heard Island and McDonald Mcdonald Islands", "Holy See (Vatican City State)", "Honduras", "Hong Kong, China", "Hungary", "Iceland", "India", "Indonesia", "Iran, Islamic Republic of", "Iraq", "Ireland", "Isle of Man", "Israel", "Italy", "Jamaica", "Japan", "Jersey", "Jordan", "Kazakhstan", "Kenya", "Kiribati", "Korea, Democratic People's Republic of", "Korea, Republic of", "Kuwait", "Kyrgyzstan", "Lao People's Democratic Republic", "Latvia", "Lebanon", "Lesotho", "Liberia", "Libya", "Liechtenstein", "Lithuania", "Luxembourg", "Macau, China", "The Former Yugoslav Republic of Macedonia", "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta", "Marshall Islands", "Martinique", "Mauritania", "Mauritius", "Mayotte", "Mexico", "Micronesia, Federated States of", "Moldova, Republic of", "Monaco", "Mongolia", "Montenegro", "Montserrat", "Morocco", "Mozambique", "Myanmar", "Namibia", "Nauru", "Nepal", "Netherlands", "New Caledonia", "New Zealand", "Nicaragua", "Niger", "Nigeria", "Niue", "Norfolk Island", "Northern Mariana Islands", "Norway", "Oman", "Pakistan", "Palau", "Palestine", "Panama", "Papua New Guinea", "Paraguay", "Peru", "Philippines", "Pitcairn", "Poland", "Portugal", "Puerto Rico", "Qatar", "Romania", "Russian Federation", "Rwanda", "Reunion", "Saint Barthelemy", "Saint Helena", "Saint Kitts and Nevis", "Saint Lucia", "Saint Martin (French part)", "Saint Pierre and Miquelon", "Saint Vincent and the Grenadines", "Samoa", "San Marino, Republic of", "Sao Tome and Principe", "Saudi Arabia", "Senegal", "Serbia", "Seychelles", "Sierra Leone", "Singapore", "Sint Maarten (Dutch part)", "Slovakia", "Slovenia", "Solomon Islands", "Somalia", "South Africa", "South Georgia and the South Sandwich Islands", "South Sudan", "Spain", "Sri Lanka", "Sudan", "Suriname", "Svalbard and Jan Mayen", "Swaziland", "Sweden", "Switzerland", "Syrian Arab Republic", "Taiwan, Province of China", "Tajikistan", "Tanzania, United Republic of", "Thailand", "Timor-Leste", "Togo", "Tokelau", "Tonga", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Turks and Caicos Islands", "Tuvalu", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States of America", "United States Minor Outlying Islands", "Uruguay", "Uzbekistan", "Vanuatu", "Venezuela", "Viet Nam", "British Virgin Islands", "US Virgin Islands", "Wallis and Futuna", "Western Sahara", "Yemen", "Zambia", "Zimbabwe", "Aland Islands", "Taipei, Chinese") | ||
25 | + country_short <- c("AF", "AL", "DZ", "AS", "AD", "AO", "AI", "AQ", "AG", "AR", "AM", "AW", "AU", "AT", "AZ", "BS", "BH", "BD", "BB", "BY", "BE", "BZ", "BJ", "BM", "BT", "BO", "BQ", "BA", "BW", "BV", "BR", "IO", "BN", "BG", "BF", "BI", "KH", "CM", "CA", "CV", "KY", "CF", "TD", "CL", "CN", "CX", "CC", "CO", "KM", "CG", "CD", "CK", "CR", "HR", "CU", "CW", "CY", "CZ", "CI", "DK", "DJ", "DM", "DO", "EC", "EG", "SV", "GQ", "ER", "EE", "ET", "FK", "FO", "FJ", "FI", "FR", "GF", "PF", "TF", "GA", "GM", "GE", "DE", "GH", "GI", "GR", "GL", "GD", "GP", "GU", "GT", "GG", "GN", "GW", "GY", "HT", "HM", "VA", "HN", "HK", "HU", "IS", "IN", "ID", "IR", "IQ", "IE", "IM", "IL", "IT", "JM", "JP", "JE", "JO", "KZ", "KE", "KI", "KP", "KR", "KW", "KG", "LA", "LV", "LB", "LS", "LR", "LY", "LI", "LT", "LU", "MO", "MK", "MG", "MW", "MY", "MV", "ML", "MT", "MH", "MQ", "MR", "MU", "YT", "MX", "FM", "MD", "MC", "MN", "ME", "MS", "MA", "MZ", "MM", "NA", "NR", "NP", "NL", "NC", "NZ", "NI", "NE", "NG", "NU", "NF", "MP", "NO", "OM", "PK", "PW", "PS", "PA", "PG", "PY", "PE", "PH", "PN", "PL", "PT", "PR", "QA", "RO", "RU", "RW", "RE", "BL", "SH", "KN", "LC", "MF", "PM", "VC", "WS", "SM", "ST", "SA", "SN", "RS", "SC", "SL", "SG", "SX", "SK", "SI", "SB", "SO", "ZA", "GS", "SS", "ES", "LK", "SD", "SR", "SJ", "SZ", "SE", "CH", "SY", "TW", "TJ", "TZ", "TH", "TL", "TG", "TK", "TO", "TT", "TN", "TR", "TM", "TC", "TV", "UG", "UA", "AE", "GB", "US", "UM", "UY", "UZ", "VU", "VE", "VN", "VG", "VI", "WF", "EH", "YE", "ZM", "ZW", "AX", "CN") | ||
26 | + | ||
27 | + dataset <- data.frame(Country, country_short) | ||
28 | + | ||
29 | + dataset | ||
30 | +} |