Commit eb9b2537b7905ddb9f8b2c6d6c2552a70c99b4ad

Authored by Miguel Tuñón
1 parent 2630a1ef

Added Main.R to test the implementations, changed [ProccesISOSurveyByCountryRaw]…

… to allow filter per year
ISO27001effectiveness/Main.R 0 → 100644
  1 +source("./R/Util.R")
  2 +LoadLibraries()
  3 +source("./R/ISOSurvey_Parser.R")
  4 +
  5 +Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1)
  6 +Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2)
  7 +Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3)
  8 +
  9 +Cert_PerCountry <- ProccesISOSurveyByCountryRaw(Cert_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
  10 +Sites_PerCountry <- ProccesISOSurveyByCountryRaw(Sites_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
  11 +#Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
ISO27001effectiveness/R/ISOSurvey_Parser.R
@@ -2,7 +2,6 @@ @@ -2,7 +2,6 @@
2 #-----------------------------Parser Excel - ISO survey------------------------------------------------ 2 #-----------------------------Parser Excel - ISO survey------------------------------------------------
3 #------------------------------------------------------------------------------------------------------ 3 #------------------------------------------------------------------------------------------------------
4 4
5 -#-----------------------------Read from file-----------------------------------------------------------  
6 5
7 #' Get data frame from an excel file 6 #' Get data frame from an excel file
8 #' 7 #'
@@ -11,6 +10,11 @@ @@ -11,6 +10,11 @@
11 #' @param sheet index of sheet to parse 10 #' @param sheet index of sheet to parse
12 #' 11 #'
13 #' @return data.frame 12 #' @return data.frame
  13 +#'
  14 +#' @examples
  15 +#' Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1)
  16 +#' Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2)
  17 +#' Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3)
14 ParseExcelFileRaw <- function(file, sheet){ 18 ParseExcelFileRaw <- function(file, sheet){
15 19
16 if (!file.exists(file)) { 20 if (!file.exists(file)) {
@@ -22,21 +26,35 @@ ParseExcelFileRaw &lt;- function(file, sheet){ @@ -22,21 +26,35 @@ ParseExcelFileRaw &lt;- function(file, sheet){
22 dataset 26 dataset
23 } 27 }
24 28
  29 +
  30 +
25 #' Process raw data from ISO survey 31 #' Process raw data from ISO survey
26 #' 32 #'
27 -#' ... 33 +#' Proccess the raw data from ISO survey to replace NAs, normalizate country names and filter years
28 #' @param dataset.raw raw data from ISO Survey excel file 34 #' @param dataset.raw raw data from ISO Survey excel file
  35 +#' @param years List of years to return, c("X2006", "X2010", ...)
29 #' 36 #'
30 #' @return data.frame 37 #' @return data.frame
31 -ProccesISOSurveyRaw <- function(dataset.raw){  
32 - dataset <- dataset.raw[is.na(dataset.raw)] 38 +#'
  39 +#' @examples
  40 +#'
  41 +#' Cert_PerCountry <- ProccesISOSurveyRaw(Cert_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
  42 +#' Sites_PerCountry <- ProccesISOSurveyRaw(Sites_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
  43 +#' Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
  44 +ProccesISOSurveyByCountryRaw <- function(dataset.raw, years){
  45 +
  46 + #NAs to 0s
  47 + dataset <- dataset.raw
33 dataset[is.na(dataset)] <- 0 48 dataset[is.na(dataset)] <- 0
34 49
35 #Translate country names to 2 letter code 50 #Translate country names to 2 letter code
  51 + CountryNames <- GetCountryAbrev()
  52 +
  53 + dataset <- merge(x = dataset, y = CountryNames, by = "Country", all.x = TRUE)
  54 +
  55 + vars <- names(dataset)
  56 + years_checked <- intersect(vars, years)
  57 + dataset <- dataset[,c(c("Country", "country_short"), years_checked)]
36 58
37 dataset 59 dataset
38 } 60 }
39 -  
40 -#ISO_survey_certificates_countries <- read.xlsx2(paste(getwd(), "/Data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", sep = ""), 1)  
41 -#ISO_survey_sites_countries <- read.xlsx2(paste(getwd(), "/Data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", sep = ""), 2)  
42 -#ISO_survey_certificates_sector <- read.xlsx2(paste(getwd(), "/Data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", sep = ""), 3)  
ISO27001effectiveness/R/Util.R
@@ -14,18 +14,17 @@ LoadParserLibraries &lt;- function(){ @@ -14,18 +14,17 @@ LoadParserLibraries &lt;- function(){
14 } 14 }
15 } 15 }
16 16
17 -#' Return the 2 letter code of a country 17 +#' Return the 2 letter code of a country relation
18 #' 18 #'
19 -#' Translate from large country names included in the ISO Survey input file to 2 letter code 19 +#' Relation of country names included in the ISO Survey input file with 2 letter code
20 #' included on the hackmaggedon input files 20 #' included on the hackmaggedon input files
21 -#' @param largeName The normal large name of the country  
22 #' 21 #'
23 -#' @return character with the 2 letter code of the country  
24 -GetCountryAbrev <- function(largeName){  
25 - countries_large <- c("Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Argentina", "Armenia", "Australia", "Austria", "Azerbaijan", "Bahrain", "Bangladesh", "Belarus", "Belgium", "Belize", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil", "Bulgaria", "Cambodia", "Canada", "Cape Verde", "Chile", "China", "Hong Kong, China", "Taipei, Chinese", "Colombia", "Congo, Republic of", "Costa Rica", "CÔte D'Ivoire", "Croatia", "Cyprus", "Czech Republic", "Denmark", "Dominican Republic", "Ecuador", "Egypt", "El Salvador", "Estonia", "Ethiopia", "Finland", "France", "Georgia", "Germany", "Ghana", "Gibraltar (UK)", "Greece", "Guatemala", "Honduras", "Hungary", "Iceland", "India", "Indonesia", "Iran, Islamic Republic of", "Iraq", "Ireland", "Israel", "Italy", "Jamaica", "Japan", "Jordan", "Kazakhstan", "Kenya", "Korea, Democratic People's Republic of", "Korea, Republic of", "Kuwait", "Kyrgyzstan", "Lao People's Democratic Republic", "Latvia", "Lebanon", "Libyan Arab Jamahiriya", "Lithuania", "Luxembourg", "Macau, China", "Malawi", "Malaysia", "Mali", "Malta", "Mauritius", "Mexico", "Moldova, Republic of", "Montenegro", "Morocco", "Mozambique", "Namibia", "Nepal", "Netherlands", "New Zealand", "Nigeria", "Norway", "Oman", "Pakistan", "Panama", "Palestine", "Peru", "Philippines", "Poland", "Portugal", "Puerto Rico", "Qatar", "Romania", "Russian Federation", "Saint Lucia", "Saint Vincent and the Grenadines", "San Marino, Republic of", "Saudi Arabia", "Senegal", "Serbia", "Singapore", "Slovakia", "Slovenia", "Somalia", "South Africa", "Spain", "Sri Lanka", "Sudan", "Suriname", "Sweden", "Switzerland", "Syrian Arab Republic", "Tanzania, United Republic of", "Thailand", "The Former Yugoslav Republic of Macedonia", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States of America", "Uruguay", "Uzbekistan", "Venezuela", "Viet Nam", "Yemen")  
26 - countries_short <- c("AF", "AL", "DZ", "AD", "AO", "AR", "AM", "AU", "AT", "AZ", "BH", "BD", "BY", "BE", "BZ", "BO", "BA", "BW", "BR", "BG", "KH", "CA", "CV", "CL", "CN", "HK", "CN", "CO", "CD", "CR", "CI", "HR", "CY", "CZ", "DK", "DO", "EC", "EG", "SV", "EE", "ET", "FI", "FR", "GE", "DE", "GH", "GI", "GR", "GT", "HN", "HU", "IS", "IN", "ID", "IR", "IQ", "IE", "IL", "IT", "JM", "JP", "JO", "KZ", "KE", "KP", "KR", "KW", "KG", "LA", "LV", "LB", "LY", "LT", "LU", "MO", "MW", "MY", "ML", "MT", "MU", "MX", "MD", "ME", "MA", "MZ", "NA", "NP", "NL", "NZ", "NG", "NO", "OM", "PK", "PA", "PS", "PE", "PH", "PL", "PT", "PR", "QA", "RO", "RU", "LC", "VC", "SM", "SA", "SN", "RS", "SG", "SK", "SI", "SO", "ZA", "ES", "LK", "SD", "SR", "SE", "CH", "SY", "TZ", "TH", "MK", "TT", "TN", "TR", "TM", "UG", "UA", "AE", "GB", "US", "UY", "UZ", "VE", "VN", "YE") 22 +#' @return data.frame
  23 +GetCountryAbrev <- function(){
  24 + Country <- c("Afghanistan", "Albania", "Algeria", "American Samoa", "Andorra", "Angola", "Anguilla", "Antarctica", "Antigua and Barbuda", "Argentina", "Armenia", "Aruba", "Australia", "Austria", "Azerbaijan", "Bahamas", "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium", "Belize", "Benin", "Bermuda", "Bhutan", "Bolivia", "Bonaire", "Bosnia and Herzegovina", "Botswana", "Bouvet Island", "Brazil", "British Indian Ocean Territory", "Brunei Darussalam", "Bulgaria", "Burkina Faso", "Burundi", "Cambodia", "Cameroon", "Canada", "Cape Verde", "Cayman Islands", "Central African Republic", "Chad", "Chile", "China", "Christmas Island", "Cocos (Keeling) Islands", "Colombia", "Comoros", "Congo", "Congo, Republic of", "Cook Islands", "Costa Rica", "Croatia", "Cuba", "Curaçao", "Cyprus", "Czech Republic", "Côte D'ivoire", "Denmark", "Djibouti", "Dominica", "Dominican Republic", "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia", "Ethiopia", "Falkland Islands (Malvinas)", "Faroe Islands", "Fiji", "Finland", "France", "French Guiana", "French Polynesia", "French Southern Territories", "Gabon", "Gambia", "Georgia", "Germany", "Ghana", "Gibraltar (UK)", "Greece", "Greenland", "Grenada", "Guadeloupe", "Guam", "Guatemala", "Guernsey", "Guinea", "Guinea-Bissau", "Guyana", "Haiti", "Heard Island and McDonald Mcdonald Islands", "Holy See (Vatican City State)", "Honduras", "Hong Kong, China", "Hungary", "Iceland", "India", "Indonesia", "Iran, Islamic Republic of", "Iraq", "Ireland", "Isle of Man", "Israel", "Italy", "Jamaica", "Japan", "Jersey", "Jordan", "Kazakhstan", "Kenya", "Kiribati", "Korea, Democratic People's Republic of", "Korea, Republic of", "Kuwait", "Kyrgyzstan", "Lao People's Democratic Republic", "Latvia", "Lebanon", "Lesotho", "Liberia", "Libya", "Liechtenstein", "Lithuania", "Luxembourg", "Macau, China", "The Former Yugoslav Republic of Macedonia", "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta", "Marshall Islands", "Martinique", "Mauritania", "Mauritius", "Mayotte", "Mexico", "Micronesia, Federated States of", "Moldova, Republic of", "Monaco", "Mongolia", "Montenegro", "Montserrat", "Morocco", "Mozambique", "Myanmar", "Namibia", "Nauru", "Nepal", "Netherlands", "New Caledonia", "New Zealand", "Nicaragua", "Niger", "Nigeria", "Niue", "Norfolk Island", "Northern Mariana Islands", "Norway", "Oman", "Pakistan", "Palau", "Palestine", "Panama", "Papua New Guinea", "Paraguay", "Peru", "Philippines", "Pitcairn", "Poland", "Portugal", "Puerto Rico", "Qatar", "Romania", "Russian Federation", "Rwanda", "Reunion", "Saint Barthelemy", "Saint Helena", "Saint Kitts and Nevis", "Saint Lucia", "Saint Martin (French part)", "Saint Pierre and Miquelon", "Saint Vincent and the Grenadines", "Samoa", "San Marino, Republic of", "Sao Tome and Principe", "Saudi Arabia", "Senegal", "Serbia", "Seychelles", "Sierra Leone", "Singapore", "Sint Maarten (Dutch part)", "Slovakia", "Slovenia", "Solomon Islands", "Somalia", "South Africa", "South Georgia and the South Sandwich Islands", "South Sudan", "Spain", "Sri Lanka", "Sudan", "Suriname", "Svalbard and Jan Mayen", "Swaziland", "Sweden", "Switzerland", "Syrian Arab Republic", "Taiwan, Province of China", "Tajikistan", "Tanzania, United Republic of", "Thailand", "Timor-Leste", "Togo", "Tokelau", "Tonga", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Turks and Caicos Islands", "Tuvalu", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States of America", "United States Minor Outlying Islands", "Uruguay", "Uzbekistan", "Vanuatu", "Venezuela", "Viet Nam", "British Virgin Islands", "US Virgin Islands", "Wallis and Futuna", "Western Sahara", "Yemen", "Zambia", "Zimbabwe", "Aland Islands", "Taipei, Chinese")
  25 + country_short <- c("AF", "AL", "DZ", "AS", "AD", "AO", "AI", "AQ", "AG", "AR", "AM", "AW", "AU", "AT", "AZ", "BS", "BH", "BD", "BB", "BY", "BE", "BZ", "BJ", "BM", "BT", "BO", "BQ", "BA", "BW", "BV", "BR", "IO", "BN", "BG", "BF", "BI", "KH", "CM", "CA", "CV", "KY", "CF", "TD", "CL", "CN", "CX", "CC", "CO", "KM", "CG", "CD", "CK", "CR", "HR", "CU", "CW", "CY", "CZ", "CI", "DK", "DJ", "DM", "DO", "EC", "EG", "SV", "GQ", "ER", "EE", "ET", "FK", "FO", "FJ", "FI", "FR", "GF", "PF", "TF", "GA", "GM", "GE", "DE", "GH", "GI", "GR", "GL", "GD", "GP", "GU", "GT", "GG", "GN", "GW", "GY", "HT", "HM", "VA", "HN", "HK", "HU", "IS", "IN", "ID", "IR", "IQ", "IE", "IM", "IL", "IT", "JM", "JP", "JE", "JO", "KZ", "KE", "KI", "KP", "KR", "KW", "KG", "LA", "LV", "LB", "LS", "LR", "LY", "LI", "LT", "LU", "MO", "MK", "MG", "MW", "MY", "MV", "ML", "MT", "MH", "MQ", "MR", "MU", "YT", "MX", "FM", "MD", "MC", "MN", "ME", "MS", "MA", "MZ", "MM", "NA", "NR", "NP", "NL", "NC", "NZ", "NI", "NE", "NG", "NU", "NF", "MP", "NO", "OM", "PK", "PW", "PS", "PA", "PG", "PY", "PE", "PH", "PN", "PL", "PT", "PR", "QA", "RO", "RU", "RW", "RE", "BL", "SH", "KN", "LC", "MF", "PM", "VC", "WS", "SM", "ST", "SA", "SN", "RS", "SC", "SL", "SG", "SX", "SK", "SI", "SB", "SO", "ZA", "GS", "SS", "ES", "LK", "SD", "SR", "SJ", "SZ", "SE", "CH", "SY", "TW", "TJ", "TZ", "TH", "TL", "TG", "TK", "TO", "TT", "TN", "TR", "TM", "TC", "TV", "UG", "UA", "AE", "GB", "US", "UM", "UY", "UZ", "VU", "VE", "VN", "VG", "VI", "WF", "EH", "YE", "ZM", "ZW", "AX", "CN")
27 26
28 - dataset <- data.frame(countries_large, countries_short) 27 + dataset <- data.frame(Country, country_short)
29 28
30 - toString(dataset[dataset$countries_large == largeName, 2]) 29 + dataset
31 } 30 }