Commit 2630a1ef588fe250f219a66709a53aaa9fd6c84d

Authored by Miguel Tuñón
1 parent 8beb9cc6

Added Util.R to common util functions like [–GetCountryAbrev] and started to bui…

…ld function [ProccesISOSurveyRaw], changed [ParseExcelFileRaw] to use colClasses
ISO27001effectiveness/R/ISOSurvey_Parser.R
... ... @@ -2,24 +2,6 @@
2 2 #-----------------------------Parser Excel - ISO survey------------------------------------------------
3 3 #------------------------------------------------------------------------------------------------------
4 4  
5   -
6   -
7   -#-----------------------------Library tests / install--------------------------------------------------
8   -
9   -
10   -#' Install and load required libraries
11   -#'
12   -#' This function checks if every required library is installed to be loaded, if not they will be installed and then loaded.
13   -#' Libraries installed:
14   -#' xlsx to parse excel files like ISO survey source format
15   -LoadParserLibraries <- function(){
16   - if (!require("xlsx"))
17   - {
18   - install.packages("xlsx")
19   - if (!require("xlsx")) stop("Error while loading package [xlsx]")
20   - }
21   -}
22   -
23 5 #-----------------------------Read from file-----------------------------------------------------------
24 6  
25 7 #' Get data frame from an excel file
... ... @@ -35,7 +17,7 @@ ParseExcelFileRaw &lt;- function(file, sheet){
35 17 stop(paste("Error, file [", file, "] not found"))
36 18 }
37 19  
38   - dataset <- read.xlsx2(file, sheet, header = TRUE)
  20 + dataset <- read.xlsx2(file, sheet,colClasses = c("character","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric"), header = TRUE, as.data.frame = TRUE)
39 21  
40 22 dataset
41 23 }
... ... @@ -47,7 +29,12 @@ ParseExcelFileRaw &lt;- function(file, sheet){
47 29 #'
48 30 #' @return data.frame
49 31 ProccesISOSurveyRaw <- function(dataset.raw){
50   -#Complete
  32 + dataset <- dataset.raw[is.na(dataset.raw)]
  33 + dataset[is.na(dataset)] <- 0
  34 +
  35 + #Translate country names to 2 letter code
  36 +
  37 + dataset
51 38 }
52 39  
53 40 #ISO_survey_certificates_countries <- read.xlsx2(paste(getwd(), "/Data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", sep = ""), 1)
... ...
ISO27001effectiveness/R/Util.R 0 → 100644
  1 +#-----------------------------Util functions--------------------------------------------------
  2 +
  3 +
  4 +#' Install and load required libraries
  5 +#'
  6 +#' This function checks if every required library is installed to be loaded, if not they will be installed and then loaded.
  7 +#' Libraries installed:
  8 +#' xlsx to parse excel files like ISO survey source format
  9 +LoadParserLibraries <- function(){
  10 + if (!require("xlsx"))
  11 + {
  12 + install.packages("xlsx")
  13 + if (!require("xlsx")) stop("Error while loading package [xlsx]")
  14 + }
  15 +}
  16 +
  17 +#' Return the 2 letter code of a country
  18 +#'
  19 +#' Translate from large country names included in the ISO Survey input file to 2 letter code
  20 +#' included on the hackmaggedon input files
  21 +#' @param largeName The normal large name of the country
  22 +#'
  23 +#' @return character with the 2 letter code of the country
  24 +GetCountryAbrev <- function(largeName){
  25 + countries_large <- c("Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Argentina", "Armenia", "Australia", "Austria", "Azerbaijan", "Bahrain", "Bangladesh", "Belarus", "Belgium", "Belize", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil", "Bulgaria", "Cambodia", "Canada", "Cape Verde", "Chile", "China", "Hong Kong, China", "Taipei, Chinese", "Colombia", "Congo, Republic of", "Costa Rica", "CÔte D'Ivoire", "Croatia", "Cyprus", "Czech Republic", "Denmark", "Dominican Republic", "Ecuador", "Egypt", "El Salvador", "Estonia", "Ethiopia", "Finland", "France", "Georgia", "Germany", "Ghana", "Gibraltar (UK)", "Greece", "Guatemala", "Honduras", "Hungary", "Iceland", "India", "Indonesia", "Iran, Islamic Republic of", "Iraq", "Ireland", "Israel", "Italy", "Jamaica", "Japan", "Jordan", "Kazakhstan", "Kenya", "Korea, Democratic People's Republic of", "Korea, Republic of", "Kuwait", "Kyrgyzstan", "Lao People's Democratic Republic", "Latvia", "Lebanon", "Libyan Arab Jamahiriya", "Lithuania", "Luxembourg", "Macau, China", "Malawi", "Malaysia", "Mali", "Malta", "Mauritius", "Mexico", "Moldova, Republic of", "Montenegro", "Morocco", "Mozambique", "Namibia", "Nepal", "Netherlands", "New Zealand", "Nigeria", "Norway", "Oman", "Pakistan", "Panama", "Palestine", "Peru", "Philippines", "Poland", "Portugal", "Puerto Rico", "Qatar", "Romania", "Russian Federation", "Saint Lucia", "Saint Vincent and the Grenadines", "San Marino, Republic of", "Saudi Arabia", "Senegal", "Serbia", "Singapore", "Slovakia", "Slovenia", "Somalia", "South Africa", "Spain", "Sri Lanka", "Sudan", "Suriname", "Sweden", "Switzerland", "Syrian Arab Republic", "Tanzania, United Republic of", "Thailand", "The Former Yugoslav Republic of Macedonia", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States of America", "Uruguay", "Uzbekistan", "Venezuela", "Viet Nam", "Yemen")
  26 + countries_short <- c("AF", "AL", "DZ", "AD", "AO", "AR", "AM", "AU", "AT", "AZ", "BH", "BD", "BY", "BE", "BZ", "BO", "BA", "BW", "BR", "BG", "KH", "CA", "CV", "CL", "CN", "HK", "CN", "CO", "CD", "CR", "CI", "HR", "CY", "CZ", "DK", "DO", "EC", "EG", "SV", "EE", "ET", "FI", "FR", "GE", "DE", "GH", "GI", "GR", "GT", "HN", "HU", "IS", "IN", "ID", "IR", "IQ", "IE", "IL", "IT", "JM", "JP", "JO", "KZ", "KE", "KP", "KR", "KW", "KG", "LA", "LV", "LB", "LY", "LT", "LU", "MO", "MW", "MY", "ML", "MT", "MU", "MX", "MD", "ME", "MA", "MZ", "NA", "NP", "NL", "NZ", "NG", "NO", "OM", "PK", "PA", "PS", "PE", "PH", "PL", "PT", "PR", "QA", "RO", "RU", "LC", "VC", "SM", "SA", "SN", "RS", "SG", "SK", "SI", "SO", "ZA", "ES", "LK", "SD", "SR", "SE", "CH", "SY", "TZ", "TH", "MK", "TT", "TN", "TR", "TM", "UG", "UA", "AE", "GB", "US", "UY", "UZ", "VE", "VN", "YE")
  27 +
  28 + dataset <- data.frame(countries_large, countries_short)
  29 +
  30 + toString(dataset[dataset$countries_large == largeName, 2])
  31 +}
... ...