Added Util.R to common util functions like [GetCountryAbrev] and started to bui…

…ld function [ProccesISOSurveyRaw], changed [ParseExcelFileRaw] to use colClasses

Added Util.R to common util functions like [GetCountryAbrev] and started to bui…
…ld function [ProccesISOSurveyRaw], changed [ParseExcelFileRaw] to use colClasses
Miguel Tuñón
1 parent 8beb9cc6
Showing 2 changed files with 38 additions and 20 deletions
ISO27001effectiveness/R/ISOSurvey_Parser.R
ISO27001effectiveness/R/Util.R
@@ -2,24 +2,6 @@
 #-----------------------------Parser Excel - ISO survey------------------------------------------------
 #------------------------------------------------------------------------------------------------------
-
-
-#-----------------------------Library tests / install--------------------------------------------------
-
-
-#' Install and load required libraries
-#'
-#' This function checks if every required library is installed to be loaded, if not they will be installed and then loaded.
-#' Libraries installed:
-#'  xlsx to parse excel files like ISO survey source format
-LoadParserLibraries <- function(){
-  if (!require("xlsx"))
-  {
-    install.packages("xlsx")
-    if (!require("xlsx")) stop("Error while loading package [xlsx]")
-  }
-}
-
 #-----------------------------Read from file-----------------------------------------------------------
 #' Get data frame from an excel file
@@ -35,7 +17,7 @@ ParseExcelFileRaw &lt;- function(file, sheet){
     stop(paste("Error, file [", file, "] not found"))
   }
-  dataset <- read.xlsx2(file, sheet, header = TRUE)
+  dataset <- read.xlsx2(file, sheet,colClasses = c("character","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric"), header = TRUE, as.data.frame = TRUE)
   dataset
 }
@@ -47,7 +29,12 @@ ParseExcelFileRaw &lt;- function(file, sheet){
 #'
 #' @return data.frame
 ProccesISOSurveyRaw <- function(dataset.raw){
-#Complete
+  dataset <- dataset.raw[is.na(dataset.raw)]
+  dataset[is.na(dataset)] <- 0
+
+  #Translate country names to 2 letter code
+
+  dataset
 }
 #ISO_survey_certificates_countries <- read.xlsx2(paste(getwd(), "/Data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", sep = ""), 1)
+#-----------------------------Util functions--------------------------------------------------
+
+
+#' Install and load required libraries
+#'
+#' This function checks if every required library is installed to be loaded, if not they will be installed and then loaded.
+#' Libraries installed:
+#'  xlsx to parse excel files like ISO survey source format
+LoadParserLibraries <- function(){
+  if (!require("xlsx"))
+  {
+    install.packages("xlsx")
+    if (!require("xlsx")) stop("Error while loading package [xlsx]")
+  }
+}
+
+#' Return the 2 letter code of a country
+#'
+#' Translate from large country names included in the ISO Survey input file to 2 letter code
+#' included on the hackmaggedon input files
+#' @param largeName The normal large name of the country
+#'
+#' @return character with the 2 letter code of the country
+GetCountryAbrev <- function(largeName){
+  countries_large <- c("Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Argentina", "Armenia", "Australia", "Austria", "Azerbaijan", "Bahrain", "Bangladesh", "Belarus", "Belgium", "Belize", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil", "Bulgaria", "Cambodia", "Canada", "Cape Verde", "Chile", "China", "Hong Kong, China", "Taipei, Chinese", "Colombia", "Congo, Republic of", "Costa Rica", "CÔte D'Ivoire", "Croatia", "Cyprus", "Czech Republic", "Denmark", "Dominican Republic", "Ecuador", "Egypt", "El Salvador", "Estonia", "Ethiopia", "Finland", "France", "Georgia", "Germany", "Ghana", "Gibraltar (UK)", "Greece", "Guatemala", "Honduras", "Hungary", "Iceland", "India", "Indonesia", "Iran, Islamic Republic of", "Iraq", "Ireland", "Israel", "Italy", "Jamaica", "Japan", "Jordan", "Kazakhstan", "Kenya", "Korea, Democratic People's Republic of", "Korea, Republic of", "Kuwait", "Kyrgyzstan", "Lao People's Democratic Republic", "Latvia", "Lebanon", "Libyan Arab Jamahiriya", "Lithuania", "Luxembourg", "Macau, China", "Malawi", "Malaysia", "Mali", "Malta", "Mauritius", "Mexico", "Moldova, Republic of", "Montenegro", "Morocco", "Mozambique", "Namibia", "Nepal", "Netherlands", "New Zealand", "Nigeria", "Norway", "Oman", "Pakistan", "Panama", "Palestine", "Peru", "Philippines", "Poland", "Portugal", "Puerto Rico", "Qatar", "Romania", "Russian Federation", "Saint Lucia", "Saint Vincent and the Grenadines", "San Marino, Republic of", "Saudi Arabia", "Senegal", "Serbia", "Singapore", "Slovakia", "Slovenia", "Somalia", "South Africa", "Spain", "Sri Lanka", "Sudan", "Suriname", "Sweden", "Switzerland", "Syrian Arab Republic", "Tanzania, United Republic of", "Thailand", "The Former Yugoslav Republic of Macedonia", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States of America", "Uruguay", "Uzbekistan", "Venezuela", "Viet Nam", "Yemen")
+  countries_short <- c("AF", "AL", "DZ", "AD", "AO", "AR", "AM", "AU", "AT", "AZ", "BH", "BD", "BY", "BE", "BZ", "BO", "BA", "BW", "BR", "BG", "KH", "CA", "CV", "CL", "CN", "HK", "CN", "CO", "CD", "CR", "CI", "HR", "CY", "CZ", "DK", "DO", "EC", "EG", "SV", "EE", "ET", "FI", "FR", "GE", "DE", "GH", "GI", "GR", "GT", "HN", "HU", "IS", "IN", "ID", "IR", "IQ", "IE", "IL", "IT", "JM", "JP", "JO", "KZ", "KE", "KP", "KR", "KW", "KG", "LA", "LV", "LB", "LY", "LT", "LU", "MO", "MW", "MY", "ML", "MT", "MU", "MX", "MD", "ME", "MA", "MZ", "NA", "NP", "NL", "NZ", "NG", "NO", "OM", "PK", "PA", "PS", "PE", "PH", "PL", "PT", "PR", "QA", "RO", "RU", "LC", "VC", "SM", "SA", "SN", "RS", "SG", "SK", "SI", "SO", "ZA", "ES", "LK", "SD", "SR", "SE", "CH", "SY", "TZ", "TH", "MK", "TT", "TN", "TR", "TM", "UG", "UA", "AE", "GB", "US", "UY", "UZ", "VE", "VN", "YE")
+
+  dataset <- data.frame(countries_large, countries_short)
+
+  toString(dataset[dataset$countries_large == largeName, 2])
+}