From 2630a1ef588fe250f219a66709a53aaa9fd6c84d Mon Sep 17 00:00:00 2001
From: Miguel Tuñón <mituga93@gmail.com>
Date: Fri, 16 Dec 2016 00:34:10 +0100
Subject: [PATCH] Added Util.R to common util functions like [GetCountryAbrev] and started to build function [ProccesISOSurveyRaw], changed [ParseExcelFileRaw] to use colClasses

---
 ISO27001effectiveness/R/ISOSurvey_Parser.R | 27 +++++++--------------------
 ISO27001effectiveness/R/Util.R             | 31 +++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 20 deletions(-)
 create mode 100644 ISO27001effectiveness/R/Util.R

diff --git a/ISO27001effectiveness/R/ISOSurvey_Parser.R b/ISO27001effectiveness/R/ISOSurvey_Parser.R
index 4566235..da02ebd 100644
--- a/ISO27001effectiveness/R/ISOSurvey_Parser.R
+++ b/ISO27001effectiveness/R/ISOSurvey_Parser.R
@@ -2,24 +2,6 @@
 #-----------------------------Parser Excel - ISO survey------------------------------------------------
 #------------------------------------------------------------------------------------------------------
 
-
-
-#-----------------------------Library tests / install--------------------------------------------------
-
-
-#' Install and load required libraries
-#'
-#' This function checks if every required library is installed to be loaded, if not they will be installed and then loaded.
-#' Libraries installed:
-#'  xlsx to parse excel files like ISO survey source format
-LoadParserLibraries <- function(){
-  if (!require("xlsx"))
-  {
-    install.packages("xlsx")
-    if (!require("xlsx")) stop("Error while loading package [xlsx]")
-  }
-}
-
 #-----------------------------Read from file-----------------------------------------------------------
 
 #' Get data frame from an excel file
@@ -35,7 +17,7 @@ ParseExcelFileRaw <- function(file, sheet){
     stop(paste("Error, file [", file, "] not found"))
   }
 
-  dataset <- read.xlsx2(file, sheet, header = TRUE)
+  dataset <- read.xlsx2(file, sheet,colClasses = c("character","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric"), header = TRUE, as.data.frame = TRUE)
 
   dataset
 }
@@ -47,7 +29,12 @@ ParseExcelFileRaw <- function(file, sheet){
 #'
 #' @return data.frame
 ProccesISOSurveyRaw <- function(dataset.raw){
-#Complete
+  dataset <- dataset.raw[is.na(dataset.raw)]
+  dataset[is.na(dataset)] <- 0
+
+  #Translate country names to 2 letter code
+
+  dataset
 }
 
 #ISO_survey_certificates_countries <- read.xlsx2(paste(getwd(), "/Data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", sep = ""), 1)
diff --git a/ISO27001effectiveness/R/Util.R b/ISO27001effectiveness/R/Util.R
new file mode 100644
index 0000000..0ee6184
--- /dev/null
+++ b/ISO27001effectiveness/R/Util.R
@@ -0,0 +1,31 @@
+#-----------------------------Util functions--------------------------------------------------
+
+
+#' Install and load required libraries
+#'
+#' This function checks if every required library is installed to be loaded, if not they will be installed and then loaded.
+#' Libraries installed:
+#'  xlsx to parse excel files like ISO survey source format
+LoadParserLibraries <- function(){
+  if (!require("xlsx"))
+  {
+    install.packages("xlsx")
+    if (!require("xlsx")) stop("Error while loading package [xlsx]")
+  }
+}
+
+#' Return the 2 letter code of a country
+#'
+#' Translate from large country names included in the ISO Survey input file to 2 letter code
+#' included on the hackmaggedon input files
+#' @param largeName The normal large name of the country
+#'
+#' @return character with the 2 letter code of the country
+GetCountryAbrev <- function(largeName){
+  countries_large <- c("Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Argentina", "Armenia", "Australia", "Austria", "Azerbaijan", "Bahrain", "Bangladesh", "Belarus", "Belgium", "Belize", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil", "Bulgaria", "Cambodia", "Canada", "Cape Verde", "Chile", "China", "Hong Kong, China", "Taipei, Chinese", "Colombia", "Congo, Republic of", "Costa Rica", "CÔte D'Ivoire", "Croatia", "Cyprus", "Czech Republic", "Denmark", "Dominican Republic", "Ecuador", "Egypt", "El Salvador", "Estonia", "Ethiopia", "Finland", "France", "Georgia", "Germany", "Ghana", "Gibraltar (UK)", "Greece", "Guatemala", "Honduras", "Hungary", "Iceland", "India", "Indonesia", "Iran, Islamic Republic of", "Iraq", "Ireland", "Israel", "Italy", "Jamaica", "Japan", "Jordan", "Kazakhstan", "Kenya", "Korea, Democratic People's Republic of", "Korea, Republic of", "Kuwait", "Kyrgyzstan", "Lao People's Democratic Republic", "Latvia", "Lebanon", "Libyan Arab Jamahiriya", "Lithuania", "Luxembourg", "Macau, China", "Malawi", "Malaysia", "Mali", "Malta", "Mauritius", "Mexico", "Moldova, Republic of", "Montenegro", "Morocco", "Mozambique", "Namibia", "Nepal", "Netherlands", "New Zealand", "Nigeria", "Norway", "Oman", "Pakistan", "Panama", "Palestine", "Peru", "Philippines", "Poland", "Portugal", "Puerto Rico", "Qatar", "Romania", "Russian Federation", "Saint Lucia", "Saint Vincent and the Grenadines", "San Marino, Republic of", "Saudi Arabia", "Senegal", "Serbia", "Singapore", "Slovakia", "Slovenia", "Somalia", "South Africa", "Spain", "Sri Lanka", "Sudan", "Suriname", "Sweden", "Switzerland", "Syrian Arab Republic", "Tanzania, United Republic of", "Thailand", "The Former Yugoslav Republic of Macedonia", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States of America", "Uruguay", "Uzbekistan", "Venezuela", "Viet Nam", "Yemen")
+  countries_short <- c("AF", "AL", "DZ", "AD", "AO", "AR", "AM", "AU", "AT", "AZ", "BH", "BD", "BY", "BE", "BZ", "BO", "BA", "BW", "BR", "BG", "KH", "CA", "CV", "CL", "CN", "HK", "CN", "CO", "CD", "CR", "CI", "HR", "CY", "CZ", "DK", "DO", "EC", "EG", "SV", "EE", "ET", "FI", "FR", "GE", "DE", "GH", "GI", "GR", "GT", "HN", "HU", "IS", "IN", "ID", "IR", "IQ", "IE", "IL", "IT", "JM", "JP", "JO", "KZ", "KE", "KP", "KR", "KW", "KG", "LA", "LV", "LB", "LY", "LT", "LU", "MO", "MW", "MY", "ML", "MT", "MU", "MX", "MD", "ME", "MA", "MZ", "NA", "NP", "NL", "NZ", "NG", "NO", "OM", "PK", "PA", "PS", "PE", "PH", "PL", "PT", "PR", "QA", "RO", "RU", "LC", "VC", "SM", "SA", "SN", "RS", "SG", "SK", "SI", "SO", "ZA", "ES", "LK", "SD", "SR", "SE", "CH", "SY", "TZ", "TH", "MK", "TT", "TN", "TR", "TM", "UG", "UA", "AE", "GB", "US", "UY", "UZ", "VE", "VN", "YE")
+
+  dataset <- data.frame(countries_large, countries_short)
+
+  toString(dataset[dataset$countries_large == largeName, 2])
+}
--
libgit2 0.22.2