#------------------------------------------------------------------------------------------------------ #-----------------------------Parser Excel - ISO survey------------------------------------------------ #------------------------------------------------------------------------------------------------------ #' Get data frame from an excel file #' #' Check if the file exists and then parse it into a data.frame #' @param file path to excel file #' @param sheet index of sheet to parse #' #' @return data.frame #' #' @examples #' Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1) #' Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2) #' Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3) ParseExcelFileRaw <- function(file, sheet){ #Checks if (!file.exists(file)) { stop(paste("Error, file [", file, "] not found")) } #Parse the excel file dataset <- xlsx::read.xlsx2(file, sheet,colClasses = c("character","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric"), header = TRUE, as.data.frame = TRUE) dataset } #' PRocess raw data parsed from excel file ISOSurvey27001 #' #' @param dataset.raw data.frame with raw data #' @param years list of years to include preceded with a X #' #' @return data.frame #' @export #' #' @examples #' Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015")) ProccesISOSurveyRaw <- function(dataset.raw, years){ #Change NAs to zeros dataset <- dataset.raw dataset[is.na(dataset)] <- 0 vars <- names(dataset) #Remove blanc rows if ("INDUSTRIAL.SECTOR" == vars[1]) { dataset <- dataset[!dataset$INDUSTRIAL.SECTOR == "",] }else if ("Country" == vars[1]) { dataset <- dataset[!dataset$Country == "",] } #Remove years not included in years param years_checked <- intersect(vars, c("Country", "INDUSTRIAL.SECTOR", years)) dataset <- dataset[,years_checked] dataset } #' Join data from ISOSurvey with 2 letter code countries plus process raw.data #' #' @param dataset.raw raw data from ISO Survey excel file #' @param years List of years to return, c("X2006", "X2010", ...) #' #' @return data.frame #' @export #' #' @examples #' #' Cert_PerCountry <- ProccesISOSurveyRaw(Cert_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015")) #' Sites_PerCountry <- ProccesISOSurveyRaw(Sites_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015")) ProccesISOSurveyByCountryRaw <- function(dataset.raw, years){ #Standard proccess dataset <- ProccesISOSurveyRaw(dataset.raw, years) dataset$Country <- gsub("Bolivia","Bolivia, Plurinational State of",dataset$Country) dataset$Country <- gsub("Cape Verde","Cabo Verde",dataset$Country) dataset$Country <- gsub("Congo, Republic of","Congo, the Democratic Republic of the",dataset$Country) dataset$Country <- gsub("Côte D'ivoire","Cote d'Ivoire",dataset$Country) dataset$Country <- gsub("Gibraltar (UK)","Gibraltar",dataset$Country) dataset$Country <- gsub("Hong Kong, China","Hong Kong",dataset$Country) dataset$Country <- gsub("Macau, China","Macao",dataset$Country) dataset$Country <- gsub("Palestine","Palestine, State of",dataset$Country) dataset$Country <- gsub("San Marino, Republic of","San Marino",dataset$Country) dataset$Country <- gsub("Taipei, Chinese","Taiwan, Province of China",dataset$Country) dataset$Country <- gsub("The Former Yugoslav Republic of Macedonia","Macedonia, the former Yugoslav Republic of",dataset$Country) dataset$Country <- gsub("United States of America","United States",dataset$Country) dataset$Country <- gsub("Venezuela","Venezuela, Bolivarian Republic of",dataset$Country) #Translate country names to 2 letter code CountryNames <- data.frame(countrycode::countrycode_data$country.name, countrycode::countrycode_data$iso2c, countrycode::countrycode_data$continent) CountryNames <- setNames(CountryNames, c("Country","country_short", "Continent")) dataset <- merge(x = dataset, y = CountryNames, by = "Country", all.x = TRUE) dataset <- dataset[!is.na(dataset$Continent),] dataset } #' Get data of certificates per year and country from IS27001 #' #' @return data.frame #' @export #' #' @examples #' Cert_PerCountry <- GetISOSurveyCertsPerCountry() GetISOSurveyCertsPerCountry <- function() { Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1) Cert_PerCountry <- ProccesISOSurveyByCountryRaw(Cert_PerCountry, c("X2012", "X2013", "X2014", "X2015")) Cert_PerCountry } #' Get data of sites per year and country from IS27001 #' #' @return data.frame #' @export #' #' @examples #' Sites_PerCountry <- GetISOSurveySitesPerCountry() GetISOSurveySitesPerCountry <- function() { Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2) Sites_PerCountry <- ProccesISOSurveyByCountryRaw(Sites_PerCountry, c( "X2012", "X2013", "X2014", "X2015")) Sites_PerCountry } #' Get data of certificates per year and sector from IS27001 #' #' @return data.frame #' @export #' #' @examples #' Cert_PerSector <- GetISOSurveyCertsPerSector() GetISOSurveyCertsPerSector <- function() { Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3) Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2012", "X2013", "X2014", "X2015")) Cert_PerSector }