ISOSurvey_Parser.R 4.09 KB
#------------------------------------------------------------------------------------------------------
#-----------------------------Parser Excel - ISO survey------------------------------------------------
#------------------------------------------------------------------------------------------------------


#' Get data frame from an excel file
#'
#' Check if the file exists and then parse it into a data.frame
#' @param file path to excel file
#' @param sheet index of sheet to parse
#'
#' @return data.frame
#'
#' @examples
#' Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1)
#' Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2)
#' Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3)
ParseExcelFileRaw <- function(file, sheet){

  #Checks
  if (!file.exists(file)) {
    stop(paste("Error, file [", file, "] not found"))
  }

  #Parse the excel file
  dataset <- xlsx::read.xlsx2(file, sheet,colClasses = c("character","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric"), header = TRUE, as.data.frame = TRUE)

  dataset
}



#' PRocess raw data parsed from excel file ISOSurvey27001
#'
#' @param dataset.raw data.frame with raw data
#' @param years list of years to include preceded with a X
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
ProccesISOSurveyRaw <- function(dataset.raw, years){

  #Change NAs to zeros
  dataset <- dataset.raw
  dataset[is.na(dataset)] <- 0

  vars <- names(dataset)

  #Remove blanc rows
  if ("INDUSTRIAL.SECTOR" == vars[1]) {
    dataset <- dataset[!dataset$INDUSTRIAL.SECTOR == "",]
  }else if ("Country" == vars[1]) {
    dataset <- dataset[!dataset$Country == "",]
  }

  #Remove years not included in years param
  years_checked <- intersect(vars, c("Country", "INDUSTRIAL.SECTOR", years))
  dataset <- dataset[,years_checked]


  dataset
}

#' Join data from ISOSurvey with 2 letter code countries plus process raw.data
#'
#' @param dataset.raw raw data from ISO Survey excel file
#' @param years List of years to return, c("X2006", "X2010", ...)
#'
#' @return data.frame
#'
#' @examples
#'
#' Cert_PerCountry <- ProccesISOSurveyRaw(Cert_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
#' Sites_PerCountry <- ProccesISOSurveyRaw(Sites_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
ProccesISOSurveyByCountryRaw <- function(dataset.raw, years){

  #Standard proccess
  dataset <- ProccesISOSurveyRaw(dataset.raw, years)

  #Translate country names to 2 letter code
  CountryNames <- GetCountryAbrev()
  dataset <- merge(x = dataset, y = CountryNames, by = "Country", all.x = TRUE)

  dataset
}



#' Get data of certificates per year and country from IS27001
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Cert_PerCountry <- GetISOSurveyCertsPerCountry()
GetISOSurveyCertsPerCountry <- function() {
  Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1)

  Cert_PerCountry <- ProccesISOSurveyByCountryRaw(Cert_PerCountry, c("X2012", "X2013", "X2014", "X2015"))

  Cert_PerCountry
}

#' Get data of sites per year and country from IS27001
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Sites_PerCountry <- GetISOSurveySitesPerCountry()
GetISOSurveySitesPerCountry <- function() {
  Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2)

  Sites_PerCountry <- ProccesISOSurveyByCountryRaw(Sites_PerCountry, c( "X2012", "X2013", "X2014", "X2015"))

  Sites_PerCountry
}

#' Get data of certificates per year and sector from IS27001
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Cert_PerSector <- GetISOSurveyCertsPerSector()
GetISOSurveyCertsPerSector <- function() {
  Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3)

  Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2012", "X2013", "X2014", "X2015"))

  Cert_PerSector

}