Blame view

ISO27001effectiveness/R/ISOSurvey_Parser.R 2.23 KB
Miguel Tuñón authored
1
2
3
4
5
6
7
8
9
10
11
12
#------------------------------------------------------------------------------------------------------
#-----------------------------Parser Excel - ISO survey------------------------------------------------
#------------------------------------------------------------------------------------------------------


#' Get data frame from an excel file
#'
#' Check if the file exists and then parse it into a data.frame
#' @param file path to excel file
#' @param sheet index of sheet to parse
#'
#' @return data.frame
Miguel Tuñón authored
13
14
15
16
17
#'
#' @examples
#' Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1)
#' Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2)
#' Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3)
Miguel Tuñón authored
18
19
20
21
22
23
ParseExcelFileRaw <- function(file, sheet){

  if (!file.exists(file)) {
    stop(paste("Error, file [", file, "] not found"))
  }
Miguel Tuñón authored
24
  dataset <- read.xlsx2(file, sheet,colClasses = c("character","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric"), header = TRUE, as.data.frame = TRUE)
Miguel Tuñón authored
25
26
27
28

  dataset
}
Miguel Tuñón authored
29
30
Miguel Tuñón authored
31
32
#' Process raw data from ISO survey
#'
Miguel Tuñón authored
33
#' Proccess the raw data from ISO survey to replace NAs, normalizate country names and filter years
Miguel Tuñón authored
34
#' @param dataset.raw raw data from ISO Survey excel file
Miguel Tuñón authored
35
#' @param years List of years to return, c("X2006", "X2010", ...)
Miguel Tuñón authored
36
37
#'
#' @return data.frame
Miguel Tuñón authored
38
39
40
41
42
43
44
45
46
47
#'
#' @examples
#'
#' Cert_PerCountry <- ProccesISOSurveyRaw(Cert_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
#' Sites_PerCountry <- ProccesISOSurveyRaw(Sites_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
#' Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
ProccesISOSurveyByCountryRaw <- function(dataset.raw, years){

  #NAs to 0s
  dataset <- dataset.raw
Miguel Tuñón authored
48
49
50
  dataset[is.na(dataset)] <- 0

  #Translate country names to 2 letter code
Miguel Tuñón authored
51
52
53
54
55
56
57
  CountryNames <- GetCountryAbrev()

  dataset <- merge(x = dataset, y = CountryNames, by = "Country", all.x = TRUE)

  vars <- names(dataset)
  years_checked <- intersect(vars, years)
  dataset <- dataset[,c(c("Country", "country_short"), years_checked)]
Miguel Tuñón authored
58
59

  dataset
Miguel Tuñón authored
60
}