Blame view

ISO27001effectiveness/R/ISOSurvey_Parser.R 5.33 KB
Miguel Tuñón authored
1
2
3
4
5
6
7
8
9
10
11
12
#------------------------------------------------------------------------------------------------------
#-----------------------------Parser Excel - ISO survey------------------------------------------------
#------------------------------------------------------------------------------------------------------


#' Get data frame from an excel file
#'
#' Check if the file exists and then parse it into a data.frame
#' @param file path to excel file
#' @param sheet index of sheet to parse
#'
#' @return data.frame
Miguel Tuñón authored
13
14
15
16
17
#'
#' @examples
#' Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1)
#' Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2)
#' Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3)
Miguel Tuñón authored
18
19
ParseExcelFileRaw <- function(file, sheet){
Miguel Tuñón authored
20
  #Checks
Miguel Tuñón authored
21
22
23
24
  if (!file.exists(file)) {
    stop(paste("Error, file [", file, "] not found"))
  }
Miguel Tuñón authored
25
  #Parse the excel file
Miguel Tuñón authored
26
  dataset <- xlsx::read.xlsx2(file, sheet,colClasses = c("character","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric"), header = TRUE, as.data.frame = TRUE)
Miguel Tuñón authored
27
28
29
30

  dataset
}
Miguel Tuñón authored
31
32
Miguel Tuñón authored
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#' PRocess raw data parsed from excel file ISOSurvey27001
#'
#' @param dataset.raw data.frame with raw data
#' @param years list of years to include preceded with a X
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
ProccesISOSurveyRaw <- function(dataset.raw, years){

  #Change NAs to zeros
  dataset <- dataset.raw
  dataset[is.na(dataset)] <- 0

  vars <- names(dataset)

  #Remove blanc rows
  if ("INDUSTRIAL.SECTOR" == vars[1]) {
    dataset <- dataset[!dataset$INDUSTRIAL.SECTOR == "",]
  }else if ("Country" == vars[1]) {
    dataset <- dataset[!dataset$Country == "",]
  }

  #Remove years not included in years param
  years_checked <- intersect(vars, c("Country", "INDUSTRIAL.SECTOR", years))
  dataset <- dataset[,years_checked]


  dataset
}

#' Join data from ISOSurvey with 2 letter code countries plus process raw.data
Miguel Tuñón authored
67
68
#'
#' @param dataset.raw raw data from ISO Survey excel file
Miguel Tuñón authored
69
#' @param years List of years to return, c("X2006", "X2010", ...)
Miguel Tuñón authored
70
71
#'
#' @return data.frame
Miguel Tuñón authored
72
#' @export
Miguel Tuñón authored
73
74
75
76
77
78
79
#'
#' @examples
#'
#' Cert_PerCountry <- ProccesISOSurveyRaw(Cert_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
#' Sites_PerCountry <- ProccesISOSurveyRaw(Sites_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
ProccesISOSurveyByCountryRaw <- function(dataset.raw, years){
Miguel Tuñón authored
80
81
  #Standard proccess
  dataset <- ProccesISOSurveyRaw(dataset.raw, years)
Miguel Tuñón authored
82
Miguel Tuñón authored
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
  dataset$Country <- gsub("Bolivia","Bolivia, Plurinational State of",dataset$Country)
  dataset$Country <- gsub("Cape Verde","Cabo Verde",dataset$Country)
  dataset$Country <- gsub("Congo, Republic of","Congo, the Democratic Republic of the",dataset$Country)
  dataset$Country <- gsub("Côte D'ivoire","Cote d'Ivoire",dataset$Country)
  dataset$Country <- gsub("Gibraltar (UK)","Gibraltar",dataset$Country)
  dataset$Country <- gsub("Hong Kong, China","Hong Kong",dataset$Country)
  dataset$Country <- gsub("Macau, China","Macao",dataset$Country)
  dataset$Country <- gsub("Palestine","Palestine, State of",dataset$Country)
  dataset$Country <- gsub("San Marino, Republic of","San Marino",dataset$Country)
  dataset$Country <- gsub("Taipei, Chinese","Taiwan, Province of China",dataset$Country)
  dataset$Country <- gsub("The Former Yugoslav Republic of Macedonia","Macedonia, the former Yugoslav Republic of",dataset$Country)
  dataset$Country <- gsub("United States of America","United States",dataset$Country)
  dataset$Country <- gsub("Venezuela","Venezuela, Bolivarian Republic of",dataset$Country)
Miguel Tuñón authored
98
  #Translate country names to 2 letter code
Miguel Tuñón authored
99
100
  CountryNames <- data.frame(countrycode::countrycode_data$country.name, countrycode::countrycode_data$iso2c)
  CountryNames <- setNames(CountryNames, c("Country","country_short"))
Miguel Tuñón authored
101
102
  dataset <- merge(x = dataset, y = CountryNames, by = "Country", all.x = TRUE)
Miguel Tuñón authored
103
  dataset
Miguel Tuñón authored
104
}
Miguel Tuñón authored
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152



#' Get data of certificates per year and country from IS27001
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Cert_PerCountry <- GetISOSurveyCertsPerCountry()
GetISOSurveyCertsPerCountry <- function() {
  Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1)

  Cert_PerCountry <- ProccesISOSurveyByCountryRaw(Cert_PerCountry, c("X2012", "X2013", "X2014", "X2015"))

  Cert_PerCountry
}

#' Get data of sites per year and country from IS27001
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Sites_PerCountry <- GetISOSurveySitesPerCountry()
GetISOSurveySitesPerCountry <- function() {
  Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2)

  Sites_PerCountry <- ProccesISOSurveyByCountryRaw(Sites_PerCountry, c( "X2012", "X2013", "X2014", "X2015"))

  Sites_PerCountry
}

#' Get data of certificates per year and sector from IS27001
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Cert_PerSector <- GetISOSurveyCertsPerSector()
GetISOSurveyCertsPerSector <- function() {
  Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3)

  Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2012", "X2013", "X2014", "X2015"))

  Cert_PerSector

}