ISOSurvey_Parser.R
5.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#------------------------------------------------------------------------------------------------------
#-----------------------------Parser Excel - ISO survey------------------------------------------------
#------------------------------------------------------------------------------------------------------
#' Get data frame from an excel file
#'
#' Check if the file exists and then parse it into a data.frame
#' @param file path to excel file
#' @param sheet index of sheet to parse
#'
#' @return data.frame
#'
#' @examples
#' Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1)
#' Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2)
#' Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3)
ParseExcelFileRaw <- function(file, sheet){
#Checks
if (!file.exists(file)) {
stop(paste("Error, file [", file, "] not found"))
}
#Parse the excel file
dataset <- xlsx::read.xlsx2(file, sheet,colClasses = c("character","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric"), header = TRUE, as.data.frame = TRUE)
dataset
}
#' PRocess raw data parsed from excel file ISOSurvey27001
#'
#' @param dataset.raw data.frame with raw data
#' @param years list of years to include preceded with a X
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
ProccesISOSurveyRaw <- function(dataset.raw, years){
#Change NAs to zeros
dataset <- dataset.raw
dataset[is.na(dataset)] <- 0
vars <- names(dataset)
#Remove blanc rows
if ("INDUSTRIAL.SECTOR" == vars[1]) {
dataset <- dataset[!dataset$INDUSTRIAL.SECTOR == "",]
}else if ("Country" == vars[1]) {
dataset <- dataset[!dataset$Country == "",]
}
#Remove years not included in years param
years_checked <- intersect(vars, c("Country", "INDUSTRIAL.SECTOR", years))
dataset <- dataset[,years_checked]
dataset
}
#' Join data from ISOSurvey with 2 letter code countries plus process raw.data
#'
#' @param dataset.raw raw data from ISO Survey excel file
#' @param years List of years to return, c("X2006", "X2010", ...)
#'
#' @return data.frame
#' @export
#'
#' @examples
#'
#' Cert_PerCountry <- ProccesISOSurveyRaw(Cert_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
#' Sites_PerCountry <- ProccesISOSurveyRaw(Sites_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
ProccesISOSurveyByCountryRaw <- function(dataset.raw, years){
#Standard proccess
dataset <- ProccesISOSurveyRaw(dataset.raw, years)
dataset$Country <- gsub("Bolivia","Bolivia, Plurinational State of",dataset$Country)
dataset$Country <- gsub("Cape Verde","Cabo Verde",dataset$Country)
dataset$Country <- gsub("Congo, Republic of","Congo, the Democratic Republic of the",dataset$Country)
dataset$Country <- gsub("Côte D'ivoire","Cote d'Ivoire",dataset$Country)
dataset$Country <- gsub("Gibraltar (UK)","Gibraltar",dataset$Country)
dataset$Country <- gsub("Hong Kong, China","Hong Kong",dataset$Country)
dataset$Country <- gsub("Macau, China","Macao",dataset$Country)
dataset$Country <- gsub("Palestine","Palestine, State of",dataset$Country)
dataset$Country <- gsub("San Marino, Republic of","San Marino",dataset$Country)
dataset$Country <- gsub("Taipei, Chinese","Taiwan, Province of China",dataset$Country)
dataset$Country <- gsub("The Former Yugoslav Republic of Macedonia","Macedonia, the former Yugoslav Republic of",dataset$Country)
dataset$Country <- gsub("United States of America","United States",dataset$Country)
dataset$Country <- gsub("Venezuela","Venezuela, Bolivarian Republic of",dataset$Country)
#Translate country names to 2 letter code
CountryNames <- data.frame(countrycode::countrycode_data$country.name, countrycode::countrycode_data$iso2c, countrycode::countrycode_data$continent)
CountryNames <- setNames(CountryNames, c("Country","country_short", "Continent"))
dataset <- merge(x = dataset, y = CountryNames, by = "Country", all.x = TRUE)
dataset <- dataset[!is.na(dataset$Continent),]
dataset
}
#' Get data of certificates per year and country from IS27001
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Cert_PerCountry <- GetISOSurveyCertsPerCountry()
GetISOSurveyCertsPerCountry <- function() {
Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1)
Cert_PerCountry <- ProccesISOSurveyByCountryRaw(Cert_PerCountry, c("X2011", "X2012", "X2013", "X2014", "X2015"))
Cert_PerCountry
}
#' Get data of sites per year and country from IS27001
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Sites_PerCountry <- GetISOSurveySitesPerCountry()
GetISOSurveySitesPerCountry <- function() {
Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2)
Sites_PerCountry <- ProccesISOSurveyByCountryRaw(Sites_PerCountry, c("X2011", "X2012", "X2013", "X2014", "X2015"))
Sites_PerCountry
}
#' Get data of certificates per year and sector from IS27001
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Cert_PerSector <- GetISOSurveyCertsPerSector()
GetISOSurveyCertsPerSector <- function() {
Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3)
Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2011", "X2012", "X2013", "X2014", "X2015"))
Cert_PerSector
}