|
1
2
3
4
5
6
7
8
9
10
11
12
|
#------------------------------------------------------------------------------------------------------
#-----------------------------Parser Excel - ISO survey------------------------------------------------
#------------------------------------------------------------------------------------------------------
#' Get data frame from an excel file
#'
#' Check if the file exists and then parse it into a data.frame
#' @param file path to excel file
#' @param sheet index of sheet to parse
#'
#' @return data.frame
|
|
13
14
15
16
17
|
#'
#' @examples
#' Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1)
#' Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2)
#' Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3)
|
|
18
19
|
ParseExcelFileRaw <- function(file, sheet){
|
|
20
|
#Checks
|
|
21
22
23
24
|
if (!file.exists(file)) {
stop(paste("Error, file [", file, "] not found"))
}
|
|
25
|
#Parse the excel file
|
|
26
|
dataset <- xlsx::read.xlsx2(file, sheet,colClasses = c("character","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric"), header = TRUE, as.data.frame = TRUE)
|
|
27
28
29
30
|
dataset
}
|
|
31
32
|
|
|
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
#' PRocess raw data parsed from excel file ISOSurvey27001
#'
#' @param dataset.raw data.frame with raw data
#' @param years list of years to include preceded with a X
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
ProccesISOSurveyRaw <- function(dataset.raw, years){
#Change NAs to zeros
dataset <- dataset.raw
dataset[is.na(dataset)] <- 0
vars <- names(dataset)
#Remove blanc rows
if ("INDUSTRIAL.SECTOR" == vars[1]) {
dataset <- dataset[!dataset$INDUSTRIAL.SECTOR == "",]
}else if ("Country" == vars[1]) {
dataset <- dataset[!dataset$Country == "",]
}
#Remove years not included in years param
years_checked <- intersect(vars, c("Country", "INDUSTRIAL.SECTOR", years))
dataset <- dataset[,years_checked]
dataset
}
#' Join data from ISOSurvey with 2 letter code countries plus process raw.data
|
|
67
68
|
#'
#' @param dataset.raw raw data from ISO Survey excel file
|
|
69
|
#' @param years List of years to return, c("X2006", "X2010", ...)
|
|
70
71
|
#'
#' @return data.frame
|
|
72
|
#' @export
|
|
73
74
75
76
77
78
79
|
#'
#' @examples
#'
#' Cert_PerCountry <- ProccesISOSurveyRaw(Cert_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
#' Sites_PerCountry <- ProccesISOSurveyRaw(Sites_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
ProccesISOSurveyByCountryRaw <- function(dataset.raw, years){
|
|
80
81
|
#Standard proccess
dataset <- ProccesISOSurveyRaw(dataset.raw, years)
|
|
82
|
|
|
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
dataset$Country <- gsub("Bolivia","Bolivia, Plurinational State of",dataset$Country)
dataset$Country <- gsub("Cape Verde","Cabo Verde",dataset$Country)
dataset$Country <- gsub("Congo, Republic of","Congo, the Democratic Republic of the",dataset$Country)
dataset$Country <- gsub("Côte D'ivoire","Cote d'Ivoire",dataset$Country)
dataset$Country <- gsub("Gibraltar (UK)","Gibraltar",dataset$Country)
dataset$Country <- gsub("Hong Kong, China","Hong Kong",dataset$Country)
dataset$Country <- gsub("Macau, China","Macao",dataset$Country)
dataset$Country <- gsub("Palestine","Palestine, State of",dataset$Country)
dataset$Country <- gsub("San Marino, Republic of","San Marino",dataset$Country)
dataset$Country <- gsub("Taipei, Chinese","Taiwan, Province of China",dataset$Country)
dataset$Country <- gsub("The Former Yugoslav Republic of Macedonia","Macedonia, the former Yugoslav Republic of",dataset$Country)
dataset$Country <- gsub("United States of America","United States",dataset$Country)
dataset$Country <- gsub("Venezuela","Venezuela, Bolivarian Republic of",dataset$Country)
|
|
98
|
#Translate country names to 2 letter code
|
|
99
100
|
CountryNames <- data.frame(countrycode::countrycode_data$country.name, countrycode::countrycode_data$iso2c, countrycode::countrycode_data$continent)
CountryNames <- setNames(CountryNames, c("Country","country_short", "Continent"))
|
|
101
102
|
dataset <- merge(x = dataset, y = CountryNames, by = "Country", all.x = TRUE)
|
|
103
104
|
dataset <- dataset[!is.na(dataset$Continent),]
|
|
105
|
dataset
|
|
106
|
}
|
|
107
108
109
110
111
112
113
114
115
116
117
118
119
|
#' Get data of certificates per year and country from IS27001
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Cert_PerCountry <- GetISOSurveyCertsPerCountry()
GetISOSurveyCertsPerCountry <- function() {
Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1)
|
|
120
|
Cert_PerCountry <- ProccesISOSurveyByCountryRaw(Cert_PerCountry, c("X2011", "X2012", "X2013", "X2014", "X2015"))
|
|
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
Cert_PerCountry
}
#' Get data of sites per year and country from IS27001
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Sites_PerCountry <- GetISOSurveySitesPerCountry()
GetISOSurveySitesPerCountry <- function() {
Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2)
|
|
135
|
Sites_PerCountry <- ProccesISOSurveyByCountryRaw(Sites_PerCountry, c("X2011", "X2012", "X2013", "X2014", "X2015"))
|
|
136
137
138
139
140
141
142
143
144
145
146
147
148
149
|
Sites_PerCountry
}
#' Get data of certificates per year and sector from IS27001
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Cert_PerSector <- GetISOSurveyCertsPerSector()
GetISOSurveyCertsPerSector <- function() {
Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3)
|
|
150
|
Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2011", "X2012", "X2013", "X2014", "X2015"))
|
|
151
152
153
154
|
Cert_PerSector
}
|