Blame view

ISO27001effectiveness/R/ISOSurvey_Parser.R 4.09 KB
Miguel Tuñón authored
1
2
3
4
5
6
7
8
9
10
11
12
#------------------------------------------------------------------------------------------------------
#-----------------------------Parser Excel - ISO survey------------------------------------------------
#------------------------------------------------------------------------------------------------------


#' Get data frame from an excel file
#'
#' Check if the file exists and then parse it into a data.frame
#' @param file path to excel file
#' @param sheet index of sheet to parse
#'
#' @return data.frame
Miguel Tuñón authored
13
14
15
16
17
#'
#' @examples
#' Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1)
#' Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2)
#' Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3)
Miguel Tuñón authored
18
19
ParseExcelFileRaw <- function(file, sheet){
Miguel Tuñón authored
20
  #Checks
Miguel Tuñón authored
21
22
23
24
  if (!file.exists(file)) {
    stop(paste("Error, file [", file, "] not found"))
  }
Miguel Tuñón authored
25
  #Parse the excel file
Miguel Tuñón authored
26
  dataset <- xlsx::read.xlsx2(file, sheet,colClasses = c("character","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric","numeric"), header = TRUE, as.data.frame = TRUE)
Miguel Tuñón authored
27
28
29
30

  dataset
}
Miguel Tuñón authored
31
32
Miguel Tuñón authored
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#' PRocess raw data parsed from excel file ISOSurvey27001
#'
#' @param dataset.raw data.frame with raw data
#' @param years list of years to include preceded with a X
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
ProccesISOSurveyRaw <- function(dataset.raw, years){

  #Change NAs to zeros
  dataset <- dataset.raw
  dataset[is.na(dataset)] <- 0

  vars <- names(dataset)

  #Remove blanc rows
  if ("INDUSTRIAL.SECTOR" == vars[1]) {
    dataset <- dataset[!dataset$INDUSTRIAL.SECTOR == "",]
  }else if ("Country" == vars[1]) {
    dataset <- dataset[!dataset$Country == "",]
  }

  #Remove years not included in years param
  years_checked <- intersect(vars, c("Country", "INDUSTRIAL.SECTOR", years))
  dataset <- dataset[,years_checked]


  dataset
}

#' Join data from ISOSurvey with 2 letter code countries plus process raw.data
Miguel Tuñón authored
67
68
#'
#' @param dataset.raw raw data from ISO Survey excel file
Miguel Tuñón authored
69
#' @param years List of years to return, c("X2006", "X2010", ...)
Miguel Tuñón authored
70
71
#'
#' @return data.frame
Miguel Tuñón authored
72
73
74
75
76
77
78
#'
#' @examples
#'
#' Cert_PerCountry <- ProccesISOSurveyRaw(Cert_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
#' Sites_PerCountry <- ProccesISOSurveyRaw(Sites_PerCountry, c("X2010", "X2011", "X2012", "X2013", "X2014", "X2015"))
ProccesISOSurveyByCountryRaw <- function(dataset.raw, years){
Miguel Tuñón authored
79
80
  #Standard proccess
  dataset <- ProccesISOSurveyRaw(dataset.raw, years)
Miguel Tuñón authored
81
82

  #Translate country names to 2 letter code
Miguel Tuñón authored
83
84
85
  CountryNames <- GetCountryAbrev()
  dataset <- merge(x = dataset, y = CountryNames, by = "Country", all.x = TRUE)
Miguel Tuñón authored
86
  dataset
Miguel Tuñón authored
87
}
Miguel Tuñón authored
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135



#' Get data of certificates per year and country from IS27001
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Cert_PerCountry <- GetISOSurveyCertsPerCountry()
GetISOSurveyCertsPerCountry <- function() {
  Cert_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 1)

  Cert_PerCountry <- ProccesISOSurveyByCountryRaw(Cert_PerCountry, c("X2012", "X2013", "X2014", "X2015"))

  Cert_PerCountry
}

#' Get data of sites per year and country from IS27001
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Sites_PerCountry <- GetISOSurveySitesPerCountry()
GetISOSurveySitesPerCountry <- function() {
  Sites_PerCountry <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 2)

  Sites_PerCountry <- ProccesISOSurveyByCountryRaw(Sites_PerCountry, c( "X2012", "X2013", "X2014", "X2015"))

  Sites_PerCountry
}

#' Get data of certificates per year and sector from IS27001
#'
#' @return data.frame
#' @export
#'
#' @examples
#' Cert_PerSector <- GetISOSurveyCertsPerSector()
GetISOSurveyCertsPerSector <- function() {
  Cert_PerSector <- ParseExcelFileRaw("./data/ISO/iso_27001_iso_survey2015_preprocessed.xlsx", 3)

  Cert_PerSector <- ProccesISOSurveyRaw(Cert_PerSector, c("X2012", "X2013", "X2014", "X2015"))

  Cert_PerSector

}