Hackmageddon_Parser.R 1.87 KB
#------------------------------------------------------------------------------------------------------
#-----------------------------Parser Excel - ISO survey------------------------------------------------
#------------------------------------------------------------------------------------------------------



ParseHMExcel <- function(file, cols){
  print(file)
  if (!file.exists(file)) {
    stop(paste("Error, file [", file, "] not found"))
  }
  dataset <- xlsx::read.xlsx2(file, 1, header = TRUE,colIndex = cols, colClasses = c("numeric", "character", "character", "character"))

  if (is.null(dataset$Date)) {
    dataset <- xlsx::read.xlsx2(file, 1, header = TRUE,colIndex = cols, colClasses = c("numeric", "character", "character", "character"), startRow = 2)
  }

  dataset
}

ProccesHMRaw <- function(dataset.raw){

  #Remove rows with Date NA
  print(class(dataset.raw$Date))
  dataset <- dataset.raw[!is.na(dataset.raw$Date),]
  dataset$Date <- as.POSIXct(dataset$Date*86400, tz ="GMT", origin ="1904-01-01")

  dataset
}

#' Title
#'
#' @param folder
#' @param cols
#'
#' @return
#' @export
#'
#' @examples
ParseHMFolder <- function(folder, cols){
  filelist <- list.files(folder)
  #frames <- lapply(paste(folder,filelist,sep = ""),ParseHMExcel)

  myFile <- paste(folder,filelist[1],sep = "")
  dataset <- ProccesHMRaw(ParseHMExcel(myFile, cols))

  for (i in 2:length(filelist)) {
    dataset <- rbind(dataset, ProccesHMRaw(ParseHMExcel(paste(folder,filelist[i],sep = ""), cols)))
  }

  dataset
}

GetAttacksData <- function() {

  format1 <- ParseHMFolder("./data/hackmageddon/Format1/", c(2, 9, 7, 6))
  format2 <- ParseHMFolder("./data/hackmageddon/Format2/", c(2,7, 5, 6))
  format3 <- ParseHMFolder("./data/hackmageddon/Format3/", c(2, 9, 7, 6))
  format4 <- ParseHMFolder("./data/hackmageddon/Format4/", c(1, 9, 5, 3))

  dataset <- rbind(format1, format2, format3, format4)

  dataset
}