主要用到了xlsx
和rJava
包,打開 Excel 文件,讀取各表格數據,再寫入到彙總表。
下圖爲處理前的原始數據表格:html
下圖爲處理後的數據:segmentfault
安裝&加載包的函數實現。installed.packages()
函數獲取全部已安裝的包,用以排除已安裝的包。install.packages()
函數安裝指定的包。library()
加載引入包。函數
loadLibrary <- function(pkgs) { uninstalledPkgs <- pkgs[!(pkgs %in% installed.packages()[, "Package"])] if (length(uninstalledPkgs)) { install.packages(uninstalledPkgs) } for (pkg in pkgs) { library(pkg, character.only = TRUE, quietly = TRUE) } }
自定義設置。涉及表格文件路徑、彙總表名等。post
setwd("/path/to/work/dir") # 工做目錄 fileName = "summary_sheet_demo.xlsx" # 處理的表格文件 summaryName = "彙總" # 彙總的 sheet 表名 summarySheet = FALSE # 對象變量、忽略 startRow = 2 # 彙總表中操做起始行
設置 CRAN 源。官方默認源可能比較慢,因此選擇國內的源很重要。此處選擇了清華 CRAN 源,其餘的可參考 CRAN Mirrors。ui
options(repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/")
加載依賴包。用到了上述寫的安裝加載包的函數。spa
loadLibrary(c("xlsx", "rJava"))
打開工做表。loadWorkbook()
打開指定路徑的表格文件,並加載到工做簿對象中。.net
wb <- loadWorkbook(fileName)
遍歷查詢&彙總數據。excel
# 獲取全部表格 sheets <- getSheets(wb) # 循環全部表格,找出須要寫入的表 for (sheet in sheets) { sheetName <- sheet$getSheetName() if (sheetName == summaryName) { summarySheet <- sheet } } if (summarySheet == FALSE) { stop(paste("表:", summaryName, "未找到")) } # 指定Date格式(此處可忽略) # options(xlsx.date.format='yyyy/MM/dd') # 遍歷全部表格 for (sheet in sheets) { # 過濾掉需寫入的表 sheetName <- sheet$getSheetName() if (sheetName == summaryName) { next } # 獲取表格【內容行數】 rowNum <- sheet$getLastRowNum() print(paste("表名:", sheetName, "總共:", rowNum, "行,", sep = " ")) # 讀取表格內容 參數 colClasses 指定每列的類型(實際是指定處理該列的類/對象) data <- read.xlsx2(fileName, sheetName = sheetName, header = FALSE, startRow = 2, colClasses = c("character", "Date", "integer", "integer", rep("numeric", 2), "integer")) print(data) # 將表格內容寫入彙總的那張表 addDataFrame(data, summarySheet, col.names = FALSE, row.names = FALSE, startRow = startRow) # 累加行數 startRow <- startRow + rowNum }
把對象內容寫入文件中保存。code
saveWorkbook(wb, fileName)
# 包加載/安裝包 loadLibrary <- function(pkgs) { uninstalledPkgs <- pkgs[!(pkgs %in% installed.packages()[, "Package"])] if (length(uninstalledPkgs)) { install.packages(uninstalledPkgs) } for (pkg in pkgs) { library(pkg, character.only = TRUE, quietly = TRUE) } } # 自定義配置 setwd("/path/to/work/dir") # 工做目錄 fileName = "summary_sheet_demo.xlsx" # 處理的表格文件 summaryName = "彙總" # 彙總的 sheet 表名 summarySheet = FALSE # 對象變量、忽略 startRow = 2 # 彙總表中操做起始行 # 設置CRAN options(repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/") # 加載依賴包 loadLibrary(c("xlsx", "rJava")) # 打開Excel表格 wb <- loadWorkbook(fileName) # 獲取全部表格 sheets <- getSheets(wb) # 循環全部表格,找出須要寫入的表 for (sheet in sheets) { sheetName <- sheet$getSheetName() if (sheetName == summaryName) { summarySheet <- sheet } } if (summarySheet == FALSE) { stop(paste("表:", summaryName, "未找到")) } # 指定Date格式(此處可忽略) # options(xlsx.date.format='yyyy/MM/dd') # 遍歷全部表格 for (sheet in sheets) { # 過濾掉需寫入的表 sheetName <- sheet$getSheetName() if (sheetName == summaryName) { next } # 獲取表格【內容行數】 rowNum <- sheet$getLastRowNum() print(paste("表名:", sheetName, "總共:", rowNum, "行,", sep = " ")) # 讀取表格內容 參數 colClasses 指定每列的類型(實際是指定處理該列的類/對象) data <- read.xlsx2(fileName, sheetName = sheetName, header = FALSE, startRow = 2, colClasses = c("character", "Date", "integer", "integer", rep("numeric", 2), "integer")) print(data) # 將表格內容寫入彙總的那張表 addDataFrame(data, summarySheet, col.names = FALSE, row.names = FALSE, startRow = startRow) # 累加行數 startRow <- startRow + rowNum } # 最後須要把對象內容寫入文件中 saveWorkbook(wb, fileName)
原文地址: https://shockerli.net/post/r-...更多文章請訪問個人我的博客: https://shockerli.net