R 腳本讀取彙總 Excel 表格數據

主要用到了 xlsxrJava 包,打開 Excel 文件,讀取各表格數據,再寫入到彙總表。

下圖爲處理前的原始數據表格:html

處理前

下圖爲處理後的數據:segmentfault

處理後

代碼實現

安裝&加載包的函數實現installed.packages() 函數獲取全部已安裝的包,用以排除已安裝的包。install.packages() 函數安裝指定的包。library() 加載引入包。函數

loadLibrary <- function(pkgs) {
    uninstalledPkgs <- pkgs[!(pkgs %in% installed.packages()[, "Package"])]
    
    if (length(uninstalledPkgs)) {
        install.packages(uninstalledPkgs)
    }
    
    for (pkg in pkgs) {
        library(pkg, character.only = TRUE, quietly = TRUE)
    }
}

自定義設置。涉及表格文件路徑、彙總表名等。post

setwd("/path/to/work/dir")            # 工做目錄
fileName = "summary_sheet_demo.xlsx"  # 處理的表格文件
summaryName = "彙總"                  # 彙總的 sheet 表名
summarySheet = FALSE                  # 對象變量、忽略
startRow = 2                          # 彙總表中操做起始行

設置 CRAN 源。官方默認源可能比較慢,因此選擇國內的源很重要。此處選擇了清華 CRAN 源,其餘的可參考 CRAN Mirrorsui

options(repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/")

加載依賴包。用到了上述寫的安裝加載包的函數。spa

loadLibrary(c("xlsx", "rJava"))

打開工做表loadWorkbook() 打開指定路徑的表格文件,並加載到工做簿對象中。.net

wb <- loadWorkbook(fileName)

遍歷查詢&彙總數據excel

# 獲取全部表格
sheets <- getSheets(wb)

# 循環全部表格,找出須要寫入的表
for (sheet in sheets) {
    sheetName <- sheet$getSheetName()
    if (sheetName == summaryName) {
        summarySheet <- sheet
    }
}

if (summarySheet == FALSE) {
    stop(paste("表:", summaryName, "未找到"))
}

# 指定Date格式(此處可忽略)
# options(xlsx.date.format='yyyy/MM/dd')

# 遍歷全部表格
for (sheet in sheets) {
    # 過濾掉需寫入的表
    sheetName <- sheet$getSheetName()
    if (sheetName == summaryName) {
        next
    }
    
    # 獲取表格【內容行數】
    rowNum <- sheet$getLastRowNum()
    print(paste("表名:", sheetName, "總共:", rowNum, "行,", sep = " "))
    
    # 讀取表格內容 參數 colClasses 指定每列的類型(實際是指定處理該列的類/對象)
    data <- read.xlsx2(fileName, sheetName = sheetName, header = FALSE, startRow = 2, colClasses = c("character", 
        "Date", "integer", "integer", rep("numeric", 2), "integer"))
    print(data)
    
    # 將表格內容寫入彙總的那張表
    addDataFrame(data, summarySheet, col.names = FALSE, row.names = FALSE, startRow = startRow)
    
    # 累加行數
    startRow <- startRow + rowNum
}

把對象內容寫入文件中保存code

saveWorkbook(wb, fileName)

完整代碼

# 包加載/安裝包
loadLibrary <- function(pkgs) {
    uninstalledPkgs <- pkgs[!(pkgs %in% installed.packages()[, "Package"])]
    
    if (length(uninstalledPkgs)) {
        install.packages(uninstalledPkgs)
    }
    
    for (pkg in pkgs) {
        library(pkg, character.only = TRUE, quietly = TRUE)
    }
}

# 自定義配置
setwd("/path/to/work/dir")            # 工做目錄
fileName = "summary_sheet_demo.xlsx"  # 處理的表格文件
summaryName = "彙總"                  # 彙總的 sheet 表名
summarySheet = FALSE                  # 對象變量、忽略
startRow = 2                          # 彙總表中操做起始行

# 設置CRAN
options(repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/")

# 加載依賴包
loadLibrary(c("xlsx", "rJava"))

# 打開Excel表格
wb <- loadWorkbook(fileName)
# 獲取全部表格
sheets <- getSheets(wb)

# 循環全部表格,找出須要寫入的表
for (sheet in sheets) {
    sheetName <- sheet$getSheetName()
    if (sheetName == summaryName) {
        summarySheet <- sheet
    }
}

if (summarySheet == FALSE) {
    stop(paste("表:", summaryName, "未找到"))
}

# 指定Date格式(此處可忽略)
# options(xlsx.date.format='yyyy/MM/dd')

# 遍歷全部表格
for (sheet in sheets) {
    # 過濾掉需寫入的表
    sheetName <- sheet$getSheetName()
    if (sheetName == summaryName) {
        next
    }
    
    # 獲取表格【內容行數】
    rowNum <- sheet$getLastRowNum()
    print(paste("表名:", sheetName, "總共:", rowNum, "行,", sep = " "))
    
    # 讀取表格內容 參數 colClasses 指定每列的類型(實際是指定處理該列的類/對象)
    data <- read.xlsx2(fileName, sheetName = sheetName, header = FALSE, startRow = 2, colClasses = c("character", 
        "Date", "integer", "integer", rep("numeric", 2), "integer"))
    print(data)
    
    # 將表格內容寫入彙總的那張表
    addDataFrame(data, summarySheet, col.names = FALSE, row.names = FALSE, startRow = startRow)
    
    # 累加行數
    startRow <- startRow + rowNum
}

# 最後須要把對象內容寫入文件中
saveWorkbook(wb, fileName)

表格附件

summary_sheet_demo.xlsxorm


原文地址: https://shockerli.net/post/r-...

更多文章請訪問個人我的博客: https://shockerli.net

相關文章
相關標籤/搜索