首页 > 解决方案 > R循环:在单个表中合并工作表并在excel中导出

问题描述

我正在用 R 编写一个代码,我想用一些循环来简化它。我有 3 个 Excel 工作簿(Country1.xlsx、Country2.xlsx、Country3.xlsx),其中包含 10 张工作表,每张工作表的列结构完全相同。目前我的代码只选择其中一个文件,但我想为所有文件运行相同的代码。代码的想法如下:代码获取 excel 文件,在 R 中导入一些工作表,只保留这些工作表中的一些列,然后将它们合并到一个表中,然后在 excel 中导出:

setwd("C:/Users/username/Desktop/")
library(readxl)
library(tidyverse)
library(writexl)
library(openxlsx)

filename = "Country1.xlsx" #Indicate the file name in your folder
country = "Country1" #Which country refers to

#Import sheets from the excel workbook
ln <- lapply(2:10, function(i) read.xlsx(filename, sheet=i, startRow=1)) #Start from sheet 2 till 10
ln <- setNames(ln, paste0("t_", 2:10)) #Give names to these sheets starting with "t_" and then the number from 2 to 10
list2env(ln, envir=.GlobalEnv) #List the tables

#Rename the columns you need to work with
t_2 <- rename(t_2, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
t_3 <- rename(t_3, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
t_4 <- rename(t_4, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
t_5 <- rename(t_5, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
t_6 <- rename(t_6, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
t_7 <- rename(t_7, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
t_8 <- rename(t_8, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
t_9 <- rename(t_9, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
t_10 <- rename(t_10, Number = 1, Reference = 5, Assessment = 6, Comments = 7)

#Extract mini tables with only columns that you need
t_2 <- select(t_2, 1,5,6,7)
t_3 <- select(t_3, 1,5,6,7)
t_4 <- select(t_4, 1,5,6,7)
t_5 <- select(t_5, 1,5,6,7)
t_6 <- select(t_6, 1,5,6,7)
t_7 <- select(t_7, 1,5,6,7)
t_8 <- select(t_8, 1,5,6,7)
t_9 <- select(t_9, 1,5,6,7)
t_10 <- select(t_10, 1,5,6,7)

#Ensure columns are all characters type so you can JOIN them
t_2 <- mutate(t_2, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
t_3 <- mutate(t_3, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
t_4 <- mutate(t_4, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
t_5 <- mutate(t_5, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
t_6 <- mutate(t_6, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
t_7 <- mutate(t_7, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
t_8 <- mutate(t_8, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
t_9 <- mutate(t_9, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
t_10 <- mutate(t_10, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))

#Joint the tables to create a single file with all information
joinfile <- full_join(t_2, t_3)
joinfile <- full_join(joinfile, t_4)
joinfile <- full_join(joinfile, t_5)
joinfile <- full_join(joinfile, t_6)
joinfile <- full_join(joinfile, t_7)
joinfile <- full_join(joinfile, t_8)
joinfile <- full_join(joinfile, t_9)
joinfile <- full_join(joinfile, t_10)

#Kill rows were all values are NAs
joinfile <- joinfile[rowSums(is.na(joinfile)) != ncol(joinfile), ]

#Include at the beginning a column with Country name
joinfile <- data.frame(append(joinfile, c(Country=country), after=0))

#Get rid of previous tables
rm(t_2,t_3,t_4,t_5,t_6,t_7,t_8,t_9,t_10,ln)

#Export the final file in excel
write_xlsx(joinfile, path = "C:/Users/username/Desktop/joinfile.xlsx") #I have no idea if it is possible to export giving to the excel the name of a string contained in cell A1 of the joinfile...

我很确定这些动作中的大多数,非常重复,可以通过一系列循环来简化......但我不知道如何,因为这是我第一次使用 R。任何帮助将不胜感激!提前致谢!

标签: rexcelloopsjoin

解决方案


您应该将数据保存在列表中,以便可以将相同的代码应用于不同的数据帧。

我认为下面的代码应该适用于文件夹中的任意数量的文件。

library(dplyr)
library(purrr)

#This should have complete path of all the filenames. 
#For this example Country1.xlsx, Country2.xlsx, Country3.xlsx 
file_path <- list.files('/file/path', pattern = '\\.xlsx$', full.names = TRUE)

#For each file in file_path
map(file_path, ~
  #For each sheet
  map(2:10, function(i) {
    #Read the file with particular sheet nummber
    read.xlsx(.x, sheet=i, startRow=1) %>%
      #Rename column based on position
      rename(Number = 1, Reference = 5, Assessment = 6, Comments = 7) %>%
      #Select specific columns
      select(1,5,6,7) %>%
      #Convert them to character
      mutate(across(.fns = as.character))
  }) %>%
    #Do a full join
    reduce(full_join) %>%
    #Remove all NA rows
    filter(Reduce(`|`, across(.fns = ~!is.na(.)))) %>%
    #Add country column at 1st position
    mutate(Country = tools::file_path_sans_ext(basename(.x)), .before = 1) %>%
  #Write the xlsx.
  write_xlsx(path = paste0("C:/Users/username/Desktop/joinfile_", basename(.x))))

推荐阅读