首页 > 解决方案 > For-loop over list of txt.files with if conditions in R


I am struggling with creating a for loop over all txt.files in a specific repository. The goal is to merge all separately saved txt.files in a dataframe and add an ID-variable that can always be found in the txt-file-names (e.g., ID=10 for the file "10_1. Recording 01.10.2015 131514_CsvData.txt" )

txt_files <- list.files("Data/study", pattern = ".txt")  

txt_files [1] "1_1. Recording 18.09.2015 091037_CsvData.txt" "10_1. Recording 01.10.2015 131514_CsvData.txt"
[3] "100_1. Recording 02.10.2015 091630_CsvData.txt" "104_1. Recording 22.09.2015 142604_CsvData.txt"
[5] "107_1. Recording 18.09.2015 104300_CsvData.txt" "110_1. Recording 29.09.2015 081558_CsvData.txt"
[7] "112_1. Recording 21.09.2015 082908_CsvData.txt" "114_1. Recording 29.09.2015 101159_CsvData.txt"
[9] "115_1. Recording 23.09.2015 141204_CsvData.txt" "116_1. Recording 30.09.2015 110624_CsvData.txt"
[11] "117_1. Recording 01.10.2015 141227_CsvData.txt" "120_1. Recording 17.09.2015 153516_CsvData.txt"

Read in and merge txt.files

    for ( file in txt_files){
    #  if the merged dataframe "final_df" doesn't already exist, create it
    if (!exists("final_df")){
    final_df<- read.table(paste("Data/study/",file, sep=""), header=TRUE, fill=TRUE)
    temp_ID <- substring(file, 0,str_locate_all(pattern ='_1.',file)[[1]][1]-1)
    final_df$ID <- temp_ID
    final_df <- as.data.frame(final_df)
  #  if the merged dataframe does already exist, append to it
  else {
    temp_dataset <- read.table(paste("Data/study/",file, sep=""), header=TRUE, fill=TRUE)
    #   extract ID column from filename
    temp_ID <- substring(file, 0,str_locate_all(pattern ='_1.',file)[[1]][1]-1)
    temp_dataset$ID <- temp_ID
    final_df<-rbind(final_df, temp_dataset)

标签: rfor-loopif-statementmergefilelist



还要考虑paste0不带分隔符参数,并gsub从下划线到字符串末尾删除任何字符以提取 ID。

txt_files <- list.files(pattern = ".txt")  

df_list <- lapply(txt_files, function(file)  
                  transform(read.table(file, header=TRUE, fill=TRUE),
                            temp_ID = gsub("_.*", "", file))   

final_df <- do.call(rbind, df_list)
