首页 > 解决方案 > R: How to aggregate several data frames into a txt file via for loop?

问题描述

I have several csv files containing date and precipitation info. Here is the sample data:

three csv files - sample data

Goal:

I want to read all of them one by one, then:

1- separate the date column into year, month, day.

2- grab the a specific column from each of them.

3- create a data frame for the extracted info from each csv file.

4- Finally, paste all these frames into a txt file.

This is my code to do so:

rm(list=ls())

## where is the main folder? 
setwd("C:/Users/Downloadspr_day_ECMWF")  

## reading all csv files:
list_csv_files <- basename(list.files(pattern = ".*_daily_results.*csv", recursive = TRUE))

## a loop here to read all csv files one by one and save their info in one txt file:
result <- list()
counter <- 1

for (i in 1:length(list_csv_files)){ 
  MyData <- read.csv(list_csv_files[[i]], header=TRUE, sep=",")
  head(MyData)

  ## separating year , month , date from the "Date.Precipitation_mm" column inside the csv file:
  date_column <- MyData$Date.Precipitation_mm  
  date_column

  year_date <- format(as.Date(date_column, format="%Y-%m-%d"),"%Y")
  month_date <- format(as.Date(date_column, format="%Y-%m-%d"),"%m")
  day_date <- format(as.Date(date_column, format="%Y-%m-%d"),"%d")

  ## reading Alberta columns in MyData :
  Alberta_column <- MyData$Alberta
  Alberta_column

  ## creating a data frame to put our data inside it:
  txt_file_data_frame <- data.frame(year_date, month_date, day_date, Alberta_column)

  ## a counter to save all data frames consecutively:
  for (j in 1:length(txt_file_data_frame)) {
    result[[counter]] <- txt_file_data_frame[j]
    counter <- counter + 1
  }

}

## write the txt file:
write.table(txt_file_data_frame, file = "myTXT.txt", row.names = FALSE, dec = ".", sep = "\t", quote = FALSE)

But the generated txt file is ONLY contained the last csv file info!! Like this:

enter image description here

I want to save all data frames in the final txt file.

Does anybody know the solution to this challenge?

Any help or comment would be highly appreciated.

标签: rcsvdataframe

解决方案


使用tidyverse包,你不需要for循环。检查此解决方法是否是您所需要的。我试图解释评论中的所有步骤。

# install required packages
if (!require("tidyverse")) install.packages("tidyverse")
#> Loading required package: tidyverse
if (!require("here")) install.packages("here")
#> Loading required package: here
#> here() starts at /tmp/RtmpRFktCG/reprex6c99164b38dd
if (!require("fs")) install.packages("fs")
#> Loading required package: fs

# create a new folder to save the data you shared
dir_create("csv-data")

# get the zip file
csv <- "https://www.dropbox.com/s/lyk5vvt7o7kxydj/csv_files.zip?dl=1"
zip_name <- "csv.zip"
download.file(url = csv, destfile = here("csv-data", zip_name))

# descompress the zip file
unzip(zipfile = here("csv-data", zip_name), exdir = here("csv-data"))

# get data
data <-
  # inform the folder
  here("csv-data") %>%
  # search for csv files
  dir_ls(regexp = "\\.csv") %>% 
  # read and bind the rows
  map_dfr(read_csv, .id = "source") %>% 
  # which files do the lines come from?
  mutate(source = basename(source)) %>% 
  # create date columns based on the previous one
  separate(
    col = Date, 
    into = c("year", "month", "day"), 
    sep = "-"
    ) %>% 
  # select columns 
  select(year, month, day, Alberta)
#> Warning: Missing column names filled in: 'X1' [1]
#> Parsed with column specification:
#> cols(
#>   X1 = col_character(),
#>   `Date/Precipitation_mm` = col_date(format = ""),
#>   Alberta = col_double(),
#>   Athabasca = col_double(),
#>   Beaver = col_double(),
#>   Hay_GreatSlave = col_double(),
#>   Milk = col_double(),
#>   NorthSaskatchewan = col_double(),
#>   Peace_Slave = col_double(),
#>   SouthSaskatchewan = col_double(),
#>   Date = col_date(format = "")
#> )
#> Warning: Missing column names filled in: 'X1' [1]
#> Parsed with column specification:
#> cols(
#>   X1 = col_character(),
#>   `Date/Precipitation_mm` = col_date(format = ""),
#>   Alberta = col_double(),
#>   Athabasca = col_double(),
#>   Beaver = col_double(),
#>   Hay_GreatSlave = col_double(),
#>   Milk = col_double(),
#>   NorthSaskatchewan = col_double(),
#>   Peace_Slave = col_double(),
#>   SouthSaskatchewan = col_double(),
#>   Date = col_date(format = "")
#> )
#> Warning: Missing column names filled in: 'X1' [1]
#> Parsed with column specification:
#> cols(
#>   X1 = col_character(),
#>   `Date/Precipitation_mm` = col_date(format = ""),
#>   Alberta = col_double(),
#>   Athabasca = col_double(),
#>   Beaver = col_double(),
#>   Hay_GreatSlave = col_double(),
#>   Milk = col_double(),
#>   NorthSaskatchewan = col_double(),
#>   Peace_Slave = col_double(),
#>   SouthSaskatchewan = col_double(),
#>   Date = col_date(format = "")
#> )

# check data
data
#> # A tibble: 1,096 x 4
#>    year  month day      Alberta
#>    <chr> <chr> <chr>      <dbl>
#>  1 1950  01    01    0.00131   
#>  2 1950  01    02    0.00170   
#>  3 1950  01    03    0.00142   
#>  4 1950  01    04    0.000156  
#>  5 1950  01    05    0.00105   
#>  6 1950  01    06    0.000792  
#>  7 1950  01    07    0.000622  
#>  8 1950  01    08    0.000267  
#>  9 1950  01    09    0.000339  
#> 10 1950  01    10    0.00000134
#> # … with 1,086 more rows

# save
data %>% 
  write_delim(path = here("csv-data", "myTXT.txt"), delim = "\t")

reprex 包(v0.3.0)于 2019 年 10 月 24 日创建


推荐阅读