首页 > 解决方案 > R:将数据框拆分为列并重新组合为行

问题描述

现在已经尽力了几个小时,希望有人可以帮助我解决以下问题。

我加载到 R 中的数据集看起来像这样(有更多的复制)

在此处输入图像描述

我的输出应该是这样的:

在此处输入图像描述

split.default(Mydata, rep(1:3, each = 5)) 完成了将数据框拆分为列的工作,但我不知道如何 a)添加复制列和 b)如何将不同的 tibbles 组合成一个数据框。

提前感谢您的帮助!

数据:

structure(list(Replication1 = c("Timestamp", "44046", "44046.02884259259", 
"44046.065949074073", "44046.088472222225", "44046.0934837963", 
"44046.105208333334", "44046.115613425929", "44046.35355324074", 
"44046.419537037036", "44046", "44046.02884259259", "44046.065949074073", 
"44046.088472222225", "44046.0934837963", "44046.105208333334", 
"44046.115613425929", "44046.35355324074", "44046.419537037036", 
"44049.058587962965", "44049.08630787037", "44049.184525462966", 
"44049.188009259262", "44049.28429398148", "44049.373472222222", 
"44049.464212962965", "44049.472627314812"), ...2 = c("a", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0"), 
    ...3 = c("b", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
    "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
    "0", "0", "0", "0", "0"), ...4 = c("c", "0", "0", "0", "0", 
    "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
    "0", "0", "37", "37", "37", "37", "37", "37", "37", "37"), 
    ...5 = c("d", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
    "0", "0", "0", "0", "0", "0", "0", "0", "0", "20", "20", 
    "20", "20", "20", "20", "20", "20"), Replication2 = c("Timestamp", 
    "44046", "44046.02884259259", "44046.065949074073", "44046.088472222225", 
    "44046.0934837963", "44046.105208333334", "44046.115613425929", 
    "44046.35355324074", "44046.419537037036", "44049.058587962965", 
    "44049.08630787037", "44049.184525462966", "44049.188009259262", 
    "44049.28429398148", "44049.373472222222", "44049.464212962965", 
    "44049.472627314812", NA, NA, NA, NA, NA, NA, NA, NA, NA), 
    ...7 = c("a", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
    "0", "0", "0", "0", "0", "0", "0", "0", NA, NA, NA, NA, NA, 
    NA, NA, NA, NA), ...8 = c("b", "0", "0", "0", "0", "0", "0", 
    "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", NA, 
    NA, NA, NA, NA, NA, NA, NA, NA), ...9 = c("c", "0", "0", 
    "0", "0", "0", "0", "0", "0", "0", "37", "37", "37", "37", 
    "37", "37", "37", "37", NA, NA, NA, NA, NA, NA, NA, NA, NA
    ), ...10 = c("d", "0", "0", "0", "0", "0", "0", "0", "0", 
    "0", "20", "20", "20", "20", "20", "20", "20", "20", NA, 
    NA, NA, NA, NA, NA, NA, NA, NA), Replication3 = c("Timestamp", 
    "44094.918553240743", "44094.960196759261", "44094.960393518515", 
    "44095.006030092591", "44095.259652777779", "44095.275034722225", 
    "44095.31045138889", "44095.323263888888", "44095.386574074073", 
    "44095.425659722219", NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA), ...12 = c("a", "1", "1", "1", 
    "1", "2", "2", "2", "2", "2", "2", NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ...13 = c("b", "0", 
    "0", "0", "0", "0", "0", "0", "0", "0", "0", NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ...14 = c("c", 
    "37", "37", "37", "37", "37", "37", "37", "37", "37", "37", 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), ...15 = c("d", "20", "20", "20", "20", "20", "20", "20", 
    "20", "20", "20", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA)), row.names = c(NA, -27L), class = c("tbl_df", 
"tbl", "data.frame"))

标签: r

解决方案


如果没有数据样本(使用 dput 不发布图像),我给出了一些指示:

  1. 将许多文件保存在 txt 文件中,与您拥有的表复制数量一样多

  2. 使用 read.csv 语句在 R 中加载每个数据帧。对于每个步骤,使用cbind语句应用复制值

    rep1_df <- read.csv('replication1.txt') rep1_df <- cbind(rep1_df, 'replication1')

  3. rbind使用以下语句创建结果数据集:

    Compound_df <- rbind(rep1_df、rep2_df 等...)

下面是一个工作示例。

这是几个简化的输入数据文件:

rep1.txt

Timestamp;a;b;c;d
03/08/2020 00:00;0;0;0;0
04/08/2020 01:00;0;2;0;0

rep2.txt

Timestamp;a;b;c;d
03/09/2020 07:00;1;0;0;0
05/09/2020 14:00;1;7;40;0

这里使用的代码:

rep1_df <- read.csv(file='rep1.txt', sep = ';')
rep2_df <- read.csv(file='rep1.txt', sep = ';')

rep1_df <- cbind(rep1_df, Replication = 'Replication1')
rep2_df <- cbind(rep2_df, Replication = 'Replication2')

compound_df <- rbind(rep1_df, rep2_df)

         Timestamp a b c d  Replication
1 03/08/2020 00:00 0 0 0 0 Replication1
2 04/08/2020 01:00 0 2 0 0 Replication1
3 03/08/2020 00:00 0 0 0 0 Replication2
4 04/08/2020 01:00 0 2 0 0 Replication2

推荐阅读