首页 > 解决方案 > 根据所选阈值内的数字行名合并数据帧并保留不匹配的行

问题描述

如何在设置匹配阈值时根据它们的数字行名合并两个数据框?

df1 <- structure(list(c(4974622.505928, 170582.149747, 130545.004516, 
143528.819582, 49416.594892, 51879.515558, 52027.462651, 42491.317116, 
49173.145029, 44040.01261), c(4664319.00309, 266278.599338, 204772.412837, 
204819.210688, 77718.961761, 82742.852809, 79706.774944, 67123.603629, 
67264.401059, 66750.260768), c(5906075.502923, 385318.121061, 
296824.944672, 308432.753482, 113407.50333, 120352.400266, 122622.356104, 
98656.179336, 107669.002489, 100262.855064), c(5401712.020682, 
204595.653994, 163485.509823, 179567.339348, 62690.116298, 63790.0244, 
64660.971879, 52545.84055, 59080.66972, 54579.538267), c(5273676.522307, 
159130.126808, 129607.971309, 142279.787439, 45812.561022, 47230.447746, 
48367.405274, 39578.235275, 45489.065198, 43102.923417)), row.names = c("34.9816256", 
"35.0576674", "35.0898006", "35.1270264", "35.1738664", "35.1936282", 
"35.2043582", "35.2359934", "35.2716016", "35.2993064"), class = "data.frame")

df2 <- structure(list(c(5898584.48405, 302326.226264, 185567.968257, 
205617.778019, 84476.66928, 65505.560486, 68121.465276, 63221.947902, 
55028.866127, 36821.607091), c(3719350.766633, 108177.577417, 
68855.378083, 78201.248427, 17558.118703, 23387.078772, 25374.978916, 
18833.579115, 12761.529092, 11507.348928), c(3587498.99736, 96793.741428, 
59750.485295, 70217.309923, 26233.188472, 20200.080468, 22241.999451, 
20268.485836, 17330.391134, 12503.133961), c(3128479.008712, 
70298.795438, 45668.592667, 56013.453832, 20323.368372, 16795.27218, 
16358.208042, 15722.790712, 12276.726458, 9155.522864), c(3847005.494149, 
138762.296854, 94196.099405, 106888.964213, 36614.870588, 30856.787329, 
33880.704043, 31399.328936, 27819.255931, 18560.05768)), row.names = c("34.9815906", 
"35.0356588", "35.0897702", "35.1269978", "35.1535182", "35.1744048", 
"35.1952968", "35.3032464", "35.3207828", "35.3739834"), class = "data.frame")

数据帧的输出(第一行是rownames

> df1
34.9816256 4974622.51 4664319.00 5906075.50 5401712.02 5273676.52
35.0576674  170582.15  266278.60  385318.12  204595.65  159130.13
35.0898006  130545.00  204772.41  296824.94  163485.51  129607.97
35.1270264  143528.82  204819.21  308432.75  179567.34  142279.79
35.1738664   49416.59   77718.96  113407.50   62690.12   45812.56
35.1936282   51879.52   82742.85  120352.40   63790.02   47230.45
35.2043582   52027.46   79706.77  122622.36   64660.97   48367.41
35.2359934   42491.32   67123.60   98656.18   52545.84   39578.24
35.2716016   49173.15   67264.40  107669.00   59080.67   45489.07
35.2993064   44040.01   66750.26  100262.86   54579.54   43102.92

> df2
34.9815906 5898584.48 3719350.77 3587499.00 3128479.009 3847005.49
35.0356588  302326.23  108177.58   96793.74   70298.795  138762.30
35.0897702  185567.97   68855.38   59750.49   45668.593   94196.10
35.1269978  205617.78   78201.25   70217.31   56013.454  106888.96
35.1535182   84476.67   17558.12   26233.19   20323.368   36614.87
35.1744048   65505.56   23387.08   20200.08   16795.272   30856.79
35.1952968   68121.47   25374.98   22242.00   16358.208   33880.70
35.3032464   63221.95   18833.58   20268.49   15722.791   31399.33
35.3207828   55028.87   12761.53   17330.39   12276.726   27819.26
35.3739834   36821.61   11507.35   12503.13    9155.523   18560.06

如果行名中两个数字之间的差异在 [-0.02, 0.02] 之间,我想根据它们的行名合并这两个数据集

换句话说,df1 中的每个rowname 都应该与df2 中的每个rowname 进行比较,如果发现两个rowname 的差异在[-0.02, 0.02] 范围内,则可以将数据合并到同一行上。如果未找到匹配项,则将 NA 添加到没有来自其他 df 的匹配数据的位置(如在 full_join 中)。

标签: rjoinmergerowname

解决方案


  1. 您需要列名并添加rownames为列

    library(tibble)
    colnames(df1) <- c('a1', 'b1', 'c1', 'd1', 'e1')
    df1 <- rownames_to_column(df1, "rn1")
    
    colnames(df2) <- c('a2', 'b2', 'c2', 'd2', 'e2')
    df2 <- rownames_to_column(df2, "rn2")
    
  2. 连接两个数据框

    df3 <- cbind(df1, df2)
    
  3. 计算两者之间的差异rownames

    df3['diff'] <- as.numeric(df3$rn1) - as.numeric(df3$rn2)
    
  4. 过滤并删除不需要的列

    library(tidyverse)
    df4 <- df3 %>%
       filter(diff >= -0.02 & diff <= 0.02) %>%
       select(-c(rn1, rn2, diff))
    
    #          a1         b1        c1         d1         e1         a2         b2         c2         d2         e2
    #1 4974622.51 4664319.00 5906075.5 5401712.02 5273676.52 5898584.48 3719350.77 3587499.00 3128479.01 3847005.49
    #2  130545.00  204772.41  296824.9  163485.51  129607.97  185567.97   68855.38   59750.49   45668.59   94196.10
    #3  143528.82  204819.21  308432.8  179567.34  142279.79  205617.78   78201.25   70217.31   56013.45  106888.96
    #4   51879.52   82742.85  120352.4   63790.02   47230.45   65505.56   23387.08   20200.08   16795.27   30856.79
    #5   52027.46   79706.77  122622.4   64660.97   48367.41   68121.47   25374.98   22242.00   16358.21   33880.70
    

推荐阅读