r - 根据所选阈值内的数字行名合并数据帧并保留不匹配的行
问题描述
如何在设置匹配阈值时根据它们的数字行名合并两个数据框?
df1 <- structure(list(c(4974622.505928, 170582.149747, 130545.004516,
143528.819582, 49416.594892, 51879.515558, 52027.462651, 42491.317116,
49173.145029, 44040.01261), c(4664319.00309, 266278.599338, 204772.412837,
204819.210688, 77718.961761, 82742.852809, 79706.774944, 67123.603629,
67264.401059, 66750.260768), c(5906075.502923, 385318.121061,
296824.944672, 308432.753482, 113407.50333, 120352.400266, 122622.356104,
98656.179336, 107669.002489, 100262.855064), c(5401712.020682,
204595.653994, 163485.509823, 179567.339348, 62690.116298, 63790.0244,
64660.971879, 52545.84055, 59080.66972, 54579.538267), c(5273676.522307,
159130.126808, 129607.971309, 142279.787439, 45812.561022, 47230.447746,
48367.405274, 39578.235275, 45489.065198, 43102.923417)), row.names = c("34.9816256",
"35.0576674", "35.0898006", "35.1270264", "35.1738664", "35.1936282",
"35.2043582", "35.2359934", "35.2716016", "35.2993064"), class = "data.frame")
df2 <- structure(list(c(5898584.48405, 302326.226264, 185567.968257,
205617.778019, 84476.66928, 65505.560486, 68121.465276, 63221.947902,
55028.866127, 36821.607091), c(3719350.766633, 108177.577417,
68855.378083, 78201.248427, 17558.118703, 23387.078772, 25374.978916,
18833.579115, 12761.529092, 11507.348928), c(3587498.99736, 96793.741428,
59750.485295, 70217.309923, 26233.188472, 20200.080468, 22241.999451,
20268.485836, 17330.391134, 12503.133961), c(3128479.008712,
70298.795438, 45668.592667, 56013.453832, 20323.368372, 16795.27218,
16358.208042, 15722.790712, 12276.726458, 9155.522864), c(3847005.494149,
138762.296854, 94196.099405, 106888.964213, 36614.870588, 30856.787329,
33880.704043, 31399.328936, 27819.255931, 18560.05768)), row.names = c("34.9815906",
"35.0356588", "35.0897702", "35.1269978", "35.1535182", "35.1744048",
"35.1952968", "35.3032464", "35.3207828", "35.3739834"), class = "data.frame")
数据帧的输出(第一行是rownames)
> df1
34.9816256 4974622.51 4664319.00 5906075.50 5401712.02 5273676.52
35.0576674 170582.15 266278.60 385318.12 204595.65 159130.13
35.0898006 130545.00 204772.41 296824.94 163485.51 129607.97
35.1270264 143528.82 204819.21 308432.75 179567.34 142279.79
35.1738664 49416.59 77718.96 113407.50 62690.12 45812.56
35.1936282 51879.52 82742.85 120352.40 63790.02 47230.45
35.2043582 52027.46 79706.77 122622.36 64660.97 48367.41
35.2359934 42491.32 67123.60 98656.18 52545.84 39578.24
35.2716016 49173.15 67264.40 107669.00 59080.67 45489.07
35.2993064 44040.01 66750.26 100262.86 54579.54 43102.92
> df2
34.9815906 5898584.48 3719350.77 3587499.00 3128479.009 3847005.49
35.0356588 302326.23 108177.58 96793.74 70298.795 138762.30
35.0897702 185567.97 68855.38 59750.49 45668.593 94196.10
35.1269978 205617.78 78201.25 70217.31 56013.454 106888.96
35.1535182 84476.67 17558.12 26233.19 20323.368 36614.87
35.1744048 65505.56 23387.08 20200.08 16795.272 30856.79
35.1952968 68121.47 25374.98 22242.00 16358.208 33880.70
35.3032464 63221.95 18833.58 20268.49 15722.791 31399.33
35.3207828 55028.87 12761.53 17330.39 12276.726 27819.26
35.3739834 36821.61 11507.35 12503.13 9155.523 18560.06
如果行名中两个数字之间的差异在 [-0.02, 0.02] 之间,我想根据它们的行名合并这两个数据集
换句话说,df1 中的每个rowname 都应该与df2 中的每个rowname 进行比较,如果发现两个rowname 的差异在[-0.02, 0.02] 范围内,则可以将数据合并到同一行上。如果未找到匹配项,则将 NA 添加到没有来自其他 df 的匹配数据的位置(如在 full_join 中)。
解决方案
您需要列名并添加
rownames
为列library(tibble) colnames(df1) <- c('a1', 'b1', 'c1', 'd1', 'e1') df1 <- rownames_to_column(df1, "rn1") colnames(df2) <- c('a2', 'b2', 'c2', 'd2', 'e2') df2 <- rownames_to_column(df2, "rn2")
连接两个数据框
df3 <- cbind(df1, df2)
计算两者之间的差异
rownames
df3['diff'] <- as.numeric(df3$rn1) - as.numeric(df3$rn2)
过滤并删除不需要的列
library(tidyverse) df4 <- df3 %>% filter(diff >= -0.02 & diff <= 0.02) %>% select(-c(rn1, rn2, diff)) # a1 b1 c1 d1 e1 a2 b2 c2 d2 e2 #1 4974622.51 4664319.00 5906075.5 5401712.02 5273676.52 5898584.48 3719350.77 3587499.00 3128479.01 3847005.49 #2 130545.00 204772.41 296824.9 163485.51 129607.97 185567.97 68855.38 59750.49 45668.59 94196.10 #3 143528.82 204819.21 308432.8 179567.34 142279.79 205617.78 78201.25 70217.31 56013.45 106888.96 #4 51879.52 82742.85 120352.4 63790.02 47230.45 65505.56 23387.08 20200.08 16795.27 30856.79 #5 52027.46 79706.77 122622.4 64660.97 48367.41 68121.47 25374.98 22242.00 16358.21 33880.70
推荐阅读
- vba - 我可以通过 VBA 在 Outlook 中创建签名并将其保存为默认签名吗?
- yii2 - 如何在视图中显示多个 GridView,其中一个 DataProvider 按某个字段对其进行分组?
- node.js - 我无法安装 puppeteer 库
- python - Importerror:ubuntu中python2.7.6中没有名为time的模块
- ubuntu - 在构建 VTK 时,ccmake 没有链接到我的 ubuntu 上最新版本的 cmake(ccmake 说 cmake 是 3.5.1 版,我不是!)
- python-3.x - 在一个图中创建 3 个熊猫系列作为饼图
- excel - 查找并替换以“F*”开头的项目
- amazon-s3 - 如何配置 `Terraform` 以将 zip 文件上传到 `s3` 存储桶,然后将它们部署到 lambda
- data-visualization - Power BI 中的自定义可视化
- python - 对于大小为 0 Python 的轴 0,索引 0 超出范围