r - 唯一对(列)之间的共享变量在 R 中使用 widyr() 给出错误
问题描述
我正在尝试计算区域对之间共享了多少物种。因此,如果物种 A、B 和 C 都出现在美国和加拿大,但物种 D 只出现在加拿大,那么美国-加拿大对的共有物种数 == 3。我使用了 widyr( ) 包在 R 中对大型数据集(50 个国家/地区的 >10,000 个物种)执行此操作,但我遇到两个错误:
- 如果我分析整个数据集,则会丢失一些区域对,而如果我只是对这两个区域进行子集化,那么它就可以工作(给出该对的共享物种的数量)。2)我只尝试了区域的一个子集,与使用完整数据集相比,我得到了共享多少物种的不同答案。
我看过一些关于计算对之间距离和唯一对之间距离的类似帖子,但这并不是我想要做的。我以前从未使用过widyr,我无法弄清楚错误是什么 - 我想知道它是否与“第一个”和“最后一个”有关?
# sample.dat.sub dput below - it's long but I wasn't sure I coudl reproduce the problem with only a few rows
# sample.dat.sub has cols "species_id" and "rgn_id"
## get col names
spp_ids <- sample.dat.sub %>% distinct(species_id) %>% as.character()
rgn_ids <- sample.dat.sub %>% distinct(rgn_id)
rgn_ids <- rgn_ids$rgn_id
rgn_ids <- as.character(rgn_ids)
## find pairs
pairs.df <- sample.dat.sub %>%
group_by(species_id) %>%
arrange(rgn_id) %>%
# maybe this is the problem?
mutate(rgn1 = first(rgn_id), rgn2 = last(rgn_id))
result.df <- pairs.df %>%
group_by(rgn1, rgn2) %>%
# number of unique species that each region pair shares
summarize(n_spp = n_distinct(species_id))
# THE PROBLEM:
result.df %>% filter(rgn1 == 134) # says 134-136 have 27 shared species, but if I just analyzed those 2 reigons alone, it says they share 75 species...
## test the sample
sample.test <- sample.dat.sub %>% filter(rgn_id %in% c(134,136))
# get col names
test.spp_ids <- sample.test %>% distinct(species_id) %>% as.character()
test.rgn_ids <- sample.test %>% distinct(rgn_id)
test.rgn_ids <- test.rgn_ids$rgn_id
test.rgn_ids <- as.character(test.rgn_ids)
# pairs
test.pairs.df <- sample.test %>%
group_by(species_id) %>%
arrange(rgn_id) %>%
mutate(rgn1 = first(rgn_id), rgn2 = last(rgn_id))
test.pairs.df
test.result.df <- test.pairs.df %>%
group_by(rgn1, rgn2) %>%
summarize(n_spp = n_distinct(species_id))
test.result.df %>% arrange(desc(n_spp)) # now it says 75 species shared
## SAMPLE DATA
sample.dat.sub <- structure(list(species_id = c("Brc-378165", "Brc-378165", "Brc-378165",
"Chn-0721ac0c-7d19-46fa-9dcc-02da54310867", "Chn-0721ac0c-7d19-46fa-9dcc-02da54310867",
"Chn-0721ac0c-7d19-46fa-9dcc-02da54310867", "Chn-46f58c2c-1280-4f6a-a148-e854c19246fe",
"Chn-46f58c2c-1280-4f6a-a148-e854c19246fe", "Chn-4a9f789f-84e4-4da2-b520-c754b804a303",
"Chn-4a9f789f-84e4-4da2-b520-c754b804a303", "Chn-4a9f789f-84e4-4da2-b520-c754b804a303",
"Chn-5b98a280-b9a0-435b-8ed9-87b3f63fc29b", "Chn-5b98a280-b9a0-435b-8ed9-87b3f63fc29b",
"Chn-5b98a280-b9a0-435b-8ed9-87b3f63fc29b", "Chn-eff8756a-9d9f-4430-886b-4a0badfed55d",
"Chn-eff8756a-9d9f-4430-886b-4a0badfed55d", "Chn-eff8756a-9d9f-4430-886b-4a0badfed55d",
"Chn-f031838d-0a64-4288-8d25-04c0ab44ad67", "Chn-f41af29f-5e40-4cb4-8862-220e6e64b329",
"Chn-f41af29f-5e40-4cb4-8862-220e6e64b329", "Chn-f41af29f-5e40-4cb4-8862-220e6e64b329",
"Fis-10903", "Fis-10993", "Fis-10993", "Fis-10993", "Fis-112580",
"Fis-112580", "Fis-112580", "Fis-112580", "Fis-112582", "Fis-112582",
"Fis-112582", "Fis-112590", "Fis-112590", "Fis-112590", "Fis-112688",
"Fis-112688", "Fis-112688", "Fis-112689", "Fis-112689", "Fis-112689",
"Fis-112689", "Fis-112706", "Fis-112706", "Fis-112706", "Fis-112706",
"Fis-112822", "Fis-112822", "Fis-112822", "Fis-113904", "Fis-113904",
"Fis-114088", "Fis-114088", "Fis-114088", "Fis-114098", "Fis-114098",
"Fis-114098", "Fis-114124", "Fis-114124", "Fis-114124", "Fis-114187",
"Fis-11429", "Fis-11429", "Fis-11429", "Fis-114378", "Fis-114378",
"Fis-114378", "Fis-114615", "Fis-114615", "Fis-114615", "Fis-114618",
"Fis-114618", "Fis-114618", "Fis-114618", "Fis-114623", "Fis-114623",
"Fis-114623", "Fis-114937", "Fis-114937", "Fis-114937", "Fis-114940",
"Fis-114940", "Fis-114940", "Fis-115015", "Fis-115015", "Fis-115015",
"Fis-115342", "Fis-115352", "Fis-115352", "Fis-115352", "Fis-115623",
"Fis-115623", "Fis-115623", "Fis-115798", "Fis-115798", "Fis-115798",
"Fis-115798", "Fis-115821", "Fis-115821", "Fis-115821", "Fis-115864",
"Fis-115864", "Fis-115865", "Fis-115865", "Fis-115865", "Fis-115865",
"Fis-115865", "Fis-115873", "Fis-115873", "Fis-115873", "Fis-115873",
"Fis-115957", "Fis-115957", "Fis-115957", "Fis-115963", "Fis-115963",
"Fis-115963", "Fis-115986", "Fis-115986", "Fis-115986", "Fis-115989",
"Fis-115989", "Fis-115989", "Fis-116375", "Fis-116375", "Fis-116472",
"Fis-116479", "Fis-116479", "Fis-116479", "Fis-116737", "Fis-116737",
"Fis-116737", "Fis-116742", "Fis-116742", "Fis-116742", "Fis-116747",
"Fis-116747", "Fis-116747", "Fis-116901", "Fis-116901", "Fis-116901",
"Fis-116910", "Fis-116910", "Fis-116910", "Fis-116926", "Fis-116926",
"Fis-117119", "Fis-117121", "Fis-117126", "Fis-117126", "Fis-117126",
"Fis-117130", "Fis-117130", "Fis-117130", "Fis-117220", "Fis-117220",
"Fis-117220", "Fis-117350", "Fis-117350", "Fis-117350", "Fis-117350",
"Fis-117358", "Fis-117358", "Fis-117370", "Fis-117370", "Fis-117370",
"Fis-117375", "Fis-117375", "Fis-117375", "Fis-117773", "Fis-117773",
"Fis-117773", "Fis-117773", "Fis-118052", "Fis-118052", "Fis-118052",
"Fis-118058", "Fis-118058", "Fis-118058", "Fis-118058", "Fis-118106",
"Fis-118106", "Fis-118106", "Fis-118742", "Fis-118742", "Fis-120072",
"Fis-120072", "Fis-120072", "Fis-120428", "Fis-120428", "Fis-120470",
"Fis-120470", "Fis-120470", "Fis-120470", "Fis-120470", "Fis-120470",
"Fis-120669", "Fis-120908", "Fis-120908", "Fis-120908", "Fis-122362",
"Fis-122362", "Fis-122362", "Fis-123096", "Fis-123121", "Fis-123121",
"Fis-123121", "Fis-123123", "Fis-123123", "Fis-123123", "Fis-123128",
"Fis-123128", "Fis-123128", "Fis-123959", "Fis-123959", "Fis-123959",
"Fis-123975", "Fis-124168", "Fis-124168", "Fis-124204", "Fis-124204",
"Fis-124208", "Fis-124211", "Fis-124211", "Fis-124211", "Fis-124275",
"Fis-124275", "Fis-124324", "Fis-124324", "Fis-124376", "Fis-124376",
"Fis-124376", "Fis-124376", "Fis-124376", "Fis-124378", "Fis-124378",
"Fis-125137", "Fis-125137", "Fis-125137", "Fis-125137", "Fis-125186",
"Fis-125235", "Fis-125622", "Fis-125622", "Fis-125622", "Fis-125639",
"Fis-125639", "Fis-125642", "Fis-125642", "Fis-125886", "Fis-125888",
"Fis-125893", "Fis-125893", "Fis-125893", "Fis-126085", "Fis-126085",
"Fis-126140", "Fis-126296", "Fis-126299", "Fis-126299", "Fis-126299",
"Fis-126681", "Fis-126681", "Fis-126681", "Fis-126951", "Fis-127016",
"Fis-127016", "Fis-127016", "Fis-127124", "Fis-127433", "Fis-127433",
"Fis-127444", "Fis-127530", "Fis-127585", "Fis-127585", "Fis-128046",
"Fis-128046", "Fis-128046", "Fis-128063", "Fis-128093", "Fis-128093",
"Fis-128093", "Fis-128094", "Fis-128094", "Fis-128110", "Fis-128110",
"Fis-128236", "Fis-128236", "Fis-128376", "Fis-128376", "Fis-128376",
"Fis-128376", "Fis-128422", "Fis-128608", "Fis-128608", "Fis-128612",
"Fis-128612", "Fis-130029", "Fis-130029", "Fis-130029", "Fis-130702",
"Fis-130702", "Fis-130702", "Fis-130702", "Fis-130709", "Fis-130709",
"Fis-130709", "Fis-130709", "Fis-130771", "Fis-130806", "Fis-130806",
"Fis-130806", "Fis-130825", "Fis-130825", "Fis-130825", "Fis-130876",
"Fis-130876", "Fis-130876", "Fis-130917", "Fis-130917", "Fis-130917",
"Fis-130921", "Fis-130921", "Fis-131008", "Fis-131178", "Fis-131178",
"Fis-131312", "Fis-131312", "Fis-131316", "Fis-131316", "Fis-131323",
"Fis-131323", "Fis-131323", "Fis-131546", "Fis-131546", "Fis-131546",
"Fis-131546", "Fis-131590", "Fis-131590", "Fis-131590", "Fis-131936",
"Fis-132003", "Fis-132003", "Fis-132003", "Fis-132010", "Fis-132010",
"Fis-132010", "Fis-132018", "Fis-132018", "Fis-132018", "Fis-132022",
"Fis-132022", "Fis-132022", "Fis-132037", "Fis-132037", "Fis-132037",
"Fis-132059", "Fis-132059", "Fis-132059", "Fis-132062", "Fis-132062",
"Fis-132062", "Fis-132062", "Fis-132064", "Fis-132064", "Fis-132064",
"Fis-132076", "Fis-132076", "Fis-132076", "Fis-132087", "Fis-132087",
"Fis-132142", "Fis-132142", "Fis-132153", "Fis-132153", "Fis-132153",
"Fis-132157", "Fis-132157", "Fis-132159", "Fis-132159", "Fis-132159",
"Fis-132167", "Fis-132167", "Fis-132167", "Fis-132169", "Fis-132169",
"Fis-132169", "Fis-132243", "Fis-132243", "Fis-132243", "Fis-132269",
"Fis-132269", "Fis-132269", "Fis-132395", "Fis-132433", "Fis-132433",
"Fis-132433", "Fis-132442", "Fis-132442", "Fis-132442", "Fis-132477",
"Fis-132477", "Fis-132477", "Fis-132479", "Fis-132479", "Fis-132479",
"Fis-132487", "Fis-132487", "Fis-132487", "Fis-132488", "Fis-132488",
"Fis-132488", "Fis-132490", "Fis-132490", "Fis-132490", "Fis-132496",
"Fis-132597", "Fis-132597", "Fis-132597", "Fis-132597", "Fis-132605",
"Fis-132605", "Fis-132605", "Fis-132609", "Fis-132609", "Fis-132609",
"Fis-132643", "Fis-132643", "Fis-132754", "Fis-132754", "Fis-132754",
"Fis-132762", "Fis-132762", "Fis-132762", "Fis-132763", "Fis-132763",
"Fis-132763", "Fis-132766", "Fis-132770", "Fis-132808", "Fis-132808",
"Fis-132808", "Fis-132813", "Fis-132813", "Fis-132813", "Fis-132813",
"Fis-132814", "Fis-132814", "Fis-132814", "Fis-132966", "Fis-133336",
"Fis-133336", "Fis-133336", "Fis-133336", "Fis-133346", "Fis-133346",
"Fis-133350", "Fis-133350", "Fis-133350", "Fis-133356", "Fis-133356",
"Fis-133356", "Fis-133356", "Fis-133480", "Fis-133508", "Fis-133510",
"Fis-133512", "Fis-133521", "Fis-133521", "Fis-133521", "Fis-133526",
"Fis-133537", "Fis-133549", "Fis-133567", "Fis-133567", "Fis-133567",
"Fis-133571", "Fis-133571", "Fis-133571", "Fis-133572", "Fis-133572",
"Fis-133572", "Fis-133755", "Fis-133755", "Fis-133755", "Fis-133755",
"Fis-134025", "Fis-134025", "Fis-134025", "Fis-134029", "Fis-134029",
"Fis-134029", "Fis-134033", "Fis-134033", "Fis-134057", "Fis-134092",
"Fis-134103", "Fis-134108", "Fis-134125", "Fis-134130"), rgn_id = c("110",
"112", "136", "138", "136", "134", "136", "134", "134", "138",
"136", "134", "136", "138", "134", "138", "136", "138", "138",
"134", "136", "138", "138", "134", "136", "250", "136", "110",
"112", "250", "110", "112", "250", "112", "110", "112", "250",
"110", "112", "110", "136", "250", "250", "136", "112", "110",
"250", "110", "112", "136", "134", "136", "138", "134", "138",
"136", "134", "138", "136", "134", "138", "110", "250", "112",
"112", "110", "250", "112", "110", "250", "110", "250", "136",
"112", "250", "112", "110", "112", "110", "250", "112", "110",
"250", "110", "250", "112", "110", "136", "112", "110", "250",
"110", "112", "112", "250", "110", "136", "112", "110", "250",
"112", "110", "134", "250", "138", "136", "112", "250", "112",
"136", "110", "112", "250", "110", "110", "250", "112", "112",
"250", "110", "112", "250", "110", "136", "134", "138", "250",
"110", "112", "250", "112", "110", "138", "134", "136", "138",
"134", "136", "136", "134", "138", "134", "136", "138", "138",
"136", "138", "138", "250", "112", "110", "134", "138", "136",
"250", "112", "110", "112", "110", "250", "136", "136", "134",
"250", "110", "112", "112", "110", "250", "250", "110", "136",
"112", "112", "250", "110", "250", "136", "110", "112", "250",
"136", "112", "136", "134", "136", "134", "138", "136", "134",
"110", "112", "134", "250", "136", "138", "138", "136", "134",
"138", "110", "250", "112", "138", "134", "138", "136", "134",
"138", "136", "134", "136", "138", "138", "134", "136", "138",
"134", "136", "134", "136", "138", "134", "138", "136", "134",
"136", "136", "134", "134", "112", "136", "110", "250", "110",
"138", "112", "110", "136", "134", "138", "138", "136", "134",
"138", "136", "134", "136", "134", "138", "138", "112", "250",
"110", "134", "136", "138", "112", "250", "112", "110", "250",
"112", "110", "134", "134", "138", "136", "138", "134", "136",
"138", "138", "136", "134", "250", "112", "110", "138", "138",
"134", "136", "134", "136", "136", "134", "134", "136", "110",
"112", "250", "136", "138", "134", "136", "136", "134", "110",
"112", "250", "110", "250", "136", "112", "250", "112", "110",
"136", "138", "250", "112", "110", "112", "110", "250", "136",
"138", "134", "112", "110", "250", "136", "134", "112", "134",
"136", "134", "136", "134", "136", "136", "138", "134", "250",
"112", "138", "110", "112", "250", "110", "134", "112", "110",
"250", "136", "138", "134", "136", "134", "138", "250", "110",
"112", "134", "138", "136", "110", "112", "250", "136", "250",
"110", "112", "136", "134", "138", "134", "138", "136", "110",
"112", "110", "112", "112", "250", "110", "110", "112", "110",
"250", "112", "138", "134", "136", "250", "110", "112", "112",
"110", "250", "110", "250", "112", "138", "136", "138", "134",
"134", "136", "138", "138", "136", "134", "134", "136", "138",
"134", "136", "138", "136", "138", "134", "138", "136", "134",
"136", "110", "136", "112", "250", "250", "110", "112", "110",
"112", "250", "134", "136", "112", "250", "110", "250", "110",
"112", "134", "136", "138", "138", "138", "134", "138", "136",
"110", "250", "112", "136", "250", "110", "112", "138", "110",
"138", "250", "112", "136", "134", "250", "110", "112", "136",
"110", "112", "250", "138", "138", "138", "138", "138", "136",
"134", "138", "138", "138", "110", "250", "112", "112", "250",
"110", "112", "250", "110", "250", "112", "110", "136", "136",
"138", "134", "136", "138", "134", "136", "134", "138", "138",
"138", "138", "138", "134")), row.names = c(NA, -500L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x00000281a3641ef0>)
解决方案
以下代码应该可以回答您的问题。使用 self-join in 很容易解决tidyverse
,不需要额外的包widyr
(我不知道)
library(dplyr) #NB: use library(tidyverse) if you want full functionality
df_distinct <- sample.dat.sub %>% distinct(species_id, rgn_id)
inner_join(df_distinct, df_distinct , by="species_id", suffix=c("_1","_2")) %>%
filter(rgn_id_1 < rgn_id_2) %>%
group_by(rgn_id_1, rgn_id_2) %>%
summarise(nb_common_species = n())
推荐阅读
- java - 字符串 (19-Aug-2019 11:05) 到日期格式
- docker - 无法从 multiprocessing.semaphore_tracker import main;main(4) 加载配置
- salesforce - Salesforce 站点。在哪个用户的上下文中,在站点上执行操作?
- python - 从其他数据框中找到前三个最近的点
- java - 每次点击后活动都会自行打开
- node.js - 护照本地奇怪的错误:“预计‘长度’是一个非负有限数”
- android - 防止部分 Tapjacking - Android
- java - 在java中通过值克隆对象的方法——而不是通过引用
- azure - 需要使用 rest api 调用来获取 azure 自动化 Runbook 错误
- reactjs - 从 ReactQuill 命名空间导入接口的问题