首页 > 解决方案 > 将行扩大两年列

问题描述

我正在尝试通过begyrsand扩展我的数据endyrs。我的数据看起来像:

   pdpcols pdpvalues begyrs begyrsvalues gvkeys gvkeysvalues endyrs endyrsvalues
   <chr>       <int> <chr>         <int> <chr>         <int> <chr>         <int>
 1 pdpco1     138490 begyr1         1998 gvkey1       138490 endyr1         2001
 2 pdpco1     138490 begyr1         1998 gvkey1       138490 endyr2         2005
 3 pdpco1     138490 begyr2         2003 gvkey1       138490 endyr1         2001
 4 pdpco1     138490 begyr2         2003 gvkey1       138490 endyr2         2005
 5 pdpco2       6266 begyr1         1998 gvkey1       138490 endyr1         2001
 6 pdpco2       6266 begyr1         1998 gvkey1       138490 endyr2         2005
 7 pdpco2       6266 begyr2         2003 gvkey1       138490 endyr1         2001
 8 pdpco2       6266 begyr2         2003 gvkey1       138490 endyr2         2005
 9 pdpco1     138490 begyr1         1998 gvkey1       138490 endyr1         2001
10 pdpco1     138490 begyr1         1998 gvkey1       138490 endyr2         2005

我想做的是扩展数据,这样,对于每个观察,行将从 扩展begyrsvalues到。endyrsvalues所以第 1 行将从1998to扩展2001,第 2 行从1998to扩展2005。这会产生相当多的重复值。

我已经尝试过group_by(gvkeysvalues),然后尝试在没有运气的情况下扩展它们。

数据:

data <- structure(list(source = c("MTCH1", "MTCH1", "MTCH1", "MTCH1", 
"MTCH1", "MTCH1", "MTCH1", "MTCH1", "MTCH1", "MTCH1", "MTCH1", 
"MTCH1", "MTCH1", "MTCH1", "MTCH1", "MTCH1", "m2006", "m2006", 
"m2006", "m2006", "m2006", "m2006", "m2006", "m2006", "MTCH1", 
"MTCH1", "MTCH1", "MTCH1", "MTCH1", "MTCH1", "MTCH1", "MTCH1", 
"m2006", "m2006", "m2006", "m2006", "m2006", "m2006", "m2006", 
"m2006", "m2006", "m2006", "m2006", "m2006", "m2006", "m2006", 
"m2006", "m2006", "PASS1", "PASS1", "PASS1", "PASS1", "PASS1", 
"PASS1", "PASS1", "PASS1", "PASS1"), id = c(8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 20L, 20L, 20L, 20L, 
20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 23L, 
23L, 23L, 23L, 23L, 23L, 23L, 23L, 24L, 24L, 24L, 24L, 24L, 24L, 
24L, 24L, 3786L, 3786L, 3786L, 3786L, 3786L, 3786L, 3786L, 3786L, 
5432L), cod = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L
), .Label = c("01 Unassigned", "02 US corporation", "03 Foreign corp, incl. state-owned", 
"04 US individual", "05 Foreign individual", "06 US government", 
"07 Foreign government", "08 US local government", "09 US state government", 
"US university", "Foreign university", "US institute", "Foreign institute", 
"US hospital/med inst", "Foreign hospital/med inst"), class = "factor"), 
    cod_fix = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
    ), pdpass = c(12233315L, 12233315L, 12233315L, 12233315L, 
    12233315L, 12233315L, 12233315L, 12233315L, 12233315L, 12233315L, 
    12233315L, 12233315L, 12233315L, 12233315L, 12233315L, 12233315L, 
    11864228L, 11864228L, 11864228L, 11864228L, 11864228L, 11864228L, 
    11864228L, 11864228L, 12233315L, 12233315L, 12233315L, 12233315L, 
    12233315L, 12233315L, 12233315L, 12233315L, 11864228L, 11864228L, 
    11864228L, 11864228L, 11864228L, 11864228L, 11864228L, 11864228L, 
    11864228L, 11864228L, 11864228L, 11864228L, 11864228L, 11864228L, 
    11864228L, 11864228L, 11146104L, 11146104L, 11146104L, 11146104L, 
    11146104L, 11146104L, 11146104L, 11146104L, 11628725L), standard_name = c("3 DIMENSIONAL PHARM", 
    "3 DIMENSIONAL PHARM", "3 DIMENSIONAL PHARM", "3 DIMENSIONAL PHARM", 
    "3 DIMENSIONAL PHARM", "3 DIMENSIONAL PHARM", "3 DIMENSIONAL PHARM", 
    "3 DIMENSIONAL PHARM", "3 DIMENSIONAL PHARM INC", "3 DIMENSIONAL PHARM INC", 
    "3 DIMENSIONAL PHARM INC", "3 DIMENSIONAL PHARM INC", "3 DIMENSIONAL PHARM INC", 
    "3 DIMENSIONAL PHARM INC", "3 DIMENSIONAL PHARM INC", "3 DIMENSIONAL PHARM INC", 
    "3D LABS INC LTD", "3D LABS INC LTD", "3D LABS INC LTD", 
    "3D LABS INC LTD", "3D LABS INC LTD", "3D LABS INC LTD", 
    "3D LABS INC LTD", "3D LABS INC LTD", "3D PHARM INC", "3D PHARM INC", 
    "3D PHARM INC", "3D PHARM INC", "3D PHARM INC", "3D PHARM INC", 
    "3D PHARM INC", "3D PHARM INC", "3DLABS INC LED", "3DLABS INC LED", 
    "3DLABS INC LED", "3DLABS INC LED", "3DLABS INC LED", "3DLABS INC LED", 
    "3DLABS INC LED", "3DLABS INC LED", "3DLABS INC LTD", "3DLABS INC LTD", 
    "3DLABS INC LTD", "3DLABS INC LTD", "3DLABS INC LTD", "3DLABS INC LTD", 
    "3DLABS INC LTD", "3DLABS INC LTD", "CAMBRIDGE SOUNDWORKS INC", 
    "CAMBRIDGE SOUNDWORKS INC", "CAMBRIDGE SOUNDWORKS INC", "CAMBRIDGE SOUNDWORKS INC", 
    "CAMBRIDGE SOUNDWORKS INC", "CAMBRIDGE SOUNDWORKS INC", "CAMBRIDGE SOUNDWORKS INC", 
    "CAMBRIDGE SOUNDWORKS INC", "CREATIVE TECH LTD"), uspto_assignee = c(715707L, 
    715707L, 715707L, 715707L, 715707L, 715707L, 715707L, 715707L, 
    715707L, 715707L, 715707L, 715707L, 715707L, 715707L, 715707L, 
    715707L, 727325L, 727325L, 727325L, 727325L, 727325L, 727325L, 
    727325L, 727325L, 715707L, 715707L, 715707L, 715707L, 715707L, 
    715707L, 715707L, 715707L, 727325L, 727325L, 727325L, 727325L, 
    727325L, 727325L, 727325L, 727325L, 727325L, 727325L, 727325L, 
    727325L, 727325L, 727325L, 727325L, 727325L, 85595L, 85595L, 
    85595L, 85595L, 85595L, 85595L, 85595L, 85595L, 721579L), 
    pdpcols = c("pdpco1", "pdpco1", "pdpco1", "pdpco1", "pdpco2", 
    "pdpco2", "pdpco2", "pdpco2", "pdpco1", "pdpco1", "pdpco1", 
    "pdpco1", "pdpco2", "pdpco2", "pdpco2", "pdpco2", "pdpco1", 
    "pdpco1", "pdpco1", "pdpco1", "pdpco2", "pdpco2", "pdpco2", 
    "pdpco2", "pdpco1", "pdpco1", "pdpco1", "pdpco1", "pdpco2", 
    "pdpco2", "pdpco2", "pdpco2", "pdpco1", "pdpco1", "pdpco1", 
    "pdpco1", "pdpco2", "pdpco2", "pdpco2", "pdpco2", "pdpco1", 
    "pdpco1", "pdpco1", "pdpco1", "pdpco2", "pdpco2", "pdpco2", 
    "pdpco2", "pdpco1", "pdpco1", "pdpco1", "pdpco1", "pdpco2", 
    "pdpco2", "pdpco2", "pdpco2", "pdpco1"), pdpvalues = c(138490L, 
    138490L, 138490L, 138490L, 6266L, 6266L, 6266L, 6266L, 138490L, 
    138490L, 138490L, 138490L, 6266L, 6266L, 6266L, 6266L, 64005L, 
    64005L, 64005L, 64005L, 25773L, 25773L, 25773L, 25773L, 138490L, 
    138490L, 138490L, 138490L, 6266L, 6266L, 6266L, 6266L, 64005L, 
    64005L, 64005L, 64005L, 25773L, 25773L, 25773L, 25773L, 64005L, 
    64005L, 64005L, 64005L, 25773L, 25773L, 25773L, 25773L, 30060L, 
    30060L, 30060L, 30060L, 25773L, 25773L, 25773L, 25773L, 25773L
    ), begyrs = c("begyr1", "begyr1", "begyr2", "begyr2", "begyr1", 
    "begyr1", "begyr2", "begyr2", "begyr1", "begyr1", "begyr2", 
    "begyr2", "begyr1", "begyr1", "begyr2", "begyr2", "begyr1", 
    "begyr1", "begyr2", "begyr2", "begyr1", "begyr1", "begyr2", 
    "begyr2", "begyr1", "begyr1", "begyr2", "begyr2", "begyr1", 
    "begyr1", "begyr2", "begyr2", "begyr1", "begyr1", "begyr2", 
    "begyr2", "begyr1", "begyr1", "begyr2", "begyr2", "begyr1", 
    "begyr1", "begyr2", "begyr2", "begyr1", "begyr1", "begyr2", 
    "begyr2", "begyr1", "begyr1", "begyr2", "begyr2", "begyr1", 
    "begyr1", "begyr2", "begyr2", "begyr1"), begyrsvalues = c(1998L, 
    1998L, 2003L, 2003L, 1998L, 1998L, 2003L, 2003L, 1998L, 1998L, 
    2003L, 2003L, 1998L, 1998L, 2003L, 2003L, 1995L, 1995L, 2002L, 
    2002L, 1995L, 1995L, 2002L, 2002L, 1998L, 1998L, 2003L, 2003L, 
    1998L, 1998L, 2003L, 2003L, 1995L, 1995L, 2002L, 2002L, 1995L, 
    1995L, 2002L, 2002L, 1995L, 1995L, 2002L, 2002L, 1995L, 1995L, 
    2002L, 2002L, 1993L, 1993L, 1998L, 1998L, 1993L, 1993L, 1998L, 
    1998L, 1992L), gvkeys = c("gvkey1", "gvkey1", "gvkey1", "gvkey1", 
    "gvkey1", "gvkey1", "gvkey1", "gvkey1", "gvkey1", "gvkey1", 
    "gvkey1", "gvkey1", "gvkey1", "gvkey1", "gvkey1", "gvkey1", 
    "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", 
    "gvkey2", "gvkey2", "gvkey1", "gvkey1", "gvkey1", "gvkey1", 
    "gvkey1", "gvkey1", "gvkey1", "gvkey1", "gvkey2", "gvkey2", 
    "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", 
    "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", 
    "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey2", 
    "gvkey2", "gvkey2", "gvkey2", "gvkey2", "gvkey1"), gvkeysvalues = c(138490L, 
    138490L, 138490L, 138490L, 138490L, 138490L, 138490L, 138490L, 
    138490L, 138490L, 138490L, 138490L, 138490L, 138490L, 138490L, 
    138490L, 25773L, 25773L, 25773L, 25773L, 25773L, 25773L, 
    25773L, 25773L, 138490L, 138490L, 138490L, 138490L, 138490L, 
    138490L, 138490L, 138490L, 25773L, 25773L, 25773L, 25773L, 
    25773L, 25773L, 25773L, 25773L, 25773L, 25773L, 25773L, 25773L, 
    25773L, 25773L, 25773L, 25773L, 25773L, 25773L, 25773L, 25773L, 
    25773L, 25773L, 25773L, 25773L, 25773L), endyrs = c("endyr1", 
    "endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1", 
    "endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1", 
    "endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1", 
    "endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1", 
    "endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1", 
    "endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1", 
    "endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1", 
    "endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1", 
    "endyr2", "endyr1", "endyr2", "endyr1", "endyr2", "endyr1", 
    "endyr2", "endyr1"), endyrsvalues = c(2001L, 2005L, 2001L, 
    2005L, 2001L, 2005L, 2001L, 2005L, 2001L, 2005L, 2001L, 2005L, 
    2001L, 2005L, 2001L, 2005L, 2000L, 2005L, 2000L, 2005L, 2000L, 
    2005L, 2000L, 2005L, 2001L, 2005L, 2001L, 2005L, 2001L, 2005L, 
    2001L, 2005L, 2000L, 2005L, 2000L, 2005L, 2000L, 2005L, 2000L, 
    2005L, 2000L, 2005L, 2000L, 2005L, 2000L, 2005L, 2000L, 2005L, 
    1997L, 2005L, 1997L, 2005L, 1997L, 2005L, 1997L, 2005L, 2006L
    )), row.names = c(NA, -57L), class = c("tbl_df", "tbl", "data.frame"
))

标签: rdplyr

解决方案


我们可以减去这两个值(差异)并diffs多次复制每一行。我们可以使用slice. 请注意,我们用 1 替换了负值(即 endyrs < begyrs),因此序列是有效的,即

data %>% 
 mutate(diffs = endyrsvalues - begyrsvalues, 
        diffs = replace(diffs, diffs < 0, 1)) %>% 
 slice(rep(seq(diffs), diffs)) %>% 
 select(-diffs)

推荐阅读