首页 > 解决方案 > 如何在 2 列的情况下将 1 添加到列的值?

问题描述

有一个名为 PERNO 的组,如果在第 i 行 col2=="a" 中的每个组,我想将 i+1 行添加到 col3 组的末尾,并带有 1

      PERNO     col2      col3
        1         b         3
        1         d         3
        1         a         4
        1         d         5
        2         v         2
        2         a         3
        2         a         4
        2         x         4
        2         h         5

输出

      PERNO     col2      col3
        1         b         3
        1         d         3
        1         a         4
        1         d         6
        2         v         2
        2         a         3
        2         a         4
        2         x         5
        2         h         6

在第一组中,col3 的第四行加了一个,因为它在 col2==a 行的旁边

在第二组中,最后一行 2 也加了一个

真实数据:

str(df)

Classes ‘tbl_df’, ‘tbl’ and 'data.frame':   153008 obs. of  3 variables:
 $ PERNO: num  1 1 1 1 1 1 1 1 2 2 ...
 $ loop : num  1 2 2 2 2 2 2 2 1 2 ...
 $ TPURP: Factor w/ 23 levels "(1) Working at home (for pay)",..: 2 3 2 13 13 13 15 2 2 13 ...

输入(df)

structure(list(PERNO = c(1, 1, 1, 1, 1, 1), loop = c(2, 1, 2, 
2, 2, 2), TPURP = structure(c(2L, 2L, 22L, 15L, 15L, 15L), .Label = c("(1) Working at home (for pay)", 
"(2) All other home activities", "(3) Work/Job", "(4) All other activities at work", 
"(5) Attending class", "(6) All other activities at school", 
"(7) Change type of transportation/transfer", "(8) Dropped off passenger", 
"(9) Picked up passenger", "(10) Other, specify - transportation", 
"(11) Work/Business related", "(12) Service Private Vehicle", 
"(13) Routine Shopping", "(14) Shopping for major purchases", 
"(15) Household errands", "(16) Personal Business", "(17) Eat meal outside of home", 
"(18) Health care", "(19) Civic/Religious activities", "(20) Recreation/Entertainment", 
"(21) Visit friends/relative", "(24) Loop trip", "(97) Other, specify"
), class = "factor")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -6L))

我们在哪里

loop==col3,     TPURP==col2    "a"== (24) Loop trip

标签: rdataframe

解决方案


按'PERNO'分组后,得到cummax逻辑表达式(col2 == 'a'),返回lag它指定'n'为'col2'中'a'元素的个数,并+与'col3'相加()

library(dplyr)
df1 %>%
   group_by(PERNO) %>%
   mutate(col3 = col3 +lag(cummax(col2 == 'a'), n = sum(col2 == "a"), default = 0))
# A tibble: 9 x 3
# Groups:   PERNO [2]
#  PERNO col2   col3
#  <int> <chr> <dbl>
#1     1 b         3
#2     1 d         3
#3     1 a         4
#4     1 d         6
#5     2 v         2
#6     2 a         3
#7     2 a         4
#8     2 x         5
#9     2 h         6

或者另一种选择是找到最后一次出现 'a' 的位置,用于case_when在该位置之后添加 1

df1 %>% 
  group_by(PERNO) %>% 
  mutate(col3 = if('a' %in% col2) case_when(row_number() > 
         tail(which(col2 == 'a'), 1) ~
          col3 + 1L, TRUE ~ col3 ) else col3)

或避免有条件if/elsepmax

df1 %>% 
   group_by(PERNO) %>%
   mutate(col3 = case_when(row_number() > pmin(n(), 
     tail(which(col2 == 'a'), 1)[1], na.rm = TRUE) ~ col3 + 1L,
         TRUE ~ col3))

或使用data.table

library(data.table)
i1 <- setDT(df1)[,   .I[.I > tail(.I[col2 == 'a'], 1)], PERNO]$V1
df1[i1, col3 := col3 + 1L]

更新

使用 OP 的新数据集

df %>% 
    mutate_if(is.factor, as.character) %>%
    group_by(PERNO) %>%
    mutate(loop = case_when(row_number() > pmin(n(), 
         tail(which(TPURP == "(24) Loop trip"), 1)[1], na.rm = TRUE) ~ loop + 1,
           TRUE ~ loop))
# A tibble: 6 x 3
# Groups:   PERNO [1]
#  PERNO  loop TPURP                        
#  <dbl> <dbl> <chr>                        
#1     1     2 (2) All other home activities
#2     1     1 (2) All other home activities
#3     1     2 (24) Loop trip               
#4     1     3 (15) Household errands       
#5     1     3 (15) Household errands       
#6     1     3 (15) Household errands       

数据

df1 <- structure(list(PERNO = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), 
    col2 = c("b", "d", "a", "d", "v", "a", "a", "x", "h"), col3 = c(3L, 
    3L, 4L, 5L, 2L, 3L, 4L, 4L, 5L)), class = "data.frame", row.names = c(NA, 
-9L))


df9 %>% 
  group_by(PERNO) %>% 
  summarise(Sum = sum(grepl("(24) Loop trip", TPURP, fixed = TRUE)))

PERNO   Sum
  <dbl> <int>
1     1   483
2     2   268
3     3    60
4     4    39
5     5    16
6     6    11
7     7     0
8     8     0

如果我们用 OP 的完整数据检查输出

fileN <- 'df.csv'
df <- read.csv(fileN, stringsAsFactors = FALSE)

out <- df %>% 
     group_by(PERNO) %>%
     mutate(loop = case_when(row_number() > pmin(n(), 
                                                   tail(which(TPURP == "(24) Loop trip"), 1)[1],
                                                   na.rm = TRUE) ~ loop + 1L,
                          TRUE ~ loop))

- 从“loop”列中字符串的“last”匹配中检查第一个“PERNO”的输出

 df %>% 
    filter(PERNO == 1) %>% 
    select(TPURP, loop) %>% 
    filter(row_number() >= tail(which(TPURP == "(24) Loop trip"), 1)[1]) %>%
     pull(loop) %>% 
     head(10)
#[1] 2 2 2 2 1 2 2 1 2 2

out %>%
    ungroup %>% 
    filter(PERNO == 1) %>%
    select(TPURP, loop) %>%
    filter(row_number() >= tail(which(TPURP == "(24) Loop trip"), 1)[1]) %>%
    pull(loop) %>% 
    head(10)
#[1] 2 3 3 3 2 3 3 2 3 3

注意,这些值加 1


推荐阅读