首页 > 解决方案 > 根据 R 中的列的条件,为每条记录选择 12 个不同的列

问题描述

在此处输入图像描述

所以,我有上面的数据集,其中每一行代表一个顾问。Pol_Month 代表第一次交易的月份。从第一次交易的月份开始,我需要跟踪他的交易 12 个月。Ex - 对于 Adv 'S301' ,他的第一笔交易是在 1 月,因此从 January_APE 到 FY1819_December_APE,他的交易将在 Month1、Month2... 下捕获。Month12 第一个交易月份在 4 月之前,从 4 月开始计算 12 个月。所以,基本上,对于每条记录,我需要从 Pol_Month 开始遍历列并捕获到第 12 个月的值。如果 Pol_Month 是 3 月,则遍历从 4 月开始。

预期输出:

Adv_Code    Pol_Month   Month1_APE  Month2_APE  Month3_APE  Month4_APE  Month5_APE  Month6_APE  Month7_APE  Month8_APE  Month9_APE  Month10_APE Month11_APE Month12_APE
A299        March       0           0           0           0           0           0           0           0           0           0           0           0
A298        NA          0           0           0           0           0           0           0           0           0           0           0           0
S300        NA          0           0           0           0           0           0           0           0           0           0           0           0
S301        January     19101       0           0           0           0           0           19000       0           0           0           0           0
A299        March       0           0           0           0           0           0           0           0           0           0           0           0
A298        March       0           0           0           0           0           0           0           0           0           0           0           0
S300        March       0           0           0           0           0           0           0           0           0           0           0           0
S301        March       0           0           0           0           0           0           0           0           0           0           0           0
S300        March       0           0           0           0           0           0           0           0           0           0           0           0
S301        April       15413       29798       70010       20342       24521       20550       0           0           25920       0           0           0

有人可以帮我吗?

下面是重现df的代码:

df <- structure(list(Adv_Code = structure(c(2L, 1L, 3L,4L,2L,1L,3L,4L,3L,4L), .Label = c("A298","A299", "S300","S301","S302","S303","S304","S305","S309"), class = "factor"),
                     #Pol_Dt = structure(c(2L,3L, 2L,2L,2L,3L,2L,3L,2L,3L), .Label = c("03/31/2017", NA, NA,"1/30/2018","3/31/2017","3/31/2017","3/31/2017","3/31/2017","3/31/2017","4/25/2017"), class = "factor"), 
                     Pol_Month = structure(1:10, .Label = c("March",NA,NA,"January","March","March","March","March","March","April"), class = "factor"),
                     April_APE = c(0L, 0L, 0L,0L,0L,0L,0L,0L,0L,15413L), 
                     May_APE = c(0L, 0L, 0L,0L,0L,0L,0L,0L,0L,29798L), June_APE = c(0L, 0L, 0L,0L,0L,0L,0L,0L,0L,70010L), July_APE = c(0L, 0L, 0L,0L,0L,0L,0L,0L,0L,20342L),
                     August_APE = c(0L, 0L, 0L,0L,0L,0L,0L,0L,0L,24521L), September_APE = c(0L, 0L, 0L,0L,0L,0L,0L,0L,0L,20550L), October_APE = c(0L, 0L, 0L,0L,0L,0L,0L,0L,0L,0L),
                     November_APE = c(0L, 0L, 0L,0L,0L,0L,0L,0L,0L,0L),December_APE = c(0L, 0L, 0L,0L,0L,0L,0L,0L,0L,25920L), January_APE = c(0L, 0L, 0L,19101L,0L,0L,0L,0L,0L,0L),
                     February_APE = c(0L, 0L, 0L,0L,0L,0L,0L,0L,0L,0L), March_APE = c(0L, 0L, 0L,0L,0L,0L,0L,0L,0L,0L),
                     FY1819_April_APE = c(NA,NA,NA,0L,NA,NA,NA,NA,NA,16440L),FY1819_May_APE = c(NA,NA,NA,0L,NA,NA,NA,NA,NA,16440L),
                     FY1819_June_APE = c(NA,NA,NA,0L,NA,NA,NA,NA,NA,0L),FY1819_July_APE = c(NA,NA,NA,19000L,NA,NA,NA,NA,NA,0L),
                     FY1819_August_APE = c(NA,NA,NA,0L,NA,NA,NA,NA,NA,15413L),FY1819_September_APE = c(NA,NA,NA,0L,NA,NA,NA,NA,NA,0L),
                     FY1819_October_APE = c(NA,NA,NA,0L,NA,NA,NA,NA,NA,0L),FY1819_November_APE = c(NA,NA,NA,0L,NA,NA,NA,NA,NA,0L),FY1819_December_APE = c(NA,NA,NA,0L,NA,NA,NA,NA,NA,0L),
                     FY1819_January_APE = c(NA,NA,NA,0L,NA,NA,NA,NA,NA,0L),FY1819_February_APE = c(NA,NA,NA,0L,NA,NA,NA,NA,NA,15413L),FY1819_March_APE = c(NA,NA,NA,0L,NA,NA,NA,NA,NA,0L)),
                class = "data.frame", row.names = c(NA, -10L))

有人可以帮忙吗!

标签: rconditional-statements

解决方案


这是使用apply. 对于每一行,如果Pol_Month列是NA,我们将返回固定的前 12 个值,如果不是,我们更改month_name"April"如果它在前 3 个月中存在,则else保持原样并选择month_name存在值的列的 12 个值。

outdf <- df[1:2]

outdf[paste0("Month", 1:12, "_APE")] <- t(apply(df, 1, function(x) {
       if (is.na(x[["Pol_Month"]])) 
             x[3:14] 
       else {
          month_name <- if (match(x[["Pol_Month"]], month.name) < 4) "April" else x[["Pol_Month"]]
          ind <- which.max(grepl(month_name, names(df)))
          x[ind : (ind + 11)]
         }
}))

outdf[1:7]

#   Adv_Code Pol_Month Month1_APE Month2_APE Month3_APE Month4_APE Month5_APE
#1      A299     March          0          0          0          0          0
#2      A298      <NA>          0          0          0          0          0
#3      S300      <NA>          0          0          0          0          0
#4      S301   January          0          0          0          0          0
#5      A299     March          0          0          0          0          0
#6      A298     March          0          0          0          0          0
#7      S300     March          0          0          0          0          0
#8      S301     March          0          0          0          0          0
#9      S300     March          0          0          0          0          0
#10     S301     April      15413      29798      70010      20342      24521

这是使用相同逻辑的dplyr/版本tidyr

library(dplyr)
library(tidyr)

df %>%
   mutate(row = row_number()) %>%
   gather(key, value, -c(1:2, row)) %>%
   group_by(row) %>%
   slice(if(is.na(first(Pol_Month))) 1:12 else {ind = max(match(first(Pol_Month), month.name), 4); ind : (ind + 11) }) %>%
   mutate(key = paste0("Month", 1:12, "_APE")) %>%
   spread(key, value) %>%
   select(-row)

推荐阅读