首页 > 解决方案 > 如何在循环中更新单个列中的多个条目

问题描述

我有两个数据框,一个具有正则表达式模式来从另一个搜索匹配的月份。

txn<-data.frame(S.No.=c(1,2),NARRATION=c("NET TXN: KBDEC17 SALARY DEC ","NET TXN: KBJAN19 SALARY"))
date_pattern<-data.frame(CODE=c("MONTH25","MONTH34"),
                         FORMAT=c("MmmYY","Mmm"),
                         ACTIVE=c("Y","Y"),
                         SEQNO=c(71,76),
                         REGEXPAT=c("(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)[1][6-9](?=[[:punct:]]|[[:space:]]|[[:alpha:]])"," (?<=[[:punct:]]|[[:space:]])(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(?=[[:punct:]]|[[:space:]])"))

现在,当我运行循环时,它将匹配的月份替换为“$”,但在某些情况下,我在字符串中发现了超过 1 个匹配值

date_pattern$SEQNO=as.numeric(date_pattern$SEQNO)
date_pattern <-  date_pattern[order(date_pattern$SEQNO),]

txn$MONTH<-NA

for (i in 1:length(date_pattern$REGEXPAT)) {
  pat <-  date_pattern$REGEXPAT[i]
  codetype <-  date_pattern$CODE[i]
  formattype <-  date_pattern$FORMAT[i]
  seq<-date_pattern$SEQNO[i]
  if(grepl("MONTH",codetype,ignore.case = T,perl = T)) {
    month_temp<-txn$MONTH
    result <- txn[grep(pat,ignore.case = T,txn$NARRATION,perl = T),]
    var  <-  as.character("MONTH")
    result <- result[is.na(result$MONTH),]
    res <- StrExtract(result$NARRATION,pat,ignore.case = T, perl = T)
    common <- intersect(txn$S.No.,result$S.No.)
    txn[common,][var] <- as.character(res)
    txn$NARRATION <- gsub(pat," $ ",txn$NARRATION, perl = T,ignore.case = T)
    txn$NARRATION <-  gsub("\\s+", " ", txn$NARRATION)
    print(paste(txn$NARRATION,pat,i,codetype,seq,sep = "--"))
  } else {print("Wrong Choice !!!!!!!") }
}

txn[is.na(txn)] <- ""

txn

现在,我得到的输出是:

   S.No.               NARRATION MONTH  
1     1 NET TXN: KB $ SALARY $  DEC17  
2     2    NET TXN: KB $ SALARY JAN19

但是,我需要像这样的输出:

   S.No.               NARRATION MONTH  
1     1 NET TXN: KB $ SALARY $  DEC17,DEC  
2     2    NET TXN: KB $ SALARY JAN19

标签: rregex

解决方案


试试这种方法:

library(dplyr)
library(stringr)

tidyr::crossing(txn, date_pattern) %>%
  mutate(MONTH = trimws(str_extract(NARRATION, REGEXPAT))) %>%
  group_by(S.No.) %>% 
  summarise(NARRATION = str_replace_all(first(NARRATION), 
                        paste0(MONTH, collapse = '|'), '$'),
            MONTH = toString(na.omit(MONTH)))

#  S.No. NARRATION                MONTH     
#  <dbl> <chr>                    <chr>     
#1     1 "NET TXN: KB$ SALARY $ " DEC17, DEC
#2     2 "NET TXN: KB$ SALARY"    JAN19     

我们检查REGEXPAT每个值中的模式NARRATION并使用 提取str_extract。使用str_replace_all我们将提取的模式替换为$


推荐阅读