首页 > 解决方案 > 如何从 R 中的多列中提取文本并粘贴到同一列中?

问题描述

我有一个数据集,其中包含以下格式的几列。

et_intro et_ed_eng
(ID 1) 是的,我明白 (ID -1) 以上都不是
(ID 2) 不,我打算打开一个不同的调查 (ID 1) 运动

我想在 R 中编写一个简单的语句(最好是对 tidyverse 友好),它会产生以下内容。

et_intro et_ed_eng
1 -1
2 1

数据

ets_e <- ets_p <- structure(
        list(
                `Scheduled Time` = c(
                        "2020-11-10 11:31:09 EST",
                        "2020-11-12 19:49:06 EST",
                        "2020-11-13 21:07:44 EST",
                        "2020-11-11 14:05:48 EST",
                        "2020-11-12 17:14:14 EST",
                        "2020-11-12 21:22:09 EST",
                        "2020-11-11 15:26:42 EST",
                        "2020-11-25 13:42:15 EST",
                        "2020-11-25 16:18:16 EST",
                        "2020-11-25 16:20:17 EST",
                        "2020-11-27 15:47:27 EST"
                ),
                `Issued Time` = c(
                        "2020-11-10 11:31:09 EST",
                        "2020-11-12 19:49:06 EST",
                        "2020-11-13 21:07:44 EST",
                        "2020-11-11 14:05:48 EST",
                        "2020-11-12 17:14:14 EST",
                        "2020-11-12 21:22:09 EST",
                        "2020-11-11 15:26:42 EST",
                        "2020-11-25 13:42:15 EST",
                        "2020-11-25 16:18:16 EST",
                        "2020-11-25 16:20:17 EST",
                        "2020-11-27 15:47:27 EST"
                ),
                `Response Time` = c(
                        "2020-11-10 11:31:36 EST",
                        "2020-11-12 19:49:53 EST",
                        "2020-11-13 21:08:13 EST",
                        "2020-11-11 14:07:16 EST",
                        "2020-11-12 17:15:24 EST",
                        "2020-11-12 21:22:56 EST",
                        "2020-11-11 15:28:34 EST",
                        "2020-11-25 13:42:25 EST",
                        "2020-11-25 16:18:55 EST",
                        "2020-11-25 16:22:08 EST",
                        "2020-11-27 15:47:44 EST"
                ),
                `Duration (minutes)` = c(
                        26.52,
                        47.758,
                        29.752,
                        87.806,
                        69.313,
                        46.894,
                        111.483,
                        10.768,
                        38.319,
                        111.159,
                        17.287
                ),
                Location = c(
                        "Unknown",
                        "Unknown",
                        "Unknown",
                        "Unknown",
                        "Unknown",
                        "Unknown",
                        "Unknown",
                        "Unknown",
                        "Unknown",
                        "Unknown",
                        "Unknown"
                ),
                et_intro = c(
                        "(ID 1) Yes I understand",
                        "(ID 1) Yes I understand",
                        "(ID 1) Yes I understand",
                        "(ID 1) Yes I understand",
                        "(ID 1) Yes I understand",
                        "(ID 1) Yes I understand",
                        "(ID 1) Yes I understand",
                        "(ID 2) No I meant to open a different survey (you can exit the current survey)",
                        "(ID 1) Yes I understand",
                        "(ID 2) No I meant to open a different survey (you can exit the current survey)",
                        "(ID 2) No I meant to open a different survey (you can exit the current survey)"
                ),
                et_ed_eng = c(
                        "(ID -1) None of the above",
                        "(ID 1) Binge eating",
                        "(ID 1) Binge eating",
                        "(ID 1) Binge eating",
                        "(ID 1) Binge eating",
                        "(ID 1) Binge eating",
                        "(ID 1) Binge eating",
                        NA,
                        "(ID 1) Binge eating",
                        NA,
                        NA
                ),
                et_ed_eng_other = c(NA, NA, NA, NA, NA, NA, NA, NA,
                                    NA, NA, NA),
                et_ed_whyb = c(
                        NA,
                        "(ID 4) Because I don't know how to control my eating & (ID 5) Because binge eating feels good",
                        "(ID 5) Because binge eating feels good",
                        "(ID 3) To cope with my emotions & (ID 5) Because binge eating feels good",
                        "(ID 4) Because I don't know how to control my eating & (ID 5) Because binge eating feels good",
                        "(ID 4) Because I don't know how to control my eating & (ID 5) Because binge eating feels good",
                        "(ID 1) I went too long without eating or restricted my eating too much",
                        NA,
                        "(ID 1) I went too long without eating or restricted my eating too much",
                        NA,
                        NA
                ),
                et_ed_whyb_other = c(NA, NA, NA, NA, NA, NA, NA, NA,
                                     NA, NA, NA),
                et_ed_bplan = c(NA, 1, 1, 1, 0, 2, 1, NA, 0, NA,
                                NA),
                et_ed_rstr = c(
                        "(ID 1) Tried to limit the amount that you ate",
                        "(ID 2) Tried to avoid eating certain foods that you like & (ID 1) Tried to limit the amount that you ate",
                        "(ID 2) Tried to avoid eating certain foods that you like",
                        "(ID 1) Tried to limit the amount that you ate & (ID 2) Tried to avoid eating certain foods that you like",
                        "(ID 1) Tried to limit the amount that you ate & (ID 2) Tried to avoid eating certain foods that you like",
                        "(ID 2) Tried to avoid eating certain foods that you like & (ID 1) Tried to limit the amount that you ate",
                        "(ID 2) Tried to avoid eating certain foods that you like",
                        NA,
                        "(ID 3) Tried to delay eating & (ID 2) Tried to avoid eating certain foods that you like & (ID 1) Tried to limit the amount that you ate",
                        NA,
                        NA
                ),
                et_ed_rstr_lim = c(
                        "(ID 2) No",
                        "(ID 2) No",
                        NA,
                        "(ID 1) Yes",
                        "(ID 2) No",
                        "(ID 2) No",
                        NA,
                        NA,
                        "(ID 2) No",
                        NA,
                        NA
                ),
                et_ed_rstr_av = c(
                        NA,
                        "(ID 2) No",
                        "(ID 2) No",
                        "(ID 1) Yes",
                        "(ID 2) No",
                        "(ID 2) No",
                        "(ID 2) No",
                        NA,
                        "(ID 2) No",
                        NA,
                        NA
                ),
                et_ed_rstr_del = c(NA,
                                   NA, NA, NA, NA, NA, NA, NA, "(ID 2) No", NA, NA),
                et_ex_eng = c(
                        "(ID 1) Yes",
                        "(ID 2) No",
                        "(ID 1) Yes",
                        "(ID 2) No",
                        "(ID 2) No",
                        "(ID 2) No",
                        "(ID 2) No",
                        NA,
                        "(ID 2) No",
                        NA,
                        NA
                ),
                et_ex_dc = c(2, NA, 3,
                             NA, NA, NA, NA, NA, NA, NA, NA),
                et_ex_sw = c(3, NA, 3, NA, NA,
                             NA, NA, NA, NA, NA, NA),
                et_ex_comp = c(1, NA, 4, NA, NA, NA,
                               NA, NA, NA, NA, NA)
        ),
        row.names = c(NA,-11L),
        class = c("tbl_df",
                  "tbl", "data.frame")
)

标签: rstringdataframetidyverse

解决方案


你可以做:

library(dplyr)

df %>% 
  mutate(across(everything(), ~as.numeric(gsub("^.*?(-?\\d+).*$", "\\1", .x))))
#>   et_intro et_ed_eng
#> 1        1        -1
#> 2        2         1

数据

df <- structure(list(et_intro = c("(ID 1) Yes I understand", 
"(ID 2) No I meant to open a different survey"
), et_ed_eng = c("(ID -1) None of the above", "(ID 1) Exercise"
)), class = "data.frame", row.names = c(NA, -2L))

df
#>                                       et_intro                 et_ed_eng
#> 1                      (ID 1) Yes I understand (ID -1) None of the above
#> 2 (ID 2) No I meant to open a different survey           (ID 1) Exercise

编辑

使用问题中新添加的数据,很明显需要做一些工作来选择正确的列而不是选择everything()

ets_e %>% 
  mutate(across(where(is.character) & starts_with("et_"),
                ~as.numeric(gsub("^.*?(-?\\d+).*$", "\\1", .x))))
#> # A tibble: 11 x 19
#>    `Scheduled Time` `Issued Time` `Response Time` `Duration (minu~ Location et_intro
#>    <chr>            <chr>         <chr>                      <dbl> <chr>       <dbl>
#>  1 2020-11-10 11:3~ 2020-11-10 1~ 2020-11-10 11:~             26.5 Unknown         1
#>  2 2020-11-12 19:4~ 2020-11-12 1~ 2020-11-12 19:~             47.8 Unknown         1
#>  3 2020-11-13 21:0~ 2020-11-13 2~ 2020-11-13 21:~             29.8 Unknown         1
#>  4 2020-11-11 14:0~ 2020-11-11 1~ 2020-11-11 14:~             87.8 Unknown         1
#>  5 2020-11-12 17:1~ 2020-11-12 1~ 2020-11-12 17:~             69.3 Unknown         1
#>  6 2020-11-12 21:2~ 2020-11-12 2~ 2020-11-12 21:~             46.9 Unknown         1
#>  7 2020-11-11 15:2~ 2020-11-11 1~ 2020-11-11 15:~            111.  Unknown         1
#>  8 2020-11-25 13:4~ 2020-11-25 1~ 2020-11-25 13:~             10.8 Unknown         2
#>  9 2020-11-25 16:1~ 2020-11-25 1~ 2020-11-25 16:~             38.3 Unknown         1
#> 10 2020-11-25 16:2~ 2020-11-25 1~ 2020-11-25 16:~            111.  Unknown         2
#> 11 2020-11-27 15:4~ 2020-11-27 1~ 2020-11-27 15:~             17.3 Unknown         2
#> # ... with 13 more variables: et_ed_eng <dbl>, et_ed_eng_other <lgl>, et_ed_whyb <dbl>,
#> #   et_ed_whyb_other <lgl>, et_ed_bplan <dbl>, et_ed_rstr <dbl>, et_ed_rstr_lim <dbl>,
#> #   et_ed_rstr_av <dbl>, et_ed_rstr_del <dbl>, et_ex_eng <dbl>, et_ex_dc <dbl>,
#> #   et_ex_sw <dbl>, et_ex_comp <dbl>


推荐阅读