首页 > 解决方案 > 从R中的长字符串中提取数据

问题描述

我想知道是否有一种方法可以从类似 JSON 的字符串中提取选定的数据并将它们插入到新列中。

我有一个这样的数据框(df):

id   created_at    rm
1    2020-07-03    {"cmd":3,"idx":0,"package":7019,"ts":1593929834,"more":0,"gps":[{"ts":1593929826,"lat":44.120545,"lon":-105.44116,"alt":1347.9,"d":1.37,"v":0,"eps":0,"a":0,"c":-0.6,"h":0,"sat":8,"flg":3}],":[{"ts":1593929795,"dt":11,"device":48119,"type":2,"psi":1390,"deg":579,"T":326,"bat":3100,"rssi":-68,"hPa":868,"atm":39},{"ts":1593929796,"dt":0,"device":40671,"type":2,"psi":1052,"deg":562,"T":562,"bat":3100,"rssi":-73,"hPa":868,"atm":39},{"ts":1593929799,"dt":12,"device":268438536,"type":2,"psi":1422,"deg":579,"T":362,"bat":3100,"rssi":-71,"hPa":868,"atm":39},{"ts":1593929799,"dt":11,"device":40651,"type":2,"psi":1430,"deg":579,"T":355,"bat":3000,"rssi":-86,"hPa":868,"atm":39},{"ts":1593929800,"dt":0,"device":40706,"type":2,"psi":1462,"deg":346,"T":346,"bat":3100,"rssi":-96,"hPa":868,"atm":39},{"ts":1593929801,"dt":12,"device":48136,"type":2,"psi":1348,"deg":576,"T":576,"bat":3100,"rssi":-74,"hPa":868,"atm":39},{"ts":1593929801,"dt":11,"device":46512,"type":2,"psi":1417,"deg":576,"T":332,"bat":3000,"rssi":-73,"hPa":868,"atm":39},{"ts":1593929802,"dt":0,"device":43640,"type":2,"psi":1364,"deg":11632,"T":11632,"bat":3000,"rssi":-60,"hPa":868,"atm":39},{"ts":1593929803,"dt":12,"device":47351,"type":2,"psi":1339,"deg":576,"T":335,"bat":3100,"rssi":-62,"hPa":868,"atm":39}}

我想从 rs 列中提取每个设备(例如“device”:48​​119)和 deg(例如“deg”:579)信息,并将它们插入每个设备的新列中。

预期的结果是这样的:

id   created_at    device1   deg1   device2   deg2   device3   deg3   device4   deg4 ...
1    2020-07-03    48119     579    40671     562    268438536 579    40651     579

有没有办法得到这些结果?非常感谢您的任何建议。

实际的 DF 如下所示:

request_id      created_at          request
159392982775726 05/07/2020 06:17    {"cmd":3,"idx":0,"package":8230,"ts":1593929826,"more":0,"gps":[{"ts":1593929796,"lat":-07.03077,"lon":-00.681861667,"alt":3887.5,"d":5.51,"v":5.42,"eps":6.01,"a":-0.03,"c":-0.3,"h":343.1,"sat":8,"flg":0},{"ts":1593929797,"lat":-o7.030726667,"lon":-00.681871667,"d":4.94,"v":4.71,"a":-0.71,"h":357.04,"flg":0},{"ts":1593929798,"lat":-07.030685,"lon":-00.681868333,"alt":3887.6,"d":4.65,"v":4.13,"a":-0.59,"c":0.1,"h":8.64,"flg":0},{"ts":1593929799,"lat":-07.030651667,"lon":-00.68186,"d":3.81,"v":3.31,"a":-0.81,"h":19.5,"flg":0},{"ts":1593929800,"lat":-07.030626667,"lon":-00.681851667,"alt":3887.7,"d":2.92,"v":2.29,"a":-1.02,"c":0.1,"h":17.76,"flg":0},{"ts":1593929802,"lat":-07.030608333,"lon":-00.681846667,"alt":3887.6,"d":2.11,"v":1.5,"a":-0.4,"c":-0.1,"h":14.9,"sat":7,"flg":0},{"ts":1593929803,"lat":-07.0306,"lon":-00.681843333,"alt":3887.5,"d":0.99,"v":0,"eps":0,"a":-1.5,"c":-0.1,"h":0,"flg":2},{"ts":1593929825,"lat":-07.030731667,"lon":-00.681828333,"alt":3885.9,"d":14.73,"c":-1.6,"flg":2}],"sensor":[{"ts":1593929797,"dt":11,"device":42387,"type":2,"psi":1318,"deg":565,"T":565,"bat":3100,"rssi":-77,"hPa":641,"atm":32},{"ts":1593929804,"dt":0,"device":268437294,"type":2,"psi":1364,"deg":498,"T":498,"bat":3100,"rssi":-83,"hPa":641,"atm":32},{"ts":1593929805,"dt":11,"device":268436490,"type":2,"psi":1371,"deg":530,"T":530,"bat":3100,"rssi":-72,"hPa":641,"atm":32},{"ts":1593929807,"dt":0,"device":268437307,"type":2,"psi":522,"deg":12048,"T":12048,"bat":3100,"rssi":-80,"hPa":641,"atm":32},{"ts":1593929809,"dt":12,"device":42387,"psi":1332,"rssi":-73},{"ts":1593929809,"dt":22,"device":268438879,"type":2,"psi":1349,"deg":496,"T":496,"bat":3100,"rssi":-78,"hPa":641,"atm":32},{"ts":1593929810,"dt":23,"device":268437144,"type":2,"psi":1346,"deg":522,"T":522,"bat":3100,"rssi":-78,"hPa":641,"atm":32},{"ts":1593929816,"dt":11,"device":268436490,"rssi":-78},{"ts":1593929818,"dt":0,"device":43753,"type":2,"psi":1362,"deg":574,"T":574,"bat":3100,"rssi":-97,"hPa":641,"atm":32},{"ts":1593929819,"dt":0,"device":268437307,"rssi":-86},{"ts":1593929820,"dt":11,"device":42387,"rssi":-77},{"ts":1593929820,"dt":11,"device":268438879,"psi":1352,"rssi":-83},{"ts":1593929821,"dt":0,"device":268435782,"type":2,"psi":1437,"deg":600,"T":600,"bat":3100,"rssi":-93,"hPa":641,"atm":32},{"ts":1593929821,"dt":11,"device":268437144,"psi":1342,"rssi":-83}],"accel":[{"ts":1593929801.07,"a":[-0.2,9.02,4.04]},{"ts":1593922349807.12,"a":[-0.0,0.0,0.0]},{"ts":1593929822345.36,"a":[-0.0,0.0,0.0]}]}
159392983317618 05/07/2020 06:17    {"cmd":3,"idx":0,"package":4,"ts":1593929832,"more":0,"sensor":[{"ts":1593929818,"dt":11,"device":268436777,"type":2,"psi":1403,"deg":636,"T":636,"bat":3100,"rssi":-86,"hPa":641,"atm":35},{"ts":1593929818,"dt":11,"device":41235,"type":2,"psi":1373,"deg":639,"T":639,"bat":2800,"rssi":-84,"hPa":641,"atm":35},{"ts":1593929818,"dt":11,"device":268438870,"type":2,"psi":528,"deg":5003,"T":5003,"bat":3200,"rssi":-76,"hPa":641,"atm":35},{"ts":1593929823,"dt":0,"device":41532,"type":2,"psi":1362,"deg":592,"T":592,"bat":3000,"rssi":-92,"hPa":641,"atm":35},{"ts":1593929823,"dt":11,"device":268438395,"type":2,"psi":1421,"deg":7226,"T":7226,"bat":3100,"rssi":-67,"hPa":641,"atm":35},{"ts":1593929825,"dt":0,"device":268437334,"type":2,"psi":1311,"deg":594,"T":594,"bat":3100,"rssi":-94,"hPa":641,"atm":35},{"ts":1593929826,"dt":11,"device":39592,"type":2,"psi":1375,"deg":587,"T":587,"bat":3100,"rssi":-74,"hPa":641,"atm":35},{"ts":1593929828,"dt":11,"device":43405,"type":2,"psi":1367,"deg":670,"T":670,"bat":2900,"rssi":-68,"hPa":641,"atm":35},{"ts":1593929829,"dt":11,"device":268436777,"rssi":-83},{"ts":1593929829,"dt":11,"device":41235,"deg":634,"T":634,"rssi":-93}],"accel":[{"ts":159392234818.75,"a":[-0.0,0.0,0.0]}]}

标签: rjsondataframe

解决方案


编辑

我花了很长时间,但我认为这可能是你想要的:

数据:

df <- data.frame(
  id = c(1,2),
  created_at = c("05/07/2020 06:17", "05/07/2020 06:17"),
  rm = c('{"cmd":3,"idx":0,"package":7019,"ts":1593929834,"more":0,"gps":[{"ts":1593929826,"lat":44.120545,"lon":-105.44116,"alt":1347.9,"d":1.37,"v":0,"eps":0,"a":0,"c":-0.6,"h":0,"sat":8,"flg":3}],":[{"ts":1593929795,"dt":11,"device":48119,"type":2,"psi":1390,"deg":579,"T":326,"bat":3100,"rssi":-68,"hPa":868,"atm":39},{"ts":1593929796,"dt":0,"device":40671,"type":2,"psi":1052,"deg":562,"T":562,"bat":3100,"rssi":-73,"hPa":868,"atm":39},{"ts":1593929799,"dt":12,"device":268438536,"type":2,"psi":1422,"deg":579,"T":362,"bat":3100,"rssi":-71,"hPa":868,"atm":39},{"ts":1593929799,"dt":11,"device":40651,"type":2,"psi":1430,"deg":579,"T":355,"bat":3000,"rssi":-86,"hPa":868,"atm":39},{"ts":1593929800,"dt":0,"device":40706,"type":2,"psi":1462,"deg":346,"T":346,"bat":3100,"rssi":-96,"hPa":868,"atm":39},{"ts":1593929801,"dt":12,"device":48136,"type":2,"psi":1348,"deg":576,"T":576,"bat":3100,"rssi":-74,"hPa":868,"atm":39},{"ts":1593929801,"dt":11,"device":46512,"type":2,"psi":1417,"deg":576,"T":332,"bat":3000,"rssi":-73,"hPa":868,"atm":39},{"ts":1593929802,"dt":0,"device":43640,"type":2,"psi":1364,"deg":11632,"T":11632,"bat":3000,"rssi":-60,"hPa":868,"atm":39},{"ts":1593929803,"dt":12,"device":47351,"type":2,"psi":1339,"deg":576,"T":335,"bat":3100,"rssi":-62,"hPa":868,"atm":39}}',
         '{"cmd":3,"idx":0,"package":4,"ts":1593929832,"more":0,"sensor":[{"ts":1593929818,"dt":11,"device":268436777,"type":2,"psi":1403,"deg":636,"T":636,"bat":3100,"rssi":-86,"hPa":641,"atm":35},{"ts":1593929818,"dt":11,"device":41235,"type":2,"psi":1373,"deg":639,"T":639,"bat":2800,"rssi":-84,"hPa":641,"atm":35},{"ts":1593929818,"dt":11,"device":268438870,"type":2,"psi":528,"deg":5003,"T":5003,"bat":3200,"rssi":-76,"hPa":641,"atm":35},{"ts":1593929823,"dt":0,"device":41532,"type":2,"psi":1362,"deg":592,"T":592,"bat":3000,"rssi":-92,"hPa":641,"atm":35},{"ts":1593929823,"dt":11,"device":268438395,"type":2,"psi":1421,"deg":7226,"T":7226,"bat":3100,"rssi":-67,"hPa":641,"atm":35},{"ts":1593929825,"dt":0,"device":268437334,"type":2,"psi":1311,"deg":594,"T":594,"bat":3100,"rssi":-94,"hPa":641,"atm":35},{"ts":1593929826,"dt":11,"device":39592,"type":2,"psi":1375,"deg":587,"T":587,"bat":3100,"rssi":-74,"hPa":641,"atm":35},{"ts":1593929828,"dt":11,"device":43405,"type":2,"psi":1367,"deg":670,"T":670,"bat":2900,"rssi":-68,"hPa":641,"atm":35},{"ts":1593929829,"dt":11,"device":268436777,"rssi":-83},{"ts":1593929829,"dt":11,"device":41235,"deg":634,"T":634,"rssi":-93}],"accel":[{"ts":159392234818.75,"a":[-0.0,0.0,0.0]}]}'),
  stringsAsFactors = F
)

让我们一步一步来。您有两个要匹配的模式。让我们从第一个开始,以下数字device

library(stringr)
matches <- str_extract_all(df$rm, '(?<="device":)\\d+(?=,)') # extract matches
ncols   <- max(sapply(matches, length)) # device:

为 设置新列device

library(dplyr)
df <- matches %>%
  lapply(function(x)  c(x, rep(NA, ncols - length(x)))) %>%
  do.call(rbind, .) %>%
  data.frame() %>%
  setNames(paste0("device", seq(ncols))) %>%
  cbind(df, .) %>%
  tibble()

现在重复第二个模式的整个过程,数字如下deg

matches <- str_extract_all(df$rm, '(?<="deg":)\\d+(?=,)') # extract matches
ncols   <- max(sapply(matches2, length)) # determine maximum number of columns

为 设置新列deg

df <- matches %>%
  lapply(function(x)  c(x, rep(NA, ncols - length(x)))) %>%
  do.call(rbind, .) %>%
  data.frame() %>%
  setNames(paste0("deg", seq(ncols))) %>%
  cbind(df, .) %>%
  tibble()

结果:

df[,-3]
# A tibble: 2 x 22
     id created_at device1 device2 device3 device4 device5 device6 device7 device8 device9 device10 deg1  deg2 
  <dbl> <chr>      <fct>   <fct>   <fct>   <fct>   <fct>   <fct>   <fct>   <fct>   <fct>   <fct>    <fct> <fct>
1     1 05/07/202… 48119   40671   268438… 40651   40706   48136   46512   43640   47351   <NA>     579   562  
2     2 05/07/202… 268436… 41235   268438… 41532   268438… 268437… 39592   43405   268436… 41235    636   639  
# … with 8 more variables: deg3 <fct>, deg4 <fct>, deg5 <fct>, deg6 <fct>, deg7 <fct>, deg8 <fct>, deg9 <fct>,
#   deg10 <fct>

推荐阅读