r - 从R中的长字符串中提取数据
问题描述
我想知道是否有一种方法可以从类似 JSON 的字符串中提取选定的数据并将它们插入到新列中。
我有一个这样的数据框(df):
id created_at rm
1 2020-07-03 {"cmd":3,"idx":0,"package":7019,"ts":1593929834,"more":0,"gps":[{"ts":1593929826,"lat":44.120545,"lon":-105.44116,"alt":1347.9,"d":1.37,"v":0,"eps":0,"a":0,"c":-0.6,"h":0,"sat":8,"flg":3}],":[{"ts":1593929795,"dt":11,"device":48119,"type":2,"psi":1390,"deg":579,"T":326,"bat":3100,"rssi":-68,"hPa":868,"atm":39},{"ts":1593929796,"dt":0,"device":40671,"type":2,"psi":1052,"deg":562,"T":562,"bat":3100,"rssi":-73,"hPa":868,"atm":39},{"ts":1593929799,"dt":12,"device":268438536,"type":2,"psi":1422,"deg":579,"T":362,"bat":3100,"rssi":-71,"hPa":868,"atm":39},{"ts":1593929799,"dt":11,"device":40651,"type":2,"psi":1430,"deg":579,"T":355,"bat":3000,"rssi":-86,"hPa":868,"atm":39},{"ts":1593929800,"dt":0,"device":40706,"type":2,"psi":1462,"deg":346,"T":346,"bat":3100,"rssi":-96,"hPa":868,"atm":39},{"ts":1593929801,"dt":12,"device":48136,"type":2,"psi":1348,"deg":576,"T":576,"bat":3100,"rssi":-74,"hPa":868,"atm":39},{"ts":1593929801,"dt":11,"device":46512,"type":2,"psi":1417,"deg":576,"T":332,"bat":3000,"rssi":-73,"hPa":868,"atm":39},{"ts":1593929802,"dt":0,"device":43640,"type":2,"psi":1364,"deg":11632,"T":11632,"bat":3000,"rssi":-60,"hPa":868,"atm":39},{"ts":1593929803,"dt":12,"device":47351,"type":2,"psi":1339,"deg":576,"T":335,"bat":3100,"rssi":-62,"hPa":868,"atm":39}}
我想从 rs 列中提取每个设备(例如“device”:48119)和 deg(例如“deg”:579)信息,并将它们插入每个设备的新列中。
预期的结果是这样的:
id created_at device1 deg1 device2 deg2 device3 deg3 device4 deg4 ...
1 2020-07-03 48119 579 40671 562 268438536 579 40651 579
有没有办法得到这些结果?非常感谢您的任何建议。
实际的 DF 如下所示:
request_id created_at request
159392982775726 05/07/2020 06:17 {"cmd":3,"idx":0,"package":8230,"ts":1593929826,"more":0,"gps":[{"ts":1593929796,"lat":-07.03077,"lon":-00.681861667,"alt":3887.5,"d":5.51,"v":5.42,"eps":6.01,"a":-0.03,"c":-0.3,"h":343.1,"sat":8,"flg":0},{"ts":1593929797,"lat":-o7.030726667,"lon":-00.681871667,"d":4.94,"v":4.71,"a":-0.71,"h":357.04,"flg":0},{"ts":1593929798,"lat":-07.030685,"lon":-00.681868333,"alt":3887.6,"d":4.65,"v":4.13,"a":-0.59,"c":0.1,"h":8.64,"flg":0},{"ts":1593929799,"lat":-07.030651667,"lon":-00.68186,"d":3.81,"v":3.31,"a":-0.81,"h":19.5,"flg":0},{"ts":1593929800,"lat":-07.030626667,"lon":-00.681851667,"alt":3887.7,"d":2.92,"v":2.29,"a":-1.02,"c":0.1,"h":17.76,"flg":0},{"ts":1593929802,"lat":-07.030608333,"lon":-00.681846667,"alt":3887.6,"d":2.11,"v":1.5,"a":-0.4,"c":-0.1,"h":14.9,"sat":7,"flg":0},{"ts":1593929803,"lat":-07.0306,"lon":-00.681843333,"alt":3887.5,"d":0.99,"v":0,"eps":0,"a":-1.5,"c":-0.1,"h":0,"flg":2},{"ts":1593929825,"lat":-07.030731667,"lon":-00.681828333,"alt":3885.9,"d":14.73,"c":-1.6,"flg":2}],"sensor":[{"ts":1593929797,"dt":11,"device":42387,"type":2,"psi":1318,"deg":565,"T":565,"bat":3100,"rssi":-77,"hPa":641,"atm":32},{"ts":1593929804,"dt":0,"device":268437294,"type":2,"psi":1364,"deg":498,"T":498,"bat":3100,"rssi":-83,"hPa":641,"atm":32},{"ts":1593929805,"dt":11,"device":268436490,"type":2,"psi":1371,"deg":530,"T":530,"bat":3100,"rssi":-72,"hPa":641,"atm":32},{"ts":1593929807,"dt":0,"device":268437307,"type":2,"psi":522,"deg":12048,"T":12048,"bat":3100,"rssi":-80,"hPa":641,"atm":32},{"ts":1593929809,"dt":12,"device":42387,"psi":1332,"rssi":-73},{"ts":1593929809,"dt":22,"device":268438879,"type":2,"psi":1349,"deg":496,"T":496,"bat":3100,"rssi":-78,"hPa":641,"atm":32},{"ts":1593929810,"dt":23,"device":268437144,"type":2,"psi":1346,"deg":522,"T":522,"bat":3100,"rssi":-78,"hPa":641,"atm":32},{"ts":1593929816,"dt":11,"device":268436490,"rssi":-78},{"ts":1593929818,"dt":0,"device":43753,"type":2,"psi":1362,"deg":574,"T":574,"bat":3100,"rssi":-97,"hPa":641,"atm":32},{"ts":1593929819,"dt":0,"device":268437307,"rssi":-86},{"ts":1593929820,"dt":11,"device":42387,"rssi":-77},{"ts":1593929820,"dt":11,"device":268438879,"psi":1352,"rssi":-83},{"ts":1593929821,"dt":0,"device":268435782,"type":2,"psi":1437,"deg":600,"T":600,"bat":3100,"rssi":-93,"hPa":641,"atm":32},{"ts":1593929821,"dt":11,"device":268437144,"psi":1342,"rssi":-83}],"accel":[{"ts":1593929801.07,"a":[-0.2,9.02,4.04]},{"ts":1593922349807.12,"a":[-0.0,0.0,0.0]},{"ts":1593929822345.36,"a":[-0.0,0.0,0.0]}]}
159392983317618 05/07/2020 06:17 {"cmd":3,"idx":0,"package":4,"ts":1593929832,"more":0,"sensor":[{"ts":1593929818,"dt":11,"device":268436777,"type":2,"psi":1403,"deg":636,"T":636,"bat":3100,"rssi":-86,"hPa":641,"atm":35},{"ts":1593929818,"dt":11,"device":41235,"type":2,"psi":1373,"deg":639,"T":639,"bat":2800,"rssi":-84,"hPa":641,"atm":35},{"ts":1593929818,"dt":11,"device":268438870,"type":2,"psi":528,"deg":5003,"T":5003,"bat":3200,"rssi":-76,"hPa":641,"atm":35},{"ts":1593929823,"dt":0,"device":41532,"type":2,"psi":1362,"deg":592,"T":592,"bat":3000,"rssi":-92,"hPa":641,"atm":35},{"ts":1593929823,"dt":11,"device":268438395,"type":2,"psi":1421,"deg":7226,"T":7226,"bat":3100,"rssi":-67,"hPa":641,"atm":35},{"ts":1593929825,"dt":0,"device":268437334,"type":2,"psi":1311,"deg":594,"T":594,"bat":3100,"rssi":-94,"hPa":641,"atm":35},{"ts":1593929826,"dt":11,"device":39592,"type":2,"psi":1375,"deg":587,"T":587,"bat":3100,"rssi":-74,"hPa":641,"atm":35},{"ts":1593929828,"dt":11,"device":43405,"type":2,"psi":1367,"deg":670,"T":670,"bat":2900,"rssi":-68,"hPa":641,"atm":35},{"ts":1593929829,"dt":11,"device":268436777,"rssi":-83},{"ts":1593929829,"dt":11,"device":41235,"deg":634,"T":634,"rssi":-93}],"accel":[{"ts":159392234818.75,"a":[-0.0,0.0,0.0]}]}
解决方案
编辑:
我花了很长时间,但我认为这可能是你想要的:
数据:
df <- data.frame(
id = c(1,2),
created_at = c("05/07/2020 06:17", "05/07/2020 06:17"),
rm = c('{"cmd":3,"idx":0,"package":7019,"ts":1593929834,"more":0,"gps":[{"ts":1593929826,"lat":44.120545,"lon":-105.44116,"alt":1347.9,"d":1.37,"v":0,"eps":0,"a":0,"c":-0.6,"h":0,"sat":8,"flg":3}],":[{"ts":1593929795,"dt":11,"device":48119,"type":2,"psi":1390,"deg":579,"T":326,"bat":3100,"rssi":-68,"hPa":868,"atm":39},{"ts":1593929796,"dt":0,"device":40671,"type":2,"psi":1052,"deg":562,"T":562,"bat":3100,"rssi":-73,"hPa":868,"atm":39},{"ts":1593929799,"dt":12,"device":268438536,"type":2,"psi":1422,"deg":579,"T":362,"bat":3100,"rssi":-71,"hPa":868,"atm":39},{"ts":1593929799,"dt":11,"device":40651,"type":2,"psi":1430,"deg":579,"T":355,"bat":3000,"rssi":-86,"hPa":868,"atm":39},{"ts":1593929800,"dt":0,"device":40706,"type":2,"psi":1462,"deg":346,"T":346,"bat":3100,"rssi":-96,"hPa":868,"atm":39},{"ts":1593929801,"dt":12,"device":48136,"type":2,"psi":1348,"deg":576,"T":576,"bat":3100,"rssi":-74,"hPa":868,"atm":39},{"ts":1593929801,"dt":11,"device":46512,"type":2,"psi":1417,"deg":576,"T":332,"bat":3000,"rssi":-73,"hPa":868,"atm":39},{"ts":1593929802,"dt":0,"device":43640,"type":2,"psi":1364,"deg":11632,"T":11632,"bat":3000,"rssi":-60,"hPa":868,"atm":39},{"ts":1593929803,"dt":12,"device":47351,"type":2,"psi":1339,"deg":576,"T":335,"bat":3100,"rssi":-62,"hPa":868,"atm":39}}',
'{"cmd":3,"idx":0,"package":4,"ts":1593929832,"more":0,"sensor":[{"ts":1593929818,"dt":11,"device":268436777,"type":2,"psi":1403,"deg":636,"T":636,"bat":3100,"rssi":-86,"hPa":641,"atm":35},{"ts":1593929818,"dt":11,"device":41235,"type":2,"psi":1373,"deg":639,"T":639,"bat":2800,"rssi":-84,"hPa":641,"atm":35},{"ts":1593929818,"dt":11,"device":268438870,"type":2,"psi":528,"deg":5003,"T":5003,"bat":3200,"rssi":-76,"hPa":641,"atm":35},{"ts":1593929823,"dt":0,"device":41532,"type":2,"psi":1362,"deg":592,"T":592,"bat":3000,"rssi":-92,"hPa":641,"atm":35},{"ts":1593929823,"dt":11,"device":268438395,"type":2,"psi":1421,"deg":7226,"T":7226,"bat":3100,"rssi":-67,"hPa":641,"atm":35},{"ts":1593929825,"dt":0,"device":268437334,"type":2,"psi":1311,"deg":594,"T":594,"bat":3100,"rssi":-94,"hPa":641,"atm":35},{"ts":1593929826,"dt":11,"device":39592,"type":2,"psi":1375,"deg":587,"T":587,"bat":3100,"rssi":-74,"hPa":641,"atm":35},{"ts":1593929828,"dt":11,"device":43405,"type":2,"psi":1367,"deg":670,"T":670,"bat":2900,"rssi":-68,"hPa":641,"atm":35},{"ts":1593929829,"dt":11,"device":268436777,"rssi":-83},{"ts":1593929829,"dt":11,"device":41235,"deg":634,"T":634,"rssi":-93}],"accel":[{"ts":159392234818.75,"a":[-0.0,0.0,0.0]}]}'),
stringsAsFactors = F
)
让我们一步一步来。您有两个要匹配的模式。让我们从第一个开始,以下数字device
:
library(stringr)
matches <- str_extract_all(df$rm, '(?<="device":)\\d+(?=,)') # extract matches
ncols <- max(sapply(matches, length)) # device:
为 设置新列device
:
library(dplyr)
df <- matches %>%
lapply(function(x) c(x, rep(NA, ncols - length(x)))) %>%
do.call(rbind, .) %>%
data.frame() %>%
setNames(paste0("device", seq(ncols))) %>%
cbind(df, .) %>%
tibble()
现在重复第二个模式的整个过程,数字如下deg
:
matches <- str_extract_all(df$rm, '(?<="deg":)\\d+(?=,)') # extract matches
ncols <- max(sapply(matches2, length)) # determine maximum number of columns
为 设置新列deg
:
df <- matches %>%
lapply(function(x) c(x, rep(NA, ncols - length(x)))) %>%
do.call(rbind, .) %>%
data.frame() %>%
setNames(paste0("deg", seq(ncols))) %>%
cbind(df, .) %>%
tibble()
结果:
df[,-3]
# A tibble: 2 x 22
id created_at device1 device2 device3 device4 device5 device6 device7 device8 device9 device10 deg1 deg2
<dbl> <chr> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct>
1 1 05/07/202… 48119 40671 268438… 40651 40706 48136 46512 43640 47351 <NA> 579 562
2 2 05/07/202… 268436… 41235 268438… 41532 268438… 268437… 39592 43405 268436… 41235 636 639
# … with 8 more variables: deg3 <fct>, deg4 <fct>, deg5 <fct>, deg6 <fct>, deg7 <fct>, deg8 <fct>, deg9 <fct>,
# deg10 <fct>
推荐阅读
- typescript - 对象解构:字符串没有匹配的索引签名
- javascript - 如何在firebase实时数据库中使用子键查找父键?
- laravel - 是否可以在一行中存储不同的 id(相同类型)?
- javascript - 没有降价编辑器的 html 页面中的 SimpleMDE 预览
- neo4j - Cypher:如何更新 Neo4j 数据库中存在的节点中的属性值?
- ios - OperationQueue 等待异步任务完成
- arrays - 为什么外循环中的malocs数量为10 * sizeof,内循环中为2 * sizeof?
- java - 回收站视图未显示任何没有错误的项目
- c# - 如何在 WPF 应用程序中停止右键单击 TtitleBar
- reactjs - 如何在 next-auth 中添加自定义“检查您的电子邮件”页面?(验证请求)