首页 > 解决方案 > 在 scala 中替代 groupBy 和 collect_list

问题描述

有没有一种方法可以在不使用 groupBy 和 collect_list 的情况下从给定的输入数据帧创建以下输出数据帧?

输入数据框架构:-

root
 |-- GPS: struct (nullable = false)
 |    |-- requestid: string (nullable = true)
 |    |-- timestamp: double (nullable = true)
 |    |-- GPSLatitude: double (nullable = true)
 |    |-- GPSLongitude: double (nullable = true)
 |-- requestid: string (nullable = true)

输入数据框:-

全球定位系统 请求ID
{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_0","timestamp":1596368673,"GPSLatitude":40.13587319463796,"GPSLongitude":-75.15846220892067} 2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_0
{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_0","timestamp":1596368674,"GPSLatitude":40.135924326024096,"GPSLongitude":-75.15865512908896} 2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_0
{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_0","timestamp":1596368675,"GPSLatitude":40.13599278802667,"GPSLongitude":-75.15876733291171} 2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_0
{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_0","timestamp":1596368676,"GPSLatitude":40.136083261014484,"GPSLongitude":-75.15885143441842} 2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_0
{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_0","timestamp":1596368677,"GPSLatitude":40.13616687156273,"GPSLongitude":-75.1589827147081} 2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_0
{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_1","timestamp":1596368833,"GPSLatitude":40.14496631033691,"GPSLongitude":-75.17361394861283} 2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_1
{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_1","timestamp":1596368834,"GPSLatitude":40.14509290798243,"GPSLongitude":-75.17385201087406} 2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_1
{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_1","timestamp":1596368835,"GPSLatitude":40.145218343731,"GPSLongitude":-75.17407707132585} 2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_1
{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_1","timestamp":1596368836,"GPSLatitude":40.145350564938425,"GPSLongitude":-75.17430271187274} 2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_1
{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_1","timestamp":1596368837,"GPSLatitude":40.14548270568285,"GPSLongitude":-75.17452650958782} 2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_1
{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_2","timestamp":1596368838,"GPSLatitude":40.14560747391316,"GPSLongitude":-75.17474553105055} 2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_2
{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_2","timestamp":1596368839,"GPSLatitude":40.14560753483339,"GPSLongitude":-75.17474563799348} 2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_2

输出数据帧:-

请求ID 全球定位系统
2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_0 [{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_0","timestamp":1596368673,"GPSLatitude":40.13587319463796,"GPSLongitude":-75.15846220892067},{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_0","timestamp":1596368674,"GPSLatitude":40.135924326024096 ,"GPSLongitude":-75.15865512908896},{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_0","timestamp":1596368675,"GPSLatitude":40.13599278802667,"GPSLongitude":-75.15876733291171},{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_0","timestamp" :1596368676,"GPSLatitude":40.136083261014484,"GPSLongitude":-75.15885143441842},{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_0","timestamp":1596368677,"GPSLatitude":40.13616687156273,"GPSLongitude":-75.1589827147081}]
2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_1 [{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_1","timestamp":1596368833,"GPSLatitude":40.14496631033691,"GPSLongitude":-75.17361394861283},{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_1","timestamp":1596368834,"GPSLatitude":40.14509290798243 ,"GPSLongitude":-75.17385201087406},{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_1","timestamp":1596368835,"GPSLatitude":40.145218343731,"GPSLongitude":-75.17407707132585},{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_1","timestamp" :1596368836,"GPSLatitude":40.145350564938425,"GPSLongitude":-75.17430271187274},{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_1","时间戳":1596368837,"GPSLatitude":40.14548270568285,"GPSLongitude":-75.17452650958782}]
2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_2 [{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_2","timestamp":1596368838,"GPSLatitude":40.14560747391316,"GPSLongitude":-75.17474553105055},{"requestid":"2b7bfbd4cf7c3124a517a112851ec6015b1ef85b_2","timestamp":1596368839,"GPSLatitude":40.14560753483339 "GPS经度":-75.17474563799348}]

输出数据框架构:-

root
 |-- requestid: string (nullable = true)
 |-- GPS: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- requestid: string (nullable = true)
 |    |    |-- timestamp: double (nullable = true)
 |    |    |-- GPSLatitude: double (nullable = true)
 |    |    |-- GPSLongitude: double (nullable = true)

版本:(火花2.45和斯卡拉2.11)

标签: scaladataframeapache-spark

解决方案


推荐阅读