r - 优化嵌套列表的子集操作
问题描述
是否可以提高此代码中最后一个子集操作的速度?此代码获取一小部分 Open Streetmap 数据,搜索所有具有名称的道路并创建一个仅包含道路的新osm o 对象。我对优化代码的最后一点很感兴趣:
highway_subset <- subset(muc, ids = highway_subset_ids)
类(MUC)
[1] 《奥斯玛》《名单》
muc 是列表的列表 end 列表的每个元素都有一个用于创建子集的 id。
这是完整的示例:
library("osmar")
src <- osmsource_api(url = "https://api.openstreetmap.org/api/0.6/")
muc_bbox <- center_bbox(11.575278, 48.137222, 1000, 1000)
muc <- get_osm(muc_bbox, src)
highway_subset_ids <- subset(muc, way_ids = find(muc, way(tags(k == "highway"))))
highway_subset_ids <- find(highway_subset_ids, way(tags(k == "name")))
highway_subset_ids <- find_down(muc, way(highway_subset_ids))
highway_subset <- subset(muc, ids = highway_subset_ids)
非常感谢您提前。
更新
如果您在使用 ssl 时遇到问题,请尝试复制粘贴以下代码示例。这是我能做到的最低限度。
我要优化的行是这一行:
final_subset <- 子集(highway_subset,ids = Highway_subset_ids)
library("osmar")
highway_subset <-
structure(list(nodes = structure(list(
attrs = structure(
list(
id = numeric(0),
visible = character(0),
timestamp = structure(
list(
sec = numeric(0),
min = integer(0),
hour = integer(0),
mday = integer(0),
mon = integer(0),
year = integer(0),
wday = integer(0),
yday = integer(0),
isdst = integer(0),
zone = character(0),
gmtoff = integer(0)
),
class = c("POSIXlt", "POSIXt")
),
version = numeric(0),
changeset = numeric(0),
user = structure(integer(0), .Label = character(0), class = "factor"),
uid = structure(
integer(0),
.Label = c("2455020", "2590140", "367380"),
class = "factor"
),
lat = numeric(0),
lon = numeric(0)
),
row.names = integer(0),
class = "data.frame"
),
tags = structure(
list(
id = numeric(0),
k = structure(integer(0), .Label = character(0), class = "factor"),
v = structure(integer(0), .Label = character(0), class = "factor")
),
row.names = integer(0),
class = "data.frame"
)
),
class = c("nodes", "osmar_element", "list")
),
ways = structure(
list(
attrs = structure(
list(
id = c(105071009, 366457476),
visible = c("true", "true"),
timestamp = structure(
list(
sec = c(10, 48),
min = c(54L, 15L),
hour = c(13L, 20L),
mday = c(4L, 15L),
mon = c(2L, 4L),
year = 117:116,
wday = c(6L, 0L),
yday = c(62L, 135L),
isdst = 0:1,
zone = c("CET", "CEST"),
gmtoff = c(NA_integer_, NA_integer_)
),
class = c("POSIXlt", "POSIXt")
),
version = c(15, 5),
changeset = c(46573027, 39338422),
user = structure(
2:1,
.Label = c("bjoern262", "saerdnaer"),
class = "factor"
),
uid = structure(
4:3,
.Label = c("367380",
"64536", "651621", "6998"),
class = "factor"
)
),
row.names = c(2L,
4L),
class = "data.frame"
),
tags = structure(
list(
id = c(
105071009,
105071009,
105071009,
105071009,
105071009,
105071009,
105071009,
105071009,
105071009,
105071009,
105071009,
366457476,
366457476,
366457476,
366457476,
366457476
),
k = structure(
c(1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 3L, 5L, 6L, 7L, 11L),
.Label = c(
"conveying",
"description",
"highway",
"incline",
"indoor",
"layer",
"level",
"oneway",
"operator",
"ref",
"tunnel"
),
class = "factor"
),
v = structure(
c(6L,
9L, 10L, 4L, 11L, 3L, 2L, 11L, 8L, 7L, 11L, 5L, 11L, 1L, 1L,
11L),
.Label = c(
"-3",
"-3;-4",
"-4",
"down",
"footway",
"forward",
"MP19",
"MVG",
"Rolltreppe MP19",
"steps",
"yes"
),
class = "factor"
)
),
row.names = 4:19,
class = "data.frame"
),
refs = structure(
list(
id = c(105071009, 105071009, 366457476,
366457476, 366457476),
ref = c(3270556979, 1211172719, 3270556979,
3704371485, 3704371444)
),
row.names = c(20L, 21L, 68L, 69L,
70L),
class = "data.frame"
)
),
class = c("ways", "osmar_element",
"list")
),
relations = structure(
list(
attrs = structure(
list(
id = numeric(0),
visible = character(0),
timestamp = structure(
list(
sec = numeric(0),
min = integer(0),
hour = integer(0),
mday = integer(0),
mon = integer(0),
year = integer(0),
wday = integer(0),
yday = integer(0),
isdst = integer(0),
zone = character(0),
gmtoff = integer(0)
),
class = c("POSIXlt", "POSIXt")
),
version = numeric(0),
changeset = numeric(0),
user = structure(integer(0), .Label = character(0), class = "factor"),
uid = structure(
integer(0),
.Label = c(
"137242",
"161619",
"2455020",
"2590140",
"531886",
"72235",
"8748",
"9451067"
),
class = "factor"
)
),
row.names = integer(0),
class = "data.frame"
),
tags = structure(
list(
id = numeric(0),
k = structure(integer(0), .Label = character(0), class = "factor"),
v = structure(integer(0), .Label = character(0), class = "factor")
),
row.names = integer(0),
class = "data.frame"
),
refs = structure(
list(
id = numeric(0),
type = structure(integer(0), .Label = character(0), class = "factor"),
ref = numeric(0),
role = structure(integer(0), .Label = character(0), class = "factor")
),
row.names = integer(0),
class = "data.frame"
)
),
class = c("relations",
"osmar_element", "list")
)
),
class = c("osmar", "list")
)
highway_subset_ids <- find_down(highway_subset, way(highway_subset$ways$attrs$id))
final_subset <- subset(highway_subset, ids = highway_subset_ids)
谢谢!
解决方案
我分析了你的代码
library("osmar")
src <- osmsource_api(url = "https://api.openstreetmap.org/api/0.6/")
muc_bbox <- center_bbox(11.575278, 48.137222, 1000, 1000)
muc <- get_osm(muc_bbox, src)
system.time(
highway_subset_ids <- subset(muc, way_ids = find(muc, way(tags(k == "highway"))))
)
# 0.157
system.time(
highway_subset_ids <- find(highway_subset_ids, way(tags(k == "name")))
)
# 0.001
system.time(
highway_subset_ids <- find_down(muc, way(highway_subset_ids))
)
# 0.008
system.time(
highway_subset <- subset(muc, ids = highway_subset_ids)
)
# 0.025
如您所见,对我来说,最后一个subset
不是瓶颈,但第一个是(贵了 6 倍)。
内部数据不是很大
nodes
15157 行ways
2938 行tags
11966 行relations
350 行- 另外
tags
3270 行
您提到您需要多次执行子集。要解决的问题可能是尝试“矢量化”您的代码。我的意思不是很明显lapply
,而是提取内部 data.frames,绑定它们,然后只做一次子集,如果需要再次拆分它们。data.table
可以在这里使用以带来额外的速度。这将比仅在 15000 行的循环中使用 data.table 子集更有益,其中收益会小得多。
要了解如何“矢量化”该代码,您需要了解 osmar 的subset
工作原理。如果您查看源代码https://github.com/cran/osmar/blob/master/R/osmar-subsetting.R,这并不难
- 尝试从所有对象中取出 data.frames 到子集
rbindlist
他们subset
他们使用[.data.table
- 如果需要,拆分
- 如果需要,变成原始对象
另请注意,该osmar
软件包相当旧,日期为 2013 年,它具有sp
非常积极开发的软件包之类的间接依赖关系。osmar
您可能会预料到一些与过去 7 年中可能在依赖项中引入的重大更改相关的问题。
推荐阅读
- flutter - Lottie 动画的颤振性能问题
- java - 在没有临时列表、队列或其他数据结构的情况下更改 Queue 的元素
- reactjs - 通过动作负载将导航道具从组件级别解析到 saga 生成器函数
- typescript - 如何构建干净的 microsoft/vscode 源代码树?
- kubernetes - gRPC keepalive 在阿里巴巴 ACK 上不起作用
- python - Python:复制文件夹时shutil.copy2方法创建一个不在原始文件夹中的.db文件,复制文件时得到NotADirectoryError
- javascript - javascript async await for function 在继续之前
- python - 无法从另一个包导入包
- javascript - Plotly Graphs - Javascript 和 Jinja 之间的冲突
- python - 如何抓取没有不同类的表数据?