r - R:在 R 中添加进度条
问题描述
我正在使用 R 编程语言。我正在尝试学习如何添加“进度条”来估计函数运行时剩余的时间(https://www.rdocumentation.org/packages/progress/versions/1.2.2/topics/progress_bar)。
例如:
library(progress)
pb <- progress_bar$new(total = 100)
for (i in 1:100) {
pb$tick()
Sys.sleep(1 / 100)
}
假设我有一个名为“grid_function”的函数和一个名为“DF_1”的数据集。我从“DF_1”中取出每一行并将这一行输入“grid_function”。“grid_function”使用这一行执行一些计算,并将其存储到一个名为“resultdf1”的“列表”中。最后,“resultdf1”被转换成一个名为“final_output”的数据框。“喂食过程”如下图所示:
resultdf1 <- apply(DF_1,1, # 1 means rows
FUN=function(x){
do.call(
# Call Function grid_function with the arguments in
# a list
grid_function,
# force list type for the arguments
c(list(train_data_new), as.list(
# make the row to a named vector
unlist(x)
)
))
}
)
l = resultdf1
final_output = rbindlist(l, fill = TRUE)
问题:我想在上面的代码中添加一个“进度条”。
我尝试了什么:我尝试按如下方式执行此操作:
library(doParallel)
library(future)
cl <- makePSOCKcluster(6) # 6 cpu cores out of 8
registerDoParallel(cl)
pb <- progress_bar$new(total = 100)
for (i in 1:100) {
resultdf1 <- apply(DF_1,1, # 1 means rows
FUN=function(x){
do.call(
# Call Function grid_function2 with the arguments in
# a list
grid_function,
# force list type for the arguments
c(list(train_data_new), as.list(
# make the row to a named vector
unlist(x)
)
))
}
)
l = resultdf1
final_output = rbindlist(l, fill = TRUE)
pb$tick()
Sys.sleep(1 / 100)
}
stopCluster(cl)
这似乎有效,但我不确定我是否正确地完成了所有操作。有人可以告诉我我是否正确地做到了这一点?添加此“进度条”是否有可能实际上会导致该功能需要更多时间才能运行?
谢谢
更新: @Serkan:这是完整的代码:
library(dplyr)
library(data.table)
results_table <- data.frame()
grid_function <- function(train_data, random_1, random_2, random_3, random_4, split_1, split_2, split_3) {
#bin data according to random criteria
train_data <- train_data %>% mutate(cat = ifelse(a1 <= random_1 & b1 <= random_3, "a", ifelse(a1 <= random_2 & b1 <= random_4, "b", "c")))
train_data$cat = as.factor(train_data$cat)
#new splits
a_table = train_data %>%
filter(cat == "a") %>%
select(a1, b1, c1, cat)
b_table = train_data %>%
filter(cat == "b") %>%
select(a1, b1, c1, cat)
c_table = train_data %>%
filter(cat == "c") %>%
select(a1, b1, c1, cat)
#calculate random quantile ("quant") for each bin
table_a = data.frame(a_table%>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = split_1)))
table_b = data.frame(b_table%>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = split_2)))
table_c = data.frame(c_table%>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = split_3)))
#create a new variable ("diff") that measures if the quantile is bigger tha the value of "c1"
table_a$diff = ifelse(table_a$quant > table_a$c1,1,0)
table_b$diff = ifelse(table_b$quant > table_b$c1,1,0)
table_c$diff = ifelse(table_c$quant > table_c$c1,1,0)
#group all tables
final_table = rbind(table_a, table_b, table_c)
#create a table: for each bin, calculate the average of "diff"
final_table_2 = data.frame(final_table %>%
group_by(cat) %>%
summarize(
mean = mean(diff)
))
#add "total mean" to this table
final_table_2 = data.frame(final_table_2 %>% add_row(cat = "total", mean = mean(final_table$diff)))
#format this table: add the random criteria to this table for reference
final_table_2$random_1 = random_1
final_table_2$random_2 = random_2
final_table_2$random_3 = random_3
final_table_2$random_4 = random_4
final_table_2$split_1 = split_1
final_table_2$split_2 = split_2
final_table_2$split_3 = split_3
results_table <- rbind(results_table, final_table_2)
final_results = dcast(setDT(results_table), random_1 + random_2 + random_3 + random_4 + split_1 + split_2 + split_3 ~ cat, value.var = 'mean')
}
# create some data for this example
a1 = rnorm(1000,100,10)
b1 = rnorm(1000,100,5)
c1 = sample.int(1000, 1000, replace = TRUE)
train_data = data.frame(a1,b1,c1)
#grid
random_1 <- seq(80,100,5)
random_2 <- seq(85,120,5)
random_3 <- seq(85,120,5)
random_4 <- seq(90,120,5)
split_1 = seq(0,1,0.1)
split_2 = seq(0,1,0.1)
split_3 = seq(0,1,0.1)
DF_1 <- expand.grid(random_1 , random_2, random_3, random_4, split_1, split_2, split_3)
#reduce the size of the grid for this example
DF_1 = DF_1[1:100,]
colnames(DF_1) <- c("random_1" , "random_2", "random_3", "random_4", "split_1", "split_2", "split_3")
train_data_new <- copy(train_data)
resultdf1 <- apply(DF_1,1, # 1 means rows
FUN=function(x){
do.call(
# Call Function grid_function2 with the arguments in
# a list
grid_function,
# force list type for the arguments
c(list(train_data_new), as.list(
# make the row to a named vector
unlist(x)
)
))
}
)
l = resultdf1
final_output = rbindlist(l, fill = TRUE)
这是最终输出的样子:
head(final_output)
random_1 random_2 random_3 random_4 split_1 split_2 split_3 b c total a
1: 80 85 85 90 0 0 0 0 0 0 NA
2: 85 85 85 90 0 0 0 0 0 0 NA
3: 90 85 85 90 0 0 0 0 0 0 NA
4: 95 85 85 90 0 0 0 0 0 0 NA
5: 100 85 85 90 0 0 0 0 0 0 NA
6: 80 90 85 90 0 0 0 0 0 0 NA
解决方案
我会选择.progress = TRUE
in furrr:future_map
,下面是一个例子R Script
;
library(furrr)
library(magrittr)
plan(multisession, workers = 2)
1:10 %>% future_map(
~ Sys.sleep(2),
.progress = TRUE
)
它会progress
自动添加一个-bar,
Progress: ─────────────────────────────────────── 100%
如果没有您的data.frame
特定功能,我无法复制您的问题来向您展示它是如何实现的,但是apply
-family 很容易转换为map
-family 功能。
推荐阅读
- ios - 在 SceneDelegate.swift 的 sceneWillResignActive 方法中使用 UserDefaults
- python - 是否有一种干净的 Pythonic 方法来覆盖 Python 子类中的默认方法 args?
- bash - 将标准输出/标准错误重定向到 bash 脚本
- r - 使用`dplyr`有条件地改变列值
- emacs - 如何在 org-tables (orgmode) 中使用逐元素操作
- visual-studio-code - 如何在 git push 之前在 UI 中的 PR 中显示更改的代码
- reactjs - 使用 Docusign Clickwrap API 做出反应
- python - Codeforces 练习 71
- firebase - 展开时非 Nil 用户崩溃 - Firebase,SwiftUI
- javascript - 在 Vue.js v2.x 中使用 v-for 指令时,如何指定范围的起始值?