首页 > 解决方案 > r中数据框列表的总和行

问题描述

我有一个数据框列表。每个数据框都是一个股票报价,其行名是日期,列名是买入价、卖出价、股票和 PL。

我想获得一个列,其中包含每个正 PL 贡献占每日总 PL 的百分比。

让它更简单。我有以下数据:

mylist= structure(list(`1` = structure(list(ID = c(35L, '2009-01-03', '2009-01-04', '2009-01-05'), Income = c(100, 200, 300, 400)), .Names = c("Date", "Income"), row.names = c(1L, 2L, 3L, 4L), class = "data.frame"), `2` = structure(list(ID = c('2009-01-02', '2009-01-03', '2009-01-04', '2009-01-05'), Income = c(500, -600, 700, 800)), .Names = c("Date", "Income"), row.names = c(1L, 2L, 3L, 4L), class = "data.frame"), `3` = structure(list(ID = c('2009-01-02', '2009-01-03', '2009-01-04'), Income = c(100, 200, 300)), .Names = c("Date", "Income"), row.names = c(1L, 2L, 3L), class = "data.frame")))

看起来像这样:

$`1`
        Date Income
1 2009-01-01    100
2 2009-01-03    200
3 2009-01-04    300
4 2009-01-05    400

$`2`
        Date Income
1 2009-01-02    500
2 2009-01-03   -600
3 2009-01-04    700
4 2009-01-05    800

$`3`
        Date Income
1 2009-01-02    100
2 2009-01-03    200
3 2009-01-04    300

我想获得如下所示的东西:

$`1`
        Date Income Perc
1 2009-01-03    100 1.00
2 2009-01-03    200 0.20
3 2009-01-04    300 0.23
4 2009-01-05    400 0.33

$`2`
        Date Income  Perc
1 2009-01-02    500  0.83
2 2009-01-03    600 -1.50
3 2009-01-04    700  0.54
4 2009-01-05    800  0.67

$`3`
        Date Income Perc
1 2009-01-02    100 0.17
2 2009-01-03    200 0.20
3 2009-01-04    300 0.23

标签: r

解决方案


对于您的问题,我有两个解决方案。我强烈建议将您的数据框组合到一个主数据框中,以尽可能降低代码的复杂性。我确信对于“分离数据框”问题有更好的解决方案,但其中大多数将涉及多个循环,从而对性能产生负面影响。

数据

mylist= structure(list(`1` = structure(list(ID = c('2009-01-02', '2009-01-03', '2009-01-04', '2009-01-05'), Income = c(100, 200, 300, 400)), .Names = c("Date", "Income"), row.names = c(1L, 2L, 3L, 4L), class = "data.frame"), `2` = structure(list(ID = c('2009-01-02', '2009-01-03', '2009-01-04', '2009-01-05'), Income = c(500, -600, 700, 800)), .Names = c("Date", "Income"), row.names = c(1L, 2L, 3L, 4L), class = "data.frame"), `3` = structure(list(ID = c('2009-01-02', '2009-01-03', '2009-01-04'), Income = c(100, 200, 300)), .Names = c("Date", "Income"), row.names = c(1L, 2L, 3L), class = "data.frame")))

组合数据框

library(dplyr)

# add an ID to each data frame
for(i in 1:length(mylist)){
  mylist[[i]] <- cbind(mylist[[i]], stock_id = names(mylist)[i])
}

# create data frame with all observations
my_data_frame <- do.call(rbind, mylist)
rownames(my_data_frame) <- NULL

my_data_frame %>% 
  group_by(Date) %>% 
  mutate(Perc = Income/sum(Income[Income > 0]))
# A tibble: 11 x 4
# Groups:   Date [4]
   Date       Income stock_id   Perc
   <chr>       <dbl> <chr>     <dbl>
 1 2009-01-02    100 1         0.143
 2 2009-01-03    200 1         0.5  
 3 2009-01-04    300 1         0.231
 4 2009-01-05    400 1         0.333
 5 2009-01-02    500 2         0.714
 6 2009-01-03   -600 2        -1.5  
 7 2009-01-04    700 2         0.538
 8 2009-01-05    800 2         0.667
 9 2009-01-02    100 3         0.143
10 2009-01-03    200 3         0.5  
11 2009-01-04    300 3         0.231

单独的数据框

library(dplyr)

all_dates <- unique(unlist(lapply(mylist, function(x) unique(x$Date))))

for(i in 1:length(mylist)){
  mylist[[i]] <- cbind(mylist[[i]], stock_id = names(mylist)[i])
}

perc_all <- list()
for(i in 1:length(all_dates)){
  temporary <- lapply(mylist, function(x) x[x$Date == all_dates[i], ])
  all_obs_date <- do.call(rbind, temporary)
  all_obs_date$Perc <- all_obs_date$Income/sum(all_obs_date$Income[all_obs_date$Income > 0])
  perc_all[[i]] <- all_obs_date
}
perc_final <- do.call(rbind, perc_all)
lapply(mylist, function(x) {
  left_join(x, perc_final) %>% select(-stock_id)
})

$`1`
        Date Income      Perc
1 2009-01-02    100 0.1428571
2 2009-01-03    200 0.5000000
3 2009-01-04    300 0.2307692
4 2009-01-05    400 0.3333333

$`2`
        Date Income       Perc
1 2009-01-02    500  0.7142857
2 2009-01-03   -600 -1.5000000
3 2009-01-04    700  0.5384615
4 2009-01-05    800  0.6666667

$`3`
        Date Income      Perc
1 2009-01-02    100 0.1428571
2 2009-01-03    200 0.5000000
3 2009-01-04    300 0.2307692

推荐阅读