首页 > 解决方案 > 如何不使用嵌套的 for 循环并改进我的 R 代码?

问题描述

我在下面的代码中有一个嵌套的 for 循环。

这循环遍历每一列和每一行 - 有没有一种简单的方法来矢量化它?

仅供参考 - 循环的内容验证每个条目中的列表是否仅包含 NA ,因此可以删除整个列。


# install.packages("rtweet")
library("rtweet")             
rbloggers <- get_timeline(user = "Rbloggers", n = 10000)
View(rbloggers)
# install.packages("janitor")
library("janitor")             

rbloggers <- janitor::remove_empty(rbloggers, which = "cols")
# this removes the columns with NA or blank - which are not in lists.

# readr::write_csv - would like to use this later and this cannot handle vector of type list.

rbloggers <- as.data.frame(rbloggers)

for (j in 1:ncol(rbloggers)){

    x <- 0
    for (i in 1:nrow(rbloggers)){
      x <- x + all(is.na(rbloggers[i,j][[1]]))
    }

    # if every element is NA, then remove the column
    if(x == nrow(rbloggers)) {rbloggers[,j] <- NULL}

                            # Many ways to remove a column:
                            # # Data[2] <- NULL
                            # # Data[[2]] <- NULL
                            # # Data <- Data[,-2]
                            # # Data <- Data[-2]
}


仅供参考 - 我试图了解以下参考资料:

标签: rvectorization

解决方案


library(rtweet)             
rbloggers <- get_timeline(user = "Rbloggers", n = 10000)

library(janitor)             

rbloggers <- janitor::remove_empty(rbloggers, which = "cols")

# find the sum of NA in each col
colSums(is.na(rbloggers))
#>                user_id              status_id             created_at 
#>                      0                      0                      0 
#>            screen_name                   text                 source 
#>                      0                      0                      0 
#>     display_text_width               is_quote             is_retweet 
#>                      0                      0                      0 
#>         favorite_count          retweet_count               hashtags 
#>                      0                      0                      0 
#>               urls_url              urls_t.co      urls_expanded_url 
#>                      0                      0                      0 
#>       mentions_user_id   mentions_screen_name                   lang 
#>                   3175                   3175                      0 
#>             geo_coords          coords_coords            bbox_coords 
#>                      0                      0                      0 
#>             status_url                   name               location 
#>                      0                      0                      0 
#>            description                    url              protected 
#>                      0                      0                      0 
#>        followers_count          friends_count           listed_count 
#>                      0                      0                      0 
#>         statuses_count       favourites_count     account_created_at 
#>                      0                      0                      0 
#>               verified            profile_url   profile_expanded_url 
#>                      0                      0                      0 
#>           account_lang profile_background_url      profile_image_url 
#>                      0                      0                      0

library(dplyr)

# remove the cols that consist of NA
rbloggers_clean <- rbloggers %>% 
  select(- mentions_user_id, -mentions_screen_name)

推荐阅读