首页 > 解决方案 > 如何转换类似于 mtcars 数据集的数据集

问题描述

我有下面的数据集,我想转换类似于mtcarsR 中数据集的数据集。我的意思是,第一列mtcars包含没有标签的汽车名称,因此它有行名。我下面的数据集有行名,即数字,我想将其转换为作者姓名。有没有办法在 R 中做到这一点?

在约翰的评论之后编辑

structure(list(author = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("dispt", 
    "Hamilton", "HM", "Jay", "Madison"), class = "factor"), filename = structure(1:6, .Label = c("dispt_fed_49.txt", 
    "dispt_fed_50.txt", "dispt_fed_51.txt", "dispt_fed_52.txt", "dispt_fed_53.txt", 
    "dispt_fed_54.txt", "dispt_fed_55.txt", "dispt_fed_56.txt", "dispt_fed_57.txt", 
    "dispt_fed_62.txt", "dispt_fed_63.txt", "Hamilton_fed_1.txt", 
    "Hamilton_fed_11.txt", "Hamilton_fed_12.txt", "Hamilton_fed_13.txt", 
    "Hamilton_fed_15.txt", "Hamilton_fed_16.txt", "Hamilton_fed_17.txt", 
    "Hamilton_fed_21.txt", "Hamilton_fed_22.txt", "Hamilton_fed_23.txt", 
    "Hamilton_fed_24.txt", "Hamilton_fed_25.txt", "Hamilton_fed_26.txt", 
    "Hamilton_fed_27.txt", "Hamilton_fed_28.txt", "Hamilton_fed_29.txt", 
    "Hamilton_fed_30.txt", "Hamilton_fed_31.txt", "Hamilton_fed_32.txt", 
    "Hamilton_fed_33.txt", "Hamilton_fed_34.txt", "Hamilton_fed_35.txt", 
    "Hamilton_fed_36.txt", "Hamilton_fed_59.txt", "Hamilton_fed_6.txt", 
    "Hamilton_fed_60.txt", "Hamilton_fed_61.txt", "Hamilton_fed_65.txt", 
    "Hamilton_fed_66.txt", "Hamilton_fed_67.txt", "Hamilton_fed_68.txt", 
    "Hamilton_fed_69.txt", "Hamilton_fed_7.txt", "Hamilton_fed_70.txt", 
    "Hamilton_fed_71.txt", "Hamilton_fed_72.txt", "Hamilton_fed_73.txt", 
    "Hamilton_fed_74.txt", "Hamilton_fed_75.txt", "Hamilton_fed_76.txt", 
    "Hamilton_fed_77.txt", "Hamilton_fed_78.txt", "Hamilton_fed_79.txt", 
    "Hamilton_fed_8.txt", "Hamilton_fed_80.txt", "Hamilton_fed_81.txt", 
    "Hamilton_fed_82.txt", "Hamilton_fed_83.txt", "Hamilton_fed_84.txt", 
    "Hamilton_fed_85.txt", "Hamilton_fed_9.txt", "HM_fed_18.txt", 
    "HM_fed_19.txt", "HM_fed_20.txt", "Jay_fed_2.txt", "Jay_fed_3.txt", 
    "Jay_fed_4.txt", "Jay_fed_5.txt", "Jay_fed_64.txt", "Madison_fed_10.txt", 
    "Madison_fed_14.txt", "Madison_fed_37.txt", "Madison_fed_38.txt", 
    "Madison_fed_39.txt", "Madison_fed_40.txt", "Madison_fed_41.txt", 
    "Madison_fed_42.txt", "Madison_fed_43.txt", "Madison_fed_44.txt", 
    "Madison_fed_45.txt", "Madison_fed_46.txt", "Madison_fed_47.txt", 
    "Madison_fed_48.txt", "Madison_fed_58.txt"), class = "factor"), 
        a = c(0.28, 0.177, 0.339, 0.27, 0.303, 0.245), all = c(0.052, 
        0.063, 0.09, 0.024, 0.054, 0.059), also = c(0.009, 0.013, 
        0.008, 0.016, 0.027, 0.007), an = c(0.096, 0.038, 0.03, 0.024, 
        0.034, 0.067), and = c(0.358, 0.393, 0.301, 0.262, 0.404, 
        0.282), any = c(0.026, 0.063, 0.008, 0.056, 0.04, 0.052), 
        are = c(0.131, 0.051, 0.068, 0.064, 0.128, 0.111), as = c(0.122, 
        0.139, 0.203, 0.111, 0.148, 0.252), at = c(0.017, 0.114, 
        0.023, 0.056, 0.013, 0.015), be = c(0.411, 0.393, 0.474, 
        0.365, 0.344, 0.297)), .Names = c("author", "filename", "a", 
    "all", "also", "an", "and", "any", "are", "as", "at", "be"), row.names = c(NA, 
    6L), class = "data.frame")

提前致谢!

标签: r

解决方案


每个author名称的数据都有很多行。因此,仅author名称不能用作rownamedata.frame。您需要添加下标 (1,2,3) 以使其唯一。

方法是先分组author。添加带有作者姓名的行号以使其唯一。最后用于column_to_rownamesnewAuthor名称设置为rownames.

library(tidyverse)


df <- df %>% remove_rownames() %>% group_by(author) %>%
  mutate(newAuthor = paste0(author, row_number())) %>% 
  as.data.frame() %>%
  remove_rownames() %>%
  column_to_rownames("newAuthor") %>% 
  ungroup() %>% 
  select(-author)

  #Display of result. Just included first 3 columns as data.frame is very big
  head(df[,1:3])
  #        filename     a   all
  # dispt1 dispt_fed_49.txt 0.280 0.052
  # dispt2 dispt_fed_50.txt 0.177 0.063
  # dispt3 dispt_fed_51.txt 0.339 0.090
  # dispt4 dispt_fed_52.txt 0.270 0.024
  # dispt5 dispt_fed_53.txt 0.303 0.054
  # dispt6 dispt_fed_54.txt 0.245 0.059

推荐阅读