首页 > 解决方案 > ggplot 将 geom_histogram 缩放到 100%

问题描述

使用此代码

ggplot(dfrunning,aes(x=distance/1000))+
  geom_histogram(aes(fill=catpace),binwidth=1)+
  stat_bin(binwidth=1, geom="text", colour="white", size=3.5,
           aes(label=..count.., group=catpace), position=position_stack(vjust=0.5)) +
  scale_x_continuous(breaks=seq(0,max(dfrunning$distance), 1))+
  labs(title = "Running distribution", x = "Distance in km", y = "Count", fill = "Pace in sec/km") 

我制作了这张图片: 在此处输入图像描述

我不希望看到每个 bin 的计数,而是将每个 bin 缩放到 100%。然后也应相应地重新调整填充和文本。数据框如下所示:

> dfrunning
# A tibble: 2,201 x 11
   date       time   type    distance duration paceInMin paceInSec latitude longitude catpace     catdistance      
   <date>     <time> <chr>      <dbl>    <dbl> <time>        <int>    <dbl>     <dbl> <fct>       <fct>            
 1 2012-04-16 10:24  running    13680     4192 05:06           306     50.8      6.10 (300,330]   (1.3e+04,1.4e+04]
 2 2012-04-18 10:47  running     7239     2115 04:52           292     50.8      6.10 (270,300]   (7e+03,8e+03]    
 3 2012-04-22 14:09  running    28536    10571 06:10           370     50.8      6.10 (360,1e+04] (2.8e+04,2.9e+04]
 4 2012-05-05 13:11  running    16168     7308 07:31           451     50.7      6.08 (360,1e+04] (1.6e+04,1.7e+04]
 5 2012-05-06 13:39  running    25033     9180 06:06           366     50.8      6.10 (360,1e+04] (2.5e+04,2.6e+04]

IMO,这个问题与“创建堆叠条形图,其中每个堆栈被缩放为总和为 100% ”不同,因为我没有明确地知道每个组的计数。

编辑:正如 jaySf 所指出的,这里是输出dput(head(dfrunning,50))

structure(list(date = structure(c(15446, 15448, 15452, 15465, 
15466, 15483, 15506, 15506, 15561, 15566, 15566, 15591, 15598, 
15599, 15602, 15605, 15606, 15608, 15611, 15612, 15613, 15614, 
15615, 15616, 15617, 15618, 15618, 15619, 15747, 15621, 15621, 
15622, 15623, 15627, 15752, 15769, 15770, 15772, 15774, 15775, 
15776, 15778, 15780, 15781, 15782, 15782, 15783, 15783, 15785, 
15785), class = "Date"), time = structure(c(37475, 38822, 50949, 
47508, 49193, 55739, 56611, 59442, 56185, 69657, 72709, 36513, 
64961, 51622, 49059, 59999, 50660, 72043, 65558, 41359, 38752, 
68144, 70312, 68611, 64509, 61189, 68135, 34764, 63827, 59209, 
69285, 69202, 69029, 41600, 31455, 61002, 61247, 58883, 47413, 
63764, 64603, 60993, 34642, 37138, 62160, 65013, 61298, 63556, 
61877, 65543), class = c("hms", "difftime"), units = "secs"), 
    type = c("running", "running", "running", "running", "running", 
    "running", "running", "running", "running", "running", "running", 
    "running", "running", "running", "running", "running", "running", 
    "running", "running", "running", "running", "running", "running", 
    "running", "running", "running", "running", "running", "running", 
    "running", "running", "running", "running", "running", "running", 
    "running", "running", "running", "running", "running", "running", 
    "running", "running", "running", "running", "running", "running", 
    "running", "running", "running"), distance = c(13680, 7238.54607310699, 
    28535.7961841139, 16168.3259995435, 25033, 10714.1336730768, 
    2363.75157921817, 8432.38615603382, 13261.9315631379, 14914.0942764589, 
    4064.49780742219, 8958.01394358889, 8416.04241820714, 27086.4898199381, 
    8454.4109033314, 13078.5350969731, 9976.05213811295, 7522.5914405498, 
    20333.7962161682, 13657.8724960625, 19632.1287324509, 19161.0107979676, 
    10175, 18363.9547260094, 45.9799995422363, 17249.8509124987, 
    19079.0453104679, 9720.46644444582, 13963.9852885433, 19564.2037933423, 
    9233.03482250782, 14047.3764062267, 17000.9929225885, 19796.3029324504, 
    7986.10636548276, 16579.832919954, 9793.81660451401, 11942.1530615798, 
    19049.4113915166, 8852.04351847768, 7891.32210952351, 5995.21855763869, 
    1301.91869595747, 13996.0654474524, 2100.09008789062, 9869.70660238926, 
    2028.71832491649, 7433.78777880617, 15151.3936450139, 11741.0316277532
    ), duration = c(4192, 2115, 10571, 7308, 9180, 3292, 760, 
    2640, 4436, 6646, 1371, 3405, 2438, 8477, 2588, 3968, 3271, 
    2826, 5652, 4330, 6410, 6255, 3682, 6193, 13, 83077, 6689, 
    3954, 4963, 106805, 2846, 6340, 5348, 6123, 3605, 88125, 
    3166, 3518, 14506, 3118, 2780, 86832, 633, 4388, 662, 2796, 
    581, 2191, 86195, 3629), paceInMin = structure(c(18360, 17520, 
    22200, 27060, 21960, 18420, 19260, 18780, 20040, 26700, 20220, 
    22800, 17340, 18720, 18360, 18180, 19620, 22500, 16620, 19020, 
    19560, 19560, 21660, 20220, 16920, 72960, 21000, 24360, 21300, 
    NA, 18480, 27060, 18840, 18540, 27060, NA, 19380, 17640, 
    45660, 21120, 21120, 4980, 29160, 18780, 18900, 16980, 17160, 
    17640, NA, 18540), class = c("hms", "difftime"), units = "secs"), 
    paceInSec = c(306L, 292L, 370L, 451L, 366L, 307L, 321L, 313L, 
    334L, 445L, 337L, 380L, 289L, 312L, 306L, 303L, 327L, 375L, 
    277L, 317L, 326L, 326L, 361L, 337L, 282L, 1216L, 350L, 406L, 
    355L, 1859L, 308L, 451L, 314L, 309L, 451L, 1715L, 323L, 294L, 
    761L, 352L, 352L, 83L, 486L, 313L, 315L, 283L, 286L, 294L, 
    2088L, 309L), latitude = c(50.78088236, 50.78210075, 50.77468025, 
    50.74850298, 50.77482007, 50.78003285, 50.78238624, 50.78864819, 
    51.33017446, 50.77988517, 50.74921084, 51.32995008, 51.32999836, 
    51.33013314, 51.32992619, 48.71562467, 48.71430603, 48.7143813, 
    48.714214, 48.71429463, 48.717048, 48.71553859, 48.7142808, 
    48.71094162, 48.71536257, 48.71526475, 48.71454718, 48.71594159, 
    48.7144186, 48.71531186, 48.71480333, 48.70992154, 48.70286641, 
    48.71461113, 48.71444383, 48.71446428, 48.7146807, 48.71469336, 
    48.72847723, 48.71530138, 48.70127678, 48.70118726, 48.7013119, 
    48.73173444, 48.71487223, 48.72272649, 48.71518764, 48.72266807, 
    48.71527171, 48.71515763), longitude = c(6.09665447, 6.09782727, 
    6.09629815, 6.08372496, 6.09631483, 6.10046044, 6.09614795, 
    6.07149736, 7.86466297, 6.10240906, 6.08444153, 7.86465433, 
    7.86348933, 7.86398814, 7.86355178, 11.48824135, 11.48822618, 
    11.48825015, 11.48808285, 11.48821939, 11.4908933, 11.49234362, 
    11.48826499, 11.49619108, 11.49016634, 11.49094141, 11.48822375, 
    11.49247371, 11.48828653, 11.48861426, 11.48820028, 11.49726229, 
    11.5088289, 11.48817371, 11.48823674, 11.48811696, 11.48788319, 
    11.48822375, 11.56643034, 11.48987599, 11.50984076, 11.50986775, 
    11.50983674, 11.56601745, 11.48822492, 11.51100417, 11.48894744, 
    11.51097458, 11.48799551, 11.48825208), catpace = c("(300,330]", 
    "(270,300]", "(360,1e+04]", "(360,1e+04]", "(360,1e+04]", 
    "(300,330]", "(300,330]", "(300,330]", "(330,360]", "(360,1e+04]", 
    "(330,360]", "(360,1e+04]", "(270,300]", "(300,330]", "(300,330]", 
    "(300,330]", "(300,330]", "(360,1e+04]", "(270,300]", "(300,330]", 
    "(300,330]", "(300,330]", "(360,1e+04]", "(330,360]", "(270,300]", 
    "(360,1e+04]", "(330,360]", "(360,1e+04]", "(330,360]", "(360,1e+04]", 
    "(300,330]", "(360,1e+04]", "(300,330]", "(300,330]", "(360,1e+04]", 
    "(360,1e+04]", "(300,330]", "(270,300]", "(360,1e+04]", "(330,360]", 
    "(330,360]", "(0,180]", "(360,1e+04]", "(300,330]", "(300,330]", 
    "(270,300]", "(270,300]", "(270,300]", "(360,1e+04]", "(300,330]"
    ), catdistance = c("(1.3e+04,1.4e+04]", "(7e+03,8e+03]", 
    "(2.8e+04,2.9e+04]", "(1.6e+04,1.7e+04]", "(2.5e+04,2.6e+04]", 
    "(1e+04,1.1e+04]", "(2e+03,3e+03]", "(8e+03,9e+03]", "(1.3e+04,1.4e+04]", 
    "(1.4e+04,1.5e+04]", "(4e+03,5e+03]", "(8e+03,9e+03]", "(8e+03,9e+03]", 
    "(2.7e+04,2.8e+04]", "(8e+03,9e+03]", "(1.3e+04,1.4e+04]", 
    "(9e+03,1e+04]", "(7e+03,8e+03]", "(2e+04,2.1e+04]", "(1.3e+04,1.4e+04]", 
    "(1.9e+04,2e+04]", "(1.9e+04,2e+04]", "(1e+04,1.1e+04]", 
    "(1.8e+04,1.9e+04]", "(0,1e+03]", "(1.7e+04,1.8e+04]", "(1.9e+04,2e+04]", 
    "(9e+03,1e+04]", "(1.3e+04,1.4e+04]", "(1.9e+04,2e+04]", 
    "(9e+03,1e+04]", "(1.4e+04,1.5e+04]", "(1.7e+04,1.8e+04]", 
    "(1.9e+04,2e+04]", "(7e+03,8e+03]", "(1.6e+04,1.7e+04]", 
    "(9e+03,1e+04]", "(1.1e+04,1.2e+04]", "(1.9e+04,2e+04]", 
    "(8e+03,9e+03]", "(7e+03,8e+03]", "(5e+03,6e+03]", "(1e+03,2e+03]", 
    "(1.3e+04,1.4e+04]", "(2e+03,3e+03]", "(9e+03,1e+04]", "(2e+03,3e+03]", 
    "(7e+03,8e+03]", "(1.5e+04,1.6e+04]", "(1.1e+04,1.2e+04]"
    )), row.names = c(NA, -50L), class = c("tbl_df", "tbl", "data.frame"
))

这产生了这个情节: 在此处输入图像描述

标签: rggplot2data-visualization

解决方案


这可以通过使用position = "fill"参数来完成。要在文本中实现正确的间距,您还需要更改图层position中的参数。stat_bin这应该有效:

library(scales) # required for the nice percentage y-axis
ggplot(dfrunning,aes(x=distance/1000))+
  geom_histogram(aes(fill=catpace),binwidth=1, position = "fill")+
   stat_bin(binwidth=1, geom="text", colour="white", size=3.5,
            aes(label=..count.., group=catpace), position=position_fill(vjust=0.5)) +
  scale_x_continuous(breaks=seq(0,max(dfrunning$distance), 1))+
  scale_y_continuous(labels = percent(c(0, 0.25, 0.5, 0.75, 1))) +
  labs(title = "Running distribution", x = "Distance in km", y = "Percentage", fill = "Pace in sec/km")

这创造了这个情节:

情节1


编辑:百分比标签而不是计数

我不知道如何使用上述方法将百分比纳入图表。这很可能是不可能的,但同样可能我只是不知道。无论如何,如果您想要图表中的百分比标签,我建议您对数据进行预处理,例如:

dfrunning %>% 
  select(distance, catpace) %>%
  mutate(dist = round(distance/1000)) %>%
  group_by(dist, catpace) %>% 
  mutate(test = n()) %>%
  distinct(dist, catpace, test) %>%
  group_by(dist) %>%
  mutate(pct = test/sum(test)*100) %>%
  ggplot(aes(x= dist, y = pct)) +
    geom_bar(aes(fill=catpace), stat = "identity") +
    geom_text(aes(label = paste0(round(pct, 0),"%")), 
              colour="white", size=3.5, angle = 90, 
              position = position_stack(vjust = 0.5)) +
  labs(title = "Running distribution", 
       x = "Distance in km", y = "Percentage", 
       fill = "Pace in sec/km")

它为您提供了以下图表(我旋转了标签以使它们适合使用angle参数):

情节2


推荐阅读