首页 > 解决方案 > R 不能映射到 ggplot 并同时使用 facet wrap

问题描述

这是我原始数据框的一部分,它是调查数据。

structure(list(Ages = c(30L, 30L, 30L, 30L, 30L, 33L, 33L, 27L, 
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 
27L, 27L, 27L, 27L, 28L, 28L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
25L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 38L, 
38L, 38L, NA, NA, NA, NA, 31L, 31L, 31L, 31L, 31L, 31L, 33L, 
33L, 33L, 33L, 33L, 33L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
29L, 29L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 
34L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 36L, 36L, 
36L, 36L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 32L, 
32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 
32L, 32L, 32L, 32L, 32L, 32L, 28L, 28L, 28L, 28L, 27L, 27L, 27L, 
27L, 27L, 27L, 27L, 27L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 
32L, 30L, 30L, 30L, 30L, 26L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L), value = c("Response Eight", 
"Response Twelve", "Response Eleven", "Response Three", "Response Two", 
"Response Seven", "Response Seven", "Response Eight", "Response Nine", 
"Response Twelve", "Response Eleven", "Response Three", "Response Ten", 
"Response Two", "Response One", "Response Four", "Response Five", 
"Response Six", "Response Eight", "Response Nine", "Response Twelve", 
"Response Eleven", "Response Three", "Response Ten", "Response Two", 
"Response One", "Response Four", "Response Five", "Response Six", 
"Response Eight", "Response Nine", "Response Twelve", "Response Eleven", 
"Response Three", "Response Ten", "Response Two", "Response One", 
"Response Four", "Response Five", "Response Six", "Response Eight", 
"Response Nine", "Response Twelve", "Response Eleven", "Response Three", 
"Response Ten", "Response Two", "Response One", "Response Four", 
"Response Five", "Response Six", "Response Seven", "Response Seven", 
"Response Three", "Response One", "Response Three", "Response One", 
"Response Three", "Response One", "Response Three", "Response One", 
"Response Twelve", "Response Three", "Response Twelve", "Response Three", 
"Response Twelve", "Response Three", "Response Twelve", "Response Three", 
"Response Twelve", "Response Three", "Response Twelve", "Response Three", 
"Response Twelve", "Response Three", "Response Twelve", "Response Three", 
"Response Twelve", "Response Three", "Response Twelve", "Response Three", 
"Response Twelve", "Response Three", "Response Twelve", "Response Three", 
"Response Seven", "Response Seven", "Response Seven", "Response Eight", 
"Response Three", "Response Two", "Response One", "Response Ten", 
"Response Two", "Response Ten", "Response Two", "Response Ten", 
"Response Two", "Response Three", "Response One", "Response Three", 
"Response One", "Response Three", "Response One", "Response Eight", 
"Response Nine", "Response Three", "Response Ten", "Response Two", 
"Response One", "Response Eight", "Response Nine", "Response Three", 
"Response Ten", "Response Two", "Response One", "Response Eight", 
"Response Nine", "Response Three", "Response Ten", "Response Two", 
"Response One", "Response Eight", "Response Nine", "Response Three", 
"Response Ten", "Response Two", "Response One", "Response Eight", 
"Response Nine", "Response Three", "Response Ten", "Response Two", 
"Response One", "Response Eight", "Response Nine", "Response Three", 
"Response Ten", "Response Two", "Response One", "Response Eight", 
"Response Three", "Response Ten", "Response Eight", "Response Three", 
"Response Ten", "Response Eight", "Response Three", "Response Ten", 
"Response Eight", "Response Three", "Response Ten", "Response Eight", 
"Response Nine", "Response Three", "Response Two", "Response Six", 
"Response Eight", "Response Nine", "Response Three", "Response Two", 
"Response Six", "Response Eight", "Response Nine", "Response Three", 
"Response Two", "Response Six", "Response Twelve", "Response One", 
"Response Twelve", "Response One", "Response Twelve", "Response One", 
"Response Twelve", "Response One", "Response Seven", "Response Seven", 
"Response Seven", "Response Seven", "Response Eight", "Response Nine", 
"Response Twelve", "Response Eleven", "Response Ten", "Response Two", 
"Response One", "Response Four", "Response Five", "Response Six", 
"Response Eight", "Response Nine", "Response Ten", "Response One", 
"Response Eight", "Response Nine", "Response Ten", "Response One", 
"Response Eight", "Response Nine", "Response Ten", "Response One", 
"Response Eight", "Response Nine", "Response Ten", "Response One", 
"Response Eight", "Response Nine", "Response Ten", "Response One", 
"Response Seven", "Response Seven", "Response Seven", "Response Seven", 
"Response Eight", "Response Nine", "Response Twelve", "Response Ten", 
"Response Eight", "Response Nine", "Response Twelve", "Response Ten", 
"Response Eight", "Response Three", "Response Ten", "Response Eight", 
"Response Three", "Response Ten", "Response Eight", "Response Three", 
"Response Ten", "Response One", "Response One", "Response One", 
"Response One", "Response Seven", "Response Eight", "Response Twelve", 
"Response Ten", "Response Eight", "Response Twelve", "Response Ten", 
"Response Eight", "Response Twelve", "Response Ten", "Response Eight", 
"Response Twelve", "Response Ten", "Response Eight", "Response Twelve", 
"Response Ten"), n = c(3210L, 4658L, 1271L, 4453L, 2834L, 2526L, 
2526L, 3210L, 4098L, 4658L, 1271L, 4453L, 2975L, 2834L, 3833L, 
916L, 1221L, 1208L, 3210L, 4098L, 4658L, 1271L, 4453L, 2975L, 
2834L, 3833L, 916L, 1221L, 1208L, 3210L, 4098L, 4658L, 1271L, 
4453L, 2975L, 2834L, 3833L, 916L, 1221L, 1208L, 3210L, 4098L, 
4658L, 1271L, 4453L, 2975L, 2834L, 3833L, 916L, 1221L, 1208L, 
2526L, 2526L, 4453L, 3833L, 4453L, 3833L, 4453L, 3833L, 4453L, 
3833L, 4658L, 4453L, 4658L, 4453L, 4658L, 4453L, 4658L, 4453L, 
4658L, 4453L, 4658L, 4453L, 4658L, 4453L, 4658L, 4453L, 4658L, 
4453L, 4658L, 4453L, 4658L, 4453L, 4658L, 4453L, 2526L, 2526L, 
2526L, 3210L, 4453L, 2834L, 3833L, 2975L, 2834L, 2975L, 2834L, 
2975L, 2834L, 4453L, 3833L, 4453L, 3833L, 4453L, 3833L, 3210L, 
4098L, 4453L, 2975L, 2834L, 3833L, 3210L, 4098L, 4453L, 2975L, 
2834L, 3833L, 3210L, 4098L, 4453L, 2975L, 2834L, 3833L, 3210L, 
4098L, 4453L, 2975L, 2834L, 3833L, 3210L, 4098L, 4453L, 2975L, 
2834L, 3833L, 3210L, 4098L, 4453L, 2975L, 2834L, 3833L, 3210L, 
4453L, 2975L, 3210L, 4453L, 2975L, 3210L, 4453L, 2975L, 3210L, 
4453L, 2975L, 3210L, 4098L, 4453L, 2834L, 1208L, 3210L, 4098L, 
4453L, 2834L, 1208L, 3210L, 4098L, 4453L, 2834L, 1208L, 4658L, 
3833L, 4658L, 3833L, 4658L, 3833L, 4658L, 3833L, 2526L, 2526L, 
2526L, 2526L, 3210L, 4098L, 4658L, 1271L, 2975L, 2834L, 3833L, 
916L, 1221L, 1208L, 3210L, 4098L, 2975L, 3833L, 3210L, 4098L, 
2975L, 3833L, 3210L, 4098L, 2975L, 3833L, 3210L, 4098L, 2975L, 
3833L, 3210L, 4098L, 2975L, 3833L, 2526L, 2526L, 2526L, 2526L, 
3210L, 4098L, 4658L, 2975L, 3210L, 4098L, 4658L, 2975L, 3210L, 
4453L, 2975L, 3210L, 4453L, 2975L, 3210L, 4453L, 2975L, 3833L, 
3833L, 3833L, 3833L, 2526L, 3210L, 4658L, 2975L, 3210L, 4658L, 
2975L, 3210L, 4658L, 2975L, 3210L, 4658L, 2975L, 3210L, 4658L, 
2975L)), row.names = c(NA, -250L), class = c("tbl_df", "tbl", 
"data.frame"))

然后我创建一个新的数据框,它是原始数据框中每个值/响应的中位年龄的自举置信区间。我使用 infer 包来创建 ci。这是数据框的代码和 dput。

bootstrapped_ci_df <- data_frame_responses %>% 
  split(.$value) %>% 
  map_df(~.x %>% 
  specify(response = Ages) %>%
  generate(reps = 1000, type = 'bootstrap') %>%
  calculate(stat = 'median') %>%
  get_ci(level = 0.99999), .id = 'value')

structure(list(value = c("Response One", "Response Two", "Response Three", 
"Response Four", "Response Five", "Response Six", "Response Seven", 
"Response Eight", "Response Nine", "Response Ten", "Response Eleven", 
"Response Twelve"), lower_ci = c(28.5, 28, 29, 28, 30, 29, 31, 
29, 28, 29, 28, 28), upper_ci = c(29, 29, 30, 29, 30.995005, 
30, 31.5, 30, 29, 30, 29, 29)), row.names = c(NA, -12L), class = c("tbl_df", 
"tbl", "data.frame"))

问题是当我用 ggplot2 绘制它时。

在第一种情况下,我可以创建一个看起来不错的图表。我将响应最多的值放在底部。

ggplot(data_frame_responses, aes(x = Ages, fill = fct_reorder(value, n))) + 
  geom_histogram() +
  facet_wrap(~ fct_reorder(value, n)) +
  theme(legend.position="none") +
  labs(title = "Title") +
  labs(subtitle = "Subtitle") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
  theme(plot.subtitle = element_text(hjust = 0.5)) 

在此处输入图像描述

我还可以在数据框上映射置信区间,这看起来也不错。

ggplot(data_frame_responses, aes(x = Ages, fill = value)) + 
  geom_histogram() +
  facet_wrap(~value) +
  theme(legend.position="none") +
  labs(title = "Title") +
  labs(subtitle = "Subtitle") +
  geom_vline(mapping = aes(xintercept = lower_ci), bootstrapped_ci_df, linetype = 'dashed') +
  geom_vline(mapping = aes(xintercept = upper_ci), bootstrapped_ci_df, linetype = 'dashed') +
  labs(caption = "dashed lines = bootstrapped confidence interval for median") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
  theme(plot.subtitle = element_text(hjust = 0.5)) 

在此处输入图像描述

问题是,我不知道如何做到这两点。如何使用构面重新排序数据框并保持映射完好无损?这是我的尝试,你可以看到它是如何出错的。

ggplot(data_frame_responses, aes(x = Ages, fill = value)) + 
  geom_histogram() +
  facet_wrap(~ fct_reorder(value, n)) + 
  theme(legend.position="none") +
  labs(title = "Title") +
  labs(subtitle = "Subtitle") +
  geom_vline(mapping = aes(xintercept = lower_ci), bootstrapped_ci_df, linetype = 'dashed') +
  geom_vline(mapping = aes(xintercept = upper_ci), bootstrapped_ci_df, linetype = 'dashed') +
  labs(caption = "dashed lines = bootstrapped confidence interval for median") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
  theme(plot.subtitle = element_text(hjust = 0.5)) 

在此处输入图像描述

标签: rggplot2

解决方案


两个数据帧中的值变量需要保持一致。在您的代码中,您更改了一个data_frame_responsesfor geom_histogram,但没有更改为geom_vline

在 之前更改两个 data.frames 的数据类型更容易ggplot。请注意:我使用不同的数据只是为了回答您的问题。


data_frame_responses <-data_frame_responses %>% mutate(
  value = fct_reorder(value, n)
)

bootstrapped_ci_df <-bootstrapped_ci_df %>% 
  mutate(value = factor(value, levels(data_frame_responses$value)))

ggplot(data_frame_responses, aes(x = Ages, fill = value)) + 
  geom_histogram() +
  facet_wrap(~value) +
  theme(legend.position="none") +
  labs(title = "Title") +
  labs(subtitle = "Subtitle") +
  geom_vline(mapping = aes(xintercept = lower_ci), bootstrapped_ci_df, linetype = 'dashed') +
  geom_vline(mapping = aes(xintercept = upper_ci), bootstrapped_ci_df, linetype = 'dashed') +
  labs(caption = "dashed lines = bootstrapped confidence interval for median") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
  theme(plot.subtitle = element_text(hjust = 0.5)) 

在此处输入图像描述


推荐阅读