首页 > 解决方案 > 删除R中表格中的重复日期

问题描述

你能帮我解决以下问题吗:请注意,在我生成的表格中,它在 05/07 那天出现了两次,但我希望它只显示一次。我该如何调整这个?

非常感谢!

library(purrr)
library(dplyr)
library(tidyverse)
library(lubridate)

df1 <- structure(
  list(date1 = c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28",
                 "2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28"),
       date2 = c("2021-04-02","2021-04-03","2021-04-08","2021-04-09","2021-04-10","2021-07-01","2021-07-02","2021-07-03",
                 "2021-07-04","2021-07-05","2021-07-05"),
       Week= c("Friday","Saturday","Thursday","Friday","Saturday","Thursday","Friday","Saturday","Sunday","Monday","Monday"),
       DR01 = c(14,11,14,13,13,14,13,16,15,11,13), DR02= c(14,12,16,17,13,12,17,14,13,15,18),DR03= c(19,15,14,13,13,12,11,15,13,13,18),
       DR04 = c(15,14,13,13,16,12,11,19,11,12,11),DR05 = c(15,14,15,13,16,12,11,19,14,15,18),
       DR06 = c(21,14,13,13,15,16,17,18,12,12,18),DR07 = c(12,15,14,14,19,14,17,18,14,13,18)),
  class = "data.frame", row.names = c(NA, -11L))


dates <- subset(df1, date2 > date1, select = date2)$date2
map_dfr(dates, ~ {
  
  datas <- df1 %>%
    filter(date2 == ymd(.x)) %>%
    summarize(across(starts_with("DR"), sum)) %>%
    pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
    mutate(name = as.numeric(name))
  colnames(datas)<-c("Days","Numbers")
  mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 47,b2 = 0), data = datas)
  tibble(dates = .x, coef = coef(mod)[2])
}) %>%
  mutate(dates = format(ymd(dates), "%d/%m/%Y"))
# A tibble: 6 x 2
  dates       coef
  <chr>      <dbl>
1 01/07/2021  12.2
2 02/07/2021  12.4
3 03/07/2021  15.6
4 04/07/2021  13.3
5 05/07/2021  27.9
6 05/07/2021  27.9

标签: r

解决方案


我们可以distinct在最后添加

map_dfr(dates, ~ {
  datas <- df1 %>%
    filter(date2 == ymd(.x)) %>%
    summarize(across(starts_with("DR"), sum)) %>%
    pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
    mutate(name = as.numeric(name))
  colnames(datas)<-c("Days","Numbers")
  mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 47,b2 = 0), data = datas)
  tibble(dates = .x, coef = coef(mod)[2])
}) %>%
  mutate(dates = format(ymd(dates), "%d/%m/%Y")) %>%
  distinct()
# A tibble: 5 × 2
  dates       coef
  <chr>      <dbl>
1 01/07/2021  12.2
2 02/07/2021  12.4
3 03/07/2021  15.6
4 04/07/2021  13.3
5 05/07/2021  27.9

或者如果它仅用于“日期”

map_dfr(dates, ~ {
  datas <- df1 %>%
    filter(date2 == ymd(.x)) %>%
    summarize(across(starts_with("DR"), sum)) %>%
    pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
    mutate(name = as.numeric(name))
  colnames(datas)<-c("Days","Numbers")
  mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 47,b2 = 0), data = datas)
  tibble(dates = .x, coef = coef(mod)[2])
}) %>%
  mutate(dates = format(ymd(dates), "%d/%m/%Y")) %>% 
     distinct(dates, .keep_all = TRUE)

推荐阅读