myintervals <- c(dmy_hms(
"01/01/2000 08:00:00",
"25/02/2000 09:00:00",
"01/03/2000 10:00:00",
"30/04/2000 11:00:00",
"01/05/2000 12:00:00",
"30/06/2000 13:00:00",
"01/07/2000 14:00:00",
"30/08/2000 15:00:00",
"01/09/2000 16:00:00",
"30/10/2000 17:00:00"))

mystations <- c("A","B","C","A","B","C","A","B","C","D")

mydata <- data.frame(myintervals,mystations)

           myintervals mystations
 |1  2000-01-01 08:00:00          A
 |2  2000-02-25 09:00:00          B
 |3  2000-03-01 10:00:00          C
 |4  2000-04-30 11:00:00          A
 |5  2000-05-01 12:00:00          B
 |6  2000-06-30 13:00:00          C
 |7  2000-07-01 14:00:00          A
 |8  2000-08-30 15:00:00          B
 |9  2000-09-01 16:00:00          C
 |10 2000-10-30 17:00:00          D


date.time <- c(dmy_hms(
"31/12/1999 08:00:00",
"24/02/2000 09:00:00",
"25/02/2000 08:00:00",
"26/02/2000 10:00:00",
"27/02/2000 11:00:00",
"01/03/2000 10:00:00",
"10/03/2000 22:00:00",
"20/03/2000 23:00:00",
"01/04/2000 10:00:00",
"20/04/2000 20:00:00",
"25/04/2000 08:00:00",
"30/04/2000 10:00:00",
"01/05/2000 12:00:00",
"10/05/2000 20:00:00",
"20/05/2000 08:00:00",
"30/06/2000 13:00:00",
"10/07/2000 10:00:00",
"20/07/2000 20:00:00",
"30/08/2000 15:00:00",
"01/09/2000 16:00:00"))

mydetections <- data.frame(date.time=date.time,mystations=mystations)

             date.time mystations
 |1  1999-12-31 08:00:00          A
 |2  2000-02-24 09:00:00          B
 |3  2000-02-25 08:00:00          C
 |4  2000-02-26 10:00:00          A
 |5  2000-02-27 11:00:00          B
 |6  2000-03-01 10:00:00          C
 |7  2000-03-10 22:00:00          A
 |8  2000-03-20 23:00:00          B
 |9  2000-04-01 10:00:00          C
 |10 2000-04-20 20:00:00          D
 |11 2000-04-25 08:00:00          A
 |12 2000-04-30 10:00:00          B
 |13 2000-05-01 12:00:00          C
 |14 2000-05-10 20:00:00          A
 |15 2000-05-20 08:00:00          B
 |16 2000-06-30 13:00:00          C
 |17 2000-07-10 10:00:00          A
 |18 2000-07-20 20:00:00          B
 |19 2000-08-30 15:00:00          C
 |20 2000-09-01 16:00:00          D


myorigins <- data.frame(myintervals=c(
dmy_hms("01/01/1970 00:00:00","01/04/1970 00:00:00","01/08/1970 00:00:00","01/12/1970 00:00:00")),mystations=c(unique(mydata$mystations)))


                                        myintervals mystation value
1  1970-01-01 00:00:00 UTC--2000-01-01 08:00:00 UTC         A     1
2  2000-01-01 08:00:00 UTC--2000-04-30 11:00:00 UTC         A     3
3  2000-04-30 11:00:00 UTC--2000-07-01 14:00:00 UTC         A     1
4  1970-04-01 00:00:00 UTC--2000-02-25 09:00:00 UTC         B     1
5  2000-02-25 09:00:00 UTC--2000-05-01 12:00:00 UTC         B     3
6  2000-05-01 12:00:00 UTC--2000-08-30 15:00:00 UTC         B     2
7  1970-08-01 00:00:00 UTC--2000-03-01 10:00:00 UTC         C     2
8  2000-03-01 10:00:00 UTC--2000-06-30 13:00:00 UTC         C     3
9  2000-06-30 13:00:00 UTC--2000-09-01 16:00:00 UTC         C     1
10 1970-12-01 00:00:00 UTC--2000-10-30 17:00:00 UTC         D     1


#line by line
mydata <- add_row(mydata,myorigins)
mydata <- arrange(mydata,mystations,myintervals)
DF <- group_split(mydata,mystations)
Y <- lapply(seq_along(DF), function(x) as.data.frame(DF[[x]]))
names(Y) <- c(unique(mydata$mystations))
list2env(Y, envir = .GlobalEnv)

#splitting the detections
DFD <- group_split(mydetections,mystations)
Z <- lapply(seq_along(DFD), function(x) as.data.frame(DFD[[x]]))
names(Z) <- c(paste(unique(mydata$mystations),"det",sep=""))
list2env(Z, envir = .GlobalEnv)


Aint <- int_diff(A$myintervals)


myresA <- Adet$date.time%within%Aint

显然,我想避免“手动”构建每个 df As 的间隔。与往常一样,对于获得所需输出的任何帮助或提示,我将不胜感激。对于帖子中最初的混乱,我深表歉意。

Here are a couple of options to consider - hope this may be helpful.

Using tidyverse you can add your myorigins to mydata, then after sorting with arrange make time intervals (start-end).

You can use fuzzy_left_join to add the events table, matching on mystations and where the date.time falls between the interval start and end.

Then, after grouping, you can count the number of rows. You will get something close to your result, depending on how you want to handle edge cases.


bind_rows(mydata, myorigins) %>%
  arrange(myintervals) %>%
  group_by(mystations) %>%
  transmute(start = myintervals, end = lead(myintervals)) %>%
  filter(!is.na(end)) %>%
    by = c("mystations", "start" = "date.time", "end" = "date.time"),
    match_fun = c(`==`, `<`, `>=`)
  ) %>%
    group_by(start, end, mystations.x) %>%
    summarise(count = n()) %>%


   start               end                 mystations.x count
   <dttm>              <dttm>              <chr>        <int>
 1 1970-01-01 00:00:00 2000-01-01 08:00:00 A                1
 2 2000-01-01 08:00:00 2000-04-30 11:00:00 A                3
 3 2000-04-30 11:00:00 2000-07-01 14:00:00 A                1
 4 1970-04-01 00:00:00 2000-02-25 09:00:00 B                1
 5 2000-02-25 09:00:00 2000-05-01 12:00:00 B                3
 6 2000-05-01 12:00:00 2000-08-30 15:00:00 B                2
 7 1970-08-01 00:00:00 2000-03-01 10:00:00 C                2
 8 2000-03-01 10:00:00 2000-06-30 13:00:00 C                3
 9 2000-06-30 13:00:00 2000-09-01 16:00:00 C                1
10 1970-12-01 00:00:00 2000-10-30 17:00:00 D                2

An alternative to consider is using data.table which would be faster. One function that may be helpful here is foverlaps to find overlap between the event dates and date ranges.


dt <- rbind(myorigins, mydata)

dt[, c("start", "end") := list(myintervals, lead(myintervals)), by = mystations]
dt <- na.omit(dt, "end")


mydetections[,date.time.copy := copy(date.time)]
setkey(mydetections, mystations, date.time, date.time.copy)

dt_ovlp <- foverlaps(dt, 
                     by.x = c("mystations", "start", "end"),
                     by.y = c("mystations", "date.time", "date.time.copy"))

dt_ovlp[ , .(value = .N), by = c("mystations", "start", "end")][order(mystations, start)]
