首页 > 解决方案 > R函数检索个人最后一次看到的信息

问题描述

我有一个数据集,其中包含从 2015 年到 2020 年每天对个人的观察。我有一个函数,可以为我提供从所选日期(今年见过的个人)“假定活着”的列表以及他们的 ID 和最后一次看到的日期.

##possible population now? (seen < 12 months)
alive_birds <- function(data, date) {
  data %>%
    dplyr::group_by(ID) %>%
    dplyr::mutate(last_seen =  as.Date(max(Date))) %>%
    dplyr::filter(last_seen > date - lubridate::years(1)) %>% # ! between is shortcut to (x >= left & x <= right)
    dplyr::select(ID,last_seen) %>%
    dplyr::distinct()
}

possible_population <- alive_birds(FODYdataVERTICAL, as.Date('2020-09-30'))
length(possible_population$ID)# View possible population size

有没有办法修改这个功能,让它也给我最后一次看到的性别?

这是我的数据集结构的示例:

BIRD_data = structure(
  list(
    ID = c(
      "FB37324",
      "FB37519",
      "FB37552",
      "FA72407",
      "FA42378",
      "FB37452",
      "FA42242",
      "FB37402",
      "FB37352",
      "FC10654",
      "FB37330",
      "FB37643",
      "FB37530",
      "FB37594",
      "FB37474",
      "FB37421",
      "FA42236",
      "FB37593",
      "FC10729",
      "FC10666"
    ),
    Name = c(
      "Rocket",
      "Dirkie",
      "Zumba",
      "Cake",
      "Hula",
      "Prickles",
      "Corsair",
      "Tigger",
      "Amarula",
      "Clapham",
      "Bidou",
      "Justin",
      "Pluto",
      "Sang",
      "Fawkes",
      "Cherry",
      "Hurricane",
      "Tan?",
      "Doobidoo",
      "Lucifer"
    ),
    Lring = c(
      "",
      "",
      "",
      "",
      "LB/ID",
      "ID",
      "",
      "ID",
      "DB/ID",
      "ID/WT",
      "MV",
      "LG",
      "",
      "DB/ID",
      "",
      "",
      "",
      "",
      "ID",
      "ID/DB"
    ),
    Rring = c(
      "",
      "",
      "",
      "",
      "RD",
      "MV/LB",
      "",
      "O/BK",
      "WT",
      "O",
      "O/ID",
      "ID/MV",
      "",
      "YL",
      "",
      "",
      "",
      "",
      "MV/DG",
      "DB"
    ),
    sex = c(
      "M",
      "F",
      "M",
      "F",
      "F",
      "F",
      "F",
      "U",
      "M",
      "J",
      "M",
      "F",
      "M",
      "F",
      "F",
      "U",
      "F",
      "U",
      "J",
      "U"
    ),
    month = c(
      2L,
      1L,
      4L,
      12L,
      6L,
      3L,
      9L,
      8L,
      8L,
      6L,
      9L,
      7L,
      2L,
      4L,
      8L,
      10L,
      9L,
      9L,
      11L,
      8L
    ),
    year = c(
      2017L,
      2017L,
      2015L,
      2015L,
      2017L,
      2020L,
      2016L,
      2017L,
      2017L,
      2018L,
      2018L,
      2020L,
      2016L,
      2018L,
      2016L,
      2016L,
      2016L,
      2016L,
      2018L,
      2020L
    ),
    seen = c(
      1L,
      1L,
      1L,
      1L,
      1L,
      1L,
      1L,
      1L,
      1L,
      1L,
      1L,
      1L,
      1L,
      1L,
      1L,
      1L,
      1L,
      1L,
      1L,
      1L
    ),
    Freq = c(
      3L,
      9L,
      9L,
      11L,
      10L,
      3L,
      19L,
      14L,
      8L,
      4L,
      9L,
      9L,
      6L,
      7L,
      9L,
      6L,
      14L,
      17L,
      10L,
      5L
    ),
    day = c(
      "21",
      "8",
      "18",
      "1",
      "24",
      "5",
      "6",
      "26",
      "18",
      "5",
      "5",
      "1",
      "26",
      "19",
      "13",
      "20",
      "9",
      "30",
      "20",
      "5"
    ),
    location = c(
      "A",
      "S",
      "A",
      "S",
      "S",
      "A",
      "A",
      "A",
      "A",
      "S",
      "A",
      "S",
      "A",
      "A",
      "A",
      "A",
      "S",
      "A",
      "S",
      "SF"
    ),
    Date = structure(
      c(
        1487620800,
        1483819200,
        1429300800,
        1448913600,
        1498248000,
        1583352000,
        1473105600,
        1503691200,
        1.503e+09,
        1528142400,
        1536091200,
        1593547200,
        1456430400,
        1524081600,
        1471032000,
        1476907200,
        1473364800,
        1475179200,
        1542657600,
        1596571200
      ),
      tzone = "",
      class = c("POSIXct", "POSIXt")
    )
  ),
  row.names = c(NA,-20L),
  class = c("tbl_df", "tbl", "data.frame")
) 

亲切的问候

标签: r

解决方案


您可以select保留要保留的列。

alive_birds <- function(data, date) {
  data %>%
    dplyr::group_by(ID) %>%
    dplyr::mutate(last_seen =  as.Date(max(Date))) %>%
    dplyr::filter(last_seen > date - lubridate::years(1)) %>% 
    dplyr::arrange(ID, desc(Date)) %>%
    dplyr::select(ID,last_seen, sex) %>%
    dplyr::distinct(ID, .keep_all = TRUE)
}

alive_birds(BIRD_data, as.Date('2020-09-30'))

推荐阅读