首页 > 解决方案 > 如何在 R 中创建一个函数来计算纵向研究参与者的每周调查?

问题描述

我是 R 新手,但我正在尝试为一些纵向数据编写一个函数。基本上,我有一个数据集,参与者每周都会做一次调查,持续 25 周。我想创建一个列来说明该行包含哪一周的数据。请记住,每个参与者都在不同的一周开始和结束。例如,参与者 111 将有 25 行,我希望该weeknum行在第 2 周行中声明“第 2 周”等。我能够为 1 名参与者创建一些代码来执行此操作,但我想要一个函数这适用于整个数据集。这是我到目前为止的代码。

df <- read_csv("cppdatacombined2.csv", col_names = TRUE)
  view(df)

  p111 <- filter(df, df$RecipientLastName == 111)
  view(p111)

intake_time <- p111$RecordedDate.x
intake_time <- intake_time[1]

#weekly dates
weekly <- p111$RecordedDate.y
week1date<-min(p111$RecordedDate.y,na.rm=FALSE)
week1date <- as.Date(week1date)
week2date <- as.Date(week1date + 7)
week3date <- as.Date(week2date + 7)
week4date <- as.Date(week3date + 7)
week5date <- as.Date(week4date + 7)
week6date <- as.Date(week5date + 7)
week7date <- as.Date(week6date + 7)
week8date <- as.Date(week7date + 7)
week9date <- as.Date(week8date + 7)
week10date <- as.Date(week9date + 7)
week11date <- as.Date(week10date + 7)
week12date <- as.Date(week11date + 7)
week13date <- as.Date(week12date + 7)
week14date <- as.Date(week13date + 7)
week15date <- as.Date(week14date + 7)
week16date <- as.Date(week15date + 7)
week17date <- as.Date(week16date + 7)
week18date <- as.Date(week17date + 7)
week19date <- as.Date(week18date + 7)
week20date <- as.Date(week19date + 7)
week21date <- as.Date(week20date + 7)
week22date <- as.Date(week21date + 7)
week23date <- as.Date(week22date + 7)
week24date <- as.Date(week23date + 7)
week25date <- as.Date(week24date + 7)
weeknum <- list(week1date, week2date, week3date, week4date, week5date, week6date, week7date, week8date, week9date, week10date, week11date, week12date, week13date, week14date, week15date, week16date, week17date, week18date, week19date, week20date, week21date, week22date, week23date, week24date, week25date)
p111$weeknum <- weeknum
desired_length <- 25 # or whatever length you want
empty_list <- vector(mode = "list", length = desired_length)
p111$list <- empty_list
if (week1date = weeknum) {
  empty_list[1] = "Week 1"
}
if (week1date = weeknum) {
  empty_list[2] = "Week 2"
}
if (week1date = weeknum) {
  empty_list[3] = "Week 3"
}
if (week1date = weeknum) {
  empty_list[4] = "Week 4"
}if (week1date = weeknum) {
  empty_list[5] = "Week 5"
}if (week1date = weeknum) {
  empty_list[6] = "Week 6"
}if (week1date = weeknum) {
  empty_list[7] = "Week 7"
}if (week1date = weeknum) {
  empty_list[8] = "Week 8"
}if (week1date = weeknum) {
  empty_list[9] = "Week 9"
}if (week1date = weeknum) {
  empty_list[10] = "Week 10"
}if (week1date = weeknum) {
  empty_list[11] = "Week 11"
}if (week1date = weeknum) {
  empty_list[12] = "Week 12"
}if (week1date = weeknum) {
  empty_list[13] = "Week 13"
}if (week1date = weeknum) {
  empty_list[14] = "Week 14"
}if (week1date = weeknum) {
  empty_list[15] = "Week 15"
}if (week1date = weeknum) {
  empty_list[16] = "Week 16"
}if (week1date = weeknum) {
  empty_list[17] = "Week 17"
}if (week1date = weeknum) {
  empty_list[18] = "Week 18"
}if (week1date = weeknum) {
  empty_list[19] = "Week 19"
}if (week1date = weeknum) {
  empty_list[20] = "Week 20"
}if (week1date = weeknum) {
  empty_list[21] = "Week 21"
}if (week1date = weeknum) {
  empty_list[22] = "Week 22"
}if (week1date = weeknum) {
  empty_list[23] = "Week 23"
}if (week1date = weeknum) {
  empty_list[24] = "Week 24"
}
if (week1date = weeknum) {
  empty_list[25] = "Week 25"
}
p111$list <- empty_list
view(p111)

这完成了我想要的 P111,这是我开始尝试制作该功能。我的主要问题是我不知道如何要求 R 为RecipientLastName(参与者 ID)中可以与数据集匹配的每个唯一值创建一个变量。

unique <- unique(df$RecipientLastName, incomparables = FALSE, fromLast = FALSE,
                   nmax = NA)
make_vars <- function(df) {
  for (i in unique) {
   #make each of the unique values call all the data from that participant (all 25 rows)##
    weekly <- (unique participant last name)$RecordedDate.y
    week1date<-min((unique participant last name)$RecordedDate.y,na.rm=FALSE)
    .
    .
    .
    week25date <- max((unique participant last name))$RecordedDate.y, na.rm = FALSE)
    #reproduce the rest of the code for each participant
  }
}

make_vars(df)

dput(head(df, 10))

我想要的 df 输出

RecipientLastName     ...othercols....    Recorded Date       Weeknum
1                          *week 1 data*       July 7           Week1
1                          *week 2 data*       July 14          Week2
.
(weeks 3-24)
.
1                          *week 25 data*     Dec 29            Week25
2                          *week 1 data*      Aug 14            Week1
2                          *week 2 data*      Aug 21            Week2
.
(weeks 3- 24)
2                          *week 25 data*     Feb 5             Week25
3                          *week 1 data*      Jan 3             Week 1
3                          *week 2 data*      Jan 10            Week 2
etc.

我当前为 1 名参与者提供的输出(缩短)(大量数据集,所以我无法发布所有内容)

"paremail@gmail.com", "paremail@gmail.com"), Q112 = c(1, 
1, 1, 1, 1, 1, 1, 1, 1, 1), Q98 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 
1), Q99 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), name = c("Joyee", 
"Shane", "Shane", "Shane", "Shane", "Shane", "Shane", "Shane", 
"Shane", "Shane"), date_one_days = c(20, 18, 18, 18, 18, 18, 
18, 18, 18, 18), date_one_weeks = c(3, 2.5, 2.5, 2.5, 2.5, 2.5, 
2.5, 2.5, 2.5, 2.5), date_one_months = c(1, 0.5, 0.5, 0.5, 0.5, 
0.5, 0.5, 0.5, 0.5, 0.5), `Need for Cognition_1` = c(4, 7, 7, 
7, 7, 7, 7, 7, 7, 7), `Need for Cognition_2` = c(4, 1, 1, 1, 
1, 1, 1, 1, 1, 1), `Need for Cognition_3` = c(4, 2, 2, 2, 2, 
2, 2, 2, 2, 2), `Need for Cognition_4` = c(4, 1, 1, 1, 1, 1, 
1, 1, 1, 1), `Need for Cognition_5` = c(4, 7, 7, 7, 7, 7, 7, 
7, 7, 7), `Need for Cognition_6` = c(4, 1, 1, 1, 1, 1, 1, 1, 
1, 1), `Need for Cognition_7` = c(4, 7, 7, 7, 7, 7, 7, 7, 7, 
7), `Need for Cognition_8` = c(4, 7, 7, 7, 7, 7, 7, 7, 7, 7), 
   `Need for Cognition_9` = c(4, 2, 2, 2, 2, 2, 2, 2, 2, 2), 
   `Need for Cognition_10` = c(4, 2, 2, 2, 2, 2, 2, 2, 2, 2), RecordedDate.y = structure(c(NA, 
       1544891009, 1544891009, 1544891009, 1544891009, 1544891009, 
       1544891009, 1544891009, 1544891009, 1544891009), tzone = "UTC", class = c("POSIXct", 
       "POSIXt")),

标签: r

解决方案


尝试以下操作:

library(dplyr)

df <- df %>%
        group_by(RecipientLastName) %>%
        summarise(Weeknum  = paste0('Week', row_number()))

这也可以用基数 R 和 来完成data.table

#Base R
df$Weeknum <- with(df, paste0('Week', ave(RecipientLastName, 
                       RecipientLastName, FUN = seq_along)))

#data.table
library(data.table)
setDT(df)[, Weeknum := paste0('Week', seq_len(.N)), RecipientLastName]

推荐阅读