首页 > 解决方案 > 根据另一个变量 r 中的值变化生成一个新变量

问题描述

我问了一些非常相似的问题[在此处输入链接描述][1],但我现在对我的问题有了更好的理解。我会尽量问清楚。

我有一个示例数据集,如下所示:

    id <-       c(1,1,1, 2,2,2, 3,3, 4,4, 5,5,5,5, 6,6,6, 7, 8,8, 9,9, 10,10)
item.id <-  c(1,1,2, 1,1,1 ,1,1, 1,2, 1,2,2,2, 1,1,1, 1, 1,2, 1,1, 1,1)
sequence <- c(1,2,1, 1,2,3, 1,2, 1,1, 1,1,2,3, 1,2,3, 1, 1,1, 1,2, 1,2)
score <-    c(0,0,0, 0,0,1, 2,0, 1,1, 1,0,1,1, 0,0,0, 1, 0,2, 1,2, 2,1)

data <- data.frame("id"=id, "item.id"=item.id, "sequence"=sequence, "score"=score)
> data
   id item.id sequence score
1   1       1        1     0
2   1       1        2     0
3   1       2        1     0
4   2       1        1     0
5   2       1        2     0
6   2       1        3     1
7   3       1        1     2
8   3       1        2     0
9   4       1        1     1
10  4       2        1     1
11  5       1        1     1
12  5       2        1     0
13  5       2        2     1
14  5       2        3     1
15  6       1        1     0
16  6       1        2     0
17  6       1        3     0
18  7       1        1     1
19  8       1        1     0
20  8       2        1     2
21  9       1        1     1
22  9       1        2     2
23 10       1        1     2
24 10       1        2     1

id代表每个学生,item.id代表学生回答的问题,sequence是每个 的尝试次数item.idscore是每次尝试的分数,取 0,1 或 2。学生可以更改他们的答案。

对于item.id每一个,我通过查看最后两个序列(更改)来id创建一个变量( ):这里的重新编码规则是:statusstatus

1-If there is only one attempt for each question: 
    a) assign "BTW" (Blank to Wrong) if the item score is 0.
    b) assign "BTW" (Blank to Right) if the item score is 1.
2-If there are multiple attempts for each question:
    a) assign "BTW" (Blank to Wrong) if the first item attempt score is 0.
    b) assign "BTW" (Blank to Right) if the first item attempt score is 1.
    c) assign "WW" for those who changed from wrong to wrong (0 to 0),
    d) assign "WR" for those who changed to increasing score (0 to 1, or 1 to 2),
    e) assign "RW" for those who changed to decreasing score (2 to 1, 2 to 0, or 1 to 0 ), and
    f) assign "RR" for those who changed from right to right (1 to 1, 2 to 2).

分数从 0 到 1 或 0 到 2 或 1 到 2 被认为是正确的(正确)变化,而分数从 1 到 0 或 2 到 0 或 2 到 1 被认为是不正确(错误)的变化。

如果只有一次尝试item.idas in id= 7,那么status应该是"BTR"。如果score0,那么它应该是"BTW"。逻辑应该是如果分数增加,它应该是WR,如果它减少,它应该是RW。

a) from 1 to 2 as WR, instead, they were coded as RR,
b) from 2 to 1 as RW, instead, they were coded as WW.

我使用了这段代码。有些事情没有解决,例如 id=1。状态应该是 {BTW, WW}。

library(dplyr)
data %>%   group_by(id,item.id) %>%
 mutate(diff = c(0, diff(score)),
                status = case_when(
                  n() == 1 & score == 0 ~ "BTW",
                  n() == 1 & score == 1 ~ "BTR",
                  diff == 0 & score == 0 ~ "WW",
                  diff == 0 & score > 0 ~ "RR",
                  diff > 0 ~ "WR",
                  diff < 0 ~ "RW",
                  TRUE ~ "oops")) 

> data
   id item.id sequence score diff status
1   1       1        1     0    0     WW
2   1       1        2     0    0     WW
3   1       2        1     0    0    BTW
4   2       1        1     0    0     WW
5   2       1        2     0    0     WW
6   2       1        3     1    1     WR
7   3       1        1     2    0     RR
8   3       1        2     0   -2     RW
9   4       1        1     1    0    BTR
10  4       2        1     1    0    BTR
11  5       1        1     1    0    BTR
12  5       2        1     0    0     WW
13  5       2        2     1    1     WR
14  5       2        3     1    0     RR
15  6       1        1     0    0     WW
16  6       1        2     0    0     WW
17  6       1        3     0    0     WW
18  7       1        1     1    0    BTR
19  8       1        1     0    0    BTW
20  8       2        1     2    0     RR
21  9       1        1     1    0     RR
22  9       1        2     2    1     WR
23 10       1        1     2    0     RR
24 10       1        2     1   -1     RW

所需的输出将是案例:

  > desired

   id item.id sequence score  status
1   1       1        1     0   BTW
2   1       1        2     0   WW
3   1       2        1     0   BTW
4   2       1        1     0   BTW
5   2       1        2     0   WW
6   2       1        3     1   WR
7   3       1        1     2   BTR
8   3       1        2     0   RW
9   4       1        1     1   BTR
10  4       2        1     1   BTR
11  5       1        1     1   BTR
12  5       2        1     0   BTW
13  5       2        2     1   WR
14  5       2        3     1   RR
15  6       1        1     0   BTW
16  6       1        2     0   WW
17  6       1        3     0   WW
18  7       1        1     1   BTR
19  8       1        1     0   BTW
20  8       2        1     2   BTR
21  9       1        1     1   BTR
22  9       1        2     2   RR
23 10       1        1     2   BTR
24 10       1        2     1   RW

有什么意见吗?谢谢!

标签: rrecode

解决方案


为了解决这个问题,我将问题分解为两个步骤。首先确定空白回答行。然后,一旦确定了第一次尝试,就将答案的变化分配给剩余的行。

#rows that are not the first answer are assigned a "NA"
test<-data %>%   group_by(id,item.id) %>%
  mutate(status = case_when(
    sequence == 1 & score == 0 ~ "BTW",
    sequence == 1 & score >0 ~ "BTR",
    TRUE ~ "NA")) 

answer<- test %>% ungroup() %>% group_by(id, item.id) %>%
  transmute(sequence, score, 
            status = case_when(score == 0 & score==lag(score) & status=="NA" ~ "WW",
                               score >= 1 & score == lag(score) & status=="NA"~ "RR",
                               score > 0 & score > lag(score) & status=="NA"~ "WR",
                               score < lag(score) & status=="NA"~ "RW",
                               TRUE ~ status)) 


head(answer, 20)
tail(answer, 4)

状态列与除第 20 行之外的所有行的样本数据匹配,请仔细检查计算。


推荐阅读