首页 > 解决方案 > 在 SAS 中创建复杂的逻辑计数器变量

问题描述

我必须在数据集中创建复杂的(对我而言)计数器变量。我试图尽可能清楚地解释。如果有任何不清楚的地方,请告诉我。希望在您的帮助下,我可以实现我的期望。

我需要创建三个变量:Probation_Count、Probation_Flag 和 Cure_Count。

创建三个变量是特定于 CID 的(我们按 CID 分组)。

Probation_Count 和 Probation_Flag 条件

在此处输入图像描述

在此处输入图像描述

在此处输入图像描述

Cure_count 条件

请在下面找到示例数据。

我手动计算了 probation_count、probation_flag 和cure_count。

    data sample;
INFILE DATALINES DLM='#';
input CID date ddmmyy10. DPD Default_Flag $ Probation_Count probation_Flag $ Cure_count;
format date ddmmyy10.;
datalines;
111#04/04/2021#87#N#00# #0
111#05/04/2021#88#N#00# #0
111#06/04/2021#89#N#00# #0
111#07/04/2021#90#Y#00# #0
111#08/04/2021#91#Y#00# #0
111#09/04/2021#92#Y#00# #0
111#10/04/2021#93#Y#00# #0
111#11/04/2021#00#N#01#Y#0
111#12/04/2021#00#N#02#Y#0
111#13/04/2021#00#N#03#Y#0
111#14/04/2021#00#N#04#Y#0
111#15/04/2021#00#N#05#Y#0
111#16/04/2021#01#N#05#Y#0
111#17/04/2021#02#N#05#Y#0
111#18/04/2021#00#N#06#Y#0
111#19/04/2021#00#N#07#Y#0
111#20/04/2021#00#N#08#Y#0
111#21/04/2021#00#N#09#Y#0
111#22/04/2021#00#N#10#Y#0
111#23/04/2021#00#N#00# #1
111#24/04/2021#00#N#00# #2
111#25/04/2021#00#N#00# #3
222#04/04/2021#86#N#00# #0
222#05/04/2021#87#N#00# #0
222#06/04/2021#88#N#00# #0
222#07/04/2021#89#N#00# #0
222#08/04/2021#90#Y#00# #0
222#09/04/2021#91#Y#00# #0
222#10/04/2021#92#Y#00# #0
222#11/04/2021#93#Y#00# #0
222#12/04/2021#94#Y#00# #0
222#13/04/2021#95#Y#00# #0
222#14/04/2021#96#Y#00# #0
333#04/04/2021#87#N#00# #0
333#05/04/2021#88#N#00# #0
333#06/04/2021#89#N#00# #0
333#07/04/2021#90#Y#00# #0
333#08/04/2021#91#Y#00# #0
333#09/04/2021#92#Y#00# #0
333#10/04/2021#00#N#01#Y#0
333#11/04/2021#00#N#02#Y#0
333#12/04/2021#00#N#03#Y#0
333#13/04/2021#00#N#04#Y#0
333#14/04/2021#00#N#05#Y#0
333#15/04/2021#00#N#06#Y#0
333#16/04/2021#01#N#05#Y#0
333#17/04/2021#02#N#05#Y#0
333#18/04/2021#03#N#05#Y#0
333#19/04/2021#04#N#00#Y#0
333#20/04/2021#05#N#00#Y#0
333#21/04/2021#00#N#01#Y#0
333#22/04/2021#00#N#02#Y#0
333#23/04/2021#00#N#03#Y#0
333#24/04/2021#00#N#04#Y#0
333#25/04/2021#00#N#05#Y#0
333#26/04/2021#00#N#06#Y#0
333#27/04/2021#00#N#07#Y#0
333#28/04/2021#00#N#08#Y#0
333#29/04/2021#00#N#09#Y#0
333#30/04/2021#00#N#10#Y#0
333#01/05/2021#00#N#00# #1
333#02/05/2021#00#N#00# #2
333#03/05/2021#00#N#00# #3
333#04/05/2021#90#Y#00# #0
333#05/05/2021#91#Y#00# #0
;
run;

非常感谢您的时间和帮助

标签: sasdatastep

解决方案


数据和解释不是 100% 清楚的,但是这个示例代码可能会帮助您完全了解您正在尝试的复杂规则。

我需要创建三个变量:Probation_Count、Probation_Flag 和 Cure_Count。

我希望这意味着这些变量及其值只能从 and 的状态和更改状态中计算default_flag出来dpd。您不清楚如何或是否应将在前一行中计算的值结转到下一行计算中。

例子:

data have;
INFILE DATALINES DLM='#';
input CID date ddmmyy10. DPD Default_Flag $ Probation_Count_X Probation_Flag_X $ Cure_Count_X;
format date ddmmyy10.;
datalines;
111#04/04/2021#87#N#00# #0
111#05/04/2021#88#N#00# #0
111#06/04/2021#89#N#00# #0
111#07/04/2021#90#Y#00# #0
111#08/04/2021#91#Y#00# #0
111#09/04/2021#92#Y#00# #0
111#10/04/2021#93#Y#00# #0
111#11/04/2021#00#N#01#Y#0
111#12/04/2021#00#N#02#Y#0
111#13/04/2021#00#N#03#Y#0
111#14/04/2021#00#N#04#Y#0
111#15/04/2021#00#N#05#Y#0
111#16/04/2021#01#N#05#Y#0
111#17/04/2021#02#N#05#Y#0
111#18/04/2021#00#N#06#Y#0
111#19/04/2021#00#N#07#Y#0
111#20/04/2021#00#N#08#Y#0
111#21/04/2021#00#N#09#Y#0
111#22/04/2021#00#N#10#Y#0
111#23/04/2021#00#N#00# #1
111#24/04/2021#00#N#00# #2
111#25/04/2021#00#N#00# #3
222#04/04/2021#86#N#00# #0
222#05/04/2021#87#N#00# #0
222#06/04/2021#88#N#00# #0
222#07/04/2021#89#N#00# #0
222#08/04/2021#90#Y#00# #0
222#09/04/2021#91#Y#00# #0
222#10/04/2021#92#Y#00# #0
222#11/04/2021#93#Y#00# #0
222#12/04/2021#94#Y#00# #0
222#13/04/2021#95#Y#00# #0
222#14/04/2021#96#Y#00# #0
333#04/04/2021#87#N#00# #0
333#05/04/2021#88#N#00# #0
333#06/04/2021#89#N#00# #0
333#07/04/2021#90#Y#00# #0
333#08/04/2021#91#Y#00# #0
333#09/04/2021#92#Y#00# #0
333#10/04/2021#00#N#01#Y#0
333#11/04/2021#00#N#02#Y#0
333#12/04/2021#00#N#03#Y#0
333#13/04/2021#00#N#04#Y#0
333#14/04/2021#00#N#05#Y#0
333#15/04/2021#00#N#06#Y#0
333#16/04/2021#01#N#05#Y#0
333#17/04/2021#02#N#05#Y#0
333#18/04/2021#03#N#05#Y#0
333#19/04/2021#04#N#00#Y#0
333#20/04/2021#05#N#00#Y#0
333#21/04/2021#00#N#01#Y#0
333#22/04/2021#00#N#02#Y#0
333#23/04/2021#00#N#03#Y#0
333#24/04/2021#00#N#04#Y#0
333#25/04/2021#00#N#05#Y#0
333#26/04/2021#00#N#06#Y#0
333#27/04/2021#00#N#07#Y#0
333#28/04/2021#00#N#08#Y#0
333#29/04/2021#00#N#09#Y#0
333#30/04/2021#00#N#10#Y#0
333#01/05/2021#00#N#00# #1
333#02/05/2021#00#N#00# #2
333#03/05/2021#00#N#00# #3
333#04/05/2021#90#Y#00# #0
333#05/05/2021#91#Y#00# #0
;

data want;
  length rule $1 probation_count 8 probation_flag $1 cure_count 8;
  length trigger_counting pcounting 8;

  retain pcounting probation_count;

  set have;
  by cid;

  rule = ' ';

  if first.cid then do;
    probation_count = 0;
    probation_flag = ' ';
    trigger_counting = 0;
    pcounting = 0;
  end;

  trigger_counting = 
    default_flag = 'N'
    and
    ( lag(default_flag) = 'Y' and NOT first.cid )
  ;

  if default_flag = 'N' then do;

    * set the counting flag 'pcounting' and initialize count;

    if trigger_counting then do;
      pcounting = 1;
      probation_count = 1;
      probation_flag = 'Y';
      rule = '1';
      return;
    end;

    * increment count for no dpd, reset if necessary;

    if pcounting and dpd = 0 then do;
      probation_count + 1;
      probation_flag = 'Y';
      rule = '2';

      if probation_count > 10 then do;
        probation_count = 0;
        rule = '5';
      end;

      return;
    end;

    * pause counting for few dpd;

    if pcounting and 0 < dpd <= 3 then do;
      probation_flag = 'Y';
      rule = '3';
      return;
    end;

    * reset counting for high dpd;

    if pcounting and dpd > 3 then do;
      probation_count = 0;
      probation_flag = 'Y';
      rule = '4';
      return;
    end;
  end;
  else
  if default_flag = 'Y' then do;
    probation_count = 0;
    probation_flag = 'N';
    rule = '6';
  end;
  else do;
    put 'ERROR: ' default_flag= _n_=;
    stop;
  end;

*  drop trigger_counting pcounting;
run;

推荐阅读