csv - SAS Proc 导入 CSV 和缺失数据
问题描述
所以,我正在尝试在 SAS 中导入一些数据集并加入它们,唯一的问题是加入它们后出现此错误 -
proc import datafile='filepath/datasetA.csv'
out = dataA
dbms= csv
replace;
run;
proc import datafile='filepath\datasetB.csv'
out = dataB
dbms= csv
replace;
run;
/* combine them all into one dataset*/
data DataC;
set &dataA. &dataB;
run;
ERROR: Variable column_k has been defined as both character and numeric
在我试图加入的两个数据集中,有问题的列看起来像这样 -
+----------+
| column_k |
+----------+
| 0 |
| 1 |
| 5 |
| 4 |
| NA |
| NA |
| 4 |
| 3 |
| NA |
+----------+
基本上,如果可能的话,我想将该列中的 NA 数据导入为“缺失”?我需要整个列保持数字,因为我计划对该列中的数据进行一些数学运算。
谢谢你的帮助!
解决方案
如果您希望继续使用Proc IMPORT
,则需要确保列是类似类型的。在您的情况下,您知道column_k
应该是数字,因此可以使用该函数DATA
将字符值转换为数字。INPUT
proc import … out = dataA;
proc import … out = dataB;
data dataA;
set dataA;
_num = input(column_k, best12.);
drop column_k;
rename _num = column_k;
run;
data dataB;
set dataB;
_num = input(column_k, best12.);
drop column_k;
rename _num = column_k;
run;
data want;
set dataA dataB;
run;
在较大范围内,列名的数据类型不匹配可能发生在处理多年导入等场景中。
假设旧数据无法重新读取,而新数据具有不同的列类型。
对于需要数值的情况,一种方法是使用宏来编写源代码,在必要时将指定的变量从字符转换为数字。
例子:
%enforce_num (perm.loans2015, age amount remaining, out=work.loans2015)
%enforce_num (perm.loans2016, age amount remaining, out=work.loans2016)
%enforce_num (perm.loans2017, age amount remaining, out=work.loans2017)
data loans_3yrs;
set work.loans2015-loans2017;
run;
回到您更简单的案例:
proc import … out = dataA;
proc import … out = dataB;
%enforce_num(dataA, column_k)
%enforce_num(dataB, column_k)
data want;
set dataA dataB;
run;
宏enforce_num
会是什么样子?它必须:
- 扫描输入数据集元数据
- 确定变量是否是指定变量之一并且是字符类型
- 编写源代码将变量转换为数值
- 保持原来的变量顺序
%macro enforce_num(data, vars, out=&data);
/*
* Arguments:
* data - name of input data set
* vars - space separated list of variables that must be numeric, convert type if necessary
* out - name of output data set, default same as input data set
*
* Output:
* - Unchanged data set if data and out are the same and no conversion needed
* - Changed data set if some columns in data need conversion to numeric
* - replaces data if out is same as data
* - replaces out if out is different then data
* - the column order of the changed data set will be the same as the original data set
*/
%local dsid index index2 vars varname vartype varnames debug;
%let index2 = 0; %* number of variables determined to be requiring conversion;
%let debug = 0;
%if &debug %then %put NOTE: &SYSMACRONAME: data=%superq(data);
%let dsid = %sysfunc(open(&data));
%if &dsid %then %do;
%do index = 1 %to %sysfunc(attrn(&dsid, nvars));
%let varname = %sysfunc(varname(&dsid, &index));
%let varnames = &varnames &varname;
%if %sysfunc(indexw(&varname, &vars)) %then %do;
%if C = %sysfunc(vartype(&dsid, &index)) %then %do;
%* Data contains character variable requiring enforcement;
%let index2 = %eval(&index2+1);
%local convert&index2;
%let convert&index2 = &varname;
%let varnames = &varnames ___&index2 ; %* Variables that will be converted will be named __<#> during conversion;
%end;
%end;
%end;
%let dsid = %sysfunc(close(&dsid));
%end;
%else
%put %sysfunc(sysmsg());
%*put NOTE: &=vars;
%*put NOTE: &=varnames;
%if &index2 = 0 %then %do;
%* No columns need to be converted to numeric, copy to out if necessary;
%if &data ne &out %then %do;
data &out;
set &data;
run;
%end;
%return;
%end;
%* Some columns need to be converted to numeric;
%* Ensure the converted column is at the same position (varnum) as in the original data set;
data &out;
retain &varnames;
set &data;
%do index = 1 %to &index2;
___&index = input(&&convert&index,?? best12.);
%end;
drop
%do index = 1 %to &index2;
&&convert&index
%end;
;
rename
%do index = 1 %to &index2;
___&index = &&convert&index
%end;
;
run;
%put NOTE: ------------------------------------------------;
%put NOTE: &data has been subjected to numeric enforcement.;
%put NOTE: ------------------------------------------------;
%mend enforce_num;
推荐阅读
- python - 如何刷新 Maya cmds.gridLayout?
- botframework - 如何使 MS Teams 机器人仅可从某些@xyz.com 域使用(无身份验证)
- javascript - 如何在 React Navigation 5 中动态设置导航栏标题和背景颜色
- kotlin - 不包括变量名中的数据类型
- sql - 来自两个表的动态列
- oracle - 为什么 Hibernate 忽略 setMaxResults?
- python - 关于 python 和 python3 命令的混淆
- c# - 等效于 Blazor 中的 ObserveOnDispatcher
- php - 如何让IP地址在一分钟内被点击超过10次
- php - 确切的 API - SalesOrderLines(必选项目:单位),即使单位存在