This macro splits a dataset to multiple datasets vertically with a common primary key. For eg, a dataset has 400 fields and 20,000 records. If we can split the dataset into two, with 200 fields and 20,000 records in each dataset with a common field like loan number as primary key would be helpful to load the details for analysis.
To be called like this... |
%splitdsnverticallykey(dsn,varperdsn,keyvars=); |
eg. %splitdsnverticallykey(sashelp.vtable,4,keyvars=memname libname); |
dsn - libname.datasetname to be split |
varperdsn - How many vars per dsn excluding the key variables |
keyvars - specify the primary key variables |
%macro splitdsnverticallykey(dsn,varperdsn,keyvars=); |
/* split the keyvars into individual macro var names*/ |
%let keyvar=%scan(&keyvars,&num, |
%let keyvarstr=%str(% ")&keyvar%str(%" ,); |
%let num=%eval(&num + 1); |
%let keyvar=%scan(&keyvars,&num, |
%if &keyvar ne %then %let keyvarstr=&keyvarstr%str(% ")&keyvar%str(%" ,); |
%let numkeyvars=%eval(&num - 1); |
%let keyvarstr=%substr(&keyvarstr,1,%length(&keyvarstr)-1); |
/*Open the dataset and assign to handler*/ |
/*attrn with nvars gives u the count of variables */ |
numofvars=attrn(dsid, "nvars" ); |
/*identify total number of dsns would it fit excluding the key vars*/ |
totalnumdsns=ceil((numofvars-&numkeyvars)/&varperdsn); |
varname=trim(left(varname(dsid,i))); |
if varname not in (&keyvarstr) then |
/* Get the name of the variables into macro variables*/ |
%let totalnumdsns=&totalnumdsns; |
%let numofvars=&numofvars; |
%put The dataset &dsn with &numofvars of variables excluding variables {&keyvars} is split vertically into &totalnumdsns datasets; |
/* name the datasets in sequence */ |
%do i=1 %to &totalnumdsns; |
%let start=%eval((&i-1)*&varperdsn + 1); |
%let end=%eval(&start + &varperdsn - 1); |
%if &end ge &numofvars %then %let end=&numofvars; |
%put start=&start end=&end; |
data &dsn.&i; /*Note: There should be a blank after &dsn.&totalnumdsns*/ |
%mend splitdsnverticallykey; |
%splitdsnverticallykey(flags,4,keyvars=title); |
Log generated shows that 2 datasets flag1 and flag2 are created with 5 and 3 columns respectively…
flag1 dataset
flag2 dsn