Jan 19, 2011

SAS Macro to split a dataset into multiple datasets vertically with a common primary key

This macro splits a dataset to multiple datasets vertically with a common primary key. For eg, a dataset has 400 fields and 20,000 records. If we can split the dataset into two, with 200 fields and 20,000 records in each dataset with a common field like loan number as primary key would be helpful to load the details for analysis.
Taking an example...
data flags;
set sashelp.flags;
To be called like this...
eg. %splitdsnverticallykey(sashelp.vtable,4,keyvars=memname libname);
Where -----------
dsn - libname.datasetname to be split
varperdsn - How many vars per dsn excluding the key variables
keyvars - specify the primary key variables
%macro splitdsnverticallykey(dsn,varperdsn,keyvars=);
/* split the keyvars into individual macro var names*/
%let num=1;
%let keyvar=%scan(&keyvars,&num,' ');
%let keyvar&num=&keyvar;
%let keyvarstr=%str(%")&keyvar%str(%",);
%do %while(&keyvar ne );
            %let num=%eval(&num + 1);
            %let keyvar=%scan(&keyvars,&num,' ');
            %let keyvar&num=&keyvar;
            %if &keyvar ne  %then %let keyvarstr=&keyvarstr%str(%")&keyvar%str(%",);
%let numkeyvars=%eval(&num - 1);
%let keyvarstr=%substr(&keyvarstr,1,%length(&keyvarstr)-1);
data _null_;
/*Open the dataset and assign to handler*/ 
   /*attrn with nvars gives u the count of variables */
   call symput('numofvars',numofvars-&numkeyvars);
   /*identify total number of dsns would it fit excluding the key vars*/
   call symput('totalnumdsns',totalnumdsns);
   do i=1 to numofvars;
     if varname not in (&keyvarstr) then
           /*Get the name of the variables into macro variables*/
           call symput(compress('varname'||k),varname);
%let totalnumdsns=&totalnumdsns;
%let numofvars=&numofvars;
%put The dataset &dsn with &numofvars of variables excluding variables {&keyvars} is split vertically into &totalnumdsns datasets;
/* name the datasets in sequence */
%let start=0;
%let end=0;
%do i=1 %to &totalnumdsns;
     %let start=%eval((&i-1)*&varperdsn + 1);
     %let end=%eval(&start + &varperdsn - 1);
     %if &end ge &numofvars %then %let end=&numofvars;
     %put start=&start end=&end;
     data &dsn.&i; /*Note: There should be a blank after &dsn.&totalnumdsns*/
     retain &keyvars;
            set &dsn (keep=&keyvars
                      %do m=&start %to &end;
%mend splitdsnverticallykey;
options nosource;
sas macro
Log generated shows that 2 datasets flag1 and flag2 are created with 5 and 3 columns respectively…
sas macro
flag1 dataset
sas macro
flag2 dsn