Folks, I was going thru few visualizations on Tableau's website today and I came across this visualization Exploring the SSA Baby Names Dataset by one of the acclaimed Tableau professional....It made me thinking to explore of How many of those SSA baby names are of Indian American (Desi) descent...
I found a website online that had a list of popular Indian baby names...I read the data into SAS and made a Tableau Story out of it... Please take a few moments to play with this interesting viz...I hope you like it....
Here's the SAS code that went into the prep of the data...
/*Read Indian
Baby Names by parsing the http URL */
libname
sharad "C:\Users\Sharad\Desktop\namesbystate";
%macro
loop(type);
proc sql; drop
table name; quit;
%do
i=1 %to 60;
filename foo url
"http://www.modernindianbabynames.com/modern_baby_name/starting_with/ANY/MF/Sikh/1560/&i.";
data _null;
retain start
recind recst recend hier;
length SN Name
Meaning Gender Origin $ 100;
retain SN Name
Meaning Gender Origin;
infile foo length=len;
input record $varying200.
len;
put record $varying200.
len;
if index(record,')
then start=1;
if index(record,'
if index(record,')
then delete;
if index(record,'
'
) then do; recvalst=1; hier+1;; end;
'
) then do; recvalst=1; hier+1;; end;
if index(record,'
')
then do; recvalend=1; delete; end;
if index(record,'
'
)
then do; recst=1; hier=0;delete;
end;
if index(record,'
')
then do; recst=0; hier=0;;
end;
if hier=1
then do; record=tranwrd(record,'
'
,''); SN=strip(record); end;
'
,''); SN=strip(record); end;
else if hier=2
then do; record=tranwrd(record,'
'
,''); Name=strip(record); end;
'
,''); Name=strip(record); end;
else if hier=3
then do; record=tranwrd(record,'
'
,''); Meaning=strip(record); end;
'
,''); Meaning=strip(record); end;
else if hier=4
then do; record=tranwrd(record,'
'
,''); Gender=strip(record); end;
'
,''); Gender=strip(record); end;
else if hier=5
then do; record=tranwrd(record,'
'
,''); Origin=strip(record); end;
'
,''); Origin=strip(record); end;
record=tranwrd(record,'
'
,'');
'
,'');
record=tranwrd(record,'
','');
record=strip(record);
if index(record,'
')
and start then do; recend=1; hier=0;
output; end;
else delete;
keep SN Name Meaning Gender Origin;
run;
OPTION SPOOL;
proc append
data=_null base=sharad.&type force; run;
%end;
%mend
loop;
%loop(Hindi);
/*
Make a list of
Indian Names that definetly sound Indian or Closely Indian
Y - Yes
P - Indian
Possibility
*/
data
Sharad.Def_IndiaNames;
infile
cards4 dlm='09'x
missover;
length
Name $ 100 IndianorNot $ 1;
input
Name IndianorNot;
Name=strip(propcase(Name));
cards;
Name Indian
Tina P
Tanya P
Maya P
Trisha Y
Nadia P
Amir P
Aisha P
Tanisha P
Chandra P
Chaya P
Rohan Y
----and 1000’s of other
records---
;
run;
/*
Join all
available Indian Names
*/
data
Sharad.ALLNames;
set
Sharad.telugu
sharad.bengali sharad.hindi sharad.sikh;
Name=translate(Name,'',"'");
if
compress(Name)='' then
delete;
drop
SNO SN;
run;
/*
Remove Dups
*/
proc
sort data=Sharad.ALLNames
noduprecs;
by Name; run;
/*
Re-purpose the
data a bit
*/
data
Sharad.IndianNames(rename=(dMeaning=Meaning
dGender=IGender dOrigin=Origin));
length
dMeaning $ 100 dGender $15
dOrigin $ 100;
retain
dMeaning dGender dOrigin;
set
Sharad.ALLNames;
by
Name;
if first.name then
do;
dMeaning='';
dOrigin='';
dGender='';
end;
if
index(strip(dMeaning),strip(Meaning)) eq 0
then
dMeaning=catx(' OR ',strip(dMeaning),strip(Meaning));
if
index(strip(dOrigin),strip(Origin)) eq 0
then
dOrigin=catx(' ,',strip(dOrigin),strip(Origin));;
if
index(strip(dGender),strip(Gender)) eq 0
then
dGender=catx(' OR ',strip(dGender),strip(Gender));;
if
dGender in ("Boy OR Girl","Girl
OR Boy") then
dGender="Boy OR Girl";
dGender=strip(dGender);
if last.name then
output;
keep
Name dMeaning dGender dOrigin;
run;
/*
Read US Gov SSA
Baby Names data fields
*/
filename
allst "C:\Users\Sharad\Desktop\namesbystate\all\allstates.txt";
data
Sharad.USNames;
infile
allst dlm=','
dsd missover
firstobs=2;
length
State $ 2 Gender $1
Year $4 Name $ 50
;
input
State Gender Year Name Occurences;
run;
/*
Merge US Gov SSA
Baby Names data with Indian Names Data
*/
proc
sql;
create
table sharad.IndNames as
select
A.*,IGender,Meaning,Origin
from
Sharad.USNames A
left
join Sharad.IndianNames B
on
A.name=B.name
order
by A.name;
quit;
/*
Merge US Gov SSA
Baby Names data with Hand picked Indian Data
*/
proc
sql;
create
table sharad.DefinitelyIndian as
select
A.*,
case
when
A.name=B.name and IndianorNot='Y'
then 'Indian Name'
when
A.name=B.name and IndianorNot='P'
then 'Likely an Indian Name'
else
'Non-Indian Name'
end
as IndianDescent length=10
from
sharad.IndNames A
left
join Sharad.Def_IndiaNames B
on
A.name=B.name
;
quit;