/* ALL CODE FOR INTRODUCTION TO SAS 9.3 SEMINAR */ ********************************************************* * Entering Data * *********************************************************; *2.1 Import wizard and proc import; proc import datafile="/home/vijayarakhavan930/sasuser.v94/raw_data/hs0.xlsx" dbms = XLSX replace out=hs0; getnames = yes; run; *2.2 Data Steps; * Infile a comma-separated-values (.csv) file; data temp; infile '/home/vijayarakhavan930/sasuser.v94/raw_data/hs0.csv' delimiter=',' dsd; length prgtype $10; input gender id race ses schtyp prgtype $ read write math science socst ; run; proc print data = temp (obs=10); run; * Enter data directly into SAS using input; data hsb10; input id female race ses schtype $ prog read write math science socst; datalines; 147 1 1 3 pub 1 47 62 53 53 61 108 0 1 2 pub 2 34 33 41 36 36 18 0 3 2 pub 3 50 33 49 44 36 153 0 1 2 pub 3 39 31 40 39 51 50 0 2 2 pub 2 50 59 42 53 61 51 1 2 1 pub 2 42 36 42 31 39 102 0 1 1 pub 1 52 41 51 53 56 57 1 1 2 pub 1 71 65 72 66 56 160 1 1 2 pub 1 55 65 55 50 61 136 0 1 2 pub 1 65 59 70 63 51 ; run; proc print data=hsb10; run; *2.3 Saving SAS data files; * Save temporary dataset "temp" as a permanent file; data '/home/vijayarakhavan930/sasuser.v94/datasets/hs0'; set temp; run; proc print data='/home/vijayarakhavan930/sasuser.v94/datasets/hs0'; run; libname IN '/home/vijayarakhavan930/sasuser.v94/datasets'; data in.hs0; set temp; run; ********************************************************* * Exploring Data * *********************************************************; * Set output to be left justified rather than centered; options nocenter; * Examine data using proc contents and proc print; proc contents position data=in.hs0; run; proc print data=in.hs0 (obs=20); run; * If we only want to print some variables, we can use the var statement; proc print data=in.hs0 (obs=20); var gender id race ses schtyp prgtype read; run; * Create a temporary dataset called hs0 ; data hs0; set in.hs0; run; * Descriptive statistics with proc means and proc univariate; proc means data=hs0; run; proc univariate data=hs0; var read write; run; * means for a subset of variables using var; proc means data=hs0 n mean median std var; var read math science write; run; * means for a subset of variables using var; proc means data=hs0 n mean median std var; where read>=60; var read math science write; run; * means broken down by group (ses) using class; proc means data=in.hs0 n mean median std var; class ses; var read math science write; run; * histogram with normal curve overlay from proc univariate; proc univariate data=in.hs0 noprint; var write; histogram / normal; run; * Frequency distribution table; proc freq data=in.hs0; table ses; run; * Frequency distribution table; proc freq data=hs0; table gender schtyp prgtype; run; * a crosstab using proc freq plus cumulative frequency graph; proc freq data=hs0; tables prgtype*ses / plots=freqplot; run; * correlations using proc corr with pairwise deletion of missing observations (default) ; proc corr data=hs0; var write read science; run; * correlations using proc corr with listwise deletion of missing observations (nomiss option) ; proc corr data=hs0 nomiss; var write read science; run; * Scatter plot matrix; proc corr data=hs0 nomiss plots=matrix; var write read science; run; * a scatter plot ; proc sgplot data = hs0; scatter x = read y = write; run; * scatter plot with id number a marker; proc sgplot data=hs0; scatter x=write y=read / markerchar=id; run; * scatter plot with gender of observation indicated; proc sgplot data=hs0; scatter x=write y=read / group=gender; run; * vertical bar chart representing mean of varaible write by ses with error bars; proc sgplot data=hs0; vbar ses /response = write stat=mean limits=both; run; * histogram of variable read with normal curve and density plot overlayed; proc sgplot data=hs0; histogram read; density read / type=normal; density read /type = kernel; run;