mp_csv2ds.sas
Go to the documentation of this file.
1 /**
2  @file mp_csv2ds.sas
3  @brief Efficient import of arbitrary CSV using a dataset as template
4  @details Used to import relevant columns from a large CSV using
5  a dataset to provide the types and lengths. Assumes that a header
6  row is provided, and datarows start on line 2. Extra columns in
7  both the CSV and base dataset are ignored.
8 
9  Usage:
10 
11  filename mycsv temp;
12  data _null_;
13  file mycsv;
14  put 'name,age,nickname';
15  put 'John,48,Jonny';
16  put 'Jennifer,23,Jen';
17  run;
18 
19  %mp_csv2ds(inref=mycsv,outds=myds,baseds=sashelp.class)
20 
21 
22  @param [in] inref= (0) Fileref to the CSV
23  @param [out] outds= (0) Output ds (lib.ds format)
24  @param [in] view= (NO) Set to YES or NO to determine whether the output
25  should be a view or not. Default is NO (not a view).
26  @param [in] baseds= (0)
27  Template dataset on which to create the input statement.
28  Is used to determine types, lengths, and any informats.
29 
30  @version 9.2
31  @author Allan Bowe
32 
33  <h4> SAS Macros </h4>
34  @li mp_abort.sas
35  @li mf_existds.sas
36 
37 **/
38 
39 %macro mp_csv2ds(inref=0,outds=0,baseds=0,view=NO);
40 
41 %mp_abort(iftrue=( &inref=0 )
42  ,mac=&sysmacroname
43  ,msg=%str(the INREF variable must be provided)
44 )
45 %mp_abort(iftrue=( %superq(outds)=0 )
46  ,mac=&sysmacroname
47  ,msg=%str(the OUTDS variable must be provided)
48 )
49 %mp_abort(iftrue=( &baseds=0 )
50  ,mac=&sysmacroname
51  ,msg=%str(the BASEDS variable must be provided)
52 )
53 %mp_abort(iftrue=( %mf_existds(&baseds)=0 )
54  ,mac=&sysmacroname
55  ,msg=%str(the BASEDS dataset (&baseds) needs to be assigned, and to exist)
56 )
57 
58 /* count rows */
59 %local hasheader; %let hasheader=0;
60 data _null_;
61  if _N_ > 1 then do;
62  call symputx('hasheader',1,'l');
63  stop;
64  end;
65  infile &inref;
66  input;
67 run;
68 %mp_abort(iftrue=( &hasheader=0 )
69  ,mac=&sysmacroname
70  ,msg=%str(No header row in &inref)
71 )
72 
73 /* get the variables in the CSV */
74 data _data_;
75  infile &inref;
76  input;
77  length name $32;
78  do i=1 to countc(_infile_,',')+1;
79  name=upcase(scan(_infile_,i,','));
80  output;
81  end;
82  stop;
83 run;
84 %local csv_vars;%let csv_vars=&syslast;
85 
86 /* get the variables in the dataset */
87 proc contents noprint data=&baseds
88  out=_data_ (keep=name type length format: informat);
89 run;
90 %local base_vars; %let base_vars=&syslast;
91 
92 proc sql undo_policy=none;
93 create table &csv_vars as
94  select a.*
95  ,b.type
96  ,b.length
97  ,b.format
98  ,b.formatd
99  ,b.formatl
100  ,b.informat
101  from &csv_vars a
102  left join &base_vars b
103  on a.name=upcase(b.name)
104  order by i;
105 
106 /* prepare the input statement */
107 %local instat dropvars;
108 data _null_;
109  set &syslast end=last;
110  length in dropvars $32767;
111  retain in dropvars;
112  if missing(type) then do;
113  informat='$1.';
114  dropvars=catx(' ',dropvars,name);
115  end;
116  else if missing(informat) then do;
117  if type=1 then informat='best.';
118  else informat=cats('$',length,'.');
119  end;
120  else informat=cats(informat,'.');
121  in=catx(' ',in,name,':',informat);
122  if last then do;
123  call symputx('instat',in,'l');
124  call symputx('dropvars',dropvars,'l');
125  end;
126 run;
127 
128 /* import the CSV */
129 data &outds
130  %if %upcase(&view)=YES %then %do;
131  /view=&outds
132  %end;
133  ;
134  infile &inref dsd firstobs=2;
135  input &instat;
136  %if %length(&dropvars)>0 %then %do;
137  drop &dropvars;
138  %end;
139 run;
140 
141 %mend mp_csv2ds;