mp_ds2csv.sas
Go to the documentation of this file.
1 /**
2  @file
3  @brief Export a dataset to a CSV file WITH leading blanks
4  @details Export a dataset to a file or fileref, retaining leading blanks.
5 
6  When using SASJS headerformat, the input statement is provided in the first
7  row of the CSV.
8 
9  Usage:
10 
11  %mp_ds2csv(sashelp.class,outref="%sysfunc(pathname(work))/file.csv")
12 
13  filename example temp;
14  %mp_ds2csv(sashelp.air,outref=example,headerformat=SASJS)
15  data; infile example; input;put _infile_; if _n_>5 then stop;run;
16 
17  data _null_;
18  infile example;
19  input;
20  call symputx('stmnt',_infile_);
21  stop;
22  run;
23  data work.want;
24  infile example dsd firstobs=2;
25  input &stmnt;
26  run;
27 
28  Why use mp_ds2csv over, say, proc export?
29 
30  1. Ability to retain leading blanks (this is a major one)
31  2. Control the header format
32  3. Simple one-liner
33 
34  @param [in] ds The dataset to be exported
35  @param [in] dlm= (COMMA) The delimeter to apply. For SASJS, will always be
36  COMMA. Supported values:
37  @li COMMA
38  @li SEMICOLON
39  @param [in] headerformat= (LABEL) The format to use for the header section.
40  Valid values:
41  @li LABEL - Use the variable label (or name, if blank)
42  @li NAME - Use the variable name
43  @li SASJS - Used to create sasjs-formatted input CSVs, eg for use in
44  mp_testservice.sas. This format will supply an input statement in the
45  first row, making ingestion by datastep a breeze. Special misisng values
46  will be prefixed with a period (eg `.A`) to enable ingestion on both SAS 9
47  and Viya. Dates / Datetimes etc are identified by the format type (lookup
48  with mcf_getfmttype.sas) and converted to human readable formats (not
49  numbers).
50  @param [out] outfile= The output filename - should be quoted.
51  @param [out] outref= (0) The output fileref (takes precedence if provided)
52  @param [in] outencoding= (0) The (quoted) output encoding to use, eg `"UTF-8"`
53  @param [in] termstr= (CRLF) The line seperator to use. For SASJS, will
54  always be CRLF. Valid values:
55  @li CRLF
56  @li LF
57 
58  <h4> SAS Macros </h4>
59  @li mcf_getfmttype.sas
60  @li mf_getuniquename.sas
61  @li mf_getvarformat.sas
62  @li mf_getvarlist.sas
63  @li mf_getvartype.sas
64 
65  @version 9.2
66  @author Allan Bowe (credit mjsq)
67 **/
68 
69 %macro mp_ds2csv(ds
70  ,dlm=COMMA
71  ,outref=0
72  ,outfile=
73  ,outencoding=0
74  ,headerformat=LABEL
75  ,termstr=CRLF
76 )/*/STORE SOURCE*/;
77 
78 %local outloc delim i varlist var vcnt vat dsv vcom vmiss fmttype vfmt;
79 
80 %if not %sysfunc(exist(&ds)) %then %do;
81  %put %str(WARN)ING: &ds does not exist;
82  %return;
83 %end;
84 
85 %if %index(&ds,.)=0 %then %let ds=WORK.&ds;
86 
87 %if &outencoding=0 %then %let outencoding=;
88 %else %let outencoding=encoding=&outencoding;
89 
90 %if &outref=0 %then %let outloc=&outfile;
91 %else %let outloc=&outref;
92 
93 %if &headerformat=SASJS %then %do;
94  %let delim=",";
95  %let termstr=CRLF;
96  %mcf_getfmttype(wrap=YES)
97 %end;
98 %else %if &dlm=COMMA %then %let delim=",";
99 %else %let delim=";";
100 
101 /* credit to mjsq - https://stackoverflow.com/a/55642267 */
102 
103 /* first get headers */
104 data _null_;
105  file &outloc &outencoding lrecl=32767 termstr=&termstr;
106  length header $ 2000 varnm vfmt $32 dlm $1 fmttype $8;
107  call missing(of _all_);
108  dsid=open("&ds.","i");
109  num=attrn(dsid,"nvars");
110  dlm=&delim;
111  do i=1 to num;
112  varnm=upcase(varname(dsid,i));
113  if i=num then dlm='';
114  %if &headerformat=NAME %then %do;
115  header=cats(varnm,dlm);
116  %end;
117  %else %if &headerformat=LABEL %then %do;
118  header = cats(coalescec(varlabel(dsid,i),varnm),dlm);
119  %end;
120  %else %if &headerformat=SASJS %then %do;
121  vlen=varlen(dsid,i);
122  if vartype(dsid,i)='C' then header=cats(varnm,':$char',vlen,'.');
123  else do;
124  vfmt=coalescec(varfmt(dsid,i),'0');
125  fmttype=mcf_getfmttype(vfmt);
126  if fmttype='DATE' then header=cats(varnm,':date9.');
127  else if fmttype='DATETIME' then header=cats(varnm,':E8601DT26.6');
128  else if fmttype='TIME' then header=cats(varnm,':TIME12.');
129  /**
130  * there is not much point importing a short length numeric like this,
131  * eg with best4., as the resulting variable will still be stored as
132  * length 8. We need a length or format statement to ensure variable
133  * is creatd with the smaller length...
134  **/
135  else if vlen<8 then header=cats(varnm,':best',vlen,'.');
136  else header=cats(varnm,':best.');
137  end;
138  %end;
139  %else %do;
140  %put &sysmacroname: Invalid headerformat value (&headerformat);
141  %return;
142  %end;
143  put header @;
144  end;
145  rc=close(dsid);
146 run;
147 
148 %let varlist=%mf_getvarlist(&ds);
149 %let vcnt=%sysfunc(countw(&varlist));
150 
151 /**
152  * The $quote modifier (without a width) will take the length from the variable
153  * and increase by two. However this will lead to truncation where the value
154  * contains double quotes (which are doubled up). To get around this, scan the
155  * data to see the max number of double quotes, so that the appropriate width
156  * can be applied in the subsequent step.
157  */
158 data _null_;
159  set &ds end=last;
160 %do i=1 %to &vcnt;
161  %let var=%scan(&varlist,&i);
162  %local vlen&i;
163  %if %mf_getvartype(&ds,&var)=C %then %do;
164  %let dsv1=%mf_getuniquename(prefix=csvcol1_);
165  %let dsv2=%mf_getuniquename(prefix=csvcol2_);
166  retain &dsv1 0;
167  &dsv2=length(&var)+countc(&var,'"');
168  if &dsv2>&dsv1 then &dsv1=&dsv2;
169  if last then call symputx(
170  "vlen&i"
171  /* should be no shorter than varlen, and no longer than 32767 */
172  ,cats('$quote',min(&dsv1+2,32767),'.')
173  ,'l'
174  );
175  %end;
176 %end;
177 
178 %let vat=@;
179 %let vcom=&delim;
180 %let vmiss=%mf_getuniquename(prefix=csvcol3_);
181 /* next, export data */
182 data _null_;
183  set &ds.;
184  file &outloc mod dlm=&delim dsd &outencoding lrecl=32767 termstr=&termstr;
185  if _n_=1 then &vmiss=' ';
186  %do i=1 %to &vcnt;
187  %let var=%scan(&varlist,&i);
188  %if &i=&vcnt %then %do;
189  %let vat=;
190  %let vcom=;
191  %end;
192  %if %mf_getvartype(&ds,&var)=N %then %do;
193  %if &headerformat = SASJS %then %do;
194  %let vcom=&delim;
195  %let fmttype=%sysfunc(mcf_getfmttype(%mf_getvarformat(&ds,&var)0));
196  %if &fmttype=DATE %then %let vfmt=DATE9.;
197  %else %if &fmttype=DATETIME %then %let vfmt=E8601DT26.6;
198  %else %if &fmttype=TIME %then %let vfmt=TIME12.;
199  %else %do;
200  %let vfmt=;
201  %let vcom=;
202  %end;
203  %end;
204  %else %let vcom=;
205 
206  /* must use period - in order to work in both 9.4 and Viya 3.5 */
207  if missing(&var) and &var ne %sysfunc(getoption(MISSING)) then do;
208  &vmiss=cats('.',&var);
209  put &vmiss &vat;
210  end;
211  else put &var &vfmt &vcom &vat;
212 
213  %end;
214  %else %do;
215  %if &i ne &vcnt %then %let vcom=&delim;
216  put &var &&vlen&i &vcom &vat;
217  %end;
218  %end;
219 run;
220 
221 %mend mp_ds2csv;