mp_ds2cards.sas
Go to the documentation of this file.
1 /**
2  @file
3  @brief Create a CARDS file from a SAS dataset.
4  @details Uses dataset attributes to convert all data into datalines.
5  Running the generated file will rebuild the original dataset. Includes
6  support for large decimals, binary data, PROCESSED_DTTM columns, and
7  alternative encoding. If the input dataset is empty, the cards file will
8  still be created.
9 
10  Additional support to generate a random sample and max rows.
11 
12  Usage:
13 
14  %mp_ds2cards(sashelp.class
15  , tgt_ds=work.class
16  , cards_file= "C:\temp\class.sas"
17  , showlog=NO
18  , maxobs=5
19  )
20 
21  TODO:
22  - labelling the dataset
23  - explicity setting a unix LF
24  - constraints / indexes etc
25 
26  @param [in] base_ds Should be two level - eg work.blah. This is the table
27  that is converted to a cards file.
28  @param [in] tgt_ds= Table that the generated cards file would create.
29  Optional - if omitted, will be same as BASE_DS.
30  @param [out] cards_file= ("%sysfunc(pathname(work))/cardgen.sas") Location in
31  which to write the (.sas) cards file
32  @param [in] maxobs= (max) To limit output to the first <code>maxobs</code>
33  observations, enter an integer here.
34  @param [in] random_sample= (NO) Set to YES to generate a random sample of
35  data. Can be quite slow.
36  @param [in] showlog= (YES) Whether to show generated cards file in the SAS
37  log. Valid values:
38  @li YES
39  @li NO
40  @param [in] outencoding= Provide encoding value for file statement (eg utf-8)
41  @param [in] append= (NO) If NO then will rebuild the cards file if it already
42  exists, otherwise will append to it. Used by the mp_lib2cards.sas macro.
43 
44  <h4> Related Macros </h4>
45  @li mp_lib2cards.sas
46  @li mp_ds2inserts.sas
47  @li mp_mdtablewrite.sas
48 
49  @version 9.2
50  @author Allan Bowe
51  @cond
52 **/
53 
54 %macro mp_ds2cards(base_ds, tgt_ds=
55  ,cards_file="%sysfunc(pathname(work))/cardgen.sas"
56  ,maxobs=max
57  ,random_sample=NO
58  ,showlog=YES
59  ,outencoding=
60  ,append=NO
61 )/*/STORE SOURCE*/;
62 %local i setds nvars;
63 
64 %if not %sysfunc(exist(&base_ds)) %then %do;
65  %put %str(WARN)ING: &base_ds does not exist;
66  %return;
67 %end;
68 
69 %if %index(&base_ds,.)=0 %then %let base_ds=WORK.&base_ds;
70 %if (&tgt_ds = ) %then %let tgt_ds=&base_ds;
71 %if %index(&tgt_ds,.)=0 %then %let tgt_ds=WORK.%scan(&base_ds,2,.);
72 %if ("&outencoding" ne "") %then %let outencoding=encoding="&outencoding";
73 %if ("&append" = "" or "&append" = "NO") %then %let append=;
74 %else %let append=mod;
75 
76 /* get varcount */
77 %let nvars=0;
78 proc sql noprint;
79 select count(*) into: nvars from dictionary.columns
80  where upcase(libname)="%scan(%upcase(&base_ds),1)"
81  and upcase(memname)="%scan(%upcase(&base_ds),2)";
82 %if &nvars=0 %then %do;
83  %put %str(WARN)ING: Dataset &base_ds has no variables, will not be converted.;
84  %return;
85 %end;
86 
87 /* get indexes */
88 proc sort
89  data=sashelp.vindex(
90  where=(upcase(libname)="%scan(%upcase(&base_ds),1)"
91  and upcase(memname)="%scan(%upcase(&base_ds),2)")
92  )
93  out=_data_;
94  by indxname indxpos;
95 run;
96 
97 %local indexes;
98 data _null_;
99  set &syslast end=last;
100  if _n_=1 then call symputx('indexes','(index=(','l');
101  by indxname indxpos;
102  length vars $32767 nom uni $8;
103  retain vars;
104  if first.indxname then do;
105  idxcnt+1;
106  nom='';
107  uni='';
108  vars=name;
109  end;
110  else vars=catx(' ',vars,name);
111  if last.indxname then do;
112  if nomiss='yes' then nom='/nomiss';
113  if unique='yes' then uni='/unique';
114  call symputx('indexes'
115  ,catx(' ',symget('indexes'),indxname,'=(',vars,')',nom,uni)
116  ,'l');
117  end;
118  if last then call symputx('indexes',cats(symget('indexes'),'))'),'l');
119 run;
120 
121 
122 data;run;
123 %let setds=&syslast;
124 proc sql
125 %if %datatyp(&maxobs)=NUMERIC %then %do;
126  outobs=&maxobs;
127 %end;
128  ;
129  create table &setds as select * from &base_ds
130 %if &random_sample=YES %then %do;
131  order by ranuni(42)
132 %end;
133  ;
134 reset outobs=max;
135 create table datalines1 as
136  select name,type,length,varnum,format,label from dictionary.columns
137  where upcase(libname)="%upcase(%scan(&base_ds,1))"
138  and upcase(memname)="%upcase(%scan(&base_ds,2))";
139 
140 /**
141  Due to long decimals cannot use best. format
142  So - use bestd. format and then use character functions to strip trailing
143  zeros, if NOT an integer or missing!! Cannot use int() as it upsets
144  note2err when there are missings.
145  resolved code = ifc( mod(coalesce(VARIABLE,0),1)=0
146  ,put(VARIABLE,best32.)
147  ,substrn(put(VARIABLE,bestd32.),1
148  ,findc(put(VARIABLE,bestd32.),'0','TBK')));
149 **/
150 
151 data datalines_2;
152  format dataline $32000.;
153  set datalines1 (where=(upcase(name) not in
154  ('PROCESSED_DTTM','VALID_FROM_DTTM','VALID_TO_DTTM')));
155  if type='num' then dataline=
156  cats('ifc(mod(coalesce(',name,',0),1)=0
157  ,put(',name,',best32.-l)
158  ,substrn(put(',name,',bestd32.-l),1
159  ,findc(put(',name,',bestd32.-l),"0","TBK")))');
160  /**
161  * binary data must be converted, to store in text format. It is identified
162  * by the presence of the $HEX keyword in the format.
163  */
164  else if upcase(format)=:'$HEX' then
165  dataline=cats('put(trim(',name,'),',format,')');
166  /**
167  * There is no easy way to store line breaks in a cards file.
168  * To discuss this, use: https://github.com/sasjs/core/issues/80
169  * Removing all nonprintables with kw (keep writeable)
170  */
171  else dataline=cats('compress(',name,', ,"kw")');
172 run;
173 
174 proc sql noprint;
175 select dataline into: datalines separated by ',' from datalines_2;
176 
177 %local
178  process_dttm_flg
179  valid_from_dttm_flg
180  valid_to_dttm_flg
181 ;
182 %let process_dttm_flg = N;
183 %let valid_from_dttm_flg = N;
184 %let valid_to_dttm_flg = N;
185 data _null_;
186  set datalines1 ;
187 /* build attrib statement */
188  if type='char' then type2='$';
189  if strip(format) ne '' then format2=cats('format=',format);
190  if strip(label) ne '' then label2=cats('label=',quote(trim(label)));
191  str1=catx(' ',(put(name,$33.)||'length=')
192  ,put(cats(type2,length),$7.)||format2,label2);
193 
194 
195 /* Build input statement */
196  if upcase(format)=:'$HEX' then type3=':'!!format;
197  else if type='char' then type3=':$char.';
198  str2=put(name,$33.)||type3;
199 
200 
201  if(upcase(name) = "PROCESSED_DTTM") then
202  call symputx("process_dttm_flg", "Y", "L");
203  if(upcase(name) = "VALID_FROM_DTTM") then
204  call symputx("valid_from_dttm_flg", "Y", "L");
205  if(upcase(name) = "VALID_TO_DTTM") then
206  call symputx("valid_to_dttm_flg", "Y", "L");
207 
208 
209  call symputx(cats("attrib_stmt_", put(_N_, 8.)), str1, "L");
210  call symputx(cats("input_stmt_", put(_N_, 8.))
211  , ifc(upcase(name) not in
212  ('PROCESSED_DTTM','VALID_FROM_DTTM','VALID_TO_DTTM'), str2, ""), "L");
213 run;
214 
215 data _null_;
216  file &cards_file. &outencoding lrecl=32767 termstr=nl &append;
217  length __attrib $32767;
218  if _n_=1 then do;
219  put '/**';
220  put ' @file';
221  put " @brief Datalines for %upcase(%scan(&base_ds,2)) dataset";
222  put " @details Generated by %nrstr(%%)mp_ds2cards()";
223  put " Source: https://github.com/sasjs/core";
224  put ' @cond ';
225  put '**/';
226  put "data &tgt_ds &indexes;";
227  put "attrib ";
228  %do i = 1 %to &nvars;
229  __attrib=symget("attrib_stmt_&i");
230  put __attrib;
231  %end;
232  put ";";
233 
234  %if &process_dttm_flg. eq Y %then %do;
235  put 'retain PROCESSED_DTTM %sysfunc(datetime());';
236  %end;
237  %if &valid_from_dttm_flg. eq Y %then %do;
238  put 'retain VALID_FROM_DTTM &low_date;';
239  %end;
240  %if &valid_to_dttm_flg. eq Y %then %do;
241  put 'retain VALID_TO_DTTM &high_date;';
242  %end;
243  if __nobs=0 then do;
244  put 'call missing(of _all_);/* avoid uninitialised notes */';
245  put 'stop;';
246  put 'run;';
247  end;
248  else do;
249  put "infile cards dsd;";
250  put "input ";
251  %do i = 1 %to &nvars.;
252  %if(%length(&&input_stmt_&i..)) %then
253  put " &&input_stmt_&i..";
254  ;
255  %end;
256  put ";";
257  put 'missing a b c d e f g h i j k l m n o p q r s t u v w x y z _;';
258  put "datalines4;";
259  end;
260  end;
261  set &setds end=__lastobs nobs=__nobs;
262 /* remove all formats for write purposes - some have long underlying decimals */
263  format _numeric_ best30.29;
264  length __dataline $32767;
265  __dataline=catq('cqtmb',&datalines);
266  put __dataline;
267  if __lastobs then do;
268  put ';;;;';
269  put 'run;';
270  put '/** @endcond **/';
271  stop;
272  end;
273 run;
274 proc sql;
275  drop table &setds;
276 quit;
277 
278 %if &showlog=YES %then %do;
279  data _null_;
280  infile &cards_file lrecl=32767;
281  input;
282  put _infile_;
283  run;
284 %end;
285 
286 %put NOTE: CARDS FILE SAVED IN:;
287 %put NOTE-;%put NOTE-;
288 %put NOTE- %sysfunc(dequote(&cards_file.));
289 %put NOTE-;%put NOTE-;
290 %mend mp_ds2cards;
291 /** @endcond **/