mp_ds2squeeze.sas
Go to the documentation of this file.
1 /**
2  @file
3  @brief Create a smaller version of a dataset, without data loss
4  @details This macro will scan the input dataset and create a new one, that
5  has the minimum variable lengths needed to store the data without data loss.
6 
7  Inspiration was taken from [How to Reduce the Disk Space Required by a
8  SASĀ® Data Set](https://www.lexjansen.com/nesug/nesug06/io/io18.pdf) by
9  Selvaratnam Sridharma. The end of the referenced paper presents a macro named
10  "squeeze", hence the nomenclature.
11 
12  Usage:
13 
14  data big;
15  length my big $32000;
16  do i=1 to 1e4;
17  my=repeat('oh my',100);
18  big='dawg';
19  special=._;
20  output;
21  end;
22  run;
23 
24  %mp_ds2squeeze(work.big,outds=work.smaller)
25 
26  The following will also be printed to the log (exact values may differ
27  depending on your OS and COMPRESS settings):
28 
29  > MP_DS2SQUEEZE: work.big was 625MB
30 
31  > MP_DS2SQUEEZE: work.smaller is 5MB
32 
33  @param [in] libds The library.dataset to be squeezed
34  @param [out] outds= (work.mp_ds2squeeze) The squeezed dataset to create
35  @param [in] mdebug= (0) Set to 1 to enable DEBUG messages
36 
37  <h4> SAS Macros </h4>
38  @li mf_getfilesize.sas
39  @li mf_getuniquefileref.sas
40  @li mf_getuniquename.sas
41  @li mp_getmaxvarlengths.sas
42 
43  <h4> Related Programs </h4>
44  @li mp_ds2squeeze.test.sas
45 
46  @version 9.3
47  @author Allan Bowe
48 **/
49 
50 %macro mp_ds2squeeze(
51  libds,
52  outds=work.mp_ds2squeeze,
53  mdebug=0
54 )/*/STORE SOURCE*/;
55 %local dbg source;
56 %if &mdebug=1 %then %do;
57  %put &sysmacroname entry vars:;
58  %put _local_;
59 %end;
60 %else %do;
61  %let dbg=*;
62  %let source=/source2;
63 %end;
64 
65 %local optval ds fref startsize;
66 %let ds=%mf_getuniquename();
67 %let fref=%mf_getuniquefileref();
68 %let startsize=%mf_getfilesize(libds=&libds,format=yes);
69 
70 %mp_getmaxvarlengths(&libds,outds=&ds)
71 
72 data _null_;
73  set &ds end=last;
74  file &fref;
75  /* grab the types */
76  retain dsid;
77  if _n_=1 then dsid=open("&libds",'is');
78  if dsid le 0 then do;
79  msg=sysmsg();
80  put msg=;
81  stop;
82  end;
83  type=vartype(dsid,varnum(dsid, name));
84  if last then rc=close(dsid);
85  /* write out the length statement */
86  if _n_=1 then put 'length ';
87  length len $6;
88  if type='C' then do;
89  if maxlen=0 then len='$1';
90  else len=cats('$',maxlen);
91  end;
92  else do;
93  if maxlen=0 then len='3';
94  else len=cats(maxlen);
95  end;
96  put ' ' name ' ' len;
97  if last then put ';';
98 run;
99 
100 /* configure varlenchk - as we are explicitly shortening the variables */
101 %let optval=%sysfunc(getoption(varlenchk));
102 options varlenchk=NOWARN;
103 
104 data &outds;
105  %inc &fref &source;
106  set &libds;
107 run;
108 
109 options varlenchk=&optval;
110 
111 %if &mdebug=0 %then %do;
112  proc sql;
113  drop table &ds;
114  filename &fref clear;
115 %end;
116 
117 %put &sysmacroname: &libds was &startsize;
118 %put &sysmacroname: &outds is %mf_getfilesize(libds=&outds,format=yes);
119 
120 %mend mp_ds2squeeze;