mp_makedata.sas
Go to the documentation of this file.
1 /**
2  @file
3  @brief Create sample data based on the structure of an empty table
4  @details Many SAS projects involve sensitive datasets. One way to _ensure_
5  the data is anonymised, is never to receive it in the first place! Often
6  consultants are provided with empty tables, and expected to create complex
7  ETL flows.
8 
9  This macro can help by taking an empty table, and populating it with data
10  according to the variable types and formats.
11 
12  TODO:
13  @li Consider dates, datetimes, times, integers etc
14 
15  Usage:
16 
17  proc sql;
18  create table work.example(
19  TX_FROM float format=datetime19.,
20  DD_TYPE char(16),
21  DD_SOURCE char(2048),
22  DD_SHORTDESC char(256),
23  constraint pk primary key(tx_from, dd_type,dd_source),
24  constraint nnn not null(DD_SHORTDESC)
25  );
26  %mp_makedata(work.example)
27 
28  @param [in] libds The empty table (libref.dataset) in which to create data
29  @param [out] obs= (500) The maximum number of records to create. The table
30  is sorted with nodup on the primary key, so the actual number of records may
31  be lower than this.
32 
33  <h4> SAS Macros </h4>
34  @li mf_getuniquename.sas
35  @li mf_getvarlen.sas
36  @li mf_getvarlist.sas
37  @li mf_islibds.sas
38  @li mf_nobs.sas
39  @li mp_getcols.sas
40  @li mp_getpk.sas
41 
42  <h4> Related Macros </h4>
43  @li mp_makedata.test.sas
44 
45  @version 9.2
46  @author Allan Bowe
47 
48 **/
49 
50 %macro mp_makedata(libds
51  ,obs=500
52  ,seed=1
53 )/*/STORE SOURCE*/;
54 
55 %local ds1 ds2 lib ds pk_fields i col charvars numvars ispk;
56 
57 %if %mf_islibds(&libds)=0 %then %do;
58  %put &sysmacroname: Invalid libds (&libds) - should be library.dataset format;
59  %return;
60 %end;
61 %else %if %mf_nobs(&libds)>0 %then %do;
62  %put &sysmacroname: &libds has data, it will not be recreated;
63  %return;
64 %end;
65 
66 /* set up temporary vars */
67 %let ds1=%mf_getuniquename(prefix=mp_makedatads1);
68 %let ds2=%mf_getuniquename(prefix=mp_makedatads2);
69 %let lib=%scan(&libds,1,.);
70 %let ds=%scan(&libds,2,.);
71 
72 /* grab the primary key vars */
73 %mp_getpk(&lib,ds=&ds,outds=&ds1)
74 
75 proc sql noprint;
76 select coalescec(pk_fields,'_all_') into: pk_fields from &ds1;
77 
78 data &ds2;
79  if 0 then set &libds;
80  do _n_=1 to &obs;
81  %let charvars=%mf_getvarlist(&libds,typefilter=C);
82  %if &charvars ^= %then %do i=1 %to %sysfunc(countw(&charvars));
83  %let col=%scan(&charvars,&i);
84  /* create random value based on observation number and colum length */
85  &col=repeat(put(md5(cats(_n_)),$hex32.),%mf_getvarlen(&libds,&col)/32);
86  %end;
87 
88  %let numvars=%mf_getvarlist(&libds,typefilter=N);
89  %if &numvars ^= %then %do i=1 %to %sysfunc(countw(&numvars));
90  %let col=%scan(&numvars,&i);
91  &col=_n_;
92  %end;
93  output;
94  end;
95  stop;
96 run;
97 proc sort data=&ds2 nodupkey;
98  by &pk_fields;
99 run;
100 
101 proc append base=&libds data=&ds2;
102 run;
103 
104 proc sql;
105 drop table &ds1, &ds2;
106 
107 %mend mp_makedata;