mp_retainedkey.sas
Go to the documentation of this file.
1 /**
2  @file
3  @brief Generate and apply retained key values to a staging table
4  @details This macro will populate a staging table with a Retained Key based on
5  a business key and a base (target) table.
6 
7  Definition of retained key ([source](
8  http://bukhantsov.org/2012/04/what-is-data-vault/)):
9 
10  > The retained key is a key which is mapped to business key one-to-one. In
11  > comparison, the surrogate key includes time and there can be many surrogate
12  > keys corresponding to one business key. This explains the name of the key,
13  > it is retained with insertion of a new version of a row while surrogate key
14  > is increasing.
15 
16  This macro is designed to be used as part of a wider load / ETL process (such
17  as the one in [Data Controller for SAS](https://datacontroller.io)).
18 
19  Specifically, the macro assumes that the base table has already been 'locked'
20  (eg with the mp_lockanytable.sas macro) prior to invocation. Also, several
21  tables are assumed to exist (names are configurable):
22 
23  @li work.staging_table - the staged data, minus the retained key element
24  @li permlib.base_table - the target table to be loaded (**not** loaded by this
25  macro)
26  @li permlib.maxkeytable - optional, used to store load metaadata.
27  The definition is available by running mp_coretable.sas as follows:
28  `mp_coretable(MAXKEYTABLE)`.
29  @li permlib.locktable - Necessary if maxkeytable is being populated. The
30  definition is available by running mp_coretable.sas as follows:
31  `mp_coretable(LOCKTABLE)`.
32 
33 
34  @param [in] base_lib= (WORK) Libref of the base (target) table.
35  @param [in] base_dsn= (BASETABLE) Name of the base (target) table.
36  @param [in] append_lib= (WORK) Libref of the staging table
37  @param [in] append_dsn= (APPENDTABLE) Name of the staging table
38  @param [in] retained_key= (DEFAULT_RK) Name of RK to generate (should exist on
39  base table)
40  @param [in] business_key= (PK1 PK2) Business key against which to generate
41  RK values. Should be unique and not null on the staging table.
42  @param [in] check_uniqueness=(NO) Set to yes to perform a uniqueness check.
43  Recommended if there is a chance that the staging data is not unique on the
44  business key.
45  @param [in] maxkeytable= (0) Provide a maxkeytable libds reference here, to
46  store load metadata (maxkey val, load time). Set to zero if metadata is not
47  required, eg, when preparing a 'dummy' load. Structure is described above.
48  See below for sample data.
49  |KEYTABLE:$32.|KEYCOLUMN:$32.|MAX_KEY:best.|PROCESSED_DTTM:E8601DT26.6|
50  |---|---|---|---|
51  |`DC487173.MPE_SELECTBOX `|`SELECTBOX_RK `|`55 `|`1950427787.8 `|
52  |`DC487173.MPE_FILTERANYTABLE `|`filter_rk `|`14 `|`1951053886.8 `|
53  @param [in] locktable= (0) If updating the maxkeytable, provide the libds
54  reference to the lock table (per mp_lockanytable.sas macro)
55  @param [in] filter_str= Apply a filter - useful for SCD2 or BITEMPORAL loads.
56  Example: `filter_str=%str( (where=( &now < &tech_to)) )`
57  @param [out] outds= (WORK.APPEND) Output table (staging table + retained key)
58 
59  <h4> SAS Macros </h4>
60  @li mf_existvar.sas
61  @li mf_fmtdttm.sas
62  @li mf_getquotedstr.sas
63  @li mf_getuniquename.sas
64  @li mf_nobs.sas
65  @li mp_abort.sas
66  @li mp_lockanytable.sas
67 
68  <h4> Related Macros </h4>
69  @li mp_filterstore.sas
70  @li mp_retainedkey.test.sas
71 
72  @version 9.2
73 
74 **/
75 
76 %macro mp_retainedkey(
77  base_lib=WORK
78  ,base_dsn=BASETABLE
79  ,append_lib=WORK
80  ,append_dsn=APPENDTABLE
81  ,retained_key=DEFAULT_RK
82  ,business_key= PK1 PK2
83  ,check_uniqueness=NO
84  ,maxkeytable=0
85  ,locktable=0
86  ,outds=WORK.APPEND
87  ,filter_str=
88 );
89 %put &sysmacroname entry vars:;
90 %put _local_;
91 
92 %local base_libds app_libds key_field check maxkey idx_pk newkey_cnt iserr
93  msg x tempds1 tempds2 comma_pk appnobs checknobs dropvar tempvar idx_val;
94 %let base_libds=%upcase(&base_lib..&base_dsn);
95 %let app_libds=%upcase(&append_lib..&append_dsn);
96 %let tempds1=%mf_getuniquename();
97 %let tempds2=%mf_getuniquename();
98 %let comma_pk=%mf_getquotedstr(in_str=%str(&business_key),dlm=%str(,),quote=);
99 %let outds=%sysfunc(ifc(%index(&outds,.)=0,work.&outds,&outds));
100 /* validation checks */
101 %let iserr=0;
102 %if &syscc>0 %then %do;
103  %let iserr=1;
104  %let msg=%str(SYSCC=&syscc on macro entry);
105 %end;
106 %else %if %sysfunc(exist(&base_libds))=0 %then %do;
107  %let iserr=1;
108  %let msg=%str(Base LIBDS (&base_libds) expected but NOT FOUND);
109 %end;
110 %else %if %sysfunc(exist(&app_libds))=0 %then %do;
111  %let iserr=1;
112  %let msg=%str(Append LIBDS (&app_libds) expected but NOT FOUND);
113 %end;
114 %else %if &maxkeytable ne 0 and %sysfunc(exist(&maxkeytable))=0 %then %do;
115  %let iserr=1;
116  %let msg=%str(Maxkeytable (&maxkeytable) expected but NOT FOUND);
117 %end;
118 %else %if &maxkeytable ne 0 and %sysfunc(exist(&locktable))=0 %then %do;
119  %let iserr=1;
120  %let msg=%str(Locktable (&locktable) expected but NOT FOUND);
121 %end;
122 %else %if %length(&business_key)=0 %then %do;
123  %let iserr=1;
124  %let msg=%str(Business key (&business_key) expected but NOT FOUND);
125 %end;
126 
127 %do x=1 %to %sysfunc(countw(&business_key));
128  /* check business key values exist */
129  %let key_field=%scan(&business_key,&x,%str( ));
130  %if not %mf_existvar(&app_libds,&key_field) %then %do;
131  %let iserr=1;
132  %let msg=Business key (&key_field) not found on &app_libds!;
133  %goto err;
134  %end;
135  %else %if not %mf_existvar(&base_libds,&key_field) %then %do;
136  %let iserr=1;
137  %let msg=Business key (&key_field) not found on &base_libds!;
138  %goto err;
139  %end;
140 %end;
141 %err:
142 %if &iserr=1 %then %do;
143  /* err case so first perform an unlock of the base table before exiting */
144  %mp_lockanytable(
145  UNLOCK,lib=&base_lib,ds=&base_dsn,ref=%superq(msg),ctl_ds=&locktable
146  )
147 %end;
148 %mp_abort(iftrue=(&iserr=1),mac=mp_retainedkey,msg=%superq(msg))
149 
150 proc sql noprint;
151 select sum(max(&retained_key),0) into: maxkey from &base_libds;
152 
153 /**
154  * get base table RK and bus field values for lookup
155  */
156 proc sql noprint;
157 create table &tempds1 as
158  select distinct &comma_pk,&retained_key
159  from &base_libds &filter_str
160  order by &comma_pk,&retained_key;
161 
162 %if &check_uniqueness=YES %then %do;
163  select count(*) into:checknobs
164  from (select distinct &comma_pk from &app_libds);
165  select count(*) into: appnobs from &app_libds; /* might be view */
166  %if &checknobs ne &appnobs %then %do;
167  %let msg=Source table &app_libds is not unique on (&business_key);
168  %let iserr=1;
169  %end;
170 %end;
171 %if &iserr=1 %then %do;
172  /* err case so first perform an unlock of the base table before exiting */
173  %mp_lockanytable(
174  UNLOCK,lib=&base_lib,ds=&base_dsn,ref=%superq(msg),ctl_ds=&locktable
175  )
176 %end;
177 %mp_abort(iftrue= (&iserr=1),mac=mp_retainedkey,msg=%superq(msg))
178 
179 %if %mf_existvar(&app_libds,&retained_key)
180 %then %let dropvar=(drop=&retained_key);
181 
182 /* prepare interim table with retained key populated for matching keys */
183 proc sql noprint;
184 create table &tempds2 as
185  select b.&retained_key, a.*
186  from &app_libds &dropvar a
187  left join &tempds1 b
188  on 1
189  %do idx_pk=1 %to %sysfunc(countw(&business_key));
190  %let idx_val=%scan(&business_key,&idx_pk);
191  and a.&idx_val=b.&idx_val
192  %end;
193  order by &retained_key;
194 
195 /* identify the number of entries without retained keys (new records) */
196 select count(*) into: newkey_cnt
197  from &tempds2
198  where missing(&retained_key);
199 quit;
200 
201 /**
202  * Update maxkey table if link provided
203  */
204 %if &maxkeytable ne 0 %then %do;
205  proc sql noprint;
206  select count(*) into: check from &maxkeytable
207  where upcase(keytable)="&base_libds";
208 
209  %mp_lockanytable(LOCK
210  ,lib=%scan(&maxkeytable,1,.)
211  ,ds=%scan(&maxkeytable,2,.)
212  ,ref=Updating maxkeyvalues with mp_retainedkey
213  ,ctl_ds=&locktable
214  )
215  proc sql;
216  %if &check=0 %then %do;
217  insert into &maxkeytable
218  set keytable="&base_libds"
219  ,keycolumn="&retained_key"
220  ,max_key=%eval(&maxkey+&newkey_cnt)
221  ,processed_dttm="%sysfunc(datetime(),%mf_fmtdttm())"dt;
222  %end;
223  %else %do;
224  update &maxkeytable
225  set max_key=%eval(&maxkey+&newkey_cnt)
226  ,processed_dttm="%sysfunc(datetime(),%mf_fmtdttm())"dt
227  where keytable="&base_libds";
228  %end;
229  %mp_lockanytable(UNLOCK
230  ,lib=%scan(&maxkeytable,1,.)
231  ,ds=%scan(&maxkeytable,2,.)
232  ,ref=Updating maxkeyvalues with maxkey=%eval(&maxkey+&newkey_cnt)
233  ,ctl_ds=&locktable
234  )
235 %end;
236 
237 /* fill in the missing retained key values */
238 %let tempvar=%mf_getuniquename();
239 data &outds(drop=&tempvar);
240  retain &tempvar %eval(&maxkey+1);
241  set &tempds2;
242  if &retained_key =. then &retained_key=&tempvar;
243  &tempvar=&tempvar+1;
244 run;
245 
246 %mend mp_retainedkey;
247