mp_filtercheck.sas
Go to the documentation of this file.
1 /**
2  @file
3  @brief Checks an input filter table for validity
4  @details Performs checks on the input table to ensure it arrives in the
5  correct format. This is necessary to prevent code injection. Will update
6  SYSCC to 1008 if bad records are found, and call mp_abort.sas for a
7  graceful service exit (configurable).
8 
9  Used for dynamic filtering in [Data Controller for SAS®](
10  https://datacontroller.io).
11 
12  Usage:
13 
14  %mp_filtercheck(work.filter,targetds=sashelp.class,outds=work.badrecords)
15 
16  The input table should have the following format:
17 
18  |GROUP_LOGIC:$3|SUBGROUP_LOGIC:$3|SUBGROUP_ID:8.|VARIABLE_NM:$32|OPERATOR_NM:$10|RAW_VALUE:$4000|
19  |---|---|---|---|---|---|
20  |AND|AND|1|AGE|=|12|
21  |AND|AND|1|SEX|<=|'M'|
22  |AND|OR|2|Name|NOT IN|('Jane','Alfred')|
23  |AND|OR|2|Weight|>=|7|
24 
25  Rules applied:
26 
27  @li GROUP_LOGIC - only AND/OR
28  @li SUBGROUP_LOGIC - only AND/OR
29  @li SUBGROUP_ID - only integers
30  @li VARIABLE_NM - must be in the target table
31  @li OPERATOR_NM - only =/>/</<=/>=/BETWEEN/IN/NOT IN/NE/CONTAINS
32  @li RAW_VALUE - no unquoted values except integers, commas and spaces.
33 
34  @returns The &outds table containing any bad rows, plus a REASON_CD column.
35 
36  @param [in] inds The table to be checked, with the format above
37  @param [in] targetds= The target dataset against which to verify VARIABLE_NM.
38  This must be available (ie, the library must be assigned).
39  @param [out] abort= (YES) If YES will call mp_abort.sas on any exceptions
40  @param [out] outds= (work.badrecords) The output table, which is a copy of the
41  &inds. table plus a REASON_CD column, containing only bad records.
42  If bad records are found, the SYSCC value will be set to 1008
43  (a general data problem).
44  Downstream processes should check this table (and return code) before
45  continuing.
46 
47  <h4> SAS Macros </h4>
48  @li mp_abort.sas
49  @li mf_getuniquefileref.sas
50  @li mf_getvarlist.sas
51  @li mf_getvartype.sas
52  @li mp_filtergenerate.sas
53  @li mp_filtervalidate.sas
54 
55  <h4> Related Macros </h4>
56  @li mp_filtergenerate.sas
57  @li mp_filtervalidate.sas
58 
59  @version 9.3
60  @author Allan Bowe
61 
62  @todo Support date / hex / name literals and exponents in RAW_VALUE field
63 **/
64 
65 %macro mp_filtercheck(inds,targetds=,outds=work.badrecords,abort=YES);
66 
67 %mp_abort(iftrue= (&syscc ne 0)
68  ,mac=&sysmacroname
69  ,msg=%str(syscc=&syscc - on macro entry)
70 )
71 
72 /* Validate input column */
73 %local vtype;
74 %let vtype=%mf_getvartype(&inds,RAW_VALUE);
75 %mp_abort(iftrue=(&abort=YES and &vtype ne C),
76  mac=&sysmacroname,
77  msg=%str(%str(ERR)OR: RAW_VALUE must be character)
78 )
79 %if &vtype ne C %then %do;
80  %put &sysmacroname: RAW_VALUE must be character;
81  %let syscc=42;
82  %return;
83 %end;
84 
85 
86 /**
87  * Sanitise the values based on valid value lists, then strip out
88  * quotes, commas, periods and spaces.
89  */
90 %local reason_cd nobs;
91 %let nobs=0;
92 data &outds;
93  /*length GROUP_LOGIC SUBGROUP_LOGIC $3 SUBGROUP_ID 8 VARIABLE_NM $32
94  OPERATOR_NM $10 RAW_VALUE $4000;*/
95  set &inds end=last;
96  length reason_cd $4032 vtype vtype2 $1 vnum dsid 8 tmp $4000;
97  drop tmp;
98 
99  /* quick check to ensure column exists */
100  if upcase(VARIABLE_NM) not in
101  (%upcase(%mf_getvarlist(&targetds,dlm=%str(,),quote=SINGLE)))
102  then do;
103  REASON_CD="Variable "!!cats(variable_nm)!!" not in &targetds";
104  putlog REASON_CD= VARIABLE_NM=;
105  call symputx('reason_cd',reason_cd,'l');
106  call symputx('nobs',_n_,'l');
107  output;
108  return;
109  end;
110 
111  /* need to open the dataset to get the column type */
112  retain dsid;
113  if _n_=1 then dsid=open("&targetds","i");
114  if dsid>0 then do;
115  vnum=varnum(dsid,VARIABLE_NM);
116  if vnum<1 then do;
117  /* should not happen as was also tested for above */
118  REASON_CD=cats("Variable (",VARIABLE_NM,") not found in &targetds");
119  putlog REASON_CD= dsid=;
120  call symputx('reason_cd',reason_cd,'l');
121  call symputx('nobs',_n_,'l');
122  output;
123  goto endstep;
124  end;
125  /* now we can get the type */
126  else vtype=vartype(dsid,vnum);
127  end;
128  else do;
129  REASON_CD=cats("Could not open &targetds");
130  putlog REASON_CD= dsid=;
131  call symputx('reason_cd',reason_cd,'l');
132  call symputx('nobs',_n_,'l');
133  output;
134  stop;
135  end;
136 
137  /* closed list checks */
138  if GROUP_LOGIC not in ('AND','OR') then do;
139  REASON_CD='GROUP_LOGIC should be AND/OR, not:'!!cats(GROUP_LOGIC);
140  putlog REASON_CD= GROUP_LOGIC=;
141  call symputx('reason_cd',reason_cd,'l');
142  call symputx('nobs',_n_,'l');
143  output;
144  end;
145  if SUBGROUP_LOGIC not in ('AND','OR') then do;
146  REASON_CD='SUBGROUP_LOGIC should be AND/OR, not:'!!cats(SUBGROUP_LOGIC);
147  putlog REASON_CD= SUBGROUP_LOGIC=;
148  call symputx('reason_cd',reason_cd,'l');
149  call symputx('nobs',_n_,'l');
150  output;
151  end;
152  if mod(SUBGROUP_ID,1) ne 0 then do;
153  REASON_CD='SUBGROUP_ID should be integer, not '!!cats(subgroup_id);
154  putlog REASON_CD= SUBGROUP_ID=;
155  call symputx('reason_cd',reason_cd,'l');
156  call symputx('nobs',_n_,'l');
157  output;
158  end;
159  if OPERATOR_NM not in
160  ('=','>','<','<=','>=','NE','GE','LE','BETWEEN','IN','NOT IN','CONTAINS')
161  then do;
162  REASON_CD='Invalid OPERATOR_NM: '!!cats(OPERATOR_NM);
163  putlog REASON_CD= OPERATOR_NM=;
164  call symputx('reason_cd',reason_cd,'l');
165  call symputx('nobs',_n_,'l');
166  output;
167  end;
168 
169  /* special missing logic */
170  if vtype='N' & OPERATOR_NM in ('=','>','<','<=','>=','NE','GE','LE') then do;
171  if cats(upcase(raw_value)) in (
172  '.','.A','.B','.C','.D','.E','.F','.G','.H','.I','.J','.K','.L','.M','.N'
173  '.N','.O','.P','.Q','.R','.S','.T','.U','.V','.W','.X','.Y','.Z','._'
174  )
175  then do;
176  /* valid numeric - exit data step loop */
177  return;
178  end;
179  else if subpad(upcase(raw_value),1,1) in (
180  'A','B','C','D','E','F','G','H','I','J','K','L','M','N'
181  'N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_'
182  )
183  then do;
184  /* check if the raw_value contains a valid variable NAME */
185  vnum=varnum(dsid,subpad(raw_value,1,32));
186  if vnum>0 then do;
187  /* now we can get the type */
188  vtype2=vartype(dsid,vnum);
189  /* check type matches */
190  if vtype2=vtype then do;
191  /* valid target var - exit loop */
192  return;
193  end;
194  else do;
195  REASON_CD=cats("Compared Type (",vtype2,") is not (",vtype,")");
196  putlog REASON_CD= dsid=;
197  call symputx('reason_cd',reason_cd,'l');
198  call symputx('nobs',_n_,'l');
199  output;
200  goto endstep;
201  end;
202  end;
203  end;
204  end;
205 
206  /* special logic */
207  if OPERATOR_NM in ('IN','NOT IN','BETWEEN') then do;
208  if OPERATOR_NM='BETWEEN' then raw_value1=tranwrd(raw_value,' AND ',',');
209  else do;
210  if substr(raw_value,1,1) ne '('
211  or substr(cats(reverse(raw_value)),1,1) ne ')'
212  then do;
213  REASON_CD='Missing start/end bracket in RAW_VALUE';
214  putlog REASON_CD= OPERATOR_NM= raw_value= raw_value1= ;
215  call symputx('reason_cd',reason_cd,'l');
216  call symputx('nobs',_n_,'l');
217  output;
218  end;
219  else raw_value1=substr(raw_value,2,max(length(raw_value)-2,0));
220  end;
221  /* we now have a comma seperated list of values */
222  if vtype='N' then do i=1 to countc(raw_value1, ',')+1;
223  tmp=scan(raw_value1,i,',');
224  if cats(tmp) ne '.' and input(tmp, ?? 8.) eq . then do;
225  if OPERATOR_NM ='BETWEEN' and subpad(upcase(tmp),1,1) in (
226  'A','B','C','D','E','F','G','H','I','J','K','L','M','N'
227  'N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_'
228  )
229  then do;
230  /* check if the raw_value contains a valid variable NAME */
231  /* is not valid syntax for IN or NOT IN */
232  vnum=varnum(dsid,subpad(tmp,1,32));
233  if vnum>0 then do;
234  /* now we can get the type */
235  vtype2=vartype(dsid,vnum);
236  /* check type matches */
237  if vtype2=vtype then do;
238  /* valid target var - exit loop */
239  return;
240  end;
241  else do;
242  REASON_CD=cats("Compared Type (",vtype2,") is not (",vtype,")");
243  putlog REASON_CD= dsid=;
244  call symputx('reason_cd',reason_cd,'l');
245  call symputx('nobs',_n_,'l');
246  output;
247  goto endstep;
248  end;
249  end;
250  end;
251  REASON_CD='Non Numeric value provided';
252  putlog REASON_CD= OPERATOR_NM= raw_value= raw_value1= ;
253  call symputx('reason_cd',reason_cd,'l');
254  call symputx('nobs',_n_,'l');
255  output;
256  end;
257  return;
258  end;
259  end;
260  else raw_value1=raw_value;
261 
262  /* remove nested literals eg '' */
263  raw_value1=tranwrd(raw_value1,"''",'');
264 
265  /* now match string literals (always single quotes) */
266  raw_value2=raw_value1;
267  regex = prxparse("s/(\').*?(\')//");
268  call prxchange(regex,-1,raw_value2);
269 
270  /* remove commas and periods*/
271  raw_value3=compress(raw_value2,',.');
272 
273  /* output records that contain values other than digits and spaces */
274  if notdigit(compress(raw_value3,' '))>0 then do;
275  if vtype='C' and subpad(upcase(raw_value),1,1) in (
276  'A','B','C','D','E','F','G','H','I','J','K','L','M','N'
277  'N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_'
278  )
279  then do;
280  /* check if the raw_value contains a valid variable NAME */
281  vnum=varnum(dsid,subpad(raw_value,1,32));
282  if vnum>0 then do;
283  /* now we can get the type */
284  vtype2=vartype(dsid,vnum);
285  /* check type matches */
286  if vtype2=vtype then do;
287  /* valid target var - exit loop */
288  return;
289  end;
290  else do;
291  REASON_CD=cats("Compared Char Type (",vtype2,") is not (",vtype,")");
292  putlog REASON_CD= dsid=;
293  call symputx('reason_cd',reason_cd,'l');
294  call symputx('nobs',_n_,'l');
295  output;
296  goto endstep;
297  end;
298  end;
299  end;
300 
301  putlog raw_value3= $hex32.;
302  REASON_CD=cats('Invalid RAW_VALUE:',raw_value);
303  putlog (_all_)(=);
304  call symputx('reason_cd',reason_cd,'l');
305  call symputx('nobs',_n_,'l');
306  output;
307  end;
308 
309  endstep:
310  if last then rc=close(dsid);
311 run;
312 
313 
314 data _null_;
315  set &outds end=last;
316  putlog (_all_)(=);
317 run;
318 
319 %mp_abort(iftrue=(&abort=YES and &nobs>0),
320  mac=&sysmacroname,
321  msg=%str(Data issue: %superq(reason_cd))
322 )
323 
324 %if &nobs>0 %then %do;
325  %let syscc=1008;
326  %return;
327 %end;
328 
329 /**
330  * syntax checking passed but it does not mean the filter is valid
331  * for that we can run a proc sql validate query
332  */
333 %local fref1;
334 %let fref1=%mf_getuniquefileref();
335 %mp_filtergenerate(&inds,outref=&fref1)
336 
337 /* this macro will also set syscc to 1008 if any issues found */
338 %mp_filtervalidate(&fref1,&targetds,outds=&outds,abort=&abort)
339 
340 %mend mp_filtercheck;