mp_chop.sas
Go to the documentation of this file.
1 /**
2  @file
3  @brief Splits a file of ANY SIZE by reference to a search string.
4  @details Provide a fileref and a search string to chop off part of a file.
5 
6  Works by reading in the file byte by byte, then marking the beginning and end
7  of each matched string, before finally doing the chop.
8 
9  Choose whether to keep the FIRST or the LAST section of the file. Optionally,
10  use an OFFSET to fix the precise chop point.
11 
12  Usage:
13 
14  %let src="%sysfunc(pathname(work))/file.txt";
15  %let str=Chop here!;
16  %let out1="%sysfunc(pathname(work))/file1.txt";
17  %let out2="%sysfunc(pathname(work))/file2.txt";
18  %let out3="%sysfunc(pathname(work))/file3.txt";
19  %let out4="%sysfunc(pathname(work))/file4.txt";
20 
21  data _null_;
22  file &src;
23  put "startsection&str.endsection";
24  run;
25 
26  %mp_chop(&src, matchvar=str, keep=FIRST, outfile=&out1)
27  %mp_chop(&src, matchvar=str, keep=LAST, outfile=&out2)
28  %mp_chop(&src, matchvar=str, keep=FIRST, matchpoint=END, outfile=&out3)
29  %mp_chop(&src, matchvar=str, keep=LAST, matchpoint=END, outfile=&out4)
30 
31  filename results (&out1 &out2 &out3 &out4);
32  data _null_;
33  infile results;
34  input;
35  list;
36  run;
37 
38  Results:
39  @li `startsection`
40  @li `Chop here!endsection`
41  @li `startsectionChop here!`
42  @li `endsection`
43 
44  For more examples, see mp_chop.test.sas
45 
46  @param [in] infile The QUOTED path to the file on which to perform the chop
47  @param [in] matchvar= () Macro variable NAME containing the string to split by
48  @param [in] matchpoint= (START) Valid values:
49  @li START - chop at the beginning of the string in `matchvar`.
50  @li END - chop at the end of the string in `matchvar`.
51  @param [in] offset= (0) An adjustment to the precise chop location, by
52  by reference to the `matchpoint`. Should be a positive or negative integer.
53  @param [in] keep= (FIRST) Valid values:
54  @li FIRST - keep the section of the file before the chop
55  @li LAST - keep the section of the file after the chop
56  @param [in] mdebug= (0) Set to 1 to provide macro debugging
57  @param [out] outfile= (0)
58  Optional QUOTED path to the adjusted output file (avoids
59  overwriting the first file).
60 
61  <h4> SAS Macros </h4>
62  @li mf_getuniquefileref.sas
63  @li mf_getuniquename.sas
64 
65  <h4> Related Macros </h4>
66  @li mp_abort.sas
67  @li mp_gsubfile.sas
68  @li mp_replace.sas
69  @li mp_chop.test.sas
70 
71  @version 9.4
72  @author Allan Bowe
73 
74 **/
75 
76 %macro mp_chop(infile,
77  matchvar=,
78  matchpoint=START,
79  keep=FIRST,
80  offset=0,
81  mdebug=0,
82  outfile=0
83 )/*/STORE SOURCE*/;
84 
85 %local fref0 dttm ds1 outref;
86 %let fref0=%mf_getuniquefileref();
87 %let ds1=%mf_getuniquename(prefix=allchars);
88 %let ds2=%mf_getuniquename(prefix=startmark);
89 
90 %if &outfile=0 %then %let outfile=&infile;
91 
92 %mp_abort(iftrue= (%length(%superq(&matchvar))=0)
93  ,mac=mp_chop.sas
94  ,msg=%str(&matchvar is an empty variable)
95 )
96 
97 /* START */
98 %let dttm=%sysfunc(datetime());
99 
100 filename &fref0 &infile lrecl=1 recfm=n;
101 
102 /* create dataset with one char per row */
103 data &ds1;
104  infile &fref0;
105  input sourcechar $char1. @@;
106  format sourcechar hex2.;
107 run;
108 
109 /* get start & stop position of first matchvar string (one row, two vars) */
110 data &ds2;
111  /* set find string to length in bytes to cover trailing spaces */
112  length string $ %length(%superq(&matchvar));
113  string =symget("&matchvar");
114  drop string;
115 
116  firstchar=char(string,1);
117  findlen=lengthm(string); /* <- for trailing bytes */
118 
119  do _N_=1 to nobs;
120  set &ds1 nobs=nobs point=_N_;
121  if sourcechar=firstchar then do;
122  pos=1;
123  s=0;
124  do point=_N_ to min(_N_ + findlen -1,nobs);
125  set &ds1 point=point;
126  if sourcechar=char(string, pos) then s + 1;
127  else goto _leave_;
128  pos+1;
129  end;
130  _leave_:
131  if s=findlen then do;
132  START =_N_;
133  _N_ =_N_+ s - 1;
134  STOP =_N_;
135  output;
136  /* matched! */
137  stop;
138  end;
139  end;
140  end;
141  stop;
142  keep START STOP;
143 run;
144 
145 %local split;
146 %let split=0;
147 data _null_;
148  set &ds2;
149  if "&matchpoint"='START' then do;
150  if "&keep"='FIRST' then mp=start;
151  else if "&keep"='LAST' then mp=start-1;
152  end;
153  else if "&matchpoint"='END' then do;
154  if "&keep"='FIRST' then mp=stop+1;
155  else if "&keep"='LAST' then mp=stop;
156  end;
157  split=mp+&offset;
158  call symputx('split',split,'l');
159 %if &mdebug=1 %then %do;
160  put (_all_)(=);
161  %put &=offset;
162 %end;
163 run;
164 %if &split=0 %then %do;
165  %put &sysmacroname: No match found in &infile for string %superq(&matchvar);
166  %return;
167 %end;
168 
169 data _null_;
170  file &outfile recfm=n;
171  set &ds1;
172 %if &keep=FIRST %then %do;
173  if _n_ ge &split then stop;
174 %end;
175 %else %do;
176  if _n_ gt &split;
177 %end;
178  put sourcechar char1.;
179 run;
180 
181 %if &mdebug=0 %then %do;
182  filename &fref0 clear;
183 %end;
184 %else %do;
185  data _null_;
186  infile &outfile lrecl=32767;
187  input;
188  list;
189  if _n_>200 then stop;
190  run;
191 %end;
192 /* END */
193 %put &sysmacroname took %sysevalf(%sysfunc(datetime())-&dttm) seconds to run;
194 
195 %mend mp_chop;