mp_cleancsv.sas
Go to the documentation of this file.
1 /**
2  @file
3  @brief Fixes embedded cr / lf / crlf in CSV
4  @details CSVs will sometimes contain lf or crlf within quotes (eg when
5  saved by excel). When the termstr is ALSO lf or crlf that can be tricky
6  to process using SAS defaults.
7  This macro converts any csv to follow the convention of a windows excel file,
8  applying CRLF line endings and converting embedded cr and crlf to lf.
9 
10  Usage:
11 
12  fileref mycsv "/path/your/csv";
13  %mp_cleancsv(in=mycsv,out=/path/new.csv)
14 
15  @param [in] in= (NOTPROVIDED)
16  Provide path or fileref to input csv. If a period is
17  found, it is assumed to be a file.
18  @param [in] out= (NOTPROVIDED) Output path or fileref to output csv.
19  If a period is found, it is assumed to be a file.
20  @param [in] qchar= ('22'x) Quote char - hex code 22 is the double quote.
21 
22  @version 9.2
23  @author Allan Bowe
24  @cond
25 **/
26 
27 %macro mp_cleancsv(in=NOTPROVIDED,out=NOTPROVIDED,qchar='22'x);
28 %if "&in"="NOTPROVIDED" or "&out"="NOTPROVIDED" %then %do;
29  %put %str(ERR)OR: Please provide valid input (&in) & output (&out) locations;
30  %return;
31 %end;
32 
33 /* presence of a period(.) indicates a physical location */
34 %if %index(&in,.) %then %let in="&in";
35 %if %index(&out,.) %then %let out="&out";
36 
37 /**
38  * convert all cr and crlf within quotes to lf
39  * convert all other cr or lf to crlf
40  */
41  data _null_;
42  infile &in recfm=n ;
43  file &out recfm=n;
44  retain isq iscrlf 0 qchar &qchar;
45  input inchar $char1. ;
46  if inchar=qchar then isq = mod(isq+1,2);
47  if isq then do;
48  /* inside a quote change cr and crlf to lf */
49  if inchar='0D'x then do;
50  put '0A'x;
51  input inchar $char1.;
52  if inchar ne '0A'x then do;
53  put inchar $char1.;
54  if inchar=qchar then isq = mod(isq+1,2);
55  end;
56  end;
57  else put inchar $char1.;
58  end;
59  else do;
60  /* outside a quote, change cr and lf to crlf */
61  if inchar='0D'x then do;
62  crblank:
63  put '0D0A'x;
64  input inchar $char1.;
65  if inchar='0D'x then do;
66  /* multiple CR indicates CR formatted file with blank lines */
67  goto crblank;
68  end;
69  else if inchar ne '0A'x then do;
70  put inchar $char1.;
71  if inchar=qchar then isq = mod(isq+1,2);
72  end;
73  end;
74  else if inchar='0A'x then put '0D0A'x;
75  else put inchar $char1.;
76  end;
77  run;
78 %mend mp_cleancsv;
79 /** @endcond */