|
楼主

楼主 |
发表于 2012-12-22 11:52:31
|
只看该作者
SAS读取不固定长度与大小的全文本文件
程序的前半部分把所有内容读取为字符串,后半部分开始处理数字型变量。
[code:1nehunex]/* Provide the fully qualified path and name of the file to be read */
%let filename=c:\_today\print_file.txt;
/* Provide the name of the SAS data set to be created. */
%let dsname=work.printout;
/* This section of the program reads through the file looking for
columns that have non-space characters in them. An array is
maintained that indicates if a column ever has a non-space
character in it.
*/
data temp;
infile "&filename" lrecl=32767 truncover end=done;
array asd_(32767) _temporary_ (32767*0); /* Temporary array for speed */
/* This null input reads a record into the input buffer. */
input ;
do i = 1 to lengthn(_infile_);
/* Only test columns that indicate they have only space characters */
if asd_(i)=0 then do;
/* Test character and set matching array element as necessary */
if substr(_infile_,i,1) ne ' ' then asd_(i)=1;
end;
end;
/* When all of the records have been processed, create variables
to keep track of the start and end of each "column" of data
that has non-space characters.
*/
if done then do;
sta=1; /* starting location */
fin=0; /* ending location */
var=0; /* variable number */
firstspace=0; /* flag indicating if a column is the first space */
do i = 2 to 32767;
if firstspace=0 and asd_(i)=0 then do;
fin=i-1;
var+1;
output; /* output each group of variable number, start and end columns */
firstspace=1;
end;
else if asd_(i)=1 and firstspace=1 then do;
sta=i;
firstspace=0;
end;
end;
end;
/* Keep only variables of interest */
keep sta fin var;
/* Put the count of variables into a macro variable */
call symputx('vcnt',put(var,3.-L));
run;
/* This step write a macro that contains the INPUT statement needed
to read the text file into a SAS data set
*/
data _null_;
set temp end=done;
if _N_=1 then do;
call execute('%nrstr(%macro test;) input');
end;
call execute(cat('var', var, ' $ ', sta, '-', fin));
if done then do;
call execute(';%nrstr(%mend test;)');
end;
run;
/* This step reads the text file into a SAS data set.
All variables are character variables
*/
data &dsname;
infile "&filename" lrecl=32767 truncover end=done;
%test
run;
/* This step looks through the data set and finds variables
that contain only digits and a decimal point. Those found
will be converted to numeric variables.
*/
data _null_;
set &dsname end=done;
array asd_(&vcnt) (&vcnt*0);
array sdf_(*) _character_;
do i = 1 to dim(sdf_);
/* If a variable has been found to contain characters, there
is no need to test it again. It's a character variable.
*/
if asd_(i) ^= 1 then do;
/* Remove spaces then verify that only digits and period are present. */
if verify(trim(sdf_(i)),'1234567890.') > 0 then asd_(i)=1;
end;
end;
/* When all of the observations have been tested, write the names
to be converted into sequential macro variables. Also write
the number of variables to be converted into a macro variable.
*/
if done then do;
cnt=0;
do i = 1 to &vcnt;
if asd_(i) = 0 then do;
cnt+1;
call symput('chvar'||put(cnt,3.-L),vname(sdf_(i)));
end;
end;
call symput('chcnt',put(cnt,3.-L));
end;
run;
/* This macro writes out an assignment statement for each variable
to be converted. The new variable is then RENAMEd and DROPped.
*/
%macro ctest;
%do i = 1 %to &chcnt;
n&&chvar&i=input(&&chvar&i,best16.);
%end;
rename %do i = 1 %to &chcnt;
n&&chvar&i=&&chvar&i
%end; ;
drop %do i = 1 %to &chcnt;
&&chvar&i
%end; ;
format %do i = 1 %to &chcnt;
n&&chvar&i
%end;
best16. ;
%mend;
/* Recreate the data set and convert the variables in the process */
data &dsname;
set &dsname;
%ctest;
run;
/* Finally spin through the data one last time to put variables back
in the order in which they were read.
*/
data &dsname;
retain var1-var&vcnt. ;
set &dsname;
run;
/* Check the results of the conversion */
proc contents position; run;[/code:1nehunex] |
|