|
|
楼主

楼主 |
发表于 2008-1-16 15:48:00
|
只看该作者
两个数据集结构的比较
比较B数据集的所有字段名以及字段类型是否都能够在A数据集中找到,反之亦然。
我在一本叫《data preperation in data mining using sas》的书中看到这样一个marco:
%macro SchCompare(A,B,Result, I_St);
/* This macro compares the schemas of two datasets A and B.
It tests the hypothesis that ALL variables in B
are present in A with the same data type. The result of the
comparison is stored in the dataset Result, which contains
one row per variable of the dataset B.
The I_ST contains the status of the comparison, such that
1=all variables in B are in A,
0=some variables in B are not in A.
In the result dataset, we call B the "Sample", and A the "Base".
*/
/* Step 1: Create a table with the field names
and types for datasets A and B. */
proc sql noprint;
create table TA as
select name,type from dictionary.columns
where memname = "%upcase(&A)"
order by name;
create table TB as
select name,type from dictionary.columns
where memname = "%upcase(&B)"
order by name;
select count(*) into:N from TB;
run;
quit;
/* Step 2: Loop on each element in TB and attempt
to find it in TA. We do that by converting the
elements of TB to macro variables. */
data _Null_;
set TB;
call symput('V_'||left(_n_),name);
call symput('T_'||left(_n_),Type);
run;
/* Step 2: Loop on the N variables in TB and check
if they exist in TA and create the entries in Result */
proc sql noprint;
create table &Result (VarName char(32),
VarType char(8),
ExitsInBase num,
Comment char(80));
%do i=1 %to &N;
select count(*) into: Ni from TA
where name="&&V_&i" ;
%if &Ni eq 0 %then %do ; /* variable does not exit */
%let Value=0;
%let Comment=Variable does not exits
in Base Dataset.;
%goto NextVar;
%end;
select count(*) into: Ti from TA
where name="&&V_&i" and Type="&&T_&i";
%if &Ti gt 0 %then %do; /* perfect match */
%let Value=1;
%let Comment=Variable exits in Base Dataset
with the same data type. ;
%end;
%else %do; /* same name - different type */
%let Value=0;
%let Comment=Variable exits in Base Dataset
but with the different data type.;
%end;
%NextVar:;
insert into &Result values ("&&V_&i",
"&&T_&i",
&Value,
"&Comment");
%end;
select min(ExitsInBase) into: I from &Result;
run;
quit;
%let &I_ST=&I;
%mend;
不知各位有没有更好,更简明的方法? |
|