SAS中文论坛

标题: IML数据模拟出现问题，请帮忙！ [打印本页]

作者: shiyiming 时间: 2012-9-25 17:37
标题: IML数据模拟出现问题，请帮忙！
随机产生500行，一列的数据；（1st）
从中随机产生100行，重复抽样变为500行；（2nd）；
从1st中抽出100行，从2nd中抽出400行，组成500行，依然按照原来的方法进行有重复的随机抽样100行，然后变500行为第3个500行（3rd）（抽样方法下同）；
从1st中抽出100行，从2nd中抽出100行，从3rd中抽出300行，组成500行（4th）；
从1st中抽出100行，从2nd中抽出100行，从3rd中抽出100行，从4th中抽出200行，组成500行，（5th）；
分别从1st，2nd，3rd，4th，5th中各抽100行，组成500行，（6th）；
从此后，按6th规则，一次从前面几个500中，各抽取100行，组成500行，形成（7th~50th），并能制定输出任一th的500行。

作者: shiyiming 时间: 2012-10-3 01:35
标题: Re: IML数据模拟出现问题，请帮忙！
我觉得你的问题是怎样产生足够的随机数。是否可以一次产生所有随机数，然后分段分隔成数个组？n 指向500个随机数的序号。
我试验了一下，最后sample6只剩下265 不同X值, 最多的出现了9次：

Dup_Times  Count
      1    141
   2       67
   3       29
   4       16
   5       6
   6       2
   7       1
   8       2
   9       1

data sample1;
do n=1 to 500;
      x=ranuni(n);
      output;
end;
run;

data s ;
do i=1 to 25100;
      n=round(499*ranuni(-1))+1;
      output;
end;
keep n;
run;

data s2_500 s31_100 s32_400 s3_500 s41_100 s42_100 s43_300
s51_100 S52_100 s53_100 s54_200
s61_100 s62_100 s63_100 s64_100 S65_100 ss;
set s;
if _N_ le 500 then  output s2_500;
else if _N_ le 600 then  output s31_100;
else if _N_ le 1000 then  output s32_400;
else if _N_ le 1500 then  output s3_500;
else if _N_ le 1600 then  output s41_100;
else if _N_ le 1700 then  output s42_100;
else if _N_ le 2000 then  output s43_300;
else if _N_ le 2100 then  output s51_100;
else if _N_ le 2200 then  output S52_100;
else if _N_ le 2300 then  output S53_100;
else if _N_ le 2500 then  output S54_200;
else if _N_ le 2600 then  output S61_100;
else if _N_ le 2700 then  output S62_100;
else if _N_ le 2800 then  output S63_100;
else if _N_ le 2900 then  output S64_100;
else if _N_ le 3000 then  output S65_100;
else output Ss;
run;

data sample2(rename=(order=n));
length x 8.;
if _n_ eq 1 then do;
      dcl hash sam(dataset:"sample1");
      sam.definekey('n');
      sam.definedata('x');
      sam.definedone();
      call missing(x);
end;
set s2_500;
if sam.find() eq 0;
drop n;
order=_n_;
run;

data sample31;
length x 8.;
if _n_ eq 1 then do;
      dcl hash sam(dataset:"sample1");
      sam.definekey('n');
      sam.definedata('x');
      sam.definedone();
      call missing(x);
end;
set s31_100;
if sam.find() eq 0;
drop n;
run;
data sample32;
length x 8.;
if _n_ eq 1 then do;
      dcl hash sam(dataset:"sample2");
      sam.definekey('n');
      sam.definedata('x');
      sam.definedone();
      call missing(x);
end;
set S32_400;
if sam.find() eq 0;
drop i2;
run;

data sample33;
set sample31 sample32;
n=_n_;
run;

data sample3(rename=(order=n));
length x 8.;
if _n_ eq 1 then do;
      dcl hash sam(dataset:"sample33");
      sam.definekey('n');
      sam.definedata('x');
      sam.definedone();
      call missing(x);
end;
set S3_500;
if sam.find() eq 0;
order=_n_;
drop n;
run;

data sample41;
length x 8.;
if _n_ eq 1 then do;
      dcl hash sam(dataset:"sample1");
      sam.definekey('n');
      sam.definedata('x');
      sam.definedone();
      call missing(x);
end;
set S41_100;
if sam.find() eq 0;
drop n;
run;

data sample42;
length x 8.;
if _n_ eq 1 then do;
      dcl hash sam(dataset:"sample2");
      sam.definekey('n');
      sam.definedata('x');
      sam.definedone();
      call missing(x);
      end;
set S42_100;
if sam.find() eq 0;
drop n;
run;

data sample43;
length x 8.;
if _n_ eq 1 then do;
      dcl hash sam(dataset:"sample3");
      sam.definekey('n');
      sam.definedata('x');
      sam.definedone();
      call missing(x);
end;
set S43_300;
if sam.find() eq 0;
drop n;
run;

data sample4;
set sample41 sample42 sample43;
n=_n_;
run;

data sample51;
length x 8.;
if _n_ eq 1 then do;
      dcl hash sam(dataset:"sample1");
      sam.definekey('n');
      sam.definedata('x');
      sam.definedone();
      call missing(x);
end;
set S51_100;
if sam.find() eq 0;
drop n;
run;

data sample52;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample2");
      sam.definekey('n');
      sam.definedata('x');
      sam.definedone();
      call missing(x);
end;
set S52_100;
if sam.find() eq 0;
drop n;
run;

data sample53;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample3");
      sam.definekey('n');
      sam.definedata('x');
      sam.definedone();
      call missing(x);
end;
set S53_100;
if sam.find() eq 0;
drop n;
run;

data sample54;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample4");
      sam.definekey('n');
      sam.definedata('x');
      sam.definedone();
      call missing(x);
end;
set S54_200;
if sam.find() eq 0;
drop n;
run;

data sample5;
set sample51-sample54;
n=_N_;
run;

data sample61;
length x 8.;
if _n_ eq 1 then do;
      dcl hash sam(dataset:"sample1");
      sam.definekey('n');
      sam.definedata('x');
      sam.definedone();
      call missing(x);
end;
set S61_100;
if sam.find() eq 0;
drop n;
run;

data sample62;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample2");
      sam.definekey('n');
      sam.definedata('x');
      sam.definedone();
      call missing(x);
end;
set S62_100;
if sam.find() eq 0;
drop n;
run;

data sample63;
length x 8.;
if _n_ eq 1 then do;
      dcl hash sam(dataset:"sample3");
      sam.definekey('n');
      sam.definedata('x');
      sam.definedone();
      call missing(x);
end;
set S63_100;
if sam.find() eq 0;
drop n;
run;

data sample64;
length x 8.;
if _n_ eq 1 then do;
      dcl hash sam(dataset:"sample4");
      sam.definekey('n');
      sam.definedata('x');
      sam.definedone();
      call missing(x);
end;
set S64_100;
if sam.find() eq 0;
drop n;
run;

data sample65;
length x 8.;
if _n_ eq 1 then do;
      dcl hash sam(dataset:"sample5");
      sam.definekey('n');
      sam.definedata('x');
      sam.definedone();
      call missing(x);
end;
set S65_100;
if sam.find() eq 0;
drop n;
run;
data sample6;
set sample61-sample65;
run;

%macro sample7_50;
%do i=7 %to 50;
      %do j= 1 %to 5;
         data s ss;
            set ss;
            if _n_ le 100 then output s;
            else output ss;
         run;
         data sample&i._&j.;
            length x 8.;
            if _n_ eq 1 then do;
                  dcl hash sam(dataset:"sample&j");
                  sam.definekey('n');
                  sam.definedata('x');
                  sam.definedone();
                  call missing(x);
            end;
            set S;
            if sam.find() eq 0;
            drop n;
         run;
      %end;
      data sample&i;
         set sample&i._1-sample&i._5;
      run;
      proc datasets lib=work;
         delete sample&i._:;
      quit;
%end;
%mend;

%sample7_50;

作者: shiyiming 时间: 2012-10-5 08:15
标题: Re: IML数据模拟出现问题，请帮忙！
son59388的观察里很好啊。随着更多的sampling，最后数据里可能只含有唯一的记录。有点degeneration的意思。不过我们的结果差异很大的：经过50次sampling，数据含有独特记录的个数大概20个左右。京剧

作者: shiyiming 时间: 2012-10-7 20:08
标题: Re: IML数据模拟出现问题，请帮忙！
好久没有来过了，觉得这个问题比较有意思，也来自凑凑热闹。
[code:17j30u07]
%let n=500;

data IDX;
array A{5,&n} v1_1-v1_&n v2_1-v2_&n v3_1-v3_&n v4_1-v4_&n v5_1-v5_&n ;
array B{&n} B_1-B_&n ;
array T{100} T_1-T_100 ;
*1st;
do i=1 to 500;
A{1,i}=i;
B{i}=A{1,i};
end;
output;
*2nd;
do i=1 to 100;
T{i}=A{1, ceil(ranuni(0)*500)};
end;
do i=1 to 500;
A{2,i}=T{ceil(ranuni(0)*100)};
B{i}=A{2,i};
end;
output;
*3rd;
do i=1 to 500;
if i<=100 then A{3,i}=A{1, ceil(ranuni(0)*500)};
else          A{3,i}=A{2, ceil(ranuni(0)*500)};
B{i}=A{3,i};
end;
output;
*4th;
do i=1 to 500;
if i<=100    then A{4,i}=A{1, ceil(ranuni(0)*500)};
if 100<i<=200 then A{4,i}=A{2, ceil(ranuni(0)*500)};
else             A{4,i}=A{3, ceil(ranuni(0)*500)};
B{i}=A{4,i};
end;
output;
*5th;
do i=1 to 500;
if i<=100    then A{5,i}=A{1, ceil(ranuni(0)*500)};
if 100<i<=200 then A{5,i}=A{2, ceil(ranuni(0)*500)};
if 200<i<=300 then A{5,i}=A{3, ceil(ranuni(0)*500)};
else             A{5,i}=A{4, ceil(ranuni(0)*500)};
B{i}=A{5,i};
end;
output;
*6th;
do j=6 to 50;
_t=IFN(mod(j,5)=0,5,mod(j,5)); /*since the range of A is 1:5, */
do i=1 to 500;
if i<=100    then B{i}=A{1, ceil(ranuni(0)*500)};
if 100<i<=200 then B{i}=A{2, ceil(ranuni(0)*500)};
if 200<i<=300 then B{i}=A{3, ceil(ranuni(0)*500)};
if 300<i<=400 then B{i}=A{4, ceil(ranuni(0)*500)};
else             B{i}=A{5, ceil(ranuni(0)*500)};
end;
output;
do i=1 to 500;
A{_t,i}=B{i};
end;
end;
keep B_:;
run;

/* generate sample: variable income follows pareto distribution */
data sample;
do i=1 to 500;
      y=ranuni(0);
      Income=quantile('pareto', y, 1.2,1.5);
      sex=round(ranuni(0));
      drop i;
      output;
      end;
run;

%macro sample_j(in=in, sample=sample, j=1, out=out );
data &out;
array T{500} B_1-B_500;
set &in(firstobs=&j obs=&j) ;
do __i=1 to 500;
      p=T{__i};
set &sample point=p;
output;
      drop b_: __i p;
end;
stop;
run;
%mend;

%sample_j(in=IDX, j=2);
[/code:17j30u07]

欢迎光临 SAS中文论坛 (http://www.mysas.net/forum/)

Powered by Discuz! X3.2