data sample1;
do n=1 to 500;
x=ranuni(n);
output;
end;
run;
data s ;
do i=1 to 25100;
n=round(499*ranuni(-1))+1;
output;
end;
keep n;
run;
data s2_500 s31_100 s32_400 s3_500 s41_100 s42_100 s43_300
s51_100 S52_100 s53_100 s54_200
s61_100 s62_100 s63_100 s64_100 S65_100 ss;
set s;
if _N_ le 500 then output s2_500;
else if _N_ le 600 then output s31_100;
else if _N_ le 1000 then output s32_400;
else if _N_ le 1500 then output s3_500;
else if _N_ le 1600 then output s41_100;
else if _N_ le 1700 then output s42_100;
else if _N_ le 2000 then output s43_300;
else if _N_ le 2100 then output s51_100;
else if _N_ le 2200 then output S52_100;
else if _N_ le 2300 then output S53_100;
else if _N_ le 2500 then output S54_200;
else if _N_ le 2600 then output S61_100;
else if _N_ le 2700 then output S62_100;
else if _N_ le 2800 then output S63_100;
else if _N_ le 2900 then output S64_100;
else if _N_ le 3000 then output S65_100;
else output Ss;
run;
data sample2(rename=(order=n));
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample1");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set s2_500;
if sam.find() eq 0;
drop n;
order=_n_;
run;
data sample31;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample1");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set s31_100;
if sam.find() eq 0;
drop n;
run;
data sample32;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample2");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set S32_400;
if sam.find() eq 0;
drop i2;
run;
data sample33;
set sample31 sample32;
n=_n_;
run;
data sample3(rename=(order=n));
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample33");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set S3_500;
if sam.find() eq 0;
order=_n_;
drop n;
run;
data sample41;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample1");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set S41_100;
if sam.find() eq 0;
drop n;
run;
data sample42;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample2");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set S42_100;
if sam.find() eq 0;
drop n;
run;
data sample43;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample3");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set S43_300;
if sam.find() eq 0;
drop n;
run;
data sample4;
set sample41 sample42 sample43;
n=_n_;
run;
data sample51;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample1");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set S51_100;
if sam.find() eq 0;
drop n;
run;
data sample52;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample2");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set S52_100;
if sam.find() eq 0;
drop n;
run;
data sample53;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample3");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set S53_100;
if sam.find() eq 0;
drop n;
run;
data sample54;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample4");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set S54_200;
if sam.find() eq 0;
drop n;
run;
data sample5;
set sample51-sample54;
n=_N_;
run;
data sample61;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample1");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set S61_100;
if sam.find() eq 0;
drop n;
run;
data sample62;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample2");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set S62_100;
if sam.find() eq 0;
drop n;
run;
data sample63;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample3");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set S63_100;
if sam.find() eq 0;
drop n;
run;
data sample64;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample4");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set S64_100;
if sam.find() eq 0;
drop n;
run;
data sample65;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample5");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set S65_100;
if sam.find() eq 0;
drop n;
run;
data sample6;
set sample61-sample65;
run;
%macro sample7_50;
%do i=7 %to 50;
%do j= 1 %to 5;
data s ss;
set ss;
if _n_ le 100 then output s;
else output ss;
run;
data sample&i._&j.;
length x 8.;
if _n_ eq 1 then do;
dcl hash sam(dataset:"sample&j");
sam.definekey('n');
sam.definedata('x');
sam.definedone();
call missing(x);
end;
set S;
if sam.find() eq 0;
drop n;
run;
%end;
data sample&i;
set sample&i._1-sample&i._5;
run;
proc datasets lib=work;
delete sample&i._:;
quit;
%end;
%mend;
data IDX;
array A{5,&n} v1_1-v1_&n v2_1-v2_&n v3_1-v3_&n v4_1-v4_&n v5_1-v5_&n ;
array B{&n} B_1-B_&n ;
array T{100} T_1-T_100 ;
*1st;
do i=1 to 500;
A{1,i}=i;
B{i}=A{1,i};
end;
output;
*2nd;
do i=1 to 100;
T{i}=A{1, ceil(ranuni(0)*500)};
end;
do i=1 to 500;
A{2,i}=T{ceil(ranuni(0)*100)};
B{i}=A{2,i};
end;
output;
*3rd;
do i=1 to 500;
if i<=100 then A{3,i}=A{1, ceil(ranuni(0)*500)};
else A{3,i}=A{2, ceil(ranuni(0)*500)};
B{i}=A{3,i};
end;
output;
*4th;
do i=1 to 500;
if i<=100 then A{4,i}=A{1, ceil(ranuni(0)*500)};
if 100<i<=200 then A{4,i}=A{2, ceil(ranuni(0)*500)};
else A{4,i}=A{3, ceil(ranuni(0)*500)};
B{i}=A{4,i};
end;
output;
*5th;
do i=1 to 500;
if i<=100 then A{5,i}=A{1, ceil(ranuni(0)*500)};
if 100<i<=200 then A{5,i}=A{2, ceil(ranuni(0)*500)};
if 200<i<=300 then A{5,i}=A{3, ceil(ranuni(0)*500)};
else A{5,i}=A{4, ceil(ranuni(0)*500)};
B{i}=A{5,i};
end;
output;
*6th;
do j=6 to 50;
_t=IFN(mod(j,5)=0,5,mod(j,5)); /*since the range of A is 1:5, */
do i=1 to 500;
if i<=100 then B{i}=A{1, ceil(ranuni(0)*500)};
if 100<i<=200 then B{i}=A{2, ceil(ranuni(0)*500)};
if 200<i<=300 then B{i}=A{3, ceil(ranuni(0)*500)};
if 300<i<=400 then B{i}=A{4, ceil(ranuni(0)*500)};
else B{i}=A{5, ceil(ranuni(0)*500)};
end;
output;
do i=1 to 500;
A{_t,i}=B{i};
end;
end;
keep B_:;
run;
/* generate sample: variable income follows pareto distribution */
data sample;
do i=1 to 500;
y=ranuni(0);
Income=quantile('pareto', y, 1.2,1.5);
sex=round(ranuni(0));
drop i;
output;
end;
run;
%macro sample_j(in=in, sample=sample, j=1, out=out );
data &out;
array T{500} B_1-B_500;
set &in(firstobs=&j obs=&j) ;
do __i=1 to 500;
p=T{__i};
set &sample point=p;
output;
drop b_: __i p;
end;
stop;
run;
%mend;