SAS中文论坛

 找回密码
 立即注册

扫一扫,访问微社区

查看: 1573|回复: 0
打印 上一主题 下一主题

Stochastic Gradient Decending Logistic Regression in SAS

[复制链接]

49

主题

76

帖子

1462

积分

管理员

Rank: 9Rank: 9Rank: 9

积分
1462
楼主
 楼主| 发表于 2012-5-24 21:50:53 | 只看该作者

Stochastic Gradient Decending Logistic Regression in SAS

From oloolo's blog on SasProgramming


<p><a href="http://feedads.g.doubleclick.net/~a/PjvzRIUQaqOILxxFQcDQHXpxHU8/0/da"><img src="http://feedads.g.doubleclick.net/~a/PjvzRIUQaqOILxxFQcDQHXpxHU8/0/di" border="0" ismap="true"></img></a><br/>
<a href="http://feedads.g.doubleclick.net/~a/PjvzRIUQaqOILxxFQcDQHXpxHU8/1/da"><img src="http://feedads.g.doubleclick.net/~a/PjvzRIUQaqOILxxFQcDQHXpxHU8/1/di" border="0" ismap="true"></img></a></p>Test the Stochastic Gradient Decending Logistic Regression in SAS. The logic and code follows the code piece of Ravi Varadhan, Ph.D from this <a href="http://r.789695.n4.nabble.com/Stochastic-Gradient-Ascent-for-logistic-regression-td884272.html" target="_blank"><strong><em>discussion</em></strong> </a>of R Help. The blog <a href="http://sasdiehard.blogspot.com/" target="_blank"><strong><em>SAS Die Hard</em></strong></a> also has a post about SGD Logistic Regression in SAS.<br />
<br />
<br />
<br />
<pre style="background-color: #ebebeb; border-bottom: #999999 1px dashed; border-left: #999999 1px dashed; border-right: #999999 1px dashed; border-top: #999999 1px dashed; color: #000001; font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; font-size: 12px; line-height: 14px; overflow: auto; padding-bottom: 5px; padding-left: 5px; padding-right: 5px; padding-top: 5px; width: 100%;"><code>

filename foo url "http://www.biostat.jhsph.edu/~ririzarr/Teaching/754/lbw.dat" ;

data temp;
   infile foo length=len;
   input low age lwt race smoke ptl ht ui ftv bwt;
   put low age lwt race smoke ptl ht ui ftv bwt;
   if _n_&gt;1;
run;
proc standard data=temp out=temp mean=0  std=1;
      var  age lwt smoke ht ui;
run;

proc contents data=temp  out=vars(keep=varnum  name type) noprint; run;

proc sql noprint;
      select name into :covars separated by " "
   from   vars
   where  substr(name, 1, 1)="x"
   ;
   select cats("b_", name) into :covars2 separated by " "
   from   vars
   where  substr(name, 1, 1)="x"
   ;
     select count(*)+1 into :nparms
     from   vars
  where  substr(name, 1, 1)="x"
  ;
quit;
%put &amp;covars2;

sasfile _xbeta close;
%lr_sgd(temp, beta, z, &amp;covars, alpha=0.008, decay=0.8, criterion=0.00001, maxiter=1000);


options fullstimer;
proc logistic data=temp  outest=_beta  desc noprint;
      model low = age lwt smoke ht ui;
run;

</code></pre>
The macro %LR_SGD. <br />
<pre style="background-color: #ebebeb; border-bottom: #999999 1px dashed; border-left: #999999 1px dashed; border-right: #999999 1px dashed; border-top: #999999 1px dashed; color: #000001; font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; font-size: 12px; line-height: 14px; overflow: auto; padding-bottom: 5px; padding-left: 5px; padding-right: 5px; padding-top: 5px; width: 100%;"><code>
/*
  SAS macro:
     Logistic Regression using Stochastic Gradient Descent.   
  Name:
     %ls_sgd();
  Copyright (c) 2009, Liang Xie (Contact me @ xie1978 at gmail dot com)
  
  
  The SAS macro is a demonstration of an implementation of logistic
  regression modelstrained by Stochastic Gradient Decent (SGD).This
  program reads a training set specified as &amp;dsn_in, trains a logistic
  regression model, and outputs the estimated coefficients to &amp;outest.
  Example usage:

  %let inputdata=train_data;
  %let beta=coefficient;
  %let response=Event;
  %lr_sgd(&amp;inputdata, &amp;beta, &amp;response, &amp;covars,
          alpha=0.008, decay=0.8,
          criterion=0.00001, maxiter=1000);


  The following topics are not covered for simplicity:   
      - bias term   
      - regularization   
      - multiclass logistic regression (maximum entropy model)         
      - calibration of learning rate

  <i>Distributed under GNU Affero General Public License version 3. This
  program is free software: you can redistribute it and/or modify
  it under the terms of the GNU Affero General Public License as
  published by the Free Software Foundation, only version 3 of the
  License.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  GNU Affero General Public License for more details.

</i>*/

%macro logistic(dsn_in, outest, response, alpha=0.0005);
proc score data=&amp;dsn_in  score=&amp;outest  type=parms  out=score(keep=score);
     var intercept &amp;covars;
run;

data _xtemp/view=_xtemp;
     merge &amp;dsn score ;
     _w=&amp;response - 1/(1+exp(-score));
  /*
  array x{*} intercept &amp;covars;
  _w=&amp;response - 1/(1+exp(-score));
  do i=1 to dim(x); x[i]=x[i]*_w; end;   
  */
run;

data _x&amp;outest;
  array x{*} intercept &amp;covars;
  array _x{*} b_intercept &amp;covars2;
  retain b_intercept &amp;covars2;
  retain logneg logpos 0;
  modify _x&amp;outest;
  do i=1 to dim(x); x[i]=_x[i]; end;
  
  do until (eof);
        set _xtemp  end=eof;
     do  i=1 to dim(x);
         _x[i]=_x[i]+&amp;alpha*x[i]*_w;
     end;
  end;
     replace;
run;

%mend;

%macro compare(dsn1, dsn2);
data _null_;
     merge &amp;dsn1  &amp;dsn2;
  array _x1{*} intercept &amp;covars;
  array _x2{*} b_intercept &amp;covars2;
  retain maxdiff 0;
  do i=1 to dim(_x1);      
     maxdiff=max(maxdiff, abs(_x1[i]-_x2[i]));  
  *put _x1[*]=;
  *put _x2[*]=;
  end;
  call symput('maxdiff', maxdiff);
run;
%mend;




%macro lr_sgd(dsn, outest, response, covars,
              alpha=0.0005, decay=0.9,
              criterion=0.00001, maxiter=1000);
options nosource nonotes;
options nomlogic nomprint;
%local i t0 t1 dt maxdiff status  stopiter;

%let t00=%sysfunc(datetime());

data &amp;dsn;
     set &amp;dsn;
  intercept=1; _w=1;
run;

data &amp;outest;
     retain _TYPE_ "PARMS"  _NAME_ "SCORE";
     array x{*} intercept &amp;covars;
  do i=1 to dim(x);
     x[i]=0;
  end;
  drop i;
  output;
run;

data _x&amp;outest;
     retain _TYPE_ "PARMS"  _NAME_ "SCORE";
     array bx{*} b_intercept &amp;covars2;
  array x{*}  intercept &amp;covars;
  set &amp;outest;
  do j=1 to dim(x); bx[j]=x[j]; end;
  keep b_intercept &amp;covars2 _TYPE_  _NAME_;
  drop j;
run;

sasfile _x&amp;outest load;
%let stopiter=&amp;maxiter;
%let status=Not Converged.;
%do i=1 %to &amp;maxiter;
    %let t0=%sysfunc(datetime());

    %logistic(&amp;dsn, &amp;outest, &amp;response, alpha=&amp;alpha);
    %compare(&amp;outest, _x&amp;outest);   
    data &amp;outest;
         retain _TYPE_ "PARMS"  _NAME_ "SCORE";
         array bx{*} b_intercept &amp;covars2;
      array x{*}  intercept &amp;covars;
      set _x&amp;outest;
      do j=1 to dim(x); x[j]=bx[j]; end;
      keep intercept &amp;covars _TYPE_  _NAME_;
      drop j;
    run;
%let alpha=%sysevalf(&amp;alpha * &amp;decay);
%let alpha=%sysfunc(max(0.00005, &amp;alpha));
    %let t1=%sysfunc(datetime());
    %let dt=%sysfunc(round(&amp;t1-&amp;t0, 0.001));
    %put Iteration &amp;i, time used &amp;dt, converge criteria is &amp;maxdiff;
%if %sysevalf(&amp;maxdiff&lt;&amp;criterion) %then %do;
     %let stopiter=&amp;i;
        %let i=%eval(&amp;maxiter+1);
  %let status=Converged.;
%end;
%end;
sasfile _x&amp;outest close;
%let t11=%sysfunc(datetime());
%let dt=%sysfunc(round(&amp;t11-&amp;t00, 0.01));
%put Total Time is &amp;dt sec.;
%put Total Iteration is &amp;stopiter, convergence status is &amp;status;
%put At Final Iteration, max difference is &amp;maxdiff;
options mlogic mprint notes source;
%mend;

</code></pre><div class="blogger-post-footer"><img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/29815492-2505374346425236390?l=www.sas-programming.com' alt='' /></div><img src="http://feeds.feedburner.com/~r/SasProgramming/~4/d_1rR80APks" height="1" width="1"/>
回复 支持 反对

使用道具 举报

您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

QQ|小黑屋|手机版|Archiver|SAS中文论坛  

GMT+8, 2025-5-6 16:10 , Processed in 0.100771 second(s), 21 queries .

Powered by Discuz! X3.2

© 2001-2013 Comsenz Inc.

快速回复 返回顶部 返回列表