Skip Navigation
  Print Page

Biostatistics and Bioinformatics Branch (BBB)

Skip sharing on social media links
Share this:
Skip Internal Navigation

Hotelling T^2 Tests Based on Population Case Control Data

SAS Macro

/*

d:\paper\ruzong\soft\hotel.sas

Version 0.1, 15.09.03

Aenderungen:

*/

%macro va_lst(anz,namen);
          %global va_lst;
          %let va_lst="";
          %do i=1 %to &anz;
                 %let help=%scan(&namen,&i,' ');
                 %if &i.>1 %then
                        %let va_lst=%unquote(&va_lst &help._1 &help._2);
                 %else
                        %let va_lst=%unquote(&help._1 &help._2);
          %end;
          %put &va_lst.;
%mend;

%macro hotel_cc(infile,aff,ma_number,ma_names,outfile);

%let max_allele=100;
%va_lst(&ma_number.,&ma_names.);

data dfile; set &infile.;
array ma[&ma_number.,2] &va_lst.;
aff=&aff.;
complete=1;
i=0;
do while ((i<&ma_number.) and (complete=1));
          i=i+1;
          if (ma[i,1]*ma[i,2]=0) then complete=0;
end;
if (complete=1) then output;
keep aff &va_lst.;
run;

proc iml;

          use dfile var { aff &va_lst.};
          read all into daten;
          anz_obs=NROW(daten);
          anz_marker=(NCOL(daten)-1)/2;

          rdaten=J(anz_obs,1+2*anz_marker);
          recode_table=J(anz_marker,&max_allele.,0);
          anz_allele=J(anz_marker,1,0);

          START recode(daten,rdaten,recode_table,anz_allele);
               fehler=0;
               anz_obs=NROW(daten);
               rdaten[1:anz_obs,1:1]=daten[1:anz_obs,1:1];
               /* Rekodierung der Markerdaten */
               do j=1 to &ma_number.;
                      do i=1 to anz_obs;
                             do k=1 to 2;
                                    allel=daten[i,1+2*(j-1)+k];
                                    if (allel) then do;
                                          found=0; t=0;
                                          do while ((t<anz_allele[j])*(found=0));
                                               t=t+1;
                                               if (allel=recode_table[j,t]) then found=1;
                                          end;
                                          if (found=0) then do;
                                               anz_allele[j]=anz_allele[j]+1;
                                               t=anz_allele[j];
                                               if (t>&max_allele.) then fehler=1;
                                               recode_table[j,t]=allel;
                                            end;
                                          rdaten[i,1+2*(j-1)+k]=t;
                                       end; /* if (allel) */
                                          else do; rdaten[i,1+2*(j-1)+k]=0; fehler=2; end;
                             end;
                      end;
               end;
               return (fehler);
FINISH;

START calc_laenge(anz_allele);
               hc_laenge=0; gc_laenge=0;
               do i=1 to &ma_number.;
                      anz=anz_allele[i];
                      hc_laenge=hc_laenge+anz-1;
                      gc_laenge=gc_laenge+(anz*(anz+1)/2)-1;
               end;
               erg=hc_laenge||gc_laenge;
               return (erg);
FINISH;

START hc(indiv,laenge,anz_allele);
               erg=J(laenge,1,0);
               start=0;
               do i=1 to &ma_number.;
                      do k=1 to 2;
                             allel=indiv[1+2*(i-1)+k];
                             if (allel<anz_allele[i]) then
                                    erg[start+allel]=erg[start+allel]+1;
                      end;
                      start=start+anz_allele[i]-1;
               end;
               return (erg);
FINISH;

START gc(indiv,laenge,anz_allele);
               erg=J(laenge,1,0);
               start=0;
               do i=1 to &ma_number.;
                      allel1=indiv[1+2*(i-1)+1];
                      allel2=indiv[1+2*(i-1)+2];
                      if (allel1>allel2) then do;
                             rette=allel1; allel1=allel2; allel2=rette;
                      end;
                      anz=anz_allele[i];
                      max_ind=(((anz+1)*anz)/2)-1;
                      index=((anz+1)*(allel1-1))-(allel1*(allel1-1)/2)+(allel2+1-allel1);
                      if (index<=max_ind) then erg[start+index]=erg[start+index]+1;
                             start=start+max_ind;
               end;
               return (erg);
FINISH;

error=recode(daten,rdaten,recode_table,anz_allele);
if (error) then print error;

laenge=calc_laenge(anz_allele);

anz_aff=0; anz_unaff=0;
ahc_mean=J(laenge[1],1,0);
agc_mean=J(laenge[2],1,0);
uhc_mean=J(laenge[1],1,0);
ugc_mean=J(laenge[2],1,0);
ahc_cov=J(laenge[1],laenge[1],0);
agc_cov=J(laenge[2],laenge[2],0);
uhc_cov=J(laenge[1],laenge[1],0);
ugc_cov=J(laenge[2],laenge[2],0);
do ind=1 to anz_obs;
               indiv=rdaten[ind:ind,];
               hcv=hc(indiv,laenge[1],anz_allele);
               gcv=gc(indiv,laenge[2],anz_allele);
               if (indiv[1]=2) then do;
                                    anz_aff=anz_aff+1;
                                    ahc_mean=ahc_mean+hcv;
                                    agc_mean=agc_mean+gcv;
                                    ahc_cov=ahc_cov+hcv*hcv`;
                                    agc_cov=agc_cov+gcv*gcv`;
                               end;
                          else do;
                                   anz_unaff=anz_unaff+1;
                                   uhc_mean=uhc_mean+hcv;
                                   
                                   uhc_cov=uhc_cov+hcv*hcv`;
                                   ugc_cov=ugc_cov+gcv*gcv`;
                               end;
end;

n=anz_aff; m=anz_unaff;
ahc_mean=ahc_mean/n;
agc_mean=agc_mean/n;
uhc_mean=uhc_mean/m;
ugc_mean=ugc_mean/m;
ahc_cov=ahc_cov-n*ahc_mean*ahc_mean`;
agc_cov=agc_cov-n*agc_mean*agc_mean`;
uhc_cov=uhc_cov-m*uhc_mean*uhc_mean`;
ugc_cov=ugc_cov-m*ugc_mean*ugc_mean`;
s_hc=(ahc_cov+uhc_cov)/(n+m-2);
s_gc=(agc_cov+ugc_cov)/(n+m-2);
T_sq_h=(n*m/(n+m))
                    *((ahc_mean-uhc_mean)`)*GINV(s_hc)*(ahc_mean-uhc_mean);
T_sq_g=(n*m/(n+m))
                    *((agc_mean-ugc_mean)`)*GINV(s_gc)*(agc_mean-ugc_mean);
P_h=1-probchi(T_sq_h,laenge[1]);
P_g=1-probchi(T_sq_g,laenge[2]);

result=(T_sq_h||laenge[1]||P_h)//(T_sq_g||laenge[2]||P_g);
CREATE res_file FROM result;
APPEND FROM result;
run;

quit;
run;

data a; set res_file;
file "&outfile." mod;
if (_N_=1) then do;
               put "Marker: &ma_names.";
               put;
               put "coding statistic df p-value";
               put "Haplotype " col1 9.2 col2 5.0 col3 E9.2;
           end;
       else
           put "Genotype " col1 9.2 col2 5.0 col3 E9.2;
run;
%mend;

/*%hotel(tdat,affect,2,ma na,C:\TAMU_stat\research_paper\case_control_sib_parent\knapp\testdat.aus);*/
→

Last Updated Date: 04/12/2013
Last Reviewed Date: 04/12/2013

Contact Information

Name: Dr Paul Albert
Chief and Senior Investigator
Biostatistics and Bioinformatics Branch
Phone: 301-496-5582
E-mail: albertp@mail.nih.gov

Staff Directory
Vision National Institutes of Health Home BOND National Institues of Health Home Home Storz Lab: Section on Environmental Gene Regulation Home Machner Lab: Unit on Microbial Pathogenesis Home Division of Intramural Population Health Research Home Bonifacino Lab: Section on Intracellular Protein Trafficking Home Lilly Lab: Section on Gamete Development Home Lippincott-Schwartz Lab: Section on Organelle Biology