Skip Navigation
  Print Page

Biostatistics and Bioinformatics Branch (BBB)

Skip sharing on social media links
Share this:
Skip Internal Navigation

Hotelling T^2 Tests Based on Sib-ship Data

SAS Macro

/*

d:\paper\ruzong4\soft\hotel_sibs.sas

Version 0.1, 8.03.05

Aenderungen:

*/

%macro va_lst(anz,namen);
          %global va_lst;
          %let va_lst="";
          %do i=1 %to &anz;
                 %let help=%scan(&namen,&i,' ');
                 %if &i.>1 %then
                        %let va_lst=%unquote(&va_lst &help._1 &help._2);
                 %else
                        %let va_lst=%unquote(&help._1 &help._2);
          %end;
          %put &va_lst.;
%mend;

%macro hotel_sibs(infile,ma_number,ma_names,outfile,simumf);

%let max_allele=100;
%let max_fam=1000;
%va_lst(&ma_number.,&ma_names.);

data dfile; set &infile.;
array ma[&ma_number.,2] &va_lst.;
complete=1;
i=0;
do while ((i<&ma_number.) and (complete=1));
          i=i+1;
          if (ma[i,1]*ma[i,2]=0) then complete=0;
end;
if (complete=1) then output;
keep fam ind father mother sex aff &va_lst.;
run;
proc sort; by fam father sex;
run;

proc iml;

          use dfile var { fam ind father mother sex aff &va_lst.};
          read all into daten;
          anz_obs=NROW(daten);
          anz_marker=(NCOL(daten)-6)/2;

          rdaten=J(anz_obs,6+2*anz_marker);
          recode_table=J(anz_marker,&max_allele.,0);
          anz_allele=J(anz_marker,1,0);

          START recode(daten,rdaten,recode_table,anz_allele);
                 fehler=0;
                 anz_obs=NROW(daten);
                 rdaten[1:anz_obs,1:6]=daten[1:anz_obs,1:6];
                 /* Rekodierung der Markerdaten */
                 do j=1 to &ma_number.;
                        do i=1 to anz_obs;
                               do k=1 to 2;
                                      allel=daten[i,6+2*(j-1)+k];
                                      if (allel) then do;
                                             found=0; t=0;
                                             do while ((t<anz_allele[j])*(found=0));
                                                    t=t+1;
                                                    if (allel=recode_table[j,t]) then found=1;
                                             end;
                                             if (found=0) then do;
                                                    anz_allele[j]=anz_allele[j]+1;
                                                    t=anz_allele[j];
                                                    if (t>&max_allele.) then fehler=1;
                                                    recode_table[j,t]=allel;
                                                 end;
                                             rdaten[i,6+2*(j-1)+k]=t;
                                          end; /* if (allel) */
                                        else do; rdaten[i,6+2*(j-1)+k]=0; fehler=2; end;
                               end;
                        end;
                 end;
                 return (fehler);
FINISH;

START calc_laenge(anz_allele);
          hc_laenge=0; gc_laenge=0;
          do i=1 to &ma_number.;
                 anz=anz_allele[i];
                 hc_laenge=hc_laenge+anz-1;
                 gc_laenge=gc_laenge+(anz*(anz+1)/2)-1;
          end;
          erg=hc_laenge||gc_laenge;
          return (erg);
FINISH;

START hc(indiv,laenge,anz_allele);
          erg=J(laenge,1,0);
          start=0;
          do i=1 to &ma_number.;
                 do k=1 to 2;
                        allel=indiv[6+2*(i-1)+k];
                        if (allel<anz_allele[i]) then
                             erg[start+allel]=erg[start+allel]+1;
                 end;
                 start=start+anz_allele[i]-1;
          end;
          return (erg);
FINISH;

START gc(indiv,laenge,anz_allele);
          erg=J(laenge,1,0);
          start=0;
          do i=1 to &ma_number.;
                 allel1=indiv[6+2*(i-1)+1];
                 allel2=indiv[6+2*(i-1)+2];
                 if (allel1>allel2) then do;
                        rette=allel1; allel1=allel2; allel2=rette;
                 end;
                 anz=anz_allele[i];
                 max_ind=(((anz+1)*anz)/2)-1;
                 index=((anz+1)*(allel1-1))-(allel1*(allel1-1)/2)+(allel2+1-allel1);
                 if (index<=max_ind) then erg[start+index]=erg[start+index]+1;
                        start=start+max_ind;
          end;
          return (erg);
FINISH;

START create_fam_ptr(daten,fam_ptr);
          anz_fam=0;
          anz_obs=NROW(daten);
          alt_fam=0;
          do i=1 to anz_obs;
                 if (daten[i,1]-alt_fam) then do;
                        anz_fam=anz_fam+1;
                        fam_ptr[anz_fam]=i;
                        alt_fam=daten[i,1];
                 end;
          end;
          return (anz_fam);
FINISH;

fam_ptr=J(&max_fam.,1,0);
anz_fam=create_fam_ptr(daten,fam_ptr);

error=recode(daten,rdaten,recode_table,anz_allele);

if (error) then print error;

laenge=calc_laenge(anz_allele);

perm=0;
if (&simumf.>0) then do;
          perm=1;
          alle_hc=J(anz_fam,laenge[1],0);
          alle_gc=J(anz_fam,laenge[2],0);
       end;

anz_sibs=0;
hc_sibs_mean=J(laenge[1],1,0);
gc_sibs_mean=J(laenge[2],1,0);
hc_sibs_cov=J(laenge[1],laenge[1],0);
gc_sibs_cov=J(laenge[2],laenge[2],0);

fatal_error=0;
do fam=1 to anz_fam;
          start_fam=fam_ptr[fam];
          if (fam=anz_fam) then end_fam=anz_obs;
                                        else end_fam=fam_ptr[fam+1]-1;
          anz_aff=0; anz_unaff=0;
          aff_hcv=J(laenge[1],1,0);
          aff_gcv=J(laenge[2],1,0);
          unaff_hcv=J(laenge[1],1,0);
          unaff_gcv=J(laenge[2],1,0);
          do ind=start_fam to end_fam;
                 help_fam=rdaten[ind,1];
                    help_ind=rdaten[ind,2];
              if (rdaten[ind,3]*rdaten[ind,4]) then do;
                    indiv=rdaten[ind:ind,];
                    hcv=hc(indiv,laenge[1],anz_allele);
                    gcv=gc(indiv,laenge[2],anz_allele);

                    if (indiv[6]-2) then do;
                          anz_unaff=anz_unaff+1;
                          unaff_hcv=unaff_hcv+hcv;
                          unaff_gcv=unaff_gcv+gcv;
                       end;
                    else do;
                          anz_aff=anz_aff+1;
                          aff_hcv=aff_hcv+hcv;
                          aff_gcv=aff_gcv+gcv;
                      end;
                    end;
          end;
          if (anz_aff*anz_unaff) then do;
              anz_sibs=anz_sibs+1;
              aff_hcv=aff_hcv/anz_aff;
              aff_gcv=aff_gcv/anz_aff;
              unaff_hcv=unaff_hcv/anz_unaff;
              unaff_gcv=unaff_gcv/anz_unaff;
              hcv=aff_hcv-unaff_hcv;
              gcv=aff_gcv-unaff_gcv;

              hc_sibs_mean=hc_sibs_mean+hcv;
              gc_sibs_mean=gc_sibs_mean+gcv;
              hc_sibs_cov=hc_sibs_cov+hcv*hcv`;
              gc_sibs_cov=gc_sibs_cov+gcv*gcv`;
              if (perm=1) then do;
                    alle_hc[anz_sibs,]=hcv`;
                    alle_gc[anz_sibs,]=gcv`;
                 end;
           end;

end; /* do fam=1 */

T_sq_hc_sibs=0; T_sq_gc_sibs=0;

if (anz_sibs>1) then do;
          hc_sibs_mean=hc_sibs_mean/anz_sibs;
          gc_sibs_mean=gc_sibs_mean/anz_sibs;

          hc_sibs_cov=hc_sibs_cov-anz_sibs*hc_sibs_mean*hc_sibs_mean`;

          gc_sibs_cov=gc_sibs_cov-anz_sibs*gc_sibs_mean*gc_sibs_mean`;

          T_sq_hc_sibs=anz_sibs*((hc_sibs_mean)`)
                                        *GINV(hc_sibs_cov/(anz_sibs-1))*hc_sibs_mean;

          T_sq_gc_sibs=anz_sibs*((gc_sibs_mean)`)
                                        *GINV(gc_sibs_cov/(anz_sibs-1))*gc_sibs_mean;
       end;

    P_hc_sibs=1-probchi(T_sq_hc_sibs,laenge[1]);
    P_gc_sibs=1-probchi(T_sq_gc_sibs,laenge[2]);

    result=(anz_sibs||T_sq_hc_sibs||laenge[1]||P_hc_sibs)
            //(anz_sibs||T_sq_gc_sibs||laenge[2]||P_gc_sibs);

    if (anz_sibs<2) then perm=0;
    if (perm=1) then do;
          anz_sig_h=0; anz_sig_g=0;
          quelle=1; x=0;
          do i=1 to &simumf;
                    hc_sibs_mean=J(laenge[1],1,0);
                    gc_sibs_mean=J(laenge[2],1,0);
                    hc_sibs_cov=J(laenge[1],laenge[1],0);
                    gc_sibs_cov=J(laenge[2],laenge[2],0);
                    do fam=1 to anz_sibs;
                              call ranuni(quelle,x);
                              if (x<.5) then hcv=alle_hc[fam,]; else hcv=-alle_hc[fam,];
                              hc_sibs_mean=hc_sibs_mean+hcv`;
                              hc_sibs_cov=hc_sibs_cov+hcv`*hcv;
                              call ranuni(quelle,x);
                              if (x<.5) then gcv=alle_gc[fam,]; else gcv=-alle_gc[fam,];
                              gc_sibs_mean=gc_sibs_mean+gcv`;
                              gc_sibs_cov=gc_sibs_cov+gcv`*gcv;
                    end;
                    hc_sibs_mean=hc_sibs_mean/anz_sibs;
                    hc_sibs_cov=hc_sibs_cov-anz_sibs*hc_sibs_mean*hc_sibs_mean`;
                    PT_sq_hc_sibs=0;
                    PT_sq_hc_sibs=anz_sibs*((hc_sibs_mean)`)
                                        *GINV(hc_sibs_cov/(anz_sibs-1))*hc_sibs_mean;
                    if (PT_sq_hc_sibs>=T_sq_hc_sibs) then anz_sig_h=anz_sig_h+1;

                    gc_sibs_mean=gc_sibs_mean/anz_sibs;
                    gc_sibs_cov=gc_sibs_cov-anz_sibs*gc_sibs_mean*gc_sibs_mean`;
                    PT_sq_gc_sibs=0;
                    PT_sq_gc_sibs=anz_sibs*((gc_sibs_mean)`)
                                        *GINV(gc_sibs_cov/(anz_sibs-1))*gc_sibs_mean;
                    if (PT_sq_gc_sibs>=T_sq_gc_sibs) then anz_sig_g=anz_sig_g+1;
                 end; /* simumf */

                 anz_sig_h=anz_sig_h/&simumf.;
                 anz_sig_g=anz_sig_g/&simumf.;
                 simumf=&simumf.;
                 all_snp=1;
                 result=result//(simumf||all_snp||anz_sig_h||anz_sig_g);
              end;

          CREATE res_file FROM result;
          APPEND FROM result;
          run;

          quit;
          run;

data a; set res_file;
file "&outfile." mod;
if (_N_=1) then do;
          put;
          put "Marker: &ma_names.";
          put;
          put " no of ";
          put " families coding statistic df p-value";
          put col1 9.0 " haplotype " col2 9.2 col3 5.0
                    col4 E9.2;
       end;
if (_N_=2) then
          put " genotype " col2 9.2 col3 5.0
                    col4 E9.2;
if (_N_=3) then do;
          put;
          put "Permutation based (" col1 "replicates) p-values:";
          put "haplotype coding: " col3 E9.2 @;
          if (col2=1) then put " genotype coding: " col4 E9.2 @; else put;
       end;
run;

data a; set res_file end=schluss;
array pwert[4];
retain pwert;
retain familien;
retain replikationen;
length marker $200;
if (_N_=1) then do;
          familien=col1; pwert[1]=col4;
       end;
if (_N_=2) then pwert[3]=col4;
if (_N_=3) then do;
          replikationen=col1;
          pwert[2]=col3;
          pwert[4]=col4;
       end;
if (schluss=1) then do;
          marker="&ma_names.";
          keep pwert1--pwert4 familien replikationen marker;
          output;
       end;
run;

proc append base=result_hotel data=a;
run;
%mend;

Last Updated Date: 04/12/2013
Last Reviewed Date: 04/12/2013

Contact Information

Name: Dr Paul Albert
Chief and Senior Investigator
Biostatistics and Bioinformatics Branch
Phone: 301-496-5582
E-mail: albertp@mail.nih.gov

Staff Directory
Vision National Institutes of Health Home BOND National Institues of Health Home Home Storz Lab: Section on Environmental Gene Regulation Home Machner Lab: Unit on Microbial Pathogenesis Home Division of Intramural Population Health Research Home Bonifacino Lab: Section on Intracellular Protein Trafficking Home Lilly Lab: Section on Gamete Development Home Lippincott-Schwartz Lab: Section on Organelle Biology