Skip Navigation
  Print Page

Biostatistics and Bioinformatics Branch (BBB)

Skip sharing on social media links
Share this:

Association analysis of complex diseases using triads, parent-child dyads and singleton monads

count_triad_dyad_monad_R

##################################################################################################################
# #
# PGM: cnt_triad_dyad_monad #
# Path: C:/NICHD/Research/software/Fan/Joint_association_analysis/Count_Triad_Dyad_Monad/cnt_triad_dyad_monad.R #
# #
# R Level : R 3.0.1 #
# Operating System : Win 7 64 Enterprise #
# SAS : SAS 9.3M1 64bit #
# #
##################################################################################################################
# #
# Purpose : SAS and R program to count MONAD, DYAD and TRIAD biomarkers; #
# Both SAS and R code are provided #
# Although R code is run within SAS you can copy and paste the R code #
# into the R Gui or run R #
# #
# Run Dependencies : Need Paper:Association Analysis of Complex Diseases Using Triads, for mapping #
# The R code is completely independent of the SAS code and vice versa #
# #
# Functions/Macros/Packages #
# #
# Internal : #
# #
# External : R packages sqldf ans reshape #
# #
# Notes : This codes handles an arbitrary number of SNP pairs without any changes. #
# : All that is required is snp pairs be next to each other and the name begin with snp #
# : ie snp11 snp12 snp21 snp22 snp31 snp32 .. snp9991 snp9992 or snpxxx snpyyy #
# : DYAD,MONAD or TRIAD is determined by examining the number of family rows #
# : Ranges fgor faily type is not used. #
# : The R code at the end of this code is completely independent of SAS and can be #
# : cut and pasted into the R console. #
# : The only inputs are the simulation data and mappings from Fans paper. #
# #
# Program Flow R and SAS #
# #
# 1. Import simped.csv into a SAS dataset or R dataframe -edit missing col1 name to x #
# 2. Create format lookups or 'fomat' dataframes in R #
# 3. Determine the number of pairs (example has 2 but SAS program will hadle up to 999) #
# 4. Drop unneeded columns and transpose #
# 5. Determine if family group is TRIAD MONAD or DYAD (no need for ranges) #
# 6. Create PARENT1, PARENT2 and CHILD from ID. ID=3 is CHILD #
# 7. Stack SNP21 and SNP22 below SNP11 and SNP12 #
# 8. Let SNP = 10#SNP#1 + SNP#2 where # is 1,2, create a grouping factor SNP001 .. SNP999. #
# 9. Transpose so PARENT1, PARENT2 and CHILD are on the same row #
# 10. Concatenate PARENT1, PARENT2 and CHILD into up to six chars ie AeAaAA #
# 11. Add lvl is s01, s02 and s03 by looking up concatenation #
# 12. Tabulate frequency couts for each level ie SNP001 DYAD n01 11 #
# 13. Create all possibilites for SNP### Family type n## and poulate with 0s #
# We need to do this to fill in combinations missing in the data with 0s #
# 14. Fill in missing combinations with 0s ie SNP001 DYAD m01 0 #
# 15. Transpose by family tpe and ANP group using lvl to get names #
# 16. Output TRIAD MONAD or DYAD csv files #
# #
# Files #
# #
input1 <- "C:/NICHD/Research/software/Fan/Joint_association_analysis/Count_Triad_Dyad_Monad/simped.csv" #
# Column one in the csv did not have a heading so I added the label x #
# input2 <- Mapping from Fans Paper: Association Analysis of Complex Diseastes Using Triads..; #
# This mapping is hardcoded in both SAS and R #
# 'AAAAAA' = 'n1' #
# 'AAAaAA' = 'n2' #
# 'AAAaAa' = 'n3' #
# #
output1 <- "C:/NICHD/Research/software/Fan/Joint_association_analysis/Count_Triad_Dyad_Monad/R_out/dyad.csv" #
output2 <- "C:/NICHD/Research/software/Fan/Joint_association_analysis/Count_Triad_Dyad_Monad/R_out/monad.csv" #
output3 <- "C:/NICHD/Research/software/Fan/Joint_association_analysis/Count_Triad_Dyad_Monad/R_out/triad.csv" #
output4 <- "C:/NICHD/Research/software/Fan/Joint_association_analysis/cnt_triad_dyad_monad.R" #
# #
# #
# #
# Example Output MONAD case: #
# #
# s0 s1 s2 S #
# #
# SNP1 2 0 0 2 #
# SNP2 0 1 1 2 #
# #
# #
# #
# Functions/Macros Assumptions : #
# #
# Functions/Macros Parameters : #
# #
##################################################################################################################
# #
# Version History : #
# #
# Version Date Programmer Description #
# ------- --------- ---------- ----------- #
# 1.0 19SEP2013 deangelisrj creation #
# 2.0 26SEP2013 deangelisrj R is no longer dependent on SAS (programs are independent) #
# 3.0 08NOV2013 deangelisrj Changed newsnp1 and newsnp2 to snp1 and snp2 respectively #
# #
##################################################################################################################

library(sqldf)
library(reshape)

##### # # # # ### ##### ##### ### # # ###
# # # ## # # # # # # # ## # # #
# # # # # # # # # # # # # # #
#### # # # ## # # # # # # ## #
# # # # # # # # # # # # #
# # # # # # # # # # # # # # #
# ### # # ### # ##### ### # # ###

# all combinations for filling in final matrix with 0s

clpotr <- function(dat=clpmapsnp$lvl,lvl='n') {

# forms all combinations levels (s01,s02,s03) and SNP groups snp1--999
# for instance if we have two groups newsnp1 newsnp2
# and s01 s02 s03 this function will return
#
# snp lvl
# snp1 s01
# snp1 s02
# snp1 s03
# snp2 s01
# snp2 s02
# snp2 s02

clpdfmnxx <- as.data.frame(unique(dat[ which(substr(dat,1,1)==lvl) ]))
colnames(clpdfmnxx) <- 'lvl'

clpotrnxx <- sqldf('
select
l.snp
,r.lvl
from
clpdfmsnp as l, clpdfmnxx as r
')

clpotrnxx

}

# code/decode lookups
# give a code like AaAAaa this code returns then level ie n04

clprecode <- function(data, oldvalue, newvalue) {

# this is more general code and can be used for other recoding

# recode 12 to Aa
# recode 11 to AA
# recode 21 to Aa
# recode 22 to aa

# convert any factors to characters

if (is.factor(data)) data <- as.character(data)
if (is.factor(oldvalue)) oldvalue <- as.character(oldvalue)
if (is.factor(newvalue)) newvalue <- as.character(newvalue)

# create the return vector

newvec <- data

# put recoded values into the correct position in the return vector

for (i in unique(oldvalue)) newvec[data == i] <- newvalue[oldvalue == i]

newvec

}

################################################################
# #
# Triple letter acronyms are used to name all objects #
# Below is a dictionary of my terms (loosely followed) #
# #
# Nomenclature #
# clp - Cleft Palate #
# dfm - dataframe #
# mat - matrix #
# snp - SNP #
# nxx - level n01-n10 #
# nnn - same as nxxx #
# mao - mapping #
# SNP - genetic codes #
# sim - simulation #
# typ - type #
# fam - family #
# #
################################################################

# ### ### # # # # #### ###
# # # # # # # # # # # # #
# # # # # # # # # # # #
# # # # # ## # # #### #
# # # # # # # # # # #
# # # # # # # # # # # #
##### ### ### # # ### # ###

simped <- read.csv(file=input1)

# map SNP strings;
clpmapsnp <- read.table(text="
snpchr, lvl
AAAAAA, n01
AAAaAA, n02
AAAaAa, n03
AAaaAa, n04
AaAaAA, n05
AaAaAa, n06
AaAaaa, n07
AaaaAa, n08
Aaaaaa, n09
aaaaaa, n10
AaAAAA, n02
AaAAAa, n03
aaAAAa, n04
aaAaAa, n08
aaAaaa, n09
AAAA, m01
AAAa, m02
AaAA, m03
AaAa, m04
Aaaa, m05
aaAa, m06
aaaa, m07
AA, s01
Aa, s02
aa, s03
",sep=",",header=TRUE,
strip.white = TRUE,
stringsAsFactors=FALSE)

# map parent child;
clpmapduo <- read.table(text="
numduo, duo
11, AA
12, Aa
21, Aa
22, aa
",sep=",",header=TRUE,
strip.white = TRUE,
stringsAsFactors=FALSE)

# map n-TRIAD m-DYAD and s-MONAD;
clpmapfam <- read.table( text="
ltr, famsiz
s, MONAD
m, DYAD
n, TRIAD
",sep=",",header=TRUE,
strip.white = TRUE,
stringsAsFactors=FALSE)

#######################################################################
# #
# Determine MONAD, DYAD and TRIAD by examining number of members #
# in a family. 1-MONDAD 2-DYAD and 3-TRIAD #
# #
# Also provides names for family members PARENT1, PARENT2 and CHILD #
# Ranges are not used #
# #
##################################################################### #

clpfamtyp <- sqldf('
select
case r.cntrec
when 1 then "MONAD"
when 2 then "DYAD"
when 3 then "TRIAD"
else "error"
end as famtyp
,case l.id
when 1 then "PARENT1"
when 2 then "PARENT2"
when 3 then "CHILD"
else "error"
end as fammem
,l.*
from
simped as l left join
(select famid, count(famid) as cntrec from simped group by famid) as r
on
l.famid = r.famid
order
by famid, id
')

clpfamtyp <- clpfamtyp[, !(colnames(clpfamtyp) %in% c("Dadid","Momid","Gender","x"))]
head(clpfamtyp)

# clpfamtyp
# famtyp fammem Famid ID SNP11 SNP12 SNP21 SNP22
# 1 TRIAD PARENT1 1.1 1 2 2 1 2
# 2 TRIAD PARENT2 1.1 2 2 2 1 1
# 3 TRIAD CHILD 1.1 3 2 2 1 2
# 4 TRIAD PARENT1 2.1 1 2 2 2 1
# 5 TRIAD PARENT2 2.1 2 2 2 1 1
# 6 TRIAD CHILD 2.1 3 2 2 1 2
#

#######################################################################
# #
# Get meta data (data about data) #
# #
# Number SNP pairs in input data (documentation is based on 2 pairs) #
# #
# Colnames for pairs in teh data SNP11-SNP22 #
# Position of the first pair in the data (5 here) #
# #
##################################################################### #

clpfamtypold <- clpfamtyp
clpcolnam <- colnames(clpfamtyp)

clpcolnam # famtyp fammem Famid ID SNP11 SNP12 SNP21 SNP22

clpnumsnp <- length(grep ("SNP",clpcolnam))/2
c("Number of pairs of SNPs = ",clpnumsnp)
# "Number of pairs of SNPs = " "2"

clpallnam <- colnames((clpfamtyp)[grep ("SNP",clpcolnam)])
clpallnam # SNP11 SNP12 SNP21 SNP22

# position of first SNP
clpsnppos <- grep ("SNP",clpcolnam)[1]
c("Position of first SNP = ",clpsnppos)
# "Position of first SNP = " "5"

#######################################################################
# #
# The for loop below adds two extra columns to clpfamtyp #
# Note it also changes the orde to always have the lower number first#
# Note ob 4 SNP21=2 and SNP22=1 yet outpu 4 has the reverse #
# This is because both 21 and 12 map to Aa (see paper) #
# #
# #
# INPUT #
# #
# Create a column vector call snp with 0s #
# famtyp fammem Famid ID SNP11 SNP12 SNP21 SNP22 #
# 1 TRIAD PARENT1 1.1 1 2 2 1 2 #
# 2 TRIAD PARENT2 1.1 2 2 2 1 1 #
# 3 TRIAD CHILD 1.1 3 2 2 1 2 #
# 4 TRIAD PARENT1 2.1 1 2 2 2 1 #
# 5 TRIAD PARENT2 2.1 2 2 2 1 1 #
# 6 TRIAD CHILD 2.1 3 2 2 1 2 #
# #
# OUTPUT #
# #
# famtyp fammem Famid ID SNP11 SNP12 SNP21 SNP22 snp1 snp2 #
# 1 TRIAD PARENT1 1.1 1 2 2 1 2 22 12 #
# 2 TRIAD PARENT2 1.1 2 2 2 1 1 22 11 #
# 3 TRIAD CHILD 1.1 3 2 2 1 2 22 12 #
# 4 TRIAD PARENT1 2.1 1 2 2 1 2 22 12 #
# 5 TRIAD PARENT2 2.1 2 2 2 1 1 22 11 #
# 6 TRIAD CHILD 2.1 3 2 2 1 2 22 12 #
# #
# Process #
# #
# 1. Switch the first and second sigle diget SNP so the smaller #
# number is first #
# 2. mutiple the first diget by 10 and add to second 1,2 = 12 #
# 3. add a column to clpfamtyp with the double diget number #
# #
# #
##################################################################### #

#######################################################################
# #
# Create a column vector of 0s to store backup needed for the #
# switch ie 21 -> 22 or aA -> Aa - see algorithm above #
# #
##################################################################### #

clpfamtypsav <- as.data.frame(clpfamtyp[,5])
colnames(clpfamtypsav) <- c("snp")
clpfamtypsav$snp <- 0
head(clpfamtypsav$snp)

# filled with 0s
# [1] 0 0 0 0 0 0

#######################################################################
# #
# Parent 1 needs to always have the lower biomarker first ie 1 when #
# possible. #
# switch ie 21 -> 22 or aA -> Aa - see algorithm above #
# #
##################################################################### #

for (j in seq(0,clpnumsnp,2))
{
for (i in 1:nrow(clpfamtyp))
{
k=clpsnppos + j/2;
if ( clpfamtyp[i,clpsnppos+j] == 2 ) {
sav = clpfamtyp[i,clpsnppos+j]
clpfamtyp[i,clpsnppos+j] = clpfamtyp[i,1+j+clpsnppos]
clpfamtyp[i,1+j+clpsnppos] = sav
}
clpfamtypxxx <- clpfamtyp[i,clpsnppos+j]*10 + clpfamtyp[i,1+j+clpsnppos]
clpfamtypsav[i,] <- clpfamtyp[i,clpsnppos+j]*10 + clpfamtyp[i,1+j+clpsnppos]
}
colnames(clpfamtypsav) <- paste("snp",1+j/2,sep='')
clpfamtyp <- cbind(clpfamtyp,clpfamtypsav)
}
head(clpfamtyp)

#
# The last two columns were added - some switching took place
# but is not shown because I have also switched the single digit biomarkers
#
# famtyp fammem Famid ID SNP11 SNP12 SNP21 SNP22 snp1 snp2
# 1 TRIAD PARENT1 1.1 1 2 2 1 2 22 12
# 2 TRIAD PARENT2 1.1 2 2 2 1 1 22 11
# 3 TRIAD CHILD 1.1 3 2 2 1 2 22 12
# 4 TRIAD PARENT1 2.1 1 2 2 1 2 22 12
# 5 TRIAD PARENT2 2.1 2 2 2 1 1 22 11
# 6 TRIAD CHILD 2.1 3 2 2 1 2 22 12
#
#

#######################################################################
# #
# Create a column vector of 0s to store backup needed for the #
# switch ie 21 -> 22 or aA -> Aa - see algorithm above #
# #
# #
#######################################################################

clpfamtyp <- clpfamtyp[, !(colnames(clpfamtyp) %in% c("ID",clpallnam))]
head(clpfamtyp)

#
# famtyp fammem Famid snp1 snp2
#
# 1 TRIAD PARENT1 1.1 22 12
# 2 TRIAD PARENT2 1.1 22 11
# 3 TRIAD CHILD 1.1 22 12
# 4 TRIAD PARENT1 2.1 22 12
# 5 TRIAD PARENT2 2.1 22 11
# 6 TRIAD CHILD 2.1 22 12
#

#######################################################################
# #
# fat to skinny data structure. note snp1-2 added and only one #
# one column for biomarkers - value #
# #
##################################################################### #

clpfamxpo <- melt(clpfamtyp, id=c("famtyp","fammem","Famid"))

#
# famtyp fammem Famid variable value
#
# 1 TRIAD PARENT1 1.1 snp1 22
# 2 TRIAD PARENT2 1.1 snp1 22
# 3 TRIAD CHILD 1.1 snp1 22
# 4 TRIAD PARENT1 2.1 snp1 22
# 5 TRIAD PARENT2 2.1 snp1 22
# 6 TRIAD CHILD 2.1 snp1 22
# ....
# 1348 MONAD CHILD 248.1 snp2 11
# 1349 MONAD CHILD 249.1 snp2 12
# 1350 MONAD CHILD 250.1 snp2 11
#

#######################################################################
# #
# recode 11,12,22 to AAAa and aa repectively #
# #
##################################################################### #

clpfamxpo$value <- clprecode(clpfamxpo$value, clpmapduo$numduo, clpmapduo$duo)

#
# famtyp fammem Famid variable value
#
# 1 TRIAD PARENT1 1.1 snp1 aa
# 2 TRIAD PARENT2 1.1 snp1 aa
# 3 TRIAD CHILD 1.1 snp1 aa
# 4 TRIAD PARENT1 2.1 snp1 aa
# 5 TRIAD PARENT2 2.1 snp1 aa
# 6 TRIAD CHILD 2.1 snp1 aa
#
#
# 1345 MONAD CHILD 245.1 snp2 AA
# 1346 MONAD CHILD 246.1 snp2 AA
# 1347 MONAD CHILD 247.1 snp2 Aa
#

#######################################################################
# #
# back to fat but with snp1-999 and family across #
# #
##################################################################### #

clpfamfat=cast(clpfamxpo,famtyp+Famid+variable~fammem,value="value");head(clpfamfat)

#
# famtyp Famid variable CHILD PARENT1 PARENT2
#
# 1 DYAD 201.1 snp1 Aa <NA> Aa
# 2 DYAD 201.1 snp2 Aa <NA> Aa
# 3 DYAD 202.1 snp1 AA <NA> Aa
# 4 DYAD 202.1 snp2 AA <NA> AA
# 5 DYAD 203.1 snp1 Aa <NA> Aa
# 6 DYAD 203.1 snp2 aa <NA> Aa

#######################################################################
# #
# change the <NA> to empty #
# #
##################################################################### #

clpfampop <- sqldf('select famtyp, Famid, variable
,case when PARENT1 is NULL then "" else PARENT1 end as PARENT1
,case when PARENT2 is NULL then "" else PARENT2 end as PARENT2
,case when CHILD is NULL then "" else CHILD end as CHILD
from clpfamfat');

#
# famtyp Famid variable PARENT1 PARENT2 CHILD
#
# 1 DYAD 201.1 snp1 Aa Aa
# 2 DYAD 201.1 snp2 Aa Aa
# 3 DYAD 202.1 snp1 Aa AA
# 4 DYAD 202.1 snp2 AA AA
# 5 DYAD 203.1 snp1 Aa Aa
# 6 DYAD 203.1 snp2 Aa aa
# ...
# 495 TRIAD 198.1 snp1 aa aa aa
# 496 TRIAD 198.1 snp2 AA AA AA
# 497 TRIAD 199.1 snp1 Aa Aa aa
#
#

#######################################################################
# #
# concatenate family members biomarkers #
# #
##################################################################### #

clpfampop$snp <- gsub(" ","",paste(clpfampop$PARENT1[],clpfampop$PARENT2[],clpfampop$CHILD[]))
clpfampop <- clpfampop[, !(colnames(clpfampop) %in% c("PARENT1", "PARENT2", "CHILD"))]

#
# famtyp Famid variable snp
#
# 1 DYAD 201.1 snp1 AaAa
# 2 DYAD 201.1 snp2 AaAa
# 3 DYAD 202.1 snp1 AaAA
# 4 DYAD 202.1 snp2 AAAA
# 5 DYAD 203.1 snp1 AaAa
# 6 DYAD 203.1 snp2 Aaaa
#

#######################################################################
# #
# lookup the levels associated with the family biomarkers #
# #
##################################################################### #

clpfampop$lvl <- clprecode(clpfampop$snp, clpmapsnp$snpchr, clpmapsnp$lvl)

#
# famtyp Famid variable snp lvl
#
# 1 DYAD 201.1 snp1 AaAa m04
# 2 DYAD 201.1 snp2 AaAa m04
# 3 DYAD 202.1 snp1 AaAA m03
# 4 DYAD 202.1 snp2 AAAA m01
# 5 DYAD 203.1 snp1 AaAa m04
# 6 DYAD 203.1 snp2 Aaaa m05

#####################################################################
# #
# create frequency of levels by family type and SNP group #
# #
##################################################################### #

clpfamcnt <- sqldf(
'select
famtyp
,variable
,lvl
,count(lvl) as cnt
from
clpfampop
group
by famtyp, variable, lvl
order
by famtyp, variable, lvl')

# fill in with zeros when we have missing counts

# famtyp variable lvl cnt
#
# 1 DYAD snp1 m03 1
# 2 DYAD snp1 m04 8
# 3 DYAD snp1 m05 3
# 4 DYAD snp1 m06 5
# 5 DYAD snp1 m07 8
#
# 6 DYAD snp2 m01 16

#######################################################################
# #
# There are missing biomarkers. For instance M01 and M02 for #
# snp1 are missing. We need to add 0s for M01 and M02 #
# #
# The code below creates all possible combinations and gives them #
# 0 count. When a level is missing a 0 row for that level will #
# added. #
# #
##################################################################### #

# get unique levels SNP progra handles up to 999 SNPs
clpdfmsnp=as.data.frame(unique(clpfampop$variable))
colnames(clpdfmsnp) <- 'snp'

clpdfmsnp
#
# snp
#1 snp1
#2 snp2
#

#######################################################################
# #
# Form all combinations of snp1-99 and level (only 2 SNPs in doc) #
# #
##################################################################### #

clpotrnnn <- clpotr(lvl='n')
clpotrmmm <- clpotr(lvl='m')
clpotrsss <- clpotr(lvl='s')
clpotrsss

#######################################################################
# #
# Simples case here snp1-2 and level s01-s03 #
# #
##################################################################### #

#
# snp lvl
#
# 1 snp1 s01
# 2 snp1 s02
# 3 snp1 s03
# 4 snp2 s01
# 5 snp2 s02
# 6 snp2 s03

#######################################################################
# #
# Add Family type letter #
# We can use ltr to get family type ie m->DYAD s->MONAD n->TRIAD #
# #
##################################################################### #

clpotr <- rbind(clpotrnnn, clpotrmmm, clpotrsss)
clpotr$zro <- 0
clpotr$ltr <- substr(clpotr$lvl,1,1)

#
# snp lvl zro ltr
#
# 1 snp1 n01 0 n
# 2 snp1 n02 0 n
# 3 snp1 n03 0 n
# 4 snp1 n04 0 n
# 5 snp1 n05 0 n
#

# convert letter to family type using lookup

clpotr$famgrp <- clprecode(clpotr$ltr, clpmapfam$ltr, clpmapfam$famsiz)

# clpotr (has all possibilities)
# snp lvl zro ltr famgrp
#
# 1 snp1 n01 0 n TRIAD
# 2 snp1 n02 0 n TRIAD
# 3 snp1 n03 0 n TRIAD
# 4 snp1 n04 0 n TRIAD
# 5 snp1 n05 0 n TRIAD
#

# clpfamcnt
# famtyp variable lvl cnt
#
# 1 DYAD snp1 m03 1
# 2 DYAD snp1 m04 8
# 3 DYAD snp1 m05 3
# 4 DYAD snp1 m06 5
# 5 DYAD snp1 m07 8
# 6 DYAD snp2 m01 16

#######################################################################
# #
# Join the all combinations table to observed counts #
# when count is missing substitute 0 from the all combinations table #
# #
##################################################################### #

clpaddzro <- sqldf('
select
l.snp
,l.lvl
,l.famgrp
,coalesce(r.cnt,l.zro) as cnt
from
clpotr as l left join clpfamcnt as r
on
l.famgrp = r.famtyp and
l.lvl = r.lvl and
l.snp = r.variable
')

#
# snp lvl famgrp cnt
#
# 35 snp1 s01 MONAD 3
# 36 snp1 s02 MONAD 6
# 37 snp1 s03 MONAD 16
#
# 38 snp2 s01 MONAD 19
# 39 snp2 s02 MONAD 5
# 40 snp2 s03 MONAD 1
#

#######################################################################
# #
# Split out the three family types #
# #
##################################################################### #

clpnnn <- clpaddzro[clpaddzro$famgrp == 'TRIAD',]
clpsss <- clpaddzro[clpaddzro$famgrp == 'MONAD',]
clpmmm <- clpaddzro[clpaddzro$famgrp == 'DYAD',]

#
# snp lvl famgrp cnt
#
# 21 snp1 m01 DYAD 0 * added zeros
# 22 snp1 m02 DYAD 0
# 23 snp1 m03 DYAD 1
# 24 snp1 m04 DYAD 8
# 25 snp1 m05 DYAD 3
# 26 snp1 m06 DYAD 5
# 27 snp1 m07 DYAD 8
# 28 snp2 m01 DYAD 16

#######################################################################
# #
# Skinny to fat table - input has all the counts in one column #
# level is now across with possible 0s #
# #
##################################################################### #
#

clpfatnnn=cast(clpnnn,snp+famgrp~lvl,value="cnt");head(clpfatnnn)
clpfatmmm=cast(clpmmm,snp+famgrp~lvl,value="cnt");head(clpfatmmm)
clpfatsss=cast(clpsss,snp+famgrp~lvl,value="cnt");head(clpfatsss)

#
# snp famgrp s01 s02 s03
# 1 snp1 MONAD 3 6 16
# 2 snp2 MONAD 19 5 1
#

clpsummmm <- cbind(clpfatmmm,sum = apply(clpfatmmm,1,sum))
clpsumsss <- cbind(clpfatsss,sum = apply(clpfatsss,1,sum))
clpsumnnn <- cbind(clpfatnnn,sum = apply(clpfatnnn,1,sum))

clpsumsss

# snp famgrp s01 s02 s03 sum
#
# 1 snp1 MONAD 3 6 16 25
# 2 snp2 MONAD 19 5 1 25
#
# output DYAD MONAD and TRIAD

write.csv(clpsummmm, file=output1)
write.csv(clpsumsss, file=output2)
write.csv(clpsumnnn, file=output3)

Last Updated Date: 11/21/2013
Last Reviewed Date: 11/21/2013

Contact Information

Name: Dr Paul Albert
Chief and Senior Investigator
Biostatistics and Bioinformatics Branch
Phone: 301-496-5582
E-mail: albertp@mail.nih.gov

Staff Directory
Vision National Institutes of Health Home BOND National Institues of Health Home Home Storz Lab: Section on Environmental Gene Regulation Home Machner Lab: Unit on Microbial Pathogenesis Home Division of Intramural Population Health Research Home Bonifacino Lab: Section on Intracellular Protein Trafficking Home Lilly Lab: Section on Gamete Development Home Lippincott-Schwartz Lab: Section on Organelle Biology