# Biostatistics and Bioinformatics Branch (BBB)

## A Cross-population Extended Haplotype-based Homozygosity Score Test (xp-EHHST)

### xpEHHST.h

#include <stdio.h>
//#include <stdlib.h>
#include <iostream>
#include <ctype.h>
#include <vector>
//#include <iterator>
#include <string>
#include <string.h>
#include <algorithm>
#include <math.h>
#include <map>

#define epsilon 0.00001
#define TEST 20 //mamimum number of gap snps before exiting in loop calculation

using namespace std;

// comparison function for map keys sorting
struct strCmp
{
bool operator()(const char* s1, const char* s2) const
{
return strcmp(s1, s2) < 0;
}
};

// compare two strings s and t betwen positions n and m, n <= m
int Memcmp(const char* s, const char* t, int n, int m)
{
if (n > m)
printf("Error: m must be larger than n!\n");

int i;
for (i = n; s[i] == t[i] && i < m + 1; i++)
if (s[i] == '\0')
return 0;

if ( i == m + 1)
return 0;
else
return s[i] - t[i];
}

class xpEHHT
{
public:
void compute_ehhst(int index); // calcualte score test for hapotype data
void compute_allele_freq(int index);// calculate allele freq for locus "index"

int snp_num; // number of snps
vector<string> snp_list;

double ndist(double z, bool upper); //normal distribution function
char *out_ehh;
FILE *output_ehh;

// private:
vector<char *> data;

vector<int> phy_map;
vector<string> allele_coded_0;
vector<string> allele_coded_1;

vector<float> p; //allele freq

vector<float> musav;
vector<float> sigmasav;
vector<float> Ssav;

double mean_M (int index);
double count_homozygotes(int index); //count all homozygotes around locus "index"//

double mean_R, mean_Rsq;
void mean_R_Rsq_hap (int index); //to calculate E R and E R^2 for haplotype data

double mean_L, mean_Lsq;
void mean_L_Lsq_hap (int index); //to calculate E L and E L^2 for haplotype data
};

