/******** Version: July 26th. Adding the parameter SINGLE (0 remove single samples, 1 leave untouched) *********/ /************************************************************************************ rpm.c a program to read in data to be used in the RPM method *************************************************************************************/ #include #include #include #include #define PATHSIZE 100 #define MAXLABELS 100 #define LINESIZE 100000 #define ADR(t,x) ( t=x, &t ) // used to permute the traits struct array { double trait; int perm; }; struct space{ char str[20]; //store a string }; // used for the first output table struct table1{ int group; char *label; int num; }; typedef struct space space; typedef struct table1 table1; void read_data(char pfile[], char ifile[], int *num_of_rows, int *tNum, int *traitType, int *cNum, int *gNum, int *MissValue, int *permNum, long int *seed, int *zerorsq, float *pseq100, float *pseq1000, double *alpha, int *ncomb, int *single, int *covGroups, int **TraitTest, int *tCounter, int **CovTest[], int **cCounter, int **ID, double **TD[], space **CG[]); void make_label(double **trait[], int **N, char **label[], int *nlabels, int **indirect, int **invalid, int nrows, double **TD, int Tcol, space **CG, int CovCols[], int nCov, char *sMissValue ); double run_rpm(double **trait, int *N, int nlabels, int *indirect, int *invalid, double alpha, int single, double **sum, double **sumsq); int choose_merge(int nlabels, int tnlabels, double *sum, double *sumsq, int N[], int invalid[], int *loc1, int *loc2, double alpha, int single); void get_output(table1 **outtable, int indirect[], char *label[], int nlabels, int N[]); double rsq(int nlabels, double *sum, double *sumsq, int N[], int invalid[], int single); void perm_trait(int num_of_rows, double **TD[], int Tcol, double **trait[], int *N, int nlabels); /************************************************************************************** method: main desc: The main program. This method reads the text files and run the rpm program param: none return: 0 if exits without errors, else 1 author: Tsvika Klein date: 03/25/04 **************************************************************************************/ int main(int argc, char *argv[]) { int *indirect; // indicates the cells merged together. int *invalid; // indicates cells that are not active anymore (after merge) double **trait; //holds all the traits val for each unique label int *N; //number of traits(samples) for each unique label int Norig[MAXLABELS]; //number of traits(samples) for each unique label char **label; // holds all the labels double *sum; // holds the sum of the traits for each unique label double *sumsq; // holds the sum of the traits squared for each unique label double var; // holds the variance of a specific group before printing to .table int nlabels=0; //number of unique labels int **outtable2; // holds the group assignment for table2 char *s;//temp string for a new label char sMissValue[30];//temp string for a new label table1 *outtable; // holds the info for table1 int i,p,t,l1,l2,l3,g,n,x,y,ind; char *param = "param"; // default param name char *input = "data"; // default input file name char *output = "data.out"; // default output file name char output_table[200]; // store the outtable file name char output_table2[200]; // store the outtable file name FILE *ofp, *ofs; // output file point and seed file pointer FILE **ofp2; // another output file pointer FILE *ofp_table, *ofp_table2; // file pointers for table1 & table2 int *CovCols; // the two covariates to be tested int num_of_rows = 0; // store total number of lines in the data int tNum, // the number of traits in a line of data traitType = 1, // the quantative and qualitive representation cNum, // the number of covariates in a line of data gNum, // the number of genotypes in a line of data MissValue = -777, // the number used to represent the missing value in the input data permNum = 1000, // the number of permutations zerorsq = 1, // 0 - run perm for all, 1 - skip permutations if original rsq is 0 numOrigRsq = 0, // number of rsq to be stored in an array ncomb = 2, // number of combinations of loci to look at index = 0, numMissing=0, // number of missing individuals from the analysis single=0; float pseq100=0.1; // sequential testing use Pval<0.1 after 100 permutations float pseq1000=0.01; // sequential testing use Pval<0.01 after 1000 permutations double alpha = 0.99; // alpha to be used in tuckey long int seed = 0; // a seed value. int *ID; // ID array double **TD; // double type array for "Traits" if it is quantitive double **tTD; struct space **CG; //store "Covariates" and "Geno Loci" int *TraitTest; // traits to use for the test int **CovTest; // covariates to use for the test int covGroups; // determines the number of covariate groups double origrsq; // the rsq values for all the combinatios double pval; // the p-values for all the combinations double permrsq; // the rsq of the permutation int tCounter, *cCounter; // number of traits and covariates. // read arguments in case the user specified them for (i=0;i=100 ){ for (p=101;p<=permNum && p<=1000;p++){ for (i=0;i=1000 ){ for (p=1001;p<=permNum;p++){ for (i=0;i=100 ){ for (p=101;p<=permNum && p<=1000;p++){ for (i=0;i=1000 ){ for (p=1001;p<=permNum;p++){ for (i=0;i 1 && N[j] > 1 && invalid[i] == 0 && invalid[j] == 0){ mean1 = sum[i]/N[i]; mean2 = sum[j]/N[j]; meandiff=mean1-mean2; //printf("meandiff=%f ",meandiff); var1 = (sumsq[i]-(pow(sum[i],2)/N[i]))/(N[i]-1); var2 = (sumsq[j]-(pow(sum[j],2)/N[j]))/(N[j]-1); t = meandiff / sqrt(var1/N[i] + var2/N[j]); df = pow((var1/N[i] + var2/N[j]),2) / ( ( pow(var1/N[i],2)/(N[i]-1) ) + ( pow(var2/N[j],2)/(N[j]-1) ) ); q = qtrng(ADR(_f0,alpha),ADR(_f1,df),ADR(_f2,tnlabels),ADR(_l0,0)); if (fabs(meandiff)1 || single==1)){ // Modified to remove labels with one sample sswg += ( sumsq[i] - ( pow(sum[i],2)/N[i] ) ); sstot1 += sumsq[i]; sstot2 += sum[i]; sstot3 += N[i]; } } sstot = sstot1 - pow(sstot2,2)/sstot3; r2 = 1 - sswg/sstot; return r2; } /************************************************************************************** method: get_output desc: this method sets up the output of table1 param: outtable - array that holds the information of the output for table1 indirect - indicates the cells merged together. label - array that holds the labels nlabels - holds the number of labels N - array that holds the number of samples in each label return: none author: Tsvika Klein date: 06/24/04 ***************************************************************************************/ void get_output(table1 **outtable, int indirect[], char *label[], int nlabels, int N[]){ int i, j, g, gsearch, update; int tindirect[MAXLABELS]; *outtable = (table1*)malloc (MAXLABELS * sizeof(table1)); // modifies indirect to hold the last groupings for (i=0;iperm - a2->perm author: Tsvika Klein date: 03/25/04 ***************************************************************************************/ int comp_ints(const struct array *a1, const struct array *a2) { return (a1->perm - a2->perm); } /************************************************************************************** method: perm_traits desc: this method permutes the trait values param: num_of_rows - number of samples TD - array that holds all the trait values Tcol - the trait column that should be tested trait - array that holds all the traits in each label N - array that holds the number of samples in each label nlabels - number of labels return: none author: Tsvika Klein date: 03/25/04 ***************************************************************************************/ void perm_trait(int num_of_rows, double **TD[], int Tcol, double **trait[], int *N, int nlabels){ int i,n,l; double val; struct array pt[num_of_rows]; // permute the trait for (i=0;i q0006) goto L_20; q0028=q0005*pow(*q0021,-q0009); q0029=*q0021*q0009; if(*q0021 == q0010) q0027=q0012; if(*q0021 == q0011) q0027=q0013; if(!(*q0021 == q0010 || *q0021 == q0011)) q0027=sqrt(q0029)*q0015[0]/(q0010+((q0015[1]/q0029+q0015[2])/ q0029+q0015[3])/q0029); q0027=log(q0027**q0022*q0024*q0028); L_20: q0030=q0024; q0002[0]=-q0010; q0002[q0017]=-q0010; q0031=q0010; q0032=q0010; for(q0033=1;q0033<=q0019;q0033++){ q0033_=q0033-1; q0030-=q0024; L_21: q0030=-q0030; q0034=q0025+q0030; q0035=q0007; if(q0032 <= q0004 && q0033 > q0018) goto L_26; q0036=q0027-q0034*q0034*q0009; q0037=alnorm(&q0034,ADR(_l0,1)); q0038=alnorm(ADR(_d0,q0034-*q0023),ADR(_l0,1))-q0037; if(q0038 > q0007) q0035=exp(q0036+q0026*log(q0038)); if(*q0021 > q0006) goto L_26; q0039=-q0017; L_22: q0039+=q0017; for(q0040=1;q0040<=q0017;q0040++){ q0040_=q0040-1; q0041=q0040+q0039; if(q0002[q0041-1] > q0007) goto L_23; q0042=q0028*q0040; if(q0040 < q0017) q0002[q0041]=-q0010; q0043=exp(q0042); q0002[q0041-1]=*q0023*q0043; q0001[q0041-1]=*q0021*(q0042+q0009-q0043*q0043*q0009); L_23: q0044=q0007; q0038=alnorm(ADR(_d0,q0034-q0002[q0041-1]),ADR(_l0,1))- q0037; if(q0038 > q0007) q0044=exp(q0036+q0001[q0041-1]+q0026*log(q0038)); q0035+=q0044; if(q0044 > q0003) goto L_24; if(q0041 > q0016 || q0033 > q0018) goto L_25; L_24: ; } L_25: q0028=-q0028; if(q0028 < q0007) goto L_22; L_26: q0000+=q0035; if((q0033 > q0018 && q0035 <= q0004) && q0031 <= q0004) goto L_99; q0032=q0031; q0031=q0035; if(q0030 > q0007) goto L_21; } L_99: return(q0000); } /*end of function*/ double /*FUNCTION*/ qtrng(double *q0017,double *q0015,double *q0016, long int *q0013) { long int q0022,q0014,q0022_; double q0025,q0023,q0024,q0019,q0020,prtrng(),q0018,q0021,qtrng0(), q0000; static long q0012=8; static double q0001=0.001e0; static double q0002=0.75e0; static double q0003=0.80e0; static double q0004=0.90e0; static double q0005=0.99e0; static double q0006=0.995e0; static double q0007=1.75e0; static double q0008=1.0e0; static double q0009=2.0e0; static double q0010=5.0e0; static double q0011=1.0e-4; *q0013=0; q0014=0; if(*q0015 < q0008 || *q0016 < q0009) *q0013=1; if(*q0017 < q0004 || *q0017 > q0005) *q0013=2; if(*q0013 != 0) goto L_99; q0018=qtrng0(q0017,q0015,q0016,&q0014); if(q0014 != 0) goto L_99; q0019=prtrng(&q0018,q0015,q0016,&q0014); if(q0014 != 0) goto L_99; q0000=q0018; if(fabs(q0019-*q0017) < q0001) goto L_99; if(q0019 > *q0017) q0019=q0007**q0017-q0002*q0019; if(q0019 < *q0017) q0020=*q0017+(*q0017-q0019)*(q0008-*q0017)/(q0008-q0019)*q0002; if(q0020 < q0003) q0020=q0003; if(q0020 > q0006) q0020=q0006; q0021=qtrng0(&q0020,q0015,q0016,&q0014); if(q0014 != 0) goto L_99; for(q0022=2;q0022<=q0012;q0022++){ q0022_=q0022-1; q0020=prtrng(&q0021,q0015,q0016,&q0014); if(q0014 != 0) goto L_99; q0023=q0019-*q0017; q0024=q0020-*q0017; q0000=(q0018+q0021)/q0009; q0025=q0024-q0023; if(fabs(q0025) > q0011) q0000=(q0024*q0018-q0023*q0021)/q0025; if(fabs(q0023) < fabs(q0024)) goto L_12; q0018=q0021; q0019=q0020; L_12: if(fabs(q0019-*q0017) < q0001*q0010) goto L_99; q0021=q0000; } L_99: if(q0014 != 0) *q0013=9; return(q0000); } /*end of function*/ double /*FUNCTION*/ qtrng0(double *q0011,double *q0012,double *q0014, long int *ifault) { double _d0,ppnd(),q0013,q0000,q0010; static double q0001=120.0e0; static double q0002=0.5e0; static double q0003=1.0e0; static double q0004=4.0e0; static double q0005=0.8843e0; static double q0006=0.2368e0; static double q0007=1.214e0; static double q0008=1.208e0; static double q0009=1.4142e0; q0010=ppnd(ADR(_d0,q0002+q0002**q0011),ifault); if(*q0012 < q0001) q0010+=(q0010*q0010*q0010+q0010)/ *q0012/q0004; q0013=q0005-q0006*q0010; if(*q0012 < q0001) q0013+=-q0007/ *q0012+q0008*q0010/ *q0012; q0000=q0010*(q0013*log(*q0014-q0003)+q0009); return(q0000); } /*end of function*/ double /*FUNCTION*/ alnorm(double *q0029,int *q0027) { int q0026; double q0000,q0030,q0028; static double q0001=0.0e0; static double q0002=1.0e0; static double q0003=0.5e0; static double q0024=7.0e0; static double q0025=18.66e0; static double q0004=1.28e0; static double q0005=0.398942280444e0; static double q0006=0.39990348504e0; static double q0007=0.398942280385e0; static double q0008=5.75885480458e0; static double q0009=2.62433121679e0; static double q0010=5.92885724438e0; static double q0011=-29.8213557807e0; static double q0012=48.6959930692e0; static double q0013=-3.8052e-8; static double q0014=3.98064794e-4; static double q0015=-0.151679116635e0; static double q0016=4.8385912808e0; static double q0017=0.742380924027e0; static double q0018=3.99019417011e0; static double q0019=1.00000615302e0; static double q0020=1.98615381364e0; static double q0021=5.29330324926e0; static double q0022=-15.1508972451e0; static double q0023=30.789933034e0; q0026=*q0027; q0028=*q0029; if(q0028 >= q0001) goto L_10; q0026=!q0026; q0028=-q0028; L_10: if(q0028 <= q0024 || (q0026 && q0028 <= q0025)) goto L_20; q0000=q0001; goto L_40; L_20: q0030=q0003*q0028*q0028; if(q0028 > q0004) goto L_30; q0000=q0003-q0028*(q0005-q0006*q0030/(q0030+q0008+q0011/(q0030+ q0009+q0012/(q0030+q0010)))); goto L_40; L_30: q0000=q0007*exp(-q0030)/(q0028+q0013+q0019/(q0028+q0014+q0020/ (q0028+q0015+q0021/(q0028+q0016+q0022/(q0028+q0017+q0023/(q0028+ q0018)))))); L_40: if(!q0026) q0000=q0002-q0000; return(q0000); } /*end of function*/ void /*FUNCTION*/ normp(double *z,double *q0019,double *q0020,double *q0018) { double q0021,q0017; static double q0000=220.2068679123761e0; static double q0001=221.2135961699311e0; static double q0002=112.0792914978709e0; static double q0003=33.91286607838300e0; static double q0004=6.373962203531650e0; static double q0005=.7003830644436881e0; static double q0006=.3526249659989109e-01; static double q0007=440.4137358247522e0; static double q0008=793.8265125199484e0; static double q0009=637.3336333788311e0; static double q0010=296.5642487796737e0; static double q0011=86.78073220294608e0; static double q0012=16.06417757920695e0; static double q0013=1.755667163182642e0; static double q0014=.8838834764831844e-1; static double q0015=7.071e0; static double q0016=2.506628274631001e0; q0017=fabs(*z); if(q0017 > 37.e0){ *q0018=0.e0; if(*z > 0.e0){ *q0019=1.e0; *q0020=0.e0; } else{ *q0019=0.e0; *q0020=1.e0; } return; } q0021=exp(-0.5e0*pow(q0017,(double)2)); *q0018=q0021/q0016; if(q0017 < q0015){ *q0019=q0021*((((((q0006*q0017+q0005)*q0017+q0004)*q0017+q0003)* q0017+q0002)*q0017+q0001)*q0017+q0000)/(((((((q0014*q0017+ q0013)*q0017+q0012)*q0017+q0011)*q0017+q0010)*q0017+q0009)* q0017+q0008)*q0017+q0007); } else{ *q0019=*q0018/(q0017+1.e0/(q0017+2.e0/(q0017+3.e0/(q0017+4.e0/ (q0017+0.65e0))))); } if(*z < 0.e0){ *q0020=1.e0-*q0019; } else{ *q0020=*q0019; *q0019=1.e0-*q0020; } return; } /*end of function*/ void /*FUNCTION*/ nprob(double *z,double *q0024,double *q0023,double *q0022) { double q0021,q0020; static double q0000=0.5e0; static double q0001=0.398942280444e0; static double q0002=0.399903438504e0; static double q0003=5.75885480458e0; static double q0004=29.8213557808e0; static double q0005=2.62433121679e0; static double q0006=48.6959930692e0; static double q0007=5.92885724438e0; static double q0008=0.398942280385e0; static double q0009=3.8052e-8; static double q0010=1.00000615302e0; static double q0011=3.98064794e-4; static double q0012=1.98615381364e0; static double q0013=0.151679116635e0; static double q0014=5.29330324926e0; static double q0015=4.8385912808e0; static double q0016=15.1508972451e0; static double q0017=0.742380924027e0; static double q0018=30.789933034e0; static double q0019=3.99019417011e0; q0020=fabs(*z); if(q0020 > 12.7e0) goto L_20; q0021=q0000**z**z; *q0022=exp(-q0021)*q0008; if(q0020 > 1.28e0) goto L_10; *q0023=q0000-q0020*(q0001-q0002*q0021/(q0021+q0003-q0004/(q0021+ q0005+q0006/(q0021+q0007)))); if(*z < 0.e0) goto L_30; *q0024=1.e0-*q0023; return; L_10: *q0023=*q0022/(q0020-q0009+q0010/(q0020+q0011+q0012/(q0020-q0013+ q0014/(q0020+q0015-q0016/(q0020+q0017+q0018/(q0020+q0019)))))); if(*z < 0.e0) goto L_30; *q0024=1.e0-*q0023; return; L_20: *q0023=0.e0; *q0022=0.e0; if(*z < 0.e0) goto L_30; *q0024=1.e0; return; L_30: *q0024=*q0023; *q0023=1.e0-*q0024; return; } /*end of function*/ double /*FUNCTION*/ ppnd(double *q0021,long int *q0019) { double q0000,q0020,q0022; static double q0001=0.42e0; static double q0002=2.50662823884e0; static double q0003=-18.61500062529e0; static double q0004=41.39119773534e0; static double q0005=-25.44106049637e0; static double q0006=-8.47351093090e0; static double q0007=23.08336743743e0; static double q0008=-21.06224101826e0; static double q0009=3.13082909833e0; static double q0010=-2.78718931138e0; static double q0011=-2.29796479134e0; static double q0012=4.85014127135e0; static double q0013=2.32121276858e0; static double q0014=3.54388924762e0; static double q0015=1.63706781897e0; static double q0016=0.e0; static double q0017=1.e0; static double q0018=0.5e0; *q0019=0; q0020=*q0021-q0018; if(fabs(q0020) > q0001) goto L_10; q0022=q0020*q0020; q0000=q0020*(((q0005*q0022+q0004)*q0022+q0003)*q0022+q0002)/((((q0009* q0022+q0008)*q0022+q0007)*q0022+q0006)*q0022+q0017); return(q0000); L_10: q0022=*q0021; if(q0020 > q0016) q0022=q0017-*q0021; if(q0022 <= q0016) goto L_20; q0022=sqrt(-log(q0022)); q0000=(((q0013*q0022+q0012)*q0022+q0011)*q0022+q0010)/((q0015* q0022+q0014)*q0022+q0017); if(q0020 < q0016) q0000=-q0000; return(q0000); L_20: *q0019=1; q0000=q0016; return(q0000); } /*end of function*/