标签:style blog color io os ar for 数据 sp
不知道有什么好办法可以让primer允许漏配,现在仅仅是允许错配,还是有一些没有配上,454数据有些primer漏配了一些,下一步解决这个问题
1 #include <cstdio> 2 #include <cstdlib> 3 #include <string> 4 #include <iostream> 5 #include <fstream> 6 #include <iomanip> 7 #include <getopt.h> 8 using namespace std; 9 10 int max_mis = 1; 11 int N = 0; 12 string primer = ""; 13 string* barcode = NULL; 14 string* barcodeName = NULL; 15 void usage () 16 { 17 cout << "\nUsage: splitByBarcode [options] <read.fasta> <barcode.txt> <output prefix>\n" 18 << " -m <int> allow the max mis num, default " << max_mis << "\n" 19 << " -p <string> primer, required "<<endl 20 << " -n <int> barcode num, required"<<endl; 21 exit(1); 22 } 23 struct COMPARE{ 24 int ifCompared; 25 int posStart; 26 int posEnd; 27 }; 28 COMPARE compare(string reads, string barcode, int maxmis,int b,int e); 29 int main(int argc, char *argv[]) 30 { 31 if (argc < 4) 32 { 33 usage(); 34 } 35 int c; 36 while ((c=getopt(argc,argv,"m:n:p:")) != -1) 37 { 38 switch (c) 39 { 40 case ‘m‘ : max_mis = atoi(optarg);break; 41 case ‘n‘ : N = atoi(optarg); break; 42 case ‘p‘ : primer = optarg;break; 43 default : cout<<"error:"<<(char)c<<endl;usage(); 44 } 45 } 46 if(N<=0){ 47 cerr<<"please input the required barcode num\n"; 48 exit(1); 49 } 50 if(primer == ""){ 51 cerr<<"please input the required primer\n"; 52 exit(1); 53 } 54 string seq_file = argv[optind++]; 55 string barcode_file = argv[optind++]; 56 string outPrefix = argv[optind++]; 57 string out_file = outPrefix + ".list"; 58 barcode = new string[N]; 59 barcodeName = new string[N]; 60 61 for(int i=0;i<N;i++){ 62 barcodeName[i] = ""; 63 barcode[i] = ""; 64 } 65 66 ifstream reads; 67 reads.open(seq_file.c_str()); 68 if(!reads){ 69 cerr<<"fail to open input file"<<seq_file<<endl; 70 } 71 ifstream barcode_in; 72 barcode_in.open(barcode_file.c_str()); 73 if(!barcode_in){ 74 cerr<<"fail to open barcode file" <<barcode_file<<endl; 75 } 76 77 string text; 78 int k=0; 79 while( getline( barcode_in, text, ‘\n‘) ) 80 { 81 int startPos = 0, endPos; 82 endPos = text.find("\t"); 83 barcodeName[k] = text.substr(startPos, endPos-startPos+1); 84 startPos = endPos + 1; 85 endPos = text.find("\t",startPos); 86 barcode[k] = text.substr(startPos, endPos-startPos+1); 87 //cout << barcodeName[k] <<"\t"<< barcodeForward[k] <<"\t"<< barcodeReverse[k] << endl; 88 k++; 89 } 90 91 ofstream splitlist; 92 splitlist.open(out_file.c_str()); 93 if(!splitlist){ 94 cerr<<"fail to creat output file" << out_file <<endl; 95 } 96 int line_num = 1; 97 splitlist<<"barcodeName\tid\tid_line\n"; 98 while( getline( reads, text, ‘\n‘) ) 99 { 100 string seq = "", id = ""; 101 id = text; 102 getline( reads, text, ‘\n‘); 103 seq = text; 104 105 COMPARE p1 ,p2; 106 p1.ifCompared = p2.ifCompared = 0; 107 for(int k=0;k<N;k++){ 108 p1 = compare(seq,primer,max_mis,0,seq.length()-primer.length()+1); 109 if( p1.ifCompared ) 110 { 111 p2 = compare(seq,barcode[k],0,0,p1.posStart); 112 if ( p2.ifCompared ){ 113 //cout<<seq1<<"\t"<<barcodeForward[k]<<"\t"<<seq2<<"\t"<<barcodeReverse[k]<<endl; 114 splitlist<<barcodeName[k]<<"\t"<<id<<"\t"<<line_num<<endl; 115 break; 116 } 117 } 118 } 119 line_num = line_num + 2; 120 } 121 delete[] barcode; 122 delete[] barcodeName; 123 splitlist.close(); 124 barcode_in.close(); 125 reads.close(); 126 } 127 COMPARE compare(string reads, string barcode, int maxmis, int b, int e) 128 { 129 COMPARE p; 130 //cout<<b<<"\t"<<e<<endl; 131 // cout<<barcode<<"\t"<<barcode.length()<<endl; 132 for(int i=b;i<e;i++) 133 { 134 int mismatch = 0; 135 int pos = 0; 136 while(mismatch <= maxmis ){ 137 if(reads[i+pos] != barcode[pos]) mismatch++; 138 pos++; 139 if(pos == barcode.length()){ 140 p.posStart = i; 141 p.posEnd = i+pos; 142 // cout<<p.posStart<<"\t"<<p.posEnd<<endl; 143 p.ifCompared = 1; 144 return p; 145 } 146 } 147 } 148 p.ifCompared = 0; 149 return p; 150 }
454ITS数据按barcode和primer分类程序v1.0
标签:style blog color io os ar for 数据 sp
原文地址:http://www.cnblogs.com/lyon2014/p/4011706.html