码迷,mamicode.com
首页 > 其他好文 > 详细

454ITS数据按barcode和primer分类程序v1.0

时间:2014-10-09 00:33:37      阅读:305      评论:0      收藏:0      [点我收藏+]

标签:style   blog   color   io   os   ar   for   数据   sp   

不知道有什么好办法可以让primer允许漏配,现在仅仅是允许错配,还是有一些没有配上,454数据有些primer漏配了一些,下一步解决这个问题

  1 #include <cstdio>
  2 #include <cstdlib>
  3 #include <string>
  4 #include <iostream>
  5 #include <fstream>
  6 #include <iomanip>
  7 #include <getopt.h>
  8 using namespace std;
  9 
 10 int max_mis = 1;
 11 int N = 0;
 12 string primer = "";
 13 string* barcode = NULL;
 14 string* barcodeName = NULL;
 15 void usage ()
 16 {
 17         cout << "\nUsage: splitByBarcode [options] <read.fasta> <barcode.txt> <output prefix>\n"
 18         << "  -m <int>  allow the max mis num, default " << max_mis << "\n"
 19     << "  -p <string> primer, required    "<<endl
 20     << "  -n <int>  barcode num, required"<<endl;
 21         exit(1);
 22 }
 23 struct COMPARE{
 24     int ifCompared;
 25     int posStart;
 26     int posEnd;
 27 };
 28 COMPARE compare(string reads, string barcode, int maxmis,int b,int e);
 29 int main(int argc, char *argv[])
 30 {
 31     if (argc < 4)
 32     {
 33         usage();
 34     }
 35     int c;
 36     while ((c=getopt(argc,argv,"m:n:p:")) != -1)
 37     {
 38         switch (c)
 39         {
 40             case m : max_mis = atoi(optarg);break;
 41             case n : N = atoi(optarg); break;
 42             case p : primer = optarg;break; 
 43             default  : cout<<"error:"<<(char)c<<endl;usage();
 44         }
 45     }
 46     if(N<=0){
 47         cerr<<"please input the required barcode num\n";
 48         exit(1);    
 49     }
 50     if(primer == ""){
 51         cerr<<"please input the required primer\n";
 52         exit(1);
 53     }
 54     string seq_file = argv[optind++];
 55     string barcode_file = argv[optind++];
 56     string outPrefix = argv[optind++];
 57     string out_file = outPrefix + ".list";
 58     barcode = new string[N];
 59     barcodeName = new string[N];
 60 
 61     for(int i=0;i<N;i++){
 62         barcodeName[i] = "";
 63         barcode[i] = "";
 64     }
 65     
 66     ifstream reads;
 67     reads.open(seq_file.c_str());
 68     if(!reads){
 69         cerr<<"fail to open input file"<<seq_file<<endl;    
 70     }
 71     ifstream barcode_in;
 72     barcode_in.open(barcode_file.c_str());
 73     if(!barcode_in){
 74         cerr<<"fail to open barcode file" <<barcode_file<<endl;
 75     }
 76     
 77     string text;
 78     int k=0;
 79     while( getline( barcode_in, text, \n) )
 80     {
 81         int startPos = 0, endPos;
 82         endPos = text.find("\t");
 83         barcodeName[k] = text.substr(startPos, endPos-startPos+1);
 84         startPos = endPos + 1;
 85         endPos = text.find("\t",startPos);
 86         barcode[k] = text.substr(startPos, endPos-startPos+1);
 87         //cout << barcodeName[k] <<"\t"<< barcodeForward[k] <<"\t"<< barcodeReverse[k] << endl;
 88         k++;
 89     }
 90 
 91     ofstream splitlist;
 92     splitlist.open(out_file.c_str());
 93     if(!splitlist){
 94         cerr<<"fail to creat output file" << out_file <<endl;
 95     }
 96     int line_num = 1;
 97     splitlist<<"barcodeName\tid\tid_line\n";
 98     while( getline( reads, text, \n) )
 99     {
100         string seq = "", id = "";
101         id = text;
102         getline( reads, text, \n);
103         seq = text;
104 
105         COMPARE p1 ,p2;
106         p1.ifCompared = p2.ifCompared = 0;
107         for(int k=0;k<N;k++){
108             p1 = compare(seq,primer,max_mis,0,seq.length()-primer.length()+1);
109             if( p1.ifCompared )
110             {
111                 p2 = compare(seq,barcode[k],0,0,p1.posStart);
112                 if ( p2.ifCompared ){
113                 //cout<<seq1<<"\t"<<barcodeForward[k]<<"\t"<<seq2<<"\t"<<barcodeReverse[k]<<endl;
114                     splitlist<<barcodeName[k]<<"\t"<<id<<"\t"<<line_num<<endl;
115                     break;
116                 }
117             }
118         }
119         line_num = line_num + 2;
120     }
121     delete[] barcode;
122     delete[] barcodeName;
123     splitlist.close();
124     barcode_in.close();
125     reads.close();
126 }
127 COMPARE compare(string reads, string barcode, int maxmis, int b, int e)
128 {
129     COMPARE p;
130     //cout<<b<<"\t"<<e<<endl;
131 //    cout<<barcode<<"\t"<<barcode.length()<<endl;
132     for(int i=b;i<e;i++)
133     {
134         int mismatch = 0;
135         int pos = 0;
136         while(mismatch <= maxmis ){
137             if(reads[i+pos] != barcode[pos]) mismatch++;
138             pos++;
139             if(pos == barcode.length()){
140                 p.posStart = i;
141                 p.posEnd = i+pos;
142             //    cout<<p.posStart<<"\t"<<p.posEnd<<endl;
143                 p.ifCompared = 1;
144                 return p;
145             }
146         }
147     }
148     p.ifCompared = 0;
149     return p;
150 }

 

454ITS数据按barcode和primer分类程序v1.0

标签:style   blog   color   io   os   ar   for   数据   sp   

原文地址:http://www.cnblogs.com/lyon2014/p/4011706.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!