标签:poj
基因地理计划是IBM和美国国家地理学会的合作研究项目,用来分析来自于数十万的捐助者的DNA,来研究人类在地球上的迁移图。
作为一个IBM的研究者,你被要求写一个程序,来发现共性的DNA片段,用来和个人调查信息关联以确定新的遗传标记。
DNA碱基序列通过按顺序排列分子中发现的含氮碱基来记录。有四种碱基:腺嘌呤(A),胸腺嘧啶(T),鸟嘌呤(G)和胞嘧啶(C)。一个6碱基的DNA序列可以表示为TAGACC。
给定一组DNA的碱基序列,确定发生在所有序列中最长的一系列碱基。
3 2 GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 3 GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATA GATACTAGATACTAGATACTAGATACTAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAA GATACCAGATACCAGATACCAGATACCAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAA 3 CATCATCATCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC ACATCATCATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AACATCATCATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
no significant commonalities AGATAC CATCATCAT
3 2 GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 3 GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATA GATACTAGATACTAGATACTAGATACTAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAA GATACCAGATACCAGATACCAGATACCAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAA 3 CATCATCATCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC ACATCATCATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AACATCATCATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
#include <iostream> #include <string> #include <list> #include <algorithm> #include <fstream> #include <map> using namespace std; int const ELEM_LENGTH = 60; typedef list<string>* Elem; typedef Elem* TABLE; // 根据动态表,建立对应长度的哈希表 void get_table(int ** data,int row,int column,TABLE & table,string a); // 求出两个字符串的公共字串动态表 void common_substring(string a,string b,int ** &data,int &row,int &column); // 读出所有数据,然后放在data二维数组中 void read_data(); // 主要解决方案 void main_solution(string * data,int m); // 求交集 list<string> common_two_list(list<string> * list1,list<string> * list2); // 求出两个字符串的公共字串动态表 void common_substring(string a,string b,int ** &data,int &row,int &column) { row = a.length()+1; column = b.length()+1; data = new int *[row]; for(int i=0;i<row;i++) { data[i] = new int[column]; } for(int i=0;i<row;i++) { data[i][0] = 0; } for(int i=0;i<column;i++) { data[0][i] = 0; } for(int i =1;i<row;i++) for(int j=1;j<column;j++) { if ( a[i-1] == b[j-1] ) { data[i][j] = data[i-1][j-1]+1; } else data[i][j] = 0; } //ofstream writer; //writer.open("out.txt"); //writer<<" "; //for(int i=0;i<ELEM_LENGTH;i++) // writer<<'\t'<<b[i]; //writer<<endl; //for(int i=0;i<ELEM_LENGTH;i++) //{ // writer<<a[i]; // for(int j=0;j<ELEM_LENGTH;j++) // { // writer<<'\t'<<data[i+1][j+1]; // } // writer<<endl; //} //writer.close(); } // 根据动态表,建立对应长度的哈希表 void get_table(int ** data,int row,int column,TABLE & table,string a) { int n; string str; table = new Elem[ELEM_LENGTH]; for(int i=0;i<ELEM_LENGTH;i++) table[i] = new list<string>; for(int i=row-1;i>2;i--) for(int j=column-1;j>2;j--) { if( data[i][j] >= 3 ) { n = data[i][j]; str=""; while( n>0 ) { str = str + a[i-n] ; n--; } table [data[i][j]] ->push_back(str); } } for( int i = ELEM_LENGTH-1;i >= 0 ;i-- ) { map<string,bool> mymap; for(list<string>::iterator it=table[i]->begin();it != table[i]->end(); it++ ) { if( mymap.find(*it) == mymap.end() ) mymap.insert(make_pair(*it,true)); } if( i != ELEM_LENGTH-1 ) { for( list<string>::iterator it = table[i+1]->begin();it != table[i+1]->end(); it++ ) { string a = it->substr(0,it->size()-1); string b = it->substr(1,it->size()-1); if( mymap.find(a) == mymap.end() ) mymap.insert(make_pair(a,true)); if( mymap.find(b) == mymap.end() ) mymap.insert(make_pair(b,true)); } } table[i]->clear(); for(map<string,bool>::iterator it = mymap.begin();it != mymap.end(); it++ ) table[i]->push_back(it->first); stable_sort( table[i]->begin() , table[i]->end() ); } } // 读出所有数据,然后放在data二维数组中 void read_data() { string * dna; int n; int m; ifstream reader; reader.open("data.txt"); reader>>n; while( n>0 ) { reader>>m; dna = new string[m]; for( int i=0;i<m;i++ ) reader>>dna[i]; n--; main_solution( dna,m ); } reader.close(); } // 主要解决方案 void main_solution(string * data,int m) { int ** dp_table; int row ; int column; int table_number = (m+1)/2; TABLE *table = new TABLE[table_number]; list<string> result; bool exist = true; for( int i=0,j=0; i<m-1; i=i+2 ) { common_substring( data[i],data[i+1],dp_table,row,column ); get_table( dp_table,row,column,table[j++],data[i] ); } if( m%2 == 1 ) { common_substring(data[0],data[m-1],dp_table,row,column); get_table( dp_table,row,column,table[table_number-1],data[0] ); } for( int i=ELEM_LENGTH-1;i>=2;i-- ) { if(table_number > 1) { result = common_two_list( table[0][i],table[1][i]); int j = 2; while(! result.empty() && j < table_number ) { result = common_two_list(&result,table[j][i]); j++; } if( result.empty() ) { continue; } else { cout<<result.front()<<endl; break; } } else { if( table[0][ELEM_LENGTH-1]->empty() ) cout<<"no significant commonalities"<<endl; else cout<<table[0][ELEM_LENGTH-1]->front()<<endl; break; } } } // 求交集 list<string> common_two_list(list<string> * list1,list<string> * list2) { list<string> result; list<string>::iterator it1,it2; it1 = list1->begin(); it2 = list2->begin(); while( it1 != list1->end() && it2 != list2->end() ) { if( it1->compare(*it2) < 0) { it1 ++ ; } else if( it1->compare(*it2) > 0) { it2 ++; } else { result.push_back(*it1); it1 ++; it2++; } } return result; } int main() { read_data(); system("pause"); return 0; }
标签:poj
原文地址:http://blog.csdn.net/cqs_experiment/article/details/40450321