标签:
题意简述:做一个极其简单的搜索系统,对以下四种输入进行分析与搜索:
1. 只有一个单词:如 term, 只需找到含有这个单词的document,然后把这个document的含有这个单词term的那些行输出。
2.term1 AND term2, 找到同时含有term1 和 term2 的document,然后把这个document的含有这个单词term1 或 term2 的那些行输出。
3.term1 OR term2, 找到含有term1 或 term2 的document,然后把这个document的含有这个单词term1 或 term2 的那些行输出。
4.NOT term,将不含有 term的document全部输出
思路简述:
做一个set集合 Src, 记录了一个单词出现在哪些文件的哪些行;用一个map做映射,指出一个单词出现在哪些文件中,这样子,如果是两个单词,可以分别求出各个单词属于哪些文件,根据“AND”则进行set_intersection运算, 根据“OR”进行set_union运算。
/* Poj 2050 Emerald 10 May 2015 */ #include <iostream> #include <cstring> #include <cstdio> #include <cctype> #include <sstream> #include <string> #include <vector> #include <map> #include <set> #include <algorithm> using namespace std; class Figure{ // meaning : a word can be found in the lineOrder‘th line of the docOrder‘th Doc public: int docOrder, lineOrder; Figure() {} Figure( int d, int l ) { docOrder = d; lineOrder = l; } }; bool operator < ( const Figure f1, const Figure f2 ) { // the comparisions used in set if( f1.docOrder!=f2.docOrder ) { return f1.docOrder < f2.docOrder; } else { return f1.lineOrder < f2.lineOrder; } } class Doc{ // meaning : the order of a Doc, and how many lines the Doc contains public : int docOrder, lineLimit; Doc() {} Doc( int d, int l ) { this->docOrder = d; this->lineLimit = l; } }; // set < Figure > src; // this defination defines an accurate instant typedef set < Figure > Src; // a src contains the figures of a word typedef set < int > docSrc; map < string, docSrc > docMap; // referring to a word, wordLine get a line map < string, Src > dict; map < Figure, string > wordLine; // referring to a word, wordLine get a line vector < Doc > docs; const string DOC_END = "**********"; #define ALL(x) x.begin(),x.end() #define INS(x) inserter(x,x.begin()) void Standard( string &line ); // make words tolower and other chars to whitespace void WordRecord( string &line, int docOrder, int lineOrder ); // transfer words into src void PrintSrc( Src &src ); // print as the problem commands void WordsToFigure( Src &src, docSrc &dsrc, string& w1, string& w2 ); // know the docs and words, get target figures void NotWord( string& word, Src &src ); // not the word w int main() { int allDocs, queries; // input docs scanf( "%d", &allDocs ); cin.get(); for( int i=0; i<allDocs; i++ ) { string line; int lineCounter = 0; while( getline( cin, line ) && line != DOC_END ) { // read until end wordLine[ Figure( i, lineCounter ) ] = line; Standard( line ); WordRecord( line, i, lineCounter ++ ); } docs.push_back( Doc( i, lineCounter ) ); } // input queries string command; scanf( "%d", &queries ); cin.get(); while( queries -- ) { getline( cin, command ); Standard( command ); Src src; if( command.find_last_of( ‘ ‘ ) == string::npos ) { // no whitespace Standard( command ); src = dict[ command ]; } else if( command.find_last_of( ‘ ‘ ) != command.find_first_of( ‘ ‘ ) ) { // if there‘re two different whitespaces stringstream ss( command ); // xxx AND/OR xxx string w1, w2, connected; ss >> w1 >> connected >> w2; docSrc dSrc1 = docMap[ w1 ]; docSrc dSrc2 = docMap[ w2 ]; docSrc dsrc; if( connected == "and" ) { set_intersection( ALL( dSrc1 ), ALL( dSrc2 ), INS( dsrc ) ); // intersection } else { set_union( ALL( dSrc1 ), ALL( dSrc2 ), INS( dsrc ) ); // union } WordsToFigure( src, dsrc, w1, w2 ); } else { // only one whitespace -> Not xxx stringstream ss( command ); string w1; ss >> w1 >> w1; NotWord( w1, src ); } PrintSrc( src ); } return 0; } void Standard( string &line ) { int length = line.length(); for( int i=0; i<length; i++ ) { if( isalpha( line[i] ) ) { line[i] = tolower( line[i] ); // tolower, such as ‘A‘ to ‘a‘ } else { line[i] = ‘ ‘; // if c isn‘t a alpha, c will be transferred to a whitespace } } } void WordRecord( string &line, int docOrder, int lineOrder ) { stringstream ss( line ); string word; while( ss >> word ) { if( dict.count( word ) ) { // whether the word has been found in the total input if( !dict[word].count( Figure( docOrder, lineOrder ) ) ) { // whether the word has been found in this line dict[word].insert( Figure( docOrder, lineOrder ) ) ; } } else { Src src; src.insert( Figure( docOrder, lineOrder ) ); dict[ word ] = src; } if( docMap.count( word ) ) { // whether the word has been found in this document docMap[word].insert( docOrder ); } else { docSrc ds; docMap[word] = ds; docMap[word].insert( docOrder ); } } } void PrintSrc( Src &src ) { // print the result if( src.size() == 0 ) { printf("Sorry, I found nothing.\n"); printf( "==========\n" ); return ; } Src :: iterator it = src.begin(), bef; // bef represents the former one printf( "%s\n", wordLine[ *it ].c_str() ); bef = it++; while( it != src.end() ) { if( it->docOrder != bef->docOrder ) { printf( "----------\n" ); } printf( "%s\n", wordLine[ *it ].c_str() ); bef = it; it ++; } printf( "==========\n" ); } void WordsToFigure( Src &src, docSrc &dsrc, string& w1, string& w2 ) { docSrc :: iterator it; for( it=dsrc.begin(); it != dsrc.end(); it ++ ) { Src :: iterator it2 ; for( it2 = dict[ w1 ].begin(); it2 !=dict[w1].end(); it2 ++ ) { if( *it == it2 -> docOrder ) { src.insert( *it2 ); // the w1 appears in this line of this document } } for( it2 = dict[ w2 ].begin(); it2 !=dict[w2].end(); it2 ++ ) { if( *it == it2 -> docOrder ) { src.insert( *it2 ); // the w1 appears in this line of this document } } } } void NotWord( string& word, Src &src ) { // not this word docSrc dsrc = docMap[ word ]; vector< Doc > :: iterator it; for( it = docs.begin(); it != docs.end(); it ++ ) { if( !dsrc.count( it->docOrder ) ) { for( int i=0; i< it->lineLimit; i ++ ) { src.insert( Figure( it->docOrder, i ) ); } } } }
标签:
原文地址:http://www.cnblogs.com/Emerald/p/4495519.html