标签:style blog http color io os ar for sp
作业一:文章词频统计
思路设计与分析:采用最简单的数组作为存储结构,每次读到一个单词就在已经保存的数组中遍历,如果没有找到则加入这一元素,否则增加词的出现频率。最后对词出现的频率进行排序取出前10名输出。
代码如下:
1 #include<stdio.h> 2 #include<stdlib.h> 3 #include <algorithm> 4 #include <fstream> 5 #include <string> 6 #include <iostream> 7 #include<queue> 8 #include<vector> 9 using namespace std; 10 11 12 string text; 13 string word; 14 string useless[] = 15 { 16 "the","he","all","was","to","would","and","of","a", 17 "that","his","in","i","for","it","could","had", 18 "when","as","on","not","us","him","this","so","out", 19 "our","we","never","up","how","at","few","often","get", 20 "after","have","their","there","around","be","if","were", 21 "again","didn‘t","your","take","you","it‘s","toward","with", 22 "yourself","than","rather","an","what","don‘t","you‘re","or", 23 "--","you‘ve","is","my","but","from","more","no","its","do", 24 "which","they","them","t","ll","go","are","just","by","will", 25 "me","can","then","s","her","all","now","even" 26 }; 27 28 29 typedef struct que 30 { 31 char s[30]; 32 int time; 33 friend bool operator<(que a,que b) 34 { 35 return a.time>b.time; //½á¹¹ÌåÖУ¬time´óµÄÓÅÏȼ¶¸ß 36 } 37 }que; 38 //priority_queue<que,vector<que>>q; 39 vector<que> q(100000); 40 int check(string s); 41 int main() 42 { 43 44 FILE *in; 45 in=fopen("text1.txt","r");//¶ÁÎļþ 46 int count = 0; 47 int x = 0; 48 int num = 0; 49 50 cout<<q.size()<<endl; 51 52 while(!feof(in)) 53 { 54 //ΪʲôÕâÀï¼ÓÁËÒ»ÐгÌÐò¾Í²»±ÄÁË£¿£¿£¿ 55 char temp[30]; //ÓÃÓÚ´æ´¢ÁÙʱµ¥´Ê 56 fscanf(in,"%s",temp); 57 int mark = 0; 58 string sss = temp; 59 if(count/1000>=x){ 60 cout<<(count)<<endl; 61 x++; 62 } 63 count++; 64 for(int j = 0;j<num;j++) 65 { 66 if(strcmp(q[j].s,temp)==0) 67 { 68 mark = 1; 69 q[j].time++; 70 } 71 } 72 if(!mark) 73 { 74 75 que qnow; 76 strcmp(qnow.s,temp); 77 qnow.time = 1; 78 q[num] =qnow; 79 num++; 80 } 81 } 82 int i; 83 sort(q.begin(),q.end(),less<que>()); 84 int index = 0; 85 for(i = 0;i<q.size();i++) 86 { 87 if(!check(q[i].s)) 88 { 89 cout<<q[i].s<<" "<<q[i].time<<endl; 90 index++; 91 } 92 if(index == 9) 93 { 94 break; 95 } 96 } 97 return 1; 98 } 99 100 int check(string s) 101 { 102 int i; 103 int len = sizeof(useless)/sizeof(useless[0]); 104 for(i = 0;i<len;i++) 105 { 106 if(useless[i] == s) 107 { 108 return 1; 109 } 110 } 111 return 0; 112 }
性能分析如下:
标签:style blog http color io os ar for sp
原文地址:http://www.cnblogs.com/ztypeer/p/4041973.html