接下来我们建立两个游标,一个指向上一个 word position一个指向当前,如果说上一个的word和当前的word在sentence中的位置刚好是相邻的,并且两个index也是相邻的那么n++,接着这两个游标都往下一步走,继续判断,直到n等于句子中单词的长度,那就说明已经匹配到了一个完整的句子。接着n=1再继续往下走,直到遍历完
public class MyMapper extends Mapper<LongWritable, Text, Text, Text> {
public void map(LongWritable ikey, Text ivalue, Context context)
throws IOException, InterruptedException {
Configuration conf=context.getConfiguration();
ArrayList< String> contents=new ArrayList< String>();
int agrsnum=Integer.parseInt(conf.get( "argsnum"));
int i=0;
for (i=2;i<agrsnum;i++){
String arg=conf.get("args"+i);
contents.add(arg);
}
String line=ivalue.toString();
String key=line.split(" ")[0];
String value=line.split(" ")[1];
for(String content:contents){
if(content.compareTo(key)==0){
StringTokenizer st=new StringTokenizer(value,";" );
while(st.hasMoreTokens()){
String s=st.nextToken();
String filename=s.split(":")[0];
String adds=s.split(":")[1];
String val=key+adds;
//System.out.println(filename+" "+ val);
//System.out.println(" ");
context.write( new Text(filename),new Text(val));
}
}
}
}
}
class Address implements Comparable<Address>{
public String word ;
public int index;
Address(String word, int index){
this.word =word;
this.index =index;
}
public String toString(){
return word +" "+ index;
}
public int compareTo(Address a){
if(index <a.index) return -1;
else return 1;
}
}
public class MyReducer extends Reducer<Text, Text, Text, Text> {
public void reduce(Text _key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
// process values
Configuration conf=context.getConfiguration();
int wordnum=Integer.parseInt(conf.get( "argsnum"))-2;
int i=0;
ArrayList<String> sentence= new ArrayList<String>();
for (i=2;i<wordnum+2;i++){
String arg=conf.get("args" +i);
sentence.add(arg);
}
ArrayList<Address> list= new ArrayList<Address>();
for (Text val : values) {
String[] line=val.toString().split("<|>|," );
for(int j=1;j<line.length;j++){
Address a=new Address(line[0],Integer.parseInt(line[j]));
list.add(a);
}
i++;
}
Collections. sort(list);
for(Address x:list){
System. out.println(x);
System. out.println(" " );
}
int sum=0;
int n=1;
Address start=list.get(0);
for(i=0;i<list.size();i++){
Address now=list.get(i);
if(sentence.indexOf(now.word )-sentence.indexOf(start.word)==1&&now. index-start.index ==1){
n++;
start. word=now.word ;
start. index=now.index ;
} else{
n=1;
start. word=now.word ;
start. index=now.index ;
}
if(n==wordnum){
System. out.println("match is " +now);
sum++;
n=1;
}
}
/*
for (i=0;i<list.size()-2;i++){
Address t1=list.get(i);
Address t2=list.get(i+1);
Address t3=list.get(i+2);
if((t1.index+2)==t3.index&&(t2.index+1)==t3.index){
if(t1.add!=t2.add&&t1.add!=t3.add&&t2.add!=t3.add){
sum++;
}
}
}
System.out.println(" ");
System.out.println("sum is "+sum);
System.out.println(" ");
*/
if(sum>0){
context.write(_key, new Text(String.valueOf(sum)));
}
}
}