标签:tuple storm dem 判断 3.1 over mllib 多次 pat
1.过滤操作
只是判断某个tuple是否保留
无需跨网络,无需跨分区
不会改变tuple的结构,只是改变tuple的数量
2.需求
过滤掉不是订单的tuple。
其中订单中包含“IBEIfeng.gif”。
3.主驱动类
先过滤
后打印
1 package com.jun.trident; 2 3 import backtype.storm.Config; 4 import backtype.storm.LocalCluster; 5 import backtype.storm.StormSubmitter; 6 import backtype.storm.generated.AlreadyAliveException; 7 import backtype.storm.generated.InvalidTopologyException; 8 import backtype.storm.tuple.Fields; 9 import backtype.storm.tuple.Values; 10 import storm.trident.TridentTopology; 11 import storm.trident.testing.FixedBatchSpout; 12 13 public class TridentDemo { 14 public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException { 15 TridentTopology tridentTopology=new TridentTopology(); 16 //模拟数据 17 Fields field=new Fields("log","flag"); 18 FixedBatchSpout spout=new FixedBatchSpout(field,5, 19 new Values("168.214.187.214 - - [1481953616092] \"GET /view.php HTTP/1.1\" 200 0 \"http://cn.bing.com/search?q=spark mllib\" \"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1\" \"-\"","A"), 20 new Values("168.187.202.202 - - [1481953537038] \"GET /IBEIfeng.gif?order_id=1063&orderTime=1481953537038&memberId=4000012340500607&productInfos=10005-2099.48-B-1|10004-1886.62-A-2|10001-961.99-A-1&orderAmt=6834.70 HTTP/1.1\" 200 0 \"-\" \"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2;Tident/6.0)\" \"-\"","A"), 21 new Values("61.30.167.187 - - [1481953539039] \"GET /IBEIfeng.gif?order_id=1064&orderTime=1481953539039&memberId=4000930409959999&productInfos=10007-3329.13-B-1|10009-2607.71-B-1|10002-390.62-A-1|10006-411.00-B-2&orderAmt=7149.46 HTTP/1.1\" 200 0 \"-\" \"Mozilla/5.0 (Linux; Android 4.2.1; Galaxy Nexus Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19\" \"-\"","A"), 22 new Values("30.29.132.190 - - [1481953544042] \"GET /IBEIfeng.gif?order_id=1065&orderTime=1481953544043&memberId=1234568970080798&productInfos=10005-2099.48-B-1|10001-3242.40-C-2|10006-411.00-B-1&orderAmt=8995.28 HTTP/1.1\" 200 0 \"-\" \"Mozilla/5.0 (iPhone; CPU iPhone OS 7_)_3 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B511 Safari/9537.53\" \"-\"","B"), 23 new Values("222.190.187.201 - - [1481953578068] \"GET /IBEIfeng.gif?order_id=1066&orderTime=1481953578068&memberId=3488586887970809&productInfos=10005-2099.48-B-1|10001-2774.16-C-2&orderAmt=7647.80 HTTP/1.1\" 200 0 \"-\" \"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1\" \"-\"","B"), 24 new Values("72.202.43.53 - - [1481953579069] \"GET /IBEIfeng.gif?order_id=1067&orderTime=1481953579069&memberId=2084859896989877&productInfos=10007-3329.13-B-1|10001-961.99-A-2&orderAmt=5253.10 HTTP/1.1\" 200 0 \"-\" \"Mozilla/5.0 (Linux; Android 4.2.1; Galaxy Nexus Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19\" \"-\"","B") 25 ); 26 //多次循环 27 spout.setCycle(true); 28 //提交 29 Config config=new Config(); 30 tridentTopology.newStream("orderAnalyse",spout) 31 // .each(new Fields("log","flag"),new PrintFilter()); 32 .each(new Fields("log"),new ValidLogFilter()) 33 .each(new Fields("log","flag"),new PrintFilter()); 34 if(args==null || args.length<=0){ 35 LocalCluster localCluster=new LocalCluster(); 36 localCluster.submitTopology("tridentDemo",config,tridentTopology.build()); 37 }else { 38 config.setNumWorkers(2); 39 StormSubmitter.submitTopology(args[0],config,tridentTopology.build()); 40 } 41 } 42 }
4.过滤类
1 package com.jun.trident; 2 3 import storm.trident.operation.Filter; 4 import storm.trident.operation.TridentOperationContext; 5 import storm.trident.tuple.TridentTuple; 6 7 import java.util.Map; 8 9 public class ValidLogFilter implements Filter { 10 //定义如何过滤的逻辑 11 @Override 12 public boolean isKeep(TridentTuple tridentTuple) { 13 String log=tridentTuple.getStringByField("log"); 14 boolean isKepp=true; 15 if(log==null || "".equals(log)){ 16 isKepp=false; 17 }else { 18 //含有IBEIfeng.gif才是订单 19 if (log.contains("IBEIfeng.gif")){ 20 isKepp=true; 21 }else { 22 isKepp=false; 23 } 24 } 25 return isKepp; 26 } 27 28 @Override 29 public void prepare(Map map, TridentOperationContext tridentOperationContext) { 30 31 } 32 33 @Override 34 public void cleanup() { 35 36 } 37 }
5.效果
标签:tuple storm dem 判断 3.1 over mllib 多次 pat
原文地址:https://www.cnblogs.com/juncaoit/p/9161561.html