标签:
描绘产品所有用户的访问的功能路径树,话就不多说,闷得慌
sql 代码
ALTER procedure [dbo].[PR_StatFuncPath](@statdate int) as begin declare @today datetime; set @today=CAST(convert(varchar(8),@statdate) as datetime); declare @yestoday datetime; set @yestoday=dateadd(DAY,-1,@today); declare @yesInt int; set @yesInt=CAST(CONVERT(varchar(8),@yestoday,112) as int); declare @tomorrow datetime; set @tomorrow=dateadd(DAY,1,@today); declare @tomorrowInt int; set @tomorrowInt=CAST(CONVERT(varchar(8),@tomorrow,112) as int); declare @tomorrowpathTable varchar(100); set @tomorrowpathTable = ‘MobileFunctionLog_‘ + CAST( YEAR(@tomorrow) as nvarchar(10)); declare @yespathTable varchar(100); set @yespathTable = ‘MobileFunctionLog_‘ + CAST(YEAR(@yestoday) as nvarchar(10)); declare @pathTable varchar(100); set @pathTable = ‘MobileFunctionLog_‘ + CAST(YEAR(@today) as nvarchar(10)); declare @sql nvarchar(max); declare @part tinyint; set @part=0; create table #resultbyversion(softid int,platform int,versionid int,functionid int,Pid int,ordernum int,functionpath varchar(200),optiontimes int); create table #result(softid int,platform int,functionid int,Pid int,ordernum int,functionpath varchar(200),optiontimes int) while(@part<128) begin --得出昨天会话 set @sql=‘ select distinct SoftID,Platform,VersionID,SessionID,IMEI into ##tempyes from ‘+ @yespathTable+ ‘ where OptionDate=@yesInt and Part=@part‘; EXEC sp_executesql @sql,N‘@part tinyint,@yesInt int‘,@part,@yesInt; --得出明天的会话 set @sql=‘ select distinct SoftID,Platform,VersionID,SessionID,IMEI into ##temptomorrow from ‘+ @tomorrowpathTable+ ‘ where OptionDate=@tomorrowInt and Part=@part‘; EXEC sp_executesql @sql,N‘@part tinyint,@tomorrowInt int‘,@part,@tomorrowInt; set @sql=‘select distinct SoftID, Platform, VersionID, SessionID, FunctionID, IMEI, OptionTime,NeedRecord into ##OneDayData from ‘+@pathTable+ ‘ where OptionDate=@statdate and Part=@part‘ EXEC sp_executesql @sql,N‘@part tinyint,@statdate int‘,@part,@statdate; Delete A from ##OneDayData A inner join ##tempyes B on A.SoftID=B.SoftID AND A.Platform=B.Platform And A.VersionID=B.VersionID AND A.SessionID=B.SessionID AND A.IMEI=B.IMEI; Delete A from ##OneDayData A inner join ##temptomorrow B on A.SoftID=B.SoftID AND A.Platform=B.Platform And A.VersionID=B.VersionID AND A.SessionID=B.SessionID AND A.IMEI=B.IMEI; ---上述俩处完成清理缓存数据和脏数据 drop table ##tempyes; drop table ##temptomorrow; select SoftID, Platform, VersionID, SessionID, FunctionID, IMEI, OptionTime,NeedRecord, ROW_NUMBER() over( partition by SoftID, Platform, VersionID, SessionID, IMEI Order By OptionTime asc) ordernum into #LastOneData from ##OneDayData ; drop table ##OneDayData; --得出标准路径 (干掉了F1->F1->F2 这种非标准路径) with etc2 as ( select SoftID,Platform,VersionID,SessionId,FunctionID,IMEI,OptionTime,NeedRecord,OrderNum,isnull(Pid,0) Pid from ( select A.SoftID, A.Platform, A.VersionID, A.SessionId, A.FunctionID, A.IMEI, A.OptionTime,A.NeedRecord, A.OrderNum,B.FunctionID Pid from #LastOneData A left join #LastOneData B ON A.SoftID=B.SoftID AND A.Platform=B.Platform And A.VersionID=B.VersionID AND A.SessionID=B.SessionID AND A.IMEI=B.IMEI And A.ordernum=B.ordernum+1 ) A where (FunctionID!=Pid or Pid is null) ) select * into #etc2 from etc2; ---路径长度大于20的不要 select SoftID, Platform, VersionID, SessionId, FunctionID, IMEI, OptionTime,NeedRecord,Pid,OrderNum into #GroupData from ( select SoftID, Platform, VersionID, SessionId, FunctionID, IMEI, OptionTime,NeedRecord,Pid , ROW_NUMBER() over(PARTITION by SoftID, Platform, VersionID, SessionId, IMEI order by OptionTime asc ) OrderNum from #etc2 ) a where OrderNum<20; drop table #etc2; --递归得出路径结果 with etc3 as ( select *,CAST(FunctionID as varchar(200)) FunctionPath from #GroupData where PID=0 UNION ALL select A.*,CAST((B.FunctionPath +‘-‘+ CAST(A.FUNCTIONID as varchar(12))) AS VARCHAR(200) ) FunctionPath from #GroupData A INNER JOIN etc3 B on A.SessionID=B.SessionID and A.SoftID=B.SoftID and A.Platform=B.platform and A.versionid=B.versionid and A.Imei=B.Imei and A.OrderNum-B.OrderNum=1 ) select * into #temppathdata from etc3; insert into #resultbyversion select SoftID,Platform,VersionID,FunctionID,Pid,OrderNum, FunctionPath,sum(NeedRecord) OptionTimes from #temppathdata group by SoftID,Platform,VersionID,FunctionID,Pid,OrderNum,FunctionPath; insert into #result select SoftID,Platform,FunctionID,Pid,OrderNum, FunctionPath,sum(NeedRecord) OptionTimes from #temppathdata group by SoftID,Platform,FunctionID,Pid,OrderNum,FunctionPath; drop table #GroupData; drop table #LastOneData; drop table #temppathdata; set @part=@part+1; print(@part); end select SoftID,Platform,VersionID,FunctionID,PID,OrderNum,FunctionPath,SUM(OptionTimes) OptionTimes into ##tmp_FuncPathByVersion from #resultbyversion group by SoftID,Platform,VersionID,FunctionID,OrderNum,Pid,FunctionPath having SUM(OptionTimes)>1 select SoftID,Platform,FunctionID,PID,OrderNum,FunctionPath,SUM(OptionTimes) OptionTimes into ##tmp_FuncPath from #result group by SoftID,Platform,FunctionID,OrderNum,Pid,FunctionPath having SUM(OptionTimes)>1 DROP TABLE #result; DROP TABLE #resultbyversion; end
java 代码
@MapConfig public static class MapTask extends Mapper<LongWritable, Text, Text, Text> { private Text mKey = new Text(); private Text mValue = new Text(); private StringBuilder sb = new StringBuilder(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String str = value.toString(); String[] params = StringUtils.splitByWholeSeparatorPreserveAllTokens(str, "\t"); //过滤数据 if (params.length < 21 || params[12].equals("0") || !params[11].equals("False")) { return; } sb.delete(0, sb.length()); //key:imei 0,softid 2,softversion 3,platform 4,sessionid 12 //value:functionid 7,actiontime 9 sb.append(params[0]).append("\t") .append(params[2]).append("\t") .append(params[3]).append("\t") .append(params[4]).append("\t") .append(params[12]); mKey.set(sb.toString()); mValue.set(params[7] + "\t" + params[9]); context.write(mKey, mValue); } }
@ReduceConfig public static class ReduceTask extends Reducer<Text, Text, Text, IntWritable> { private Text rKey = new Text(); private IntWritable rValue = new IntWritable(); private String[] valItem = new String[2]; private int i = 0; //key:actiontime value:lastfunctionid firstfunctionid; private SortedMap<String, String> map = new TreeMap<String, String>(); String pid = ""; //lastid private String valTemp; private String firstid; private StringBuilder sbpathvalue = new StringBuilder(); //key:road,value:functionid private Map<String, Integer> mRoads = new HashMap<String, Integer>(); @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { map.clear(); mRoads.clear(); boolean isfirstid = true; sbpathvalue.delete(0, sbpathvalue.length()); i = 0; for (Text item : values) { valItem = item.toString().split("\t"); if (map.containsKey(valItem[1]) || valItem[1].equals("")) { continue; } map.put(valItem[1], valItem[0]); } //添加退出功能点 map.put(System.currentTimeMillis() + "", "-1"); pid = ""; valTemp = ""; Iterator it = map.entrySet().iterator(); //用户一次操作完整路径才计算路径长度 int pathlengtn = 0; while (it.hasNext()) { Map.Entry<String, String> entry = (Map.Entry<String, String>) it.next(); if (isfirstid) { firstid = entry.getValue(); isfirstid = false; } pathlengtn = 0; // //过滤掉F1->F1 // if (valTemp.equals(entry.getValue())){ // continue; // } valTemp = entry.getValue(); //算法关键点 int start = sbpathvalue.indexOf(valTemp); if (start >= 0) { sbpathvalue = sbpathvalue.replace(start + valTemp.length(), sbpathvalue.length(), ""); //continue; } if (i != 0 && firstid.equals(valTemp)) { i = 0; sbpathvalue.delete(0, sbpathvalue.length()); } if (i == 0) { sbpathvalue.append(valTemp); pid = valTemp; } else { if (start == -1) { sbpathvalue.append("_").append(valTemp); } int first = sbpathvalue.indexOf("_"); int last = sbpathvalue.lastIndexOf("_"); if (last == -1) { pid = sbpathvalue.toString(); } else { pid = sbpathvalue.substring(first == last ? 0 : sbpathvalue.substring(0, last).lastIndexOf("_") + 1, last); } } //表示此路径为完整路 if (valTemp.equals("-1")) { if (sbpathvalue.toString().contains("_")) { pathlengtn = sbpathvalue.toString().split("_").length; } } String roadkey = pid + "\t" + valTemp + "\t" + pathlengtn + "\t" + sbpathvalue; if (!mRoads.containsKey(roadkey)) { mRoads.put(roadkey, 1); } else { mRoads.put(roadkey, mRoads.get(roadkey) + 1); } i++; } if (mRoads.size() <= 0) { return; } Iterator itRoads = mRoads.entrySet().iterator(); while (itRoads.hasNext()) { Map.Entry<String, Integer> entry = (Map.Entry<String, Integer>) itRoads.next(); rKey.set(key + "\t" + entry.getKey()); rValue.set(entry.getValue()); context.write(rKey, rValue); } } }
标签:
原文地址:http://my.oschina.net/osenlin/blog/522731