public class ClickstreamListener implements ServletContextListener, HttpSessionListener { private static final Log log = LogFactory.getLog(ClickstreamListener.class); /** The servlet context attribute key. */ public static final String CLICKSTREAMS_ATTRIBUTE_KEY = "clickstreams"; /** * The click stream (individual) attribute key: this is * the one inserted into the HttpSession. */ public static final String SESSION_ATTRIBUTE_KEY = "clickstream"; /** The current clickstreams, keyed by session ID. */ private Map<String, Clickstream> clickstreams = new ConcurrentHashMap<String, Clickstream>(); public ClickstreamListener() { log.debug("ClickstreamLogger constructed"); } /** * Notification that the ServletContext has been initialized. * * @param sce The context event */ public void contextInitialized(ServletContextEvent sce) { log.debug("ServletContext initialised"); sce.getServletContext().setAttribute(CLICKSTREAMS_ATTRIBUTE_KEY, clickstreams); } /** * Notification that the ServletContext has been destroyed. * * @param sce The context event */ public void contextDestroyed(ServletContextEvent sce) { log.debug("ServletContext destroyed"); // help gc, but should be already clear except when exception was thrown during sessionDestroyed clickstreams.clear(); } /** * Notification that a Session has been created. * * @param hse The session event */ public void sessionCreated(HttpSessionEvent hse) { final HttpSession session = hse.getSession(); if (log.isDebugEnabled()) { log.debug("Session " + session.getId() + " was created, adding a new clickstream."); } Object attrValue = session.getAttribute(SESSION_ATTRIBUTE_KEY); if (attrValue != null) { log.warn("Session " + session.getId() + " already has an attribute named " + SESSION_ATTRIBUTE_KEY + ": " + attrValue); } final Clickstream clickstream = new Clickstream(); session.setAttribute(SESSION_ATTRIBUTE_KEY, clickstream); clickstreams.put(session.getId(), clickstream); } /** * Notification that a session has been destroyed. * * @param hse The session event */ public void sessionDestroyed(HttpSessionEvent hse) { final HttpSession session = hse.getSession(); // check if the session is not null (expired) if (session == null) { return; } if (log.isDebugEnabled()) { log.debug("Session " + session.getId() + " was destroyed, logging the clickstream and removing it."); } final Clickstream stream = clickstreams.get(session.getId()); if (stream == null) { log.warn("Session " + session.getId() + " doesn't have a clickstream."); return; } try { if (stream.getSession() != null) { ClickstreamLoggerFactory.getLogger().log(stream); } } catch (Exception e) { log.error(e.getMessage(), e); } finally { clickstreams.remove(session.getId()); } } }
private Map<String, Clickstream> clickstreams = new ConcurrentHashMap<String, Clickstream>();
public class Clickstream implements Serializable { private static final long serialVersionUID = 1; /** The stream itself: a list of click events. */ private List<ClickstreamRequest> clickstream = new CopyOnWriteArrayList<ClickstreamRequest>(); /** The attributes. */ private Map<String, Object> attributes = new HashMap<String, Object>(); /** The host name. */ private String hostname; /** The original referer URL, if any. */ private String initialReferrer; /** The stream start time. */ private Date start = new Date(); /** The time of the last request made on this stream. */ private Date lastRequest = new Date(); /** Flag indicating this is a bot surfing the site. */ private boolean bot = false; /** * The session itself. * * Marked as transient so that it does not get serialized when the stream is serialized. * See JIRA issue CLK-14 for details. */ private transient HttpSession session; /** * Adds a new request to the stream of clicks. The HttpServletRequest is converted * to a ClickstreamRequest object and added to the clickstream. * * @param request The serlvet request to be added to the clickstream */ public void addRequest(HttpServletRequest request) { lastRequest = new Date(); if (hostname == null) { hostname = request.getRemoteHost(); session = request.getSession(); } // if this is the first request in the click stream if (clickstream.isEmpty()) { // setup initial referrer if (request.getHeader("REFERER") != null) { initialReferrer = request.getHeader("REFERER"); } else { initialReferrer = ""; } // decide whether this is a bot bot = BotChecker.isBot(request); } clickstream.add(new ClickstreamRequest(request, lastRequest)); } /** * Gets an attribute for this clickstream. * * @param name */ public Object getAttribute(String name) { return attributes.get(name); } /** * Gets the attribute names for this clickstream. */ public Set<String> getAttributeNames() { return attributes.keySet(); } /** * Sets an attribute for this clickstream. * * @param name * @param value */ public void setAttribute(String name, Object value) { attributes.put(name, value); } /** * Returns the host name that this clickstream relates to. * * @return the host name that the user clicked through */ public String getHostname() { return hostname; } /** * Returns the bot status. * * @return true if the client is bot or spider */ public boolean isBot() { return bot; } /** * Returns the HttpSession associated with this clickstream. * * @return the HttpSession associated with this clickstream */ public HttpSession getSession() { return session; } /** * The URL of the initial referer. This is useful for determining * how the user entered the site. * * @return the URL of the initial referer */ public String getInitialReferrer() { return initialReferrer; } /** * Returns the Date when the clickstream began. * * @return the Date when the clickstream began */ public Date getStart() { return start; } /** * Returns the last Date that the clickstream was modified. * * @return the last Date that the clickstream was modified */ public Date getLastRequest() { return lastRequest; } /** * Returns the actual List of ClickstreamRequest objects. * * @return the actual List of ClickstreamRequest objects */ public List<ClickstreamRequest> getStream() { return clickstream; }
public class ClickstreamRequest implements Serializable { private static final long serialVersionUID = 1; private final String protocol; private final String serverName; private final int serverPort; private final String requestURI; private final String queryString; private final String remoteUser; private final long timestamp; public ClickstreamRequest(HttpServletRequest request, Date timestamp) { protocol = request.getProtocol(); serverName = request.getServerName(); serverPort = request.getServerPort(); requestURI = request.getRequestURI(); queryString = request.getQueryString(); remoteUser = request.getRemoteUser(); this.timestamp = timestamp.getTime(); } public String getProtocol() { return protocol; } public String getServerName() { return serverName; } public int getServerPort() { return serverPort; } public String getRequestURI() { return requestURI; } public String getQueryString() { return queryString; } public String getRemoteUser() { return remoteUser; } public Date getTimestamp() { return new Date(timestamp); } /** * Returns a string representation of the HTTP request being tracked. * Example: <b>www.opensymphony.com/some/path.jsp?arg1=foo&arg2=bar</b> * * @return a string representation of the HTTP request being tracked. */ @Override public String toString() { return serverName + (serverPort != 80 ? ":" + serverPort : "") + requestURI + (queryString != null ? "?" + queryString : ""); } }
public void doFilter(ServletRequest req, ServletResponse res, FilterChain chain) throws IOException, ServletException { // Ensure that filter is only applied once per request. if (req.getAttribute(FILTER_APPLIED) == null) { log.debug("Applying clickstream filter to request."); req.setAttribute(FILTER_APPLIED, true); HttpServletRequest request = (HttpServletRequest)req; HttpSession session = request.getSession(); Clickstream stream = (Clickstream) session.getAttribute(ClickstreamListener.SESSION_ATTRIBUTE_KEY); stream.addRequest(request); } else { log.debug("Clickstream filter already applied, ignoring it."); } // pass the request on chain.doFilter(req, res); }
1. Clickstram项目,使用ServletContext来存储Map,意味着只能使用一个web容器,
Redis中的List, 每个元素对应着一个序列化之后的ClickstreamRequest 字符串;
Redis中的Hash,存储private Map<String, Object> attributes = new HashMap<String, Object>();
Redis中的Hash,存储hostname,initialReferrer,start,lastRequest,bot,HttpSession -id等字段
使用集合session.ids 来存储相关的session-id,每个字符串session:{id}对应着一个ClickStream,
List类型的clickstream 中,两个相邻的ClickstreamRequest元素A,B,
如果A和B是同一个页面,说明用户在刷新页面A,用户计算request_count ;
Google Analytic