标签:
参考官方文档:http://gora.apache.org/current/tutorial.html
一、环境准备
1、下载gora并解压
2、分别进入$GORA_HOME/gora-hbase/,$GORA_HOME/gora-core,$GORA_HOME/gora-compiler,$GORA_HOME/gora-compiler-CLI执行
$ mvn clean install
3、启动hbase。
注意gora-0.5对应Hbase0.94
4、准备好日志文件,用于本项目的分析
二、建立项目
1、建立一个java project,并创建以下几个目录
2、构建build path,增加hadoop library,hbase library以及avro, gora相关的类包。
3、将准备好的日志文件放到resource目录下
4、在conf目录下创建gora.properties,内容如下:
##gora.datastore.default is the default detastore implementation to use ##if it is not passed to the DataStoreFactory#createDataStore() method. gora.datastore.default=org.apache.gora.hbase.store.HBaseStore #gora.datastore.default=org.apache.gora.cassandra.store.CassandraStore #gora.datastore.default=org.apache.gora.solr.store.SolrStore #gora.datastore.default=org.apache.gora.avro.store.AvroStore #gora.avrostore.input.path=hdfs://localhost:9000/gora.avrostore.test.input #gora.avrostore.output.path=hdfs://localhost:9000/gora.avrostore.test.output #gora.avrostore.codec.type=JSON || BINARY ##whether to create schema automatically if not exists. gora.datastore.autocreateschema=true ##Cassandra properties for gora-cassandra module using Cassandra #gora.cassandrastore.servers=localhost:9160 ##JDBC properties for gora-sql module using HSQL gora.sqlstore.jdbc.driver=org.hsqldb.jdbcDriver ##HSQL jdbc connection as persistent in-process database gora.sqlstore.jdbc.url=jdbc:hsqldb:file:./hsql-data ##HSQL jdbc connection as network server #gora.sqlstore.jdbc.url=jdbc:hsqldb:hsql://localhost/goratest ##JDBC properties for gora-sql module using MySQL #gora.sqlstore.jdbc.driver=com.mysql.jdbc.Driver #gora.sqlstore.jdbc.url=jdbc:mysql://localhost:3306/goratest #gora.sqlstore.jdbc.user=root #gora.sqlstore.jdbc.password= gora.solrstore.solr.url=http://localhost:8983/solr gora.solrstore.solr.commitwithin=0 gora.solrstore.solr.batchsize=100 # set which Solrj server impl you wish to use # cloud, concurrent, http, loadbalance gora.solrstore.solr.solrjserver=http
{ "type": "record", "name": "Pageview", "default":null, "namespace": "org.apache.gora.tutorial.log.generated", "fields" : [ {"name": "url", "type": ["null","string"], "default":null}, {"name": "timestamp", "type": "long", "default":0}, {"name": "ip", "type": ["null","string"], "default":null}, {"name": "httpMethod", "type": ["null","string"], "default":null}, {"name": "httpStatusCode", "type": "int", "default":0}, {"name": "responseSize", "type": "int", "default":0}, {"name": "referrer", "type": ["null","string"], "default":null}, {"name": "userAgent", "type": ["null","string"], "default":null} ] }
/** * Autogenerated by Avro * * DO NOT EDIT DIRECTLY */ package org.apache.gora.tutorial.log.generated; @SuppressWarnings("all") public class Pageview extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent { public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Pageview\",\"namespace\":\"org.apache.gora.tutorial.log.generated\",\"fields\":[{\"name\":\"url\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"timestamp\",\"type\":\"long\",\"default\":0},{\"name\":\"ip\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"httpMethod\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"httpStatusCode\",\"type\":\"int\",\"default\":0},{\"name\":\"responseSize\",\"type\":\"int\",\"default\":0},{\"name\":\"referrer\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"userAgent\",\"type\":[\"null\",\"string\"],\"default\":null}],\"default\":null}"); /** Enum containing all data bean's fields. */ public static enum Field { URL(0, "url"), TIMESTAMP(1, "timestamp"), IP(2, "ip"), HTTP_METHOD(3, "httpMethod"), HTTP_STATUS_CODE(4, "httpStatusCode"), RESPONSE_SIZE(5, "responseSize"), REFERRER(6, "referrer"), USER_AGENT(7, "userAgent"), ; /** * Field's index. */ private int index; /** * Field's name. */ private String name; /** * Field's constructor * @param index field's index. * @param name field's name. */ Field(int index, String name) {this.index=index;this.name=name;} /** * Gets field's index. * @return int field's index. */ public int getIndex() {return index;} /** * Gets field's name. * @return String field's name. */ public String getName() {return name;} /** * Gets field's attributes to string. * @return String field's attributes to string. */ public String toString() {return name;} }; public static final String[] _ALL_FIELDS = { "url", "timestamp", "ip", "httpMethod", "httpStatusCode", "responseSize", "referrer", "userAgent", }; /** * Gets the total field count. * @return int field count */ public int getFieldsCount() { return Pageview._ALL_FIELDS.length; } private java.lang.CharSequence url; private long timestamp; private java.lang.CharSequence ip; private java.lang.CharSequence httpMethod; private int httpStatusCode; private int responseSize; private java.lang.CharSequence referrer; private java.lang.CharSequence userAgent; public org.apache.avro.Schema getSchema() { return SCHEMA$; } // Used by DatumWriter. Applications should not call. public java.lang.Object get(int field$) { switch (field$) { case 0: return url; case 1: return timestamp; case 2: return ip; case 3: return httpMethod; case 4: return httpStatusCode; case 5: return responseSize; case 6: return referrer; case 7: return userAgent; default: throw new org.apache.avro.AvroRuntimeException("Bad index"); } } // Used by DatumReader. Applications should not call. @SuppressWarnings(value="unchecked") public void put(int field$, java.lang.Object value) { switch (field$) { case 0: url = (java.lang.CharSequence)(value); break; case 1: timestamp = (java.lang.Long)(value); break; case 2: ip = (java.lang.CharSequence)(value); break; case 3: httpMethod = (java.lang.CharSequence)(value); break; case 4: httpStatusCode = (java.lang.Integer)(value); break; case 5: responseSize = (java.lang.Integer)(value); break; case 6: referrer = (java.lang.CharSequence)(value); break; case 7: userAgent = (java.lang.CharSequence)(value); break; default: throw new org.apache.avro.AvroRuntimeException("Bad index"); } } /** * Gets the value of the 'url' field. */ public java.lang.CharSequence getUrl() { return url; } /** * Sets the value of the 'url' field. * @param value the value to set. */ public void setUrl(java.lang.CharSequence value) { this.url = value; setDirty(0); } /** * Checks the dirty status of the 'url' field. A field is dirty if it represents a change that has not yet been written to the database. * @param value the value to set. */ public boolean isUrlDirty(java.lang.CharSequence value) { return isDirty(0); } /** * Gets the value of the 'timestamp' field. */ public java.lang.Long getTimestamp() { return timestamp; } /** * Sets the value of the 'timestamp' field. * @param value the value to set. */ public void setTimestamp(java.lang.Long value) { this.timestamp = value; setDirty(1); } /** * Checks the dirty status of the 'timestamp' field. A field is dirty if it represents a change that has not yet been written to the database. * @param value the value to set. */ public boolean isTimestampDirty(java.lang.Long value) { return isDirty(1); } /** * Gets the value of the 'ip' field. */ public java.lang.CharSequence getIp() { return ip; } /** * Sets the value of the 'ip' field. * @param value the value to set. */ public void setIp(java.lang.CharSequence value) { this.ip = value; setDirty(2); } /** * Checks the dirty status of the 'ip' field. A field is dirty if it represents a change that has not yet been written to the database. * @param value the value to set. */ public boolean isIpDirty(java.lang.CharSequence value) { return isDirty(2); } /** * Gets the value of the 'httpMethod' field. */ public java.lang.CharSequence getHttpMethod() { return httpMethod; } /** * Sets the value of the 'httpMethod' field. * @param value the value to set. */ public void setHttpMethod(java.lang.CharSequence value) { this.httpMethod = value; setDirty(3); } /** * Checks the dirty status of the 'httpMethod' field. A field is dirty if it represents a change that has not yet been written to the database. * @param value the value to set. */ public boolean isHttpMethodDirty(java.lang.CharSequence value) { return isDirty(3); } /** * Gets the value of the 'httpStatusCode' field. */ public java.lang.Integer getHttpStatusCode() { return httpStatusCode; } /** * Sets the value of the 'httpStatusCode' field. * @param value the value to set. */ public void setHttpStatusCode(java.lang.Integer value) { this.httpStatusCode = value; setDirty(4); } /** * Checks the dirty status of the 'httpStatusCode' field. A field is dirty if it represents a change that has not yet been written to the database. * @param value the value to set. */ public boolean isHttpStatusCodeDirty(java.lang.Integer value) { return isDirty(4); } /** * Gets the value of the 'responseSize' field. */ public java.lang.Integer getResponseSize() { return responseSize; } /** * Sets the value of the 'responseSize' field. * @param value the value to set. */ public void setResponseSize(java.lang.Integer value) { this.responseSize = value; setDirty(5); } /** * Checks the dirty status of the 'responseSize' field. A field is dirty if it represents a change that has not yet been written to the database. * @param value the value to set. */ public boolean isResponseSizeDirty(java.lang.Integer value) { return isDirty(5); } /** * Gets the value of the 'referrer' field. */ public java.lang.CharSequence getReferrer() { return referrer; } /** * Sets the value of the 'referrer' field. * @param value the value to set. */ public void setReferrer(java.lang.CharSequence value) { this.referrer = value; setDirty(6); } /** * Checks the dirty status of the 'referrer' field. A field is dirty if it represents a change that has not yet been written to the database. * @param value the value to set. */ public boolean isReferrerDirty(java.lang.CharSequence value) { return isDirty(6); } /** * Gets the value of the 'userAgent' field. */ public java.lang.CharSequence getUserAgent() { return userAgent; } /** * Sets the value of the 'userAgent' field. * @param value the value to set. */ public void setUserAgent(java.lang.CharSequence value) { this.userAgent = value; setDirty(7); } /** * Checks the dirty status of the 'userAgent' field. A field is dirty if it represents a change that has not yet been written to the database. * @param value the value to set. */ public boolean isUserAgentDirty(java.lang.CharSequence value) { return isDirty(7); } /** Creates a new Pageview RecordBuilder */ public static org.apache.gora.tutorial.log.generated.Pageview.Builder newBuilder() { return new org.apache.gora.tutorial.log.generated.Pageview.Builder(); } /** Creates a new Pageview RecordBuilder by copying an existing Builder */ public static org.apache.gora.tutorial.log.generated.Pageview.Builder newBuilder(org.apache.gora.tutorial.log.generated.Pageview.Builder other) { return new org.apache.gora.tutorial.log.generated.Pageview.Builder(other); } /** Creates a new Pageview RecordBuilder by copying an existing Pageview instance */ public static org.apache.gora.tutorial.log.generated.Pageview.Builder newBuilder(org.apache.gora.tutorial.log.generated.Pageview other) { return new org.apache.gora.tutorial.log.generated.Pageview.Builder(other); } private static java.nio.ByteBuffer deepCopyToReadOnlyBuffer( java.nio.ByteBuffer input) { java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity()); int position = input.position(); input.reset(); int mark = input.position(); int limit = input.limit(); input.rewind(); input.limit(input.capacity()); copy.put(input); input.rewind(); copy.rewind(); input.position(mark); input.mark(); copy.position(mark); copy.mark(); input.position(position); copy.position(position); input.limit(limit); copy.limit(limit); return copy.asReadOnlyBuffer(); } /** * RecordBuilder for Pageview instances. */ public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<Pageview> implements org.apache.avro.data.RecordBuilder<Pageview> { private java.lang.CharSequence url; private long timestamp; private java.lang.CharSequence ip; private java.lang.CharSequence httpMethod; private int httpStatusCode; private int responseSize; private java.lang.CharSequence referrer; private java.lang.CharSequence userAgent; /** Creates a new Builder */ private Builder() { super(org.apache.gora.tutorial.log.generated.Pageview.SCHEMA$); } /** Creates a Builder by copying an existing Builder */ private Builder(org.apache.gora.tutorial.log.generated.Pageview.Builder other) { super(other); } /** Creates a Builder by copying an existing Pageview instance */ private Builder(org.apache.gora.tutorial.log.generated.Pageview other) { super(org.apache.gora.tutorial.log.generated.Pageview.SCHEMA$); if (isValidValue(fields()[0], other.url)) { this.url = (java.lang.CharSequence) data().deepCopy(fields()[0].schema(), other.url); fieldSetFlags()[0] = true; } if (isValidValue(fields()[1], other.timestamp)) { this.timestamp = (java.lang.Long) data().deepCopy(fields()[1].schema(), other.timestamp); fieldSetFlags()[1] = true; } if (isValidValue(fields()[2], other.ip)) { this.ip = (java.lang.CharSequence) data().deepCopy(fields()[2].schema(), other.ip); fieldSetFlags()[2] = true; } if (isValidValue(fields()[3], other.httpMethod)) { this.httpMethod = (java.lang.CharSequence) data().deepCopy(fields()[3].schema(), other.httpMethod); fieldSetFlags()[3] = true; } if (isValidValue(fields()[4], other.httpStatusCode)) { this.httpStatusCode = (java.lang.Integer) data().deepCopy(fields()[4].schema(), other.httpStatusCode); fieldSetFlags()[4] = true; } if (isValidValue(fields()[5], other.responseSize)) { this.responseSize = (java.lang.Integer) data().deepCopy(fields()[5].schema(), other.responseSize); fieldSetFlags()[5] = true; } if (isValidValue(fields()[6], other.referrer)) { this.referrer = (java.lang.CharSequence) data().deepCopy(fields()[6].schema(), other.referrer); fieldSetFlags()[6] = true; } if (isValidValue(fields()[7], other.userAgent)) { this.userAgent = (java.lang.CharSequence) data().deepCopy(fields()[7].schema(), other.userAgent); fieldSetFlags()[7] = true; } } /** Gets the value of the 'url' field */ public java.lang.CharSequence getUrl() { return url; } /** Sets the value of the 'url' field */ public org.apache.gora.tutorial.log.generated.Pageview.Builder setUrl(java.lang.CharSequence value) { validate(fields()[0], value); this.url = value; fieldSetFlags()[0] = true; return this; } /** Checks whether the 'url' field has been set */ public boolean hasUrl() { return fieldSetFlags()[0]; } /** Clears the value of the 'url' field */ public org.apache.gora.tutorial.log.generated.Pageview.Builder clearUrl() { url = null; fieldSetFlags()[0] = false; return this; } /** Gets the value of the 'timestamp' field */ public java.lang.Long getTimestamp() { return timestamp; } /** Sets the value of the 'timestamp' field */ public org.apache.gora.tutorial.log.generated.Pageview.Builder setTimestamp(long value) { validate(fields()[1], value); this.timestamp = value; fieldSetFlags()[1] = true; return this; } /** Checks whether the 'timestamp' field has been set */ public boolean hasTimestamp() { return fieldSetFlags()[1]; } /** Clears the value of the 'timestamp' field */ public org.apache.gora.tutorial.log.generated.Pageview.Builder clearTimestamp() { fieldSetFlags()[1] = false; return this; } /** Gets the value of the 'ip' field */ public java.lang.CharSequence getIp() { return ip; } /** Sets the value of the 'ip' field */ public org.apache.gora.tutorial.log.generated.Pageview.Builder setIp(java.lang.CharSequence value) { validate(fields()[2], value); this.ip = value; fieldSetFlags()[2] = true; return this; } /** Checks whether the 'ip' field has been set */ public boolean hasIp() { return fieldSetFlags()[2]; } /** Clears the value of the 'ip' field */ public org.apache.gora.tutorial.log.generated.Pageview.Builder clearIp() { ip = null; fieldSetFlags()[2] = false; return this; } /** Gets the value of the 'httpMethod' field */ public java.lang.CharSequence getHttpMethod() { return httpMethod; } /** Sets the value of the 'httpMethod' field */ public org.apache.gora.tutorial.log.generated.Pageview.Builder setHttpMethod(java.lang.CharSequence value) { validate(fields()[3], value); this.httpMethod = value; fieldSetFlags()[3] = true; return this; } /** Checks whether the 'httpMethod' field has been set */ public boolean hasHttpMethod() { return fieldSetFlags()[3]; } /** Clears the value of the 'httpMethod' field */ public org.apache.gora.tutorial.log.generated.Pageview.Builder clearHttpMethod() { httpMethod = null; fieldSetFlags()[3] = false; return this; } /** Gets the value of the 'httpStatusCode' field */ public java.lang.Integer getHttpStatusCode() { return httpStatusCode; } /** Sets the value of the 'httpStatusCode' field */ public org.apache.gora.tutorial.log.generated.Pageview.Builder setHttpStatusCode(int value) { validate(fields()[4], value); this.httpStatusCode = value; fieldSetFlags()[4] = true; return this; } /** Checks whether the 'httpStatusCode' field has been set */ public boolean hasHttpStatusCode() { return fieldSetFlags()[4]; } /** Clears the value of the 'httpStatusCode' field */ public org.apache.gora.tutorial.log.generated.Pageview.Builder clearHttpStatusCode() { fieldSetFlags()[4] = false; return this; } /** Gets the value of the 'responseSize' field */ public java.lang.Integer getResponseSize() { return responseSize; } /** Sets the value of the 'responseSize' field */ public org.apache.gora.tutorial.log.generated.Pageview.Builder setResponseSize(int value) { validate(fields()[5], value); this.responseSize = value; fieldSetFlags()[5] = true; return this; } /** Checks whether the 'responseSize' field has been set */ public boolean hasResponseSize() { return fieldSetFlags()[5]; } /** Clears the value of the 'responseSize' field */ public org.apache.gora.tutorial.log.generated.Pageview.Builder clearResponseSize() { fieldSetFlags()[5] = false; return this; } /** Gets the value of the 'referrer' field */ public java.lang.CharSequence getReferrer() { return referrer; } /** Sets the value of the 'referrer' field */ public org.apache.gora.tutorial.log.generated.Pageview.Builder setReferrer(java.lang.CharSequence value) { validate(fields()[6], value); this.referrer = value; fieldSetFlags()[6] = true; return this; } /** Checks whether the 'referrer' field has been set */ public boolean hasReferrer() { return fieldSetFlags()[6]; } /** Clears the value of the 'referrer' field */ public org.apache.gora.tutorial.log.generated.Pageview.Builder clearReferrer() { referrer = null; fieldSetFlags()[6] = false; return this; } /** Gets the value of the 'userAgent' field */ public java.lang.CharSequence getUserAgent() { return userAgent; } /** Sets the value of the 'userAgent' field */ public org.apache.gora.tutorial.log.generated.Pageview.Builder setUserAgent(java.lang.CharSequence value) { validate(fields()[7], value); this.userAgent = value; fieldSetFlags()[7] = true; return this; } /** Checks whether the 'userAgent' field has been set */ public boolean hasUserAgent() { return fieldSetFlags()[7]; } /** Clears the value of the 'userAgent' field */ public org.apache.gora.tutorial.log.generated.Pageview.Builder clearUserAgent() { userAgent = null; fieldSetFlags()[7] = false; return this; } @Override public Pageview build() { try { Pageview record = new Pageview(); record.url = fieldSetFlags()[0] ? this.url : (java.lang.CharSequence) defaultValue(fields()[0]); record.timestamp = fieldSetFlags()[1] ? this.timestamp : (java.lang.Long) defaultValue(fields()[1]); record.ip = fieldSetFlags()[2] ? this.ip : (java.lang.CharSequence) defaultValue(fields()[2]); record.httpMethod = fieldSetFlags()[3] ? this.httpMethod : (java.lang.CharSequence) defaultValue(fields()[3]); record.httpStatusCode = fieldSetFlags()[4] ? this.httpStatusCode : (java.lang.Integer) defaultValue(fields()[4]); record.responseSize = fieldSetFlags()[5] ? this.responseSize : (java.lang.Integer) defaultValue(fields()[5]); record.referrer = fieldSetFlags()[6] ? this.referrer : (java.lang.CharSequence) defaultValue(fields()[6]); record.userAgent = fieldSetFlags()[7] ? this.userAgent : (java.lang.CharSequence) defaultValue(fields()[7]); return record; } catch (Exception e) { throw new org.apache.avro.AvroRuntimeException(e); } } } public Pageview.Tombstone getTombstone(){ return TOMBSTONE; } public Pageview newInstance(){ return newBuilder().build(); } private static final Tombstone TOMBSTONE = new Tombstone(); public static final class Tombstone extends Pageview implements org.apache.gora.persistency.Tombstone { private Tombstone() { } /** * Gets the value of the 'url' field. */ public java.lang.CharSequence getUrl() { throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones"); } /** * Sets the value of the 'url' field. * @param value the value to set. */ public void setUrl(java.lang.CharSequence value) { throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones"); } /** * Checks the dirty status of the 'url' field. A field is dirty if it represents a change that has not yet been written to the database. * @param value the value to set. */ public boolean isUrlDirty(java.lang.CharSequence value) { throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones"); } /** * Gets the value of the 'timestamp' field. */ public java.lang.Long getTimestamp() { throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones"); } /** * Sets the value of the 'timestamp' field. * @param value the value to set. */ public void setTimestamp(java.lang.Long value) { throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones"); } /** * Checks the dirty status of the 'timestamp' field. A field is dirty if it represents a change that has not yet been written to the database. * @param value the value to set. */ public boolean isTimestampDirty(java.lang.Long value) { throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones"); } /** * Gets the value of the 'ip' field. */ public java.lang.CharSequence getIp() { throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones"); } /** * Sets the value of the 'ip' field. * @param value the value to set. */ public void setIp(java.lang.CharSequence value) { throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones"); } /** * Checks the dirty status of the 'ip' field. A field is dirty if it represents a change that has not yet been written to the database. * @param value the value to set. */ public boolean isIpDirty(java.lang.CharSequence value) { throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones"); } /** * Gets the value of the 'httpMethod' field. */ public java.lang.CharSequence getHttpMethod() { throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones"); } /** * Sets the value of the 'httpMethod' field. * @param value the value to set. */ public void setHttpMethod(java.lang.CharSequence value) { throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones"); } /** * Checks the dirty status of the 'httpMethod' field. A field is dirty if it represents a change that has not yet been written to the database. * @param value the value to set. */ public boolean isHttpMethodDirty(java.lang.CharSequence value) { throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones"); } /** * Gets the value of the 'httpStatusCode' field. */ public java.lang.Integer getHttpStatusCode() { throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones"); } /** * Sets the value of the 'httpStatusCode' field. * @param value the value to set. */ public void setHttpStatusCode(java.lang.Integer value) { throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones"); } /** * Checks the dirty status of the 'httpStatusCode' field. A field is dirty if it represents a change that has not yet been written to the database. * @param value the value to set. */ public boolean isHttpStatusCodeDirty(java.lang.Integer value) { throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones"); } /** * Gets the value of the 'responseSize' field. */ public java.lang.Integer getResponseSize() { throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones"); } /** * Sets the value of the 'responseSize' field. * @param value the value to set. */ public void setResponseSize(java.lang.Integer value) { throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones"); } /** * Checks the dirty status of the 'responseSize' field. A field is dirty if it represents a change that has not yet been written to the database. * @param value the value to set. */ public boolean isResponseSizeDirty(java.lang.Integer value) { throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones"); } /** * Gets the value of the 'referrer' field. */ public java.lang.CharSequence getReferrer() { throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones"); } /** * Sets the value of the 'referrer' field. * @param value the value to set. */ public void setReferrer(java.lang.CharSequence value) { throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones"); } /** * Checks the dirty status of the 'referrer' field. A field is dirty if it represents a change that has not yet been written to the database. * @param value the value to set. */ public boolean isReferrerDirty(java.lang.CharSequence value) { throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones"); } /** * Gets the value of the 'userAgent' field. */ public java.lang.CharSequence getUserAgent() { throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones"); } /** * Sets the value of the 'userAgent' field. * @param value the value to set. */ public void setUserAgent(java.lang.CharSequence value) { throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones"); } /** * Checks the dirty status of the 'userAgent' field. A field is dirty if it represents a change that has not yet been written to the database. * @param value the value to set. */ public boolean isUserAgentDirty(java.lang.CharSequence value) { throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones"); } } }
<?xml version="1.0" encoding="UTF-8"?> <!-- Gora Mapping file for HBase Backend --> <gora-otd> <table name="Pageview"> <!-- optional descriptors for tables --> <family name="common"/> <!-- This can also have params like compression, bloom filters --> <family name="http"/> <family name="misc"/> </table> <class name="org.apache.gora.tutorial.log.generated.Pageview" keyClass="java.lang.Long" table="AccessLog"> <field name="url" family="common" qualifier="url"/> <field name="timestamp" family="common" qualifier="timestamp"/> <field name="ip" family="common" qualifier="ip" /> <field name="httpMethod" family="http" qualifier="httpMethod"/> <field name="httpStatusCode" family="http" qualifier="httpStatusCode"/> <field name="responseSize" family="http" qualifier="responseSize"/> <field name="referrer" family="misc" qualifier="referrer"/> <field name="userAgent" family="misc" qualifier="userAgent"/> </class> <class name="org.apache.gora.tutorial.log.generated.MetricDatum" keyClass="java.lang.String" table="Metrics"> <field name="metricDimension" family="common" qualifier="metricDimension"/> <field name="timestamp" family="common" qualifier="ts"/> <field name="metric" family="common" qualifier="metric"/> </class> </gora-otd>
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.gora.tutorial.log; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.StringTokenizer; import org.apache.avro.util.Utf8; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.gora.query.Query; import org.apache.gora.query.Result; import org.apache.gora.store.DataStore; import org.apache.gora.store.DataStoreFactory; import org.apache.gora.tutorial.log.generated.Pageview; import org.apache.hadoop.conf.Configuration; /** * LogManager is the tutorial class to illustrate the basic * {@link DataStore} API usage. The LogManager class is used * to parse the web server logs in combined log format, store the * data in a Gora compatible data store, query and manipulate the stored data. * * <p>In the data model, keys are the line numbers in the log file, * and the values are Pageview objects, generated from * <code>gora-tutorial/src/main/avro/pageview.json</code>. * * <p>See the tutorial.html file in docs or go to the * <a href="http://gora.apache.org/docs/current/tutorial.html"> * web site</a>for more information.</p> */ public class LogManager { private static final Logger log = LoggerFactory.getLogger(LogManager.class); private DataStore<Long, Pageview> dataStore; private static final SimpleDateFormat dateFormat = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z"); public LogManager() { try { init(); } catch (IOException ex) { throw new RuntimeException(ex); } } private void init() throws IOException { //Data store objects are created from a factory. It is necessary to //provide the key and value class. The datastore class is optional, //and if not specified it will be read from the properties file dataStore = DataStoreFactory.getDataStore(Long.class, Pageview.class, new Configuration()); } /** * Parses a log file and store the contents at the data store. * @param input the input file location */ private void parse(String input) throws IOException, ParseException, Exception { log.info("Parsing file:" + input); BufferedReader reader = new BufferedReader(new FileReader(input)); long lineCount = 0; try { String line = reader.readLine(); do { Pageview pageview = parseLine(line); if(pageview != null) { //store the pageview storePageview(lineCount++, pageview); } line = reader.readLine(); } while(line != null); } finally { reader.close(); } log.info("finished parsing file. Total number of log lines:" + lineCount); } /** Parses a single log line in combined log format using StringTokenizers */ private Pageview parseLine(String line) throws ParseException { StringTokenizer matcher = new StringTokenizer(line); //parse the log line String ip = matcher.nextToken(); matcher.nextToken(); //discard matcher.nextToken(); long timestamp = dateFormat.parse(matcher.nextToken("]").substring(2)).getTime(); matcher.nextToken("\""); String request = matcher.nextToken("\""); String[] requestParts = request.split(" "); String httpMethod = requestParts[0]; String url = requestParts[1]; matcher.nextToken(" "); int httpStatusCode = Integer.parseInt(matcher.nextToken()); int responseSize = Integer.parseInt(matcher.nextToken()); matcher.nextToken("\""); String referrer = matcher.nextToken("\""); matcher.nextToken("\""); String userAgent = matcher.nextToken("\""); //construct and return pageview object Pageview pageview = new Pageview(); pageview.setIp(new Utf8(ip)); pageview.setTimestamp(timestamp); pageview.setHttpMethod(new Utf8(httpMethod)); pageview.setUrl(new Utf8(url)); pageview.setHttpStatusCode(httpStatusCode); pageview.setResponseSize(responseSize); pageview.setReferrer(new Utf8(referrer)); pageview.setUserAgent(new Utf8(userAgent)); return pageview; } /** Stores the pageview object with the given key */ private void storePageview(long key, Pageview pageview) throws IOException, Exception { log.info("Storing Pageview in: " + dataStore.toString()); dataStore.put(key, pageview); } /** Fetches a single pageview object and prints it*/ private void get(long key) throws IOException, Exception { Pageview pageview = dataStore.get(key); printPageview(pageview); } /** Queries and prints a single pageview object */ private void query(long key) throws IOException, Exception { //Queries are constructed from the data store Query<Long, Pageview> query = dataStore.newQuery(); query.setKey(key); Result<Long, Pageview> result = query.execute(); //Actually executes the query. // alternatively dataStore.execute(query); can be used printResult(result); } /** Queries and prints pageview object that have keys between startKey and endKey*/ private void query(long startKey, long endKey) throws IOException, Exception { Query<Long, Pageview> query = dataStore.newQuery(); //set the properties of query query.setStartKey(startKey); query.setEndKey(endKey); Result<Long, Pageview> result = query.execute(); printResult(result); } /**Deletes the pageview with the given line number */ private void delete(long lineNum) throws Exception { dataStore.delete(lineNum); dataStore.flush(); //write changes may need to be flushed before //they are committed log.info("pageview with key:" + lineNum + " deleted"); } /** This method illustrates delete by query call */ private void deleteByQuery(long startKey, long endKey) throws IOException, Exception { //Constructs a query from the dataStore. The matching rows to this query will be deleted Query<Long, Pageview> query = dataStore.newQuery(); //set the properties of query query.setStartKey(startKey); query.setEndKey(endKey); dataStore.deleteByQuery(query); log.info("pageviews with keys between " + startKey + " and " + endKey + " are deleted"); } private void printResult(Result<Long, Pageview> result) throws IOException, Exception { while(result.next()) { //advances the Result object and breaks if at end long resultKey = result.getKey(); //obtain current key Pageview resultPageview = result.get(); //obtain current value object //print the results System.out.println(resultKey + ":"); printPageview(resultPageview); } System.out.println("Number of pageviews from the query:" + result.getOffset()); } /** Pretty prints the pageview object to stdout */ private void printPageview(Pageview pageview) { if(pageview == null) { System.out.println("No result to show"); } else { System.out.println(pageview.toString()); } } private void close() throws IOException, Exception { //It is very important to close the datastore properly, otherwise //some data loss might occur. if(dataStore != null) dataStore.close(); } private static final String USAGE = "LogManager -parse <input_log_file>\n" + " -get <lineNum>\n" + " -query <lineNum>\n" + " -query <startLineNum> <endLineNum>\n" + " -delete <lineNum>\n" + " -deleteByQuery <startLineNum> <endLineNum>\n"; public static void main(String[] args) throws Exception { if(args.length < 2) { System.err.println(USAGE); System.exit(1); } LogManager manager = new LogManager(); if("-parse".equals(args[0])) { manager.parse(args[1]); } else if("-get".equals(args[0])) { manager.get(Long.parseLong(args[1])); } else if("-query".equals(args[0])) { if(args.length == 2) manager.query(Long.parseLong(args[1])); else manager.query(Long.parseLong(args[1]), Long.parseLong(args[2])); } else if("-delete".equals(args[0])) { manager.delete(Long.parseLong(args[1])); } else if("-deleteByQuery".equalsIgnoreCase(args[0])) { manager.deleteByQuery(Long.parseLong(args[1]), Long.parseLong(args[2])); } else { System.err.println(USAGE); System.exit(1); } manager.close(); } }
标签:
原文地址:http://blog.csdn.net/jediael_lu/article/details/43272521