标签:des style blog http color java os 使用 io
1
2
3
4
5
|
log_format main ‘$remote_addr - $remote_user [$time_local] "$request" ‘ ‘$status $body_bytes_sent "$http_referer" ‘ ‘"$http_user_agent" "$http_x_forwarded_for" ‘ ‘"$gzip_ratio" $request_time $bytes_sent $request_length ‘ ‘"$upstream_addr" $upstream_status $upstream_response_time‘ ; |
1
2
3
4
5
6
7
8
9
10
11
12
13
|
8.8.8.8 - - [22 /Aug/2014 :20:23:45 +0800] "GET / HTTP/1.1" 200 2373 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "2.78" 0.004 2683 369 "unix:/var/run/php5-fpm.sock" 200 0.004 8.8.8.8 - - [22 /Aug/2014 :20:23:45 +0800] "GET 8.8.8.8/b519d8ca/css/base.css HTTP/1.1" 200 940 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.000 1247 373 "-" - - 8.8.8.8 - - [22 /Aug/2014 :20:23:45 +0800] "GET 8.8.8.8/a3e2e507/jquery.min.js HTTP/1.1" 200 93636 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.152 93976 359 "-" - - 8.8.8.8 - - [22 /Aug/2014 :20:23:45 +0800] "GET 8.8.8.8/b519d8ca/image/logo.png HTTP/1.1" 200 6059 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.000 6369 377 "-" - - 8.8.8.8 - - [22 /Aug/2014 :20:23:45 +0800] "GET 8.8.8.8/b519d8ca/image/p02.jpg HTTP/1.1" 200 22177 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.000 22489 376 "-" - - 8.8.8.8 - - [22 /Aug/2014 :20:23:45 +0800] "GET 8.8.8.8/b519d8ca/image/p03.png HTTP/1.1" 200 3012 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.000 3321 376 "-" - - 8.8.8.8 - - [22 /Aug/2014 :20:23:45 +0800] "GET 8.8.8.8/b519d8ca/image/two-dimension-code1.png HTTP/1.1" 200 761 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.000 1069 392 "-" - - 8.8.8.8 - - [22 /Aug/2014 :20:23:45 +0800] "GET 8.8.8.8/b519d8ca/image/bg.png HTTP/1.1" 200 11474 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.000 11785 375 "-" - - 8.8.8.8 - - [22 /Aug/2014 :20:23:45 +0800] "GET 8.8.8.8/b519d8ca/image/p04.png HTTP/1.1" 200 2860 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.000 3169 376 "-" - - 8.8.8.8 - - [22 /Aug/2014 :20:23:45 +0800] "GET 8.8.8.8/b519d8ca/image/p06.png HTTP/1.1" 200 74097 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.062 74409 376 "-" - - 8.8.8.8 - - [22 /Aug/2014 :20:23:45 +0800] "GET 8.8.8.8/b519d8ca/image/p05.png HTTP/1.1" 200 132072 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.256 132385 376 "-" - - 8.8.8.8 - - [22 /Aug/2014 :20:23:46 +0800] "GET 8.8.8.8/b519d8ca/image/p07.png HTTP/1.1" 200 207987 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.592 208300 376 "-" - - 8.8.8.8 - - [22 /Aug/2014 :20:23:46 +0800] "GET 8.8.8.8/b519d8ca/image/p01.png HTTP/1.1" 200 310418 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.918 310731 376 "-" - - |
1
2
3
4
5
|
root@m1: /home/hadoop # /home/hadoop/hadoop-2.2.0/bin/hadoop fs -mkdir /user/hive/warehouse/nginxlog root@m1: /home/hadoop # /home/hadoop/hadoop-2.2.0/bin/hadoop fs -ls /user/hive/warehouse Found 1 items drwxr-xr-x - root supergroup 0 2014-01-22 23:13 /user/hive/warehouse/nginxlog root@m1: /home/hadoop # |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
package idoall.org.hive; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.io.LongWritable; /** * UDFRowSequence. */ @Description (name = "row_sequence" , value = "_FUNC_() - Returns a generated row sequence number starting from 1" ) @UDFType (deterministic = false , stateful = true ) public class UDFRowSequence extends UDF { private LongWritable result = new LongWritable(); public UDFRowSequence() { result.set( 0 ); } public LongWritable evaluate() { result.set(result.get() + 1 ); return result; } } |
1
2
3
4
|
hive> ADD JAR /home/hadoop/hive-0.13.1/lib/idoall.org-0.0.1-SNAPSHOT-jar- with -dependencies.jar; Added /home/hadoop/hive-0.13.1/lib/idoall.org-0.0.1-SNAPSHOT-jar- with -dependencies.jar to class path Added resource: /home/hadoop/hive-0.13.1/lib/idoall.org-0.0.1-SNAPSHOT-jar- with -dependencies.jar hive> |
1
2
3
|
hive> CREATE TEMPORARY FUNCTION rowSequence AS ‘idoall.org.hive.UDFRowSequence‘ ; OK Time taken: 0.048 seconds |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
create external table nginx_accesslog( host string, hostuser string, times string, requestmethond string, requesturl string, requesthttp string, status string, body_bytes_sent string, referer string, useragent string, http_x_forwarded_for string, gzip_ratio string, request_time string, bytes_sent string, request_length string, upstream_addr string, upstream_status string, upstream_response_time string) PARTITIONED BY ( YEAR STRING, MONTH STRING, DAY STRING) row format SERDE ‘org.apache.hadoop.hive.contrib.serde2.RegexSerDe‘ WITH SERDEPROPERTIES ( "input.regex" = "([^ ]*)\\s+-\\s+(.+?|-)\\s+\\[(.*)\\]\\s+\"([^ ]*)\\s+([^ ]*)\\s+([^ |\"]*)\"\\s+(-|[0-9]*)\\s+(-|[0-9]*)\\s+\"(.+?|-)\"\\s+\"(.+?|-)\"\\s+\"(.+?|-)\"\\s+\"(.+?|-)\"\\s+(.+?|-)\\s+(.+?|-)\\s+(.+?|-)\\s+\"(.+?|-)\"\\s+(.+?|-)\\s+(.*)" , "output.format.string" = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s %10$s %11$s %12$s %13$s %14$s %15$s %16$s %17$s %18$s" ) STORED AS TEXTFILE location ‘/user/hive/warehouse/nginxlog‘ ; |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
hive> LOAD DATA LOCAL INPATH ‘/home/hadoop/hive-0.13.1/a.com.access.20140821.log‘ OVERWRITE INTO TABLE nginx_accesslog partition ( YEAR = ‘2014‘ , MONTH = ‘08‘ , DAY = ‘21‘ ); Copying data from file:/home/hadoop/hive-0.13.1/a.com.access.20140821.log Copying file: file:/home/hadoop/hive-0.13.1/a.com.access.20140821.log Loading data to table default .nginx_accesslog partition ( year =2014, month =08, day =21) Partition default .nginx_accesslog{ year =2014, month =08, day =21} stats: [numFiles=1, numRows=0, totalSize=3483, rawDataSize=0] OK Time taken: 1.046 seconds hive> select count (0) from nginx_accesslog; Total jobs = 1 Launching Job 1 out of 1 Number of reduce tasks determined at compile time : 1 In order to change the average load for a reducer ( in bytes): set hive. exec .reducers.bytes.per.reducer=<number> In order to limit the maximum number of reducers: set hive. exec .reducers. max =<number> In order to set a constant number of reducers: set mapreduce.job.reduces=<number> Starting Job = job_1408550631561_0005, Tracking URL = http://m1:8088/proxy/application_1408550631561_0005/ Kill Command = /home/hadoop/hadoop-2.2.0/bin/hadoop job -kill job_1408550631561_0005 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1 2014-08-22 23:19:55,322 Stage-1 map = 0%, reduce = 0% 2014-08-22 23:20:01,669 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 0.74 sec 2014-08-22 23:20:08,926 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 1.59 sec MapReduce Total cumulative CPU time : 1 seconds 590 msec Ended Job = job_1408550631561_0005 MapReduce Jobs Launched: Job 0: Map: 1 Reduce: 1 Cumulative CPU: 1.59 sec HDFS Read : 3734 HDFS Write: 3 SUCCESS Total MapReduce CPU Time Spent: 1 seconds 590 msec OK 13 Time taken: 24.762 seconds, Fetched: 1 row(s) hive> |
1
2
3
4
5
|
root@m1: /home/hadoop # /home/hadoop/hadoop-2.2.0/bin/hadoop fs -copyFromLocal /home/hadoop/hive-0.13.1/a.com.access.20140821.log /user/hive/warehouse/nginxlog/ root@m1: /home/hadoop # /home/hadoop/hadoop-2.2.0/bin/hadoop fs -ls /user/hive/warehouse/nginxlog Found 1 items -rw-r--r-- 3 root supergroup 3483 2014-08-22 23:18 /user/hive/warehouse/nginxlog/a .com.access.20140821.log root@m1: /home/hadoop # |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
#先删除表 hive> drop table nginx_accesslog; OK Time taken: 0.363 seconds hive> #再创建表 ....此处省略,参考上文命令重新创建一次 #从HDFS导入数据(如果文件存在,要先删除),从下图可以看到,数据导入成功 hive> LOAD DATA inpath ‘/user/hive/warehouse/nginxlog/a.com.access.20140821.log‘ overwrite INTO TABLE nginx_accesslog partition (YEAR= ‘2014‘ , MONTH= ‘08‘ ,DAY= ‘21‘ ); Loading data to table default.nginx_accesslog partition (year=2014, month=08, day=21) Partition default.nginx_accesslog{year=2014, month=08, day=21} stats: [numFiles=1, numRows=0, totalSize=3483, rawDataSize=0] OK Time taken: 0.373 seconds hive> select * from nginx_accesslog limit 100; OK 8.8.8.8 - 22 /Aug/2014 :20:23:45 +0800 GET / HTTP /1 .1 200 2373 - Mozilla /5 .0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit /537 .36 (KHTML, like Gecko) Chrome /38 .0.2125.0 Safari /537 .36 - 2.78 0.004 2683 369 unix: /var/run/php5-fpm .sock 200 0.004 2014 08 21 8.8.8.8 - 22 /Aug/2014 :20:23:45 +0800 GET 8.8.8.8 /b519d8ca/css/base .css HTTP /1 .1 200 940 http: //xxx .com/ Mozilla /5 .0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit /537 .36 (KHTML, like Gecko) Chrome /38 .0.2125.0 Safari /537 .36 - - 0.000 1247 373 - - - 2014 08 21 8.8.8.8 - 22 /Aug/2014 :20:23:45 +0800 GET 8.8.8.8 /a3e2e507/jquery .min.js HTTP /1 .1 200 93636 http: //xxx .com/ Mozilla /5 .0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit /537 .36 (KHTML, like Gecko) Chrome /38 .0.2125.0 Safari /537 .36 - - 0.152 93976 359 - - - 2014 08 21 8.8.8.8 - 22 /Aug/2014 :20:23:45 +0800 GET 8.8.8.8 /b519d8ca/image/logo .png HTTP /1 .1 200 6059 http: //xxx .com/ Mozilla /5 .0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit /537 .36 (KHTML, like Gecko) Chrome /38 .0.2125.0 Safari /537 .36 - - 0.000 6369 377 - - - 2014 08 21 8.8.8.8 - 22 /Aug/2014 :20:23:45 +0800 GET 8.8.8.8 /b519d8ca/image/p02 .jpg HTTP /1 .1 200 22177 http: //xxx .com/ Mozilla /5 .0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit /537 .36 (KHTML, like Gecko) Chrome /38 .0.2125.0 Safari /537 .36 - - 0.000 22489 376 - - - 2014 08 21 8.8.8.8 - 22 /Aug/2014 :20:23:45 +0800 GET 8.8.8.8 /b519d8ca/image/p03 .png HTTP /1 .1 200 3012 http: //xxx .com/ Mozilla /5 .0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit /537 .36 (KHTML, like Gecko) Chrome /38 .0.2125.0 Safari /537 .36 - - 0.000 3321 376 - - - 2014 08 21 8.8.8.8 - 22 /Aug/2014 :20:23:45 +0800 GET 8.8.8.8 /b519d8ca/image/two-dimension-code1 .png HTTP /1 .1 200 761 http: //xxx .com/ Mozilla /5 .0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit /537 .36 (KHTML, like Gecko) Chrome /38 .0.2125.0 Safari /537 .36 - - 0.000 1069 392 - - - 2014 08 21 8.8.8.8 - 22 /Aug/2014 :20:23:45 +0800 GET 8.8.8.8 /b519d8ca/image/bg .png HTTP /1 .1 200 11474 http: //xxx .com/ Mozilla /5 .0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit /537 .36 (KHTML, like Gecko) Chrome /38 .0.2125.0 Safari /537 .36 - - 0.000 11785 375 - - - 2014 08 21 8.8.8.8 - 22 /Aug/2014 :20:23:45 +0800 GET 8.8.8.8 /b519d8ca/image/p04 .png HTTP /1 .1 200 2860 http: //xxx .com/ Mozilla /5 .0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit /537 .36 (KHTML, like Gecko) Chrome /38 .0.2125.0 Safari /537 .36 - - 0.000 3169 376 - - - 2014 08 21 8.8.8.8 - 22 /Aug/2014 :20:23:45 +0800 GET 8.8.8.8 /b519d8ca/image/p06 .png HTTP /1 .1 200 74097 http: //xxx .com/ Mozilla /5 .0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit /537 .36 (KHTML, like Gecko) Chrome /38 .0.2125.0 Safari /537 .36 - - 0.062 74409 376 - - - 2014 08 21 8.8.8.8 - 22 /Aug/2014 :20:23:45 +0800 GET 8.8.8.8 /b519d8ca/image/p05 .png HTTP /1 .1 200 132072 http: //xxx .com/ Mozilla /5 .0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit /537 .36 (KHTML, like Gecko) Chrome /38 .0.2125.0 Safari /537 .36 - - 0.256 132385 376 - - - 2014 08 21 8.8.8.8 - 22 /Aug/2014 :20:23:46 +0800 GET 8.8.8.8 /b519d8ca/image/p07 .png HTTP /1 .1 200 207987 http: //xxx .com/ Mozilla /5 .0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit /537 .36 (KHTML, like Gecko) Chrome /38 .0.2125.0 Safari /537 .36 - - 0.592 208300 376 - - - 2014 08 21 8.8.8.8 - 22 /Aug/2014 :20:23:46 +0800 GET 8.8.8.8 /b519d8ca/image/p01 .png HTTP /1 .1 200 310418 http: //xxx .com/ Mozilla /5 .0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit /537 .36 (KHTML, like Gecko) Chrome /38 .0.2125.0 Safari /537 .36 - - 0.918 310731 376 - - - 2014 08 21 Time taken: 0.056 seconds, Fetched: 13 row(s) hive> |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
CREATE TABLE h2b_nginx_accesslog( key int , host string, hostuser string, times string, requestmethond string, requesturl string, requesthttp string, status string, body_bytes_sent string, referer string, useragent string, http_x_forwarded_for string, gzip_ratio string, request_time string, bytes_sent string, request_length string, upstream_addr string, upstream_status string, upstream_response_time string) STORED BY ‘org.apache.hadoop.hive.hbase.HBaseStorageHandler‘ WITH SERDEPROPERTIES ( "hbase.columns.mapping" = ":key,log:host,log:hostuser,log:times,log:requestmethond,log:requesturl,log:requesthttp,log:status,log:body_bytes_sent,log:referer,log:useragent,log:http_x_forwarded_for,log:gzip_ratio,log:request_time,log:bytes_sent,log:request_length,log:upstream_addr,log:upstream_status,log:upstream_response_time" ) TBLPROPERTIES ( "hbase.table.name" = "h2b_nginx_accesslog" ); |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
hbase(main):002:0> list TABLE h2b_nginx_accesslog 1 row(s) in 0.1220 seconds => [ "h2b_nginx_accesslog" ] /* 查看表结构时,只会显示列族,而不会显示列。Hbase表中的每个列,都归属与某个列族。列族是表的chema的一部分(而列不是)。*/ hbase(main):003:0> describe "h2b_nginx_accesslog" DESCRIPTION ENABLED ‘h2b_nginx_accesslog‘ , { NAME => ‘log‘ , DATA_BLOCK_ENCODING => ‘NONE‘ , BLOOMFILTER => ‘ROW‘ , REPLICATION_SCOPE => ‘0‘ , VERSIONS => ‘1‘ , CO true MPRESSION => ‘NONE‘ , MIN_VERSIONS => ‘0‘ , TTL => ‘2147483647‘ , KEEP_DELETED_CELLS => ‘false‘ , BLOCKSIZE => ‘65536‘ , IN_MEMORY => ‘false‘ , BLOCKCACHE => ‘true‘ } 1 row(s) in 0.5890 seconds hbase(main):004:0> |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
insert overwrite table h2b_nginx_accesslog select a.* from ( select rowSequence(), host, hostuser, times, requestmethond, requesturl, requesthttp, status, body_bytes_sent, referer, useragent, http_x_forwarded_for, gzip_ratio, request_time, bytes_sent, request_length, upstream_addr, upstream_status, upstream_response_time from nginx_accesslog) a; |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
hive> insert overwrite table h2b_nginx_accesslog > select a.* > from ( select > rowSequence(), > host, > hostuser, > times, > requestmethond, > requesturl, > requesthttp, > status, > body_bytes_sent, > referer, > useragent, > http_x_forwarded_for, > gzip_ratio, > request_time, > bytes_sent, > request_length, > upstream_addr, > upstream_status, > upstream_response_time > from nginx_accesslog) a; Total jobs = 1 Launching Job 1 out of 1 Number of reduce tasks is set to 0 since there‘s no reduce operator Starting Job = job_1408550631561_0017, Tracking URL = http://m1:8088/proxy/application_1408550631561_0017/ Kill Command = /home/hadoop/hadoop-2.2.0/bin/hadoop job -kill job_1408550631561_0017 Hadoop job information for Stage-0: number of mappers: 1; number of reducers: 0 2014-08-24 11:57:24,051 Stage-0 map = 0%, reduce = 0% 2014-08-24 11:57:32,403 Stage-0 map = 100%, reduce = 0%, Cumulative CPU 1.96 sec MapReduce Total cumulative CPU time : 1 seconds 960 msec Ended Job = job_1408550631561_0017 MapReduce Jobs Launched: Job 0: Map: 1 Cumulative CPU: 1.96 sec HDFS Read : 3734 HDFS Write: 0 SUCCESS Total MapReduce CPU Time Spent: 1 seconds 960 msec OK Time taken: 20.378 seconds hive> |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
hbase(main):013:0> get "h2b_nginx_accesslog" ,1 COLUMN CELL log:body_bytes_sent timestamp =1408852652522, value=2373 log:bytes_sent timestamp =1408852652522, value=2683 log:gzip_ratio timestamp =1408852652522, value=2.78 log:host timestamp =1408852652522, value=8.8.8.8 log:hostuser timestamp =1408852652522, value=- log:http_x_forwarded_for timestamp =1408852652522, value=- log:referer timestamp =1408852652522, value=- log:request_length timestamp =1408852652522, value=369 log:request_time timestamp =1408852652522, value=0.004 log:requesthttp timestamp =1408852652522, value=HTTP/1.1 log:requestmethond timestamp =1408852652522, value=GET log:requesturl timestamp =1408852652522, value=/ log:status timestamp =1408852652522, value=200 log:times timestamp =1408852652522, value=22/Aug/2014:20:23:45 +0800 log:upstream_addr timestamp =1408852652522, value=unix:/var/run/php5-fpm.sock log:upstream_response_time timestamp =1408852652522, value=0.004 log:upstream_status timestamp =1408852652522, value=200 log:useragent timestamp =1408852652522, value=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 18 row(s) in 0.0440 seconds hbase(main):015:0> get "h2b_nginx_accesslog" ,1,{ COLUMN => ‘log:useragent‘ } COLUMN CELL log:useragent timestamp =1408852652522, value=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 1 row(s) in 0.0080 seconds |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
hbase(main):031:0> import org.apache.hadoop.hbase.filter.CompareFilter hbase(main):032:0> import org.apache.hadoop.hbase.filter.SingleColumnValueFilter hbase(main):033:0> import org.apache.hadoop.hbase.filter.SubstringComparator hbase(main):034:0> import org.apache.hadoop.hbase.util.Bytes hbase(main):035:0> scan "h2b_nginx_accesslog" ,{FILTER => SingleColumnValueFilter.new(Bytes.toBytes( ‘log‘ ),Bytes.toBytes( ‘requesturl‘ ),CompareFilter::CompareOp.valueOf( ‘EQUAL‘ ),SubstringComparator.new( ‘p04.png‘ ))} ROW COLUMN +CELL 9 column =log:body_bytes_sent, timestamp =1408852652522, value=2860 9 column =log:bytes_sent, timestamp =1408852652522, value=3169 9 column =log:gzip_ratio, timestamp =1408852652522, value=- 9 column =log:host, timestamp =1408852652522, value=8.8.8.8 9 column =log:hostuser, timestamp =1408852652522, value=- 9 column =log:http_x_forwarded_for, timestamp =1408852652522, value=- 9 column =log:referer, timestamp =1408852652522, value=http://xxx.com/ 9 column =log:request_length, timestamp =1408852652522, value=376 9 column =log:request_time, timestamp =1408852652522, value=0.000 9 column =log:requesthttp, timestamp =1408852652522, value=HTTP/1.1 9 column =log:requestmethond, timestamp =1408852652522, value=GET 9 column =log:requesturl, timestamp =1408852652522, value=8.8.8.8/b519d8ca/image/p04.png 9 column =log:status, timestamp =1408852652522, value=200 9 column =log:times, timestamp =1408852652522, value=22/Aug/2014:20:23:45 +0800 9 column =log:upstream_addr, timestamp =1408852652522, value=- 9 column =log:upstream_response_time, timestamp =1408852652522, value=- 9 column =log:upstream_status, timestamp =1408852652522, value=- 9 column =log:useragent, timestamp =1408852652522, value=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Sa fari/537.36 1 row(s) in 0.0320 seconds hbase(main):036:0> |
1
|
hive> add jar /home/hjl/hive/lib/hive_contrib.jar; |
Nginx日志导入到Hive0.13.1,同步Hbase0.96.2,设置RowKey为autoincrement(ID自增长)
标签:des style blog http color java os 使用 io
原文地址:http://www.cnblogs.com/lion.net/p/3932741.html