标签:
-rw-rw-r-- 1 hzadmin hzadmin 1258939838 Jul 10 08:53 20160709001.txt
-rw-rw-r-- 1 hzadmin hzadmin 1259011023 Jul 10 08:53 20160709002.txt
-rw-rw-r-- 1 hzadmin hzadmin 1258893627 Jul 10 08:53 20160709003.txt
-rw-rw-r-- 1 hzadmin hzadmin 1258825284 Jul 10 08:53 20160709004.txt
-rw-rw-r-- 1 hzadmin hzadmin 1258902330 Jul 10 08:53 20160709005.txt
-rw-rw-r-- 1 hzadmin hzadmin 12662944 Jul 10 08:53 20160709006.txt
-rw-rw-r-- 1 hzadmin hzadmin 1258267725 Jul 11 08:48 20160710001.txt
-rw-rw-r-- 1 hzadmin hzadmin 1258199674 Jul 11 08:48 20160710002.txt
-rw-rw-r-- 1 hzadmin hzadmin 1258244177 Jul 11 08:48 20160710003.txt
-rw-rw-r-- 1 hzadmin hzadmin 1258312191 Jul 11 08:48 20160710004.txt
-rw-rw-r-- 1 hzadmin hzadmin 1131040166 Jul 11 08:48 20160710005.txt
-rw-rw-r-- 1 hzadmin hzadmin 1257713262 Jul 12 09:20 20160711001.txt
-rw-rw-r-- 1 hzadmin hzadmin 1257638353 Jul 12 09:20 20160711002.txt
-rw-rw-r-- 1 hzadmin hzadmin 1257634023 Jul 12 09:20 20160711003.txt
-rw-rw-r-- 1 hzadmin hzadmin 1257516561 Jul 12 09:20 20160711004.txt
-rw-rw-r-- 1 hzadmin hzadmin 1257627299 Jul 12 09:21 20160711005.txt
-rw-rw-r-- 1 hzadmin hzadmin 553070932 Jul 12 09:21 20160711006.txt
[hzadmin@BDI2 bj_data]$
[hzadmin@BDI2 bj_data]$
[hzadmin@BDI2 bj_data]$ pwd
/dfs/ftp/hzadmin/bj_data
[hzadmin@BDI2 bj_data]$ history
23 hdfs dfs -ls /home/hzadmin/bj_ggsn/start.log
24 hdfs dfs -ls /share/hzadmin/external_table/DMP_SSA/DPI/
25 hdfs dfs -ls /share/hzadmin/external_table/DMP_SSA/DPI/20160509
26 cd ..
27 ll
28 crontab -l
29 sh /home/hzadmin/bj_ggsn/start3.sh > /home/hzadmin/bj_ggsn/start.log &
30 tail -f /home/hzadmin/bj_ggsn/start.log
31 sh /home/hzadmin/bj_ggsn/start3.sh > /home/hzadmin/bj_ggsn/start.log &
32 ll
33 cd /home/hzadmin
34 ll
35 pwd
36 cd bj_ggsn/
37 ll
38 pwd
39 cd ../
40 ll
41 cd urlAPP/
42 ll
43 pwd
44 crobtab -l
45 crontab -l
46 cd ../bj_ggsn/
47 ll
48 cd
49 ll
50 cd test
51 ll
52 cd ../bj_data/
53 ll
54 crontab -l
55 ps -ef|grep start1
56 tail -f /home/hzadmin/bj_ggsn/start.log
57 more /home/hzadmin/bj_ggsn/start.log
58 hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G/all/20160509
59 hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G/
60 hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G/ALL/20160509
61 hdfs dfs -ls /share/external_table/ssa/DPI_MBL_4G/ALL/20160509
62 crontab -l
63 cd ..
64 ll
65 cd /home/hzadmin
66 ll
67 cd bj_ggsn/
68 ll
69 cd ../urlAPP/
70 ll
71 cd URLAppProgram_sf
72 ll
73 more public.cfg
74 ftp 132.63.10.7
75 ll
76 crontab -l
77 cd trydemo
78 ll
79 pwd
80 cd bj_ggsn
81 ll
82 cd ..
83 ll
84 cd /home/hzadmin/bj_ggsn/
85 ll
86 pwd
87 vi /home/hzadmin/bj_ggsn/hours.txt
88 cd ..
89 ll
90 cd urlAPP
91 ll
92 cd ..
93 ;ll
94 ll
95 pwd
96 cd urlAPP
97 ll
98 cd /home/hzadmin/urlAPP/URLAppProgram_sf
99 ll
100 cd ..
101 ll
102 cd ..
103 ll
104 cd urlAPP
105 ll
106 cd URLAppProgram_sf
107 ll
108 pwd
109 ll
110 pwd
111 ll
112 vi public.cfg
113 cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
114 ll
115 vi get_file_list.sh
116 cd /home/hzadmin/urlAPP/URLAppProgram_sf
117 ll
118 vi get_uacds.sh
119 cd ProgramByDay
120 ll
121 vi get_uacds.sh
122 ll
123 cd /home/hzadmin/urlAPP/URLAppProgram_sf
124 ll
125 vi public.cfg
126 ll
127 cd bj_data/
128 ll
129 cd ..
130 ll
131 cd /home/hzadmin
132 ll
133 cd bj_ggsn/
134 ll
135 more start
136 more start.log
137 ps -ef|grep start1.sh
138 ps -ef|grep start3.sh
139 kill -9 178805
140 kill -9 221082
141 ps -ef|grep start1.sh
142 ll
143 cd bj_data/
144 ll
145 cd /home/hzadmin
146 ll
147 cd bj_ggsn/
148 ll
149 crontab -l
150 ps -ef|grep start1.sh
151 sh /home/hzadmin/bj_ggsn/start1.sh &>/home/hzadmin/bj_ggsn/start.log &
152 tail -f /home/hzadmin/bj_ggsn/start.log
153 kill -9 14886
154 ll
155 sh /home/hzadmin/bj_ggsn/start2.sh 20160509 > /home/hzadmin/bj_ggsn/start.log
156 ps -ef|grep start2
157 sh /home/hzadmin/bj_ggsn/start2.sh 20160509 > /home/hzadmin/bj_ggsn/start.log &
158 ps -ef|grep start2
159 ps -ef|grep start1
160 ps -ef|grep start2
161 ps -ef|grep start3
162 cd /home/hzadmin
163 ll
164 cd bj_ggsn/
165 ll
166 more select1.sh
167 more start1.sh
168 ll
169 cd bj_data/
170 ll
171 cd /home/hzadmin
172 ll
173 cd bj_ggsn/
174 ll
175 tail -f start
176 tail -f start.log
177 ll
178 cd log
179 ll
180 tail -f 20160509_1.log
181 hive
182 hdfs dfs -ls /usr/local
183 hdfs dfs -ls /
184 hdfs dfs -chown /usr/local
185 hdfs dfs -chown hdfs /usr/local
186 ll
187 cd bj_data
188 ll
189 cd ..
190 ll
191 cd test
192 ll
193 cd /home/hzadmin
194 ll
195 cd bj_ggsn/
196 ll
197 crontab -l
198 sh /home/hzadmin/bj_ggsn/start1.sh &>/home/hzadmin/bj_ggsn/start.log &
199 hive
200 ll
201 cd bj_data/
202 ll
203 hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G
204 hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G/all/
205 hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G/ALL/
206 ll
207 ll
208 cd urlAPP
209 ll
210 crontab -l
211 cd /dfs/ftp/hzadmin
212 ll
213 cd bj_data
214 ll
215 vi 20160509007.txt
216 cd ..
217 ll
218 vi log.txt
219 cd t_user
220 ll
221 vi phone_number.dat
222 cd /home/hzadmin/bj_ggsn
223 ll
224 vi select2.sh
225 vi /home/hzadmin/urlAPP/BoncRun.sh
226 cd /home/hzadmin/urlAPP
227 ll
228 cd URLAppProgram_sf
229 ll
230 vi common.cfg
231 df
232 cd /home/hzadmin/urlAPP/URLAppProgram_sf
233 ll
234 vi run.sh
235 ll
236 cd ProgramByDay
237 ll
238 vi report_summary.sh
239 ll
240 cd ..
241 ll
242 vi match.cfg
243 cd ProgramByHour
244 ll
245 cd ..
246 ll
247 cd ProgramByDay
248 ll
249 pwd
250 cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
251 ll
252 sh ftp_getfilelist.sh
253 cd ..
254 ll
255 cd ProgramByDay
256 ll
257 cd ..
258 ll
259 cd ProgramByHour
260 ll
261 pwd
262 cd ..
263 ll
264 vi match.cfg
265 cd ProgramByHour
266 ll
267 cd ..
268 ll
269 cd ..
270 ll
271 cd ResultMatch
272 ll
273 crontab -l
274 exit
275 cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
276 ll
277 vi get_uacds.sh
278 cd /home/hzadmin/urlAPP/URLAppProgram_sf
279 ll
280 cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
281 ll
282 vi get_file_list.sh
283 get_uacds.sh
284 cd /data3/ftp000/URLAppProgram
285 cd ..
286 ll
287 cd ProgramByDay
288 ll
289 cd ..
290 ll
291 cd ..
292 ll
293 cd logs
294 ll
295 vi hive__20160320.log
296 ll
297 cd ..
298 ll
299 cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
300 ll
301 cd ..
302 ll
303 vi R_URL_TYPE_20160510_00.txt
304 df
305 cd ProgramByDay;
306 ll
307 cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
308 ll
309 cd /home/hzadmin/urlAPP/URLAppProgram_sf
310 ll
311 ping 132.63.10.7
312 ls -lt /dfs/ftp/hzadmin/urlAPP/ResultMatch/data
313 df
314 cd /home/hzadmin/urlAPP/
315 ll
316 vi hive.sh
317 cd /home/hzadmin/bj_ggsn/
318 ll
319 vi delete.sh
320 pwd
321 cd /home/hzadmin/urlAPP/URLAppProgram_sf
322 ll
323 cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
324 ll
325 vi match.sh
326 pwd
327 ll
328 cd /dfs/data/ugftp/ccg/
329 ll
330 cd /dfs/ftp/hzadmin
331 ll
332 cd bj_data
333 ll
334 pwd
335 cd ..
336 ll
337 cd urlAPP
338 ll
339 cd ResultMatch
340 ll
341 cd data
342 ll
343 cd ..
344 ll
345 cd ..
346 ll
347 cd ..
348 ll
349 cd bj_data
350 ll
351 cd ..
352 ll
353 du -sh bj_data
354 df
355 df -h
356 cd ..
357 ll
358 cd ..
359 ll
360 df
361 ll
362 cd /dfs/ftp/hzadmin
363 ll
364 cd t_user/
365 ll
366 cd ..
367 ll
368 cd /dfs/ftp/hzadmin/
369 ll
370 cd /home/hzadmin/
371 ll
372 cd bj_ggsn/
373 ll
374 more start1.sh
375 more select1.sh
376 cd /home/hzadmin
377 ll
378 cd
379 ll
380 cd bj_data/
381 ll
382 pwd
383 cd ..
384 ll
385 cd t_user/
386 ll
387 cd ..
388 ll
389 cd urlAPP/
390 ll
391 cd ResultMatch/
392 ll
393 cd data
394 ll
395 cd 201605
396 ll
397 cd 20160530
398 ll
399 cd ..
400 ll
401 cd
402 ll
403 hdfs
404 hadoop
405 hadoop version
406 ll
407 cd bj_data/
408 ll
409 cd ..
410 cd /home/hzadmin/
411 ll
412 cd bj_ggsn/
413 ll
414 vim start1.sh
415 vim select1.sh
416 vim delete.sh
417 more start1.sh
418 vim /home/hzadmin/urlAPP/hive.sh
419 cd
420 cd bj_data/
421 ll
422 exit
423 ll
424 pwd
425 cd home
426 cd bj_data
427 ll
428 cd ../
429 ll
430 pwd
431 cd /home
432 ll
433 cd hzadmin
434 ll
435 cd urlAPP
436 ll
437 cd ..
438 ll
439 cd bj_data
440 ll
441 cd ..
442 ll
443 cd /home/hzadmin/bj_ggsn
444 ll
445 cd ..
446 ll
447 pwd
448 cd bj_ggsn
449 ll
450 cd jar
451 ll
452 cd ..
453 ll
454 cd ..
455 ll
456 cd urlAPP
457 ll
458 vi ResultMatch
459 cd URLAppProgram
460 ll
461 cd ..
462 ll
463 cd URLAppProgram_sf
464 ll
465 vi public.cfg
466 vi run.sh
467 ll
468 cd urlAPP
469 ll
470 cd ..
471 cd bj_data
472 ll
473 vi 20160607006.txt
474 cat 20160607006.txt
475 ll
476 cd /home
477 ll
478 cd /home/hzadmin/bj_ggsn/start.log
479 cd /home/hzadmin/bj_ggsn
480 ll
481 cat start.log
482 cd ../
483 ll
484 cat /dfs/ftp/hzadmin/trydemo/log.txt
485 crontab -l
486 ll
487 cd /dfs/ftp/hzadmin/bj_data
488 ll
489 cat 20160607006.txt
490 ll
491 cd bj_data
492 ll
493 crontab -l
494 cd ~
495 ls
496 cd /home/hzadmin/
497 ls
498 pwd
499 cd ~
500 pwd
501 cd /home/hzadmin/
502 cd bj_ggsn/
503 ls
504 vim start1.sh
505 cd ..
506 ls
507 cd urlAPP/
508 ls
509 vim hive.sh
510 ls
511 cd ..
512 ls
513 cd urlAPP/
514 ls
515 cd logs
516 ls
517 ll
518 cd 20160615
519 ls
520 ll
521 more match_20160615_20160614.log
522 ls
523 more report_20160615_20160614.log
524 cd ..
525 ls
526 cd ..
527 ls
528 cd URLAppProgram_sf/
529 ls
530 vim run.sh
531 ls
532 cd ProgramByDay/
533 ls
534 vim alter_table.sh
535 ls
536 vim create_table.sh
537 ls
538 vim match1.sh
539 ls
540 vim match.sh
541 hive
542 ll
543 cd bj_data/
544 ll
545 rm -f ./201605*.txt
546 ll
547 ll|grep 201604
548 rm -f ./201604*.txt
549 ll
550 ls -lt
551 rm -f ./2015*.txt
552 ll
553 ls -lt
554 rm -f ./2015*.tx
555 ll
556 ls -lrt
557 ls -lt
558 debugfs
559 exit
560 ll
561 cd urlAPP/
562 ll
563 cd /dfs/ftp
564 ll
565 cd /dfs/ftp/hzadmin
566 ll
567 cd urlAPP/
568 ll
569 cd URLAppReport/
570 LL
571 ll
572 cd ..
573 ll
574 cd UnMatchTop1000/
575 ll
576 cd ..
577 ll
578 cd ResultMatch/
579 ll
580 cd data/
581 ll
582 cd ../..
583 l
584 cd ..
585 ll
586 cd
587 ll
588 cd /dfs/ftp/hzadmin/
589 ll
590 cd /home/hzadmin/
591 ll
592 cd bj_ggsn/
593 ll
594 cd ..
595 ll
596 cd urlAPP/
597 ll
598 cd URLAppProgram
599 cd URLAppProgram_sf
600 cd ../URLAppProgram_sf
601 ll
602 cd bin
603 ll
604 cd ..
605 ll
606 pwd
607 find .. -name "*match*"
608 find .. -name "*match.sh"
609 cd ../URLAppProgram_sf/ProgramByDay/match.sh
610 cd ../URLAppProgram_sf/ProgramByDay/
611 ll
612 pwd
613 ll
614 cd bj_data/
615 ll
616 exit
617 ll
618 exit
619 ll
620 cd bj_data/
621 ll
622 cd /home/hzadmin
623 ll
624 cd bj_ggsn/
625 ll
626 sh start2.sh 20160625
627 sh start2.sh 20160625 > start.log 2>&1 &
628 tail -f start.log
629 cd
630 ll
631 cd bj_data/
632 ll
633 cd /ap
634 cd /app
635 ll
636 cd hadoop/con
637 cd hadoop/etc/hadoop/
638 ll
639 more core-site.xml
640 ll
641 ll
642 cd /home/hzadmin
643 ll
644 cd bj_ggsn/
645 ll
646 more start2.sh
647 sh start2.sh 20160625
648 ll
649 sh start2.sh 20160625 > start.log 2>&1 &
650 tail -f start.log
651 ll
652 more start1.sh
653 more start2.sh
654 ll
655 more start.log
656 cd /dfs/ftp/hzadmin/test/
657 tail start.log
658 cd -
659 tail -n 200 start.log
660 ll
661 more start3.sh
662 sh ./start2.sh 20160625 > start.log 2>&1 &
663 tail -f start.log
664 cd
665 cd test/
666 ll
667 cd ..
668 ll
669 cd bj_data/
670 ll
671 cd
672 cd /home/hzadmin/
673 ll
674 cd bj_ggsn/
675 ll
676 vim start2.sh
677 sh ./start2.sh 20160625 > start.log 2>&1 &
678 df -h
679 tail -f start.log
680 ll
681 cd bj_data/
682 ll
683 cd ..
684 ll
685 cd /home/hzadmin
686 ll
687 cd bj_ggsn/
688 ll
689 sh start2.sh 20160624 > start.log 2>&1 &
690 ll /dfs/ftp/hzadmin/bj_data/
691 cd
692 ll
693 cd bj_data/
694 ll
695 cd -
696 ll
697 cd -
698 ll
699 cd -
700 cd /home/hzadmin/
701 ll
702 cd bj_ggsn/
703 ll
704 tail -f start.log
705 ll /dfs/ftp/hzadmin/bj_data/
706 sh start2.sh 20160625 > start.log 2>&1 &
707 ftp 10.62.242.124
708 ll /dfs/ftp/hzadmin/bj_data/
709 tail -f start.log
710 ll /dfs/ftp/hzadmin/bj_data/
711 tail -f start.log
712 ll
713 ps -ef |grep start2.sh
714 ll
715 ll /dfs/ftp/hzadmin/bj_data/
716 tail -f -n 100 start.log
717 ll
718 cd bj_data/
719 ll
720 cd /home/hzadmin
721 ll
722 cd bj_ggsn/
723 ll
724 sh start2.sh 20160626 > start.log 2>&1 &
725 hadoop fs -ls /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160627/match
726 hadoop fs -ls /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160627/
727 hadoop fs -ls /
728 hadoop fs -ls /user
729 hadoop fs -ls /user/hzadmin
730 hadoop fs -mkdir /user/hzadmin/extract
731 hadoop fs -ls /user/hzadmin/
732 exit
733 cd ~
734 ls
735 cd /home/hzadmin
736 ls
737 spark-submit
738 exit
739 ls
740 exit
741 hadoop fs -ls /home/hzadmin
742 hadoop fs -ls /user/hzadmin
743 hadoop fs -rm -r /user/hzadmin/extract
744 hadoop fs -ls /user/hzadmin
745 exit
746 ll
747 cd bj_data/
748 ll
749 /home/spark/spark-1.2.2-bin-hadoop2.4/spark-submit --class Extract --master yarn --deploy-mode client /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract
750 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode client /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract
751 ll /home/spark/spark-1.2.2-bin-hadoop2.4/bin
752 exit
753 ll /home/spark/spark-1.2.2-bin-hadoop2.4/bin
754 ll /home/spark/spark-1.2.2-bin-hadoop2.4/
755 ll /home/spark/
756 exit
757 ll /home/spark/
758 ll /home/spark/spark-1.2.2-bin-hadoop2.4/
759 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit
760 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode client /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract
761 exit
762 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode client /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract
763 yarn application -list
764 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract
765 yarn application -list
766 yarn application -kill application_1464150086810_7363
767 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4g --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract
768 hadoop fs -ls /user/hzadmin
769 hadoop fs -rm -r /user/hzadmin/extract
770 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4g --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract
771 hadoop fs -ls /user/hzadmin/extract
772 hadoop fs -du -h /user/hzadmin/extract
773 hadoop fs -du -h /user/hzadmin/
774 ls
775 exit
776 hadoop fs -ls /user/hzadmin
777 hadoop fs -ls /user/hzadmin/.sparkStaging
778 hadoop fs -ls /user/hzadmin/.sparkStaging/application_1464150086810_7363
779 ls
780 mkdir extract
781 ls
782 hadoop fs -get /user/hzadmin/extract/* /home/hzadmin/extract/
783 ls
784 ll -h
785 ll extract/
786 ls
787 tar -zcvf extract.tar.gz extract
788 ls
789 ll -h
790 exit
791 ll
792 cd bj_data/
793 ll
794 ll -h
795 cd ..
796 ll
797 mkdir 6y
798 ll
799 cd bj_data/
800 ll
801 cp 201606* ../6y/
802 ll
803 cd ..
804 ll
805 rm -rf 6y
806 ll
807 cd 6y/
808 ll
809 df -h
810 ll
811 cd ..
812 ll
813 cd bj_data/
814 ll
815 ls |grep 201606
816 ls |grep 201606|xargs du -h
817 ls |grep 201606|xargs du -cb
818 ls |grep 201606|xargs du -h
819 ls |grep 201606|xargs du -cb
820 ls |grep 201606|xargs du -cbh
821 ls |grep 201603|xargs du -cbh
822 hadoop fs -ls /user/hzadmin
823 hadoop fs -ls /user/hzadmin/extract
824 hadoop fs -rm -r /user/hzadmin/extract
825 hadoop fs -ls /user/hzadmin
826 hadoop fs -ls /user/hzadmin/.sparkStaging
827 hadoop fs -ls /user/hzadmin/.sparkStaging/application_1464150086810_9663
828 hadoop fs -ls /user/hzadmin/.sparkStaging/.staging
829 hadoop fs -ls /user/hzadmin/.staging
830 hadoop fs -ls /
831 hadoop fs -ls /app-logs
832 hadoop fs -ls /app-logs/hzadmin
833 hadoop fs -ls /app-logs/hzadmin/logs
834 hadoop fs -ls /app-logs/hzadmin/logs/application_1464150086810_9663
835 cd ~
836 ls
837 cd /home/hzadmin/
838 ls
839 hadoop fs -get /app-logs/hzadmin/logs/application_1464150086810_9663/BD18.bd.bjtel_45454
840 ls
841 more BD18.bd.bjtel_45454
842 hadoop fs -tail /app-logs/hzadmin/logs/application_1464150086810_9663/BD18.bd.bjtel_45454
843 exit
844 hadoop fs -ls /user/hzadmin
845 hadoop fs -ls /user/hzadmin/extract
846 hadoop fs -rm -r /user/hzadmin/extract
847 hadoop fs -ls /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/
848 hadoop fs -du -h /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/
849 hadoop fs -ls /user/hzadmin/extract
850 hadoop fs -du -h /user/hzadmin/extract
851 hadoop fs -du -h /user/hzadmin/
852 hadoop fs -du -h /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/
853 hadoop fs -du -h /user/hzadmin/
854 hadoop fs -du -h /user/hzadmin/extract2
855 cd /home/hzadmin
856 ls
857 hadoop fs -get /user/hzadmin/extract
858 ls
859 ls extract/
860 hadoop fs -get /user/hzadmin/extract2
861 ls
862 man gz
863 man tar
864 ls
865 tar -cf extract
866 tar zcvf extract.tar.gz extract
867 ls
868 tar zcvf extract2.tar.gz extract2
869 ls
870 exit
871 hadoop fs -ls /user/hzadmin
872 hadoop fs -ls /user/hzadmin/extract
873 hadoop fs -rm -r /user/hzadmin/extract
874 hadoop fs -ls /user/hzadmin/
875 ls
876 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 8g --executor-cores 4 --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160705/match /user/hzadmin/extract
877 yarn application -list
878 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4g --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160705/match /user/hzadmin/extract
879 hadoop fs -ls /user/hzadmin
880 hadoop fs -ls /user/hzadmin/extract
881 hadoop fs -rm -r /user/hzadmin/extract
882 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4g --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160705/match /user/hzadmin/extract
883 yarn application -list
884 yarn application -kill application_1464150086810_9170
885 yarn application -list
886 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4g --num-executors 20 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
887 yarn application -list
888 yarn application -kill application_1464150086810_9256
889 history
890 hadoop fs -ls /user/hzadmin
891 hadoop fs -ls /user/hzadmin
892 hadoop fs -ls /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match
893 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4G --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
894 yarn application -list
895 yarn application -kill application_1464150086810_9293
896 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4G --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
897 ls /home/spark/
898 find /home/spark/ -name *example*jar
899 jar -tvf /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar | grep -i pi
900 jar -tvf /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar | grep -i Pi
901 jar -tvf /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar | grep -i SparkPi
902 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apacke.spark.examples.sparkPi --deploy-mode cluster --executor-memory 4G --num-executors 40 10
903 find /home/spark/ -name *example*jar
904 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apacke.spark.examples.sparkPi --deploy-mode cluster --executor-memory 4G --num-executors 40 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10
905 yarn
906 yarn application
907 yarn application -list
908 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apacke.spark.examples.sparkPi --deploy-mode cluster --executor-memory 4G --num-executors 4 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10
909 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apacke.spark.examples.sparkPi --deploy-mode cluster --executor-memory 1G --num-executors 4 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10
910 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apacke.spark.examples.sparkPi --master yarn --deploy-mode client --executor-memory 1G --num-executors 4 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10
911 hdfs dfs -ls /user/hzadmin/
912 hdfs dfs -ls /user/hzadmin/extract
913 hdfs dfs -rmr /user/hzadmin/extract
914 hdfs dfs -ls /user/hzadmin/extract
915 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4G --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
916 yarn application -list
917 yarn application -kill application_1464150086810_9459
918 yarn application -list
919 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
920 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode client --executor-memory 4G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
921 yarn application -list
922 yarn application -kill application_1464150086810_9476
923 hadoop fs -ls /user/hzadmin
924 hadoop fs -rm -r /user/hzadmin/extract
925 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode client --executor-memory 4G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
926 yarn application -list
927 yarn application -kill application_1464150086810_9481
928 pwd
929 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apache.spark.examples.sparkPi --master yarn --deploy-mode client --executor-memory 1G --num-executors 4 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10
930 jar -tvf /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar | grep sparkPi
931 jar -tvf /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar | grep -i sparkpi
932 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn --deploy-mode client --executor-memory 1G --num-executors 4 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10
933 diagnostics: Application application_1464150086810_9496 failed 2 times due to AM Container for appattempt_1464150086810_9496_000002 exited with exitCode: 10 due to: Exception from container-launch: org.apache.hadoop.util.Shell$ExitCodeException:
934 org.apache.hadoop.util.Shell$ExitCodeException:
935 at org.apache.hadoop.util.Shell.runCommand(Shell.java:505)
936 at org.apache.hadoop.util.Shell.run(Shell.java:418)
937 at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:650)
938 at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:195)
939 at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:300)
940 at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:81)
941 at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303)
942 at java.util.concurrent.FutureTask.run(FutureTask.java:138)
943 at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
944 at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
945 at java.lang.Thread.run(Thread.java:662)
946 Container exited with a non-zero exit code 10
947 .Failing this attempt.. Failing the application.
948 ApplicationMaster host: N/A
949 ApplicationMaster RPC port: -1
950 queue: default
951 start time: 1467966689710
952 final status: FAILED
953 tracking URL: BD01.bd.bjtel:8088/cluster/app/application_1464150086810_9496
954 user: hzadmin
955 Exception in thread "main" org.apache.spark.SparkException: Yarn application has already ended! It might have been killed or unable to launch application master.
956 at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.waitForApplication(YarnClientSchedulerBackend.scala:118)
957 at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:59)
958 at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:140)
959 at org.apache.spark.SparkContext.<init>(SparkContext.scala:348)
960 at org.apache.spark.examples.SparkPi$.main(SparkPi.scala:28)
961 at org.apache.spark.examples.SparkPi.main(SparkPi.scala)
962 at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
963 at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
964 at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
965 at java.lang.reflect.Method.invoke(Method.java:606)
966 at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:358)
967 at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
968 at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
969 [hzadmin@BDI2 hzadmin]$
970 yarn application -list
971 hadoop fs -ls /user/hzadmin
972 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
973 yarn application -list
974 yarn application -kill application_1464150086810_9663
975 hadoop fs -ls /user/hzadmin
976 hadoop fs -ls /user/hzadmin/extract
977 hadoop fs -rm -r /user/hzadmin/extract
978 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 10G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
979 yarn application -lsit
980 yarn application -lis
981 yarn application -kill application_1464150086810_9732
982 hadoop fs -rm -r /user/hzadmin/extract
983 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 16G --executor-cores 4 --num-executors 10 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
984 yarn application -kill application_1464150086810_9733
985 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 20G --executor-cores 4 --num-executors 10 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract
986 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 10G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160707/match /user/hzadmin/extract2
987 ls
988 ls extract/
989 rm -rf extract
990 ls
991 top
992 ls
993 top
994 ll
995 [wd
996 pwd
997 cd /home/hzadmin
998 ll
999 cd bj_ggsn/
1000 ll
1001 crontab -l
1002 more start1.sh
1003 more start2.sh
1004 ~/bj_data/
1005 cd ~/bj_data/
1006 ll
1007 cd -
1008 ll
1009 more start2.sh
1010 more start1.sh
1011 ll
1012 cd ..
1013 cd urlAPP/
1014 ll
1015 cd
1016 ll
1017 cd /dfs/ftp/hzadmin/
1018 ll
1019 cd bj_data/
1020 ll
1021 pwd
1022 history
[hzadmin@BDI2 bj_data]$
--------------------------------------------------------------------------------------
[hzadmin@BDI2 home]$ cd hzadmin/
[hzadmin@BDI2 hzadmin]$
[hzadmin@BDI2 hzadmin]$ ll
total 28
drwxrwxr-x 3 hzadmin hzadmin 4096 Aug 5 2015 bak
drwxr-xr-x 4 hzadmin hzadmin 4096 Jun 26 19:31 bj_ggsn
drwxrwxr-x 2 hzadmin hzadmin 4096 Jul 11 14:48 extract
drwxrwxr-x 2 hzadmin hzadmin 4096 Jul 11 14:55 extract2
-rw-r--r-- 1 root root 5485 Jun 29 10:46 process_2.10-1.0.jar
drwxrwxr-x 8 hzadmin hzadmin 4096 Jun 17 11:09 urlAPP
[hzadmin@BDI2 hzadmin]$ pwd
/home/hzadmin
[hzadmin@BDI2 hzadmin]$ cd bj_ggsn/
[hzadmin@BDI2 bj_ggsn]$ ll
total 136
-rwxr-xr-x 1 hzadmin hzadmin 433 Feb 10 20:39 delete.sh
-rw-r--r-- 1 hzadmin hzadmin 71 Apr 30 2015 hours.txt
drwxr-xr-x 2 root root 4096 Aug 5 2015 jar
drwxrwxr-x 2 hzadmin hzadmin 36864 Jul 12 03:19 log
-rw------- 1 hzadmin hzadmin 21554 Apr 12 20:56 nohup.out
-rwxr-xr-x 1 hzadmin hzadmin 1845 Sep 23 2015 select1.sh
-rwxr-xr-x 1 hzadmin hzadmin 454 Oct 12 2015 select2bak.sh
-rwxr-xr-x 1 hzadmin hzadmin 1367 Oct 12 2015 select2.sh
-rwxr-xr-x 1 hzadmin hzadmin 1344 Jun 18 2015 select.sh
-rwxr-xr-x 1 hzadmin hzadmin 1337 May 4 2015 select.shbak
-rwxr-xr-x 1 hzadmin hzadmin 628 Oct 28 2015 start1.sh
-rwxr-xr-x 1 hzadmin hzadmin 692 Jun 26 19:31 start2.sh
-rwxr-xr-x 1 hzadmin hzadmin 636 May 10 14:22 start3.sh
-rwxr-xr-x 1 hzadmin hzadmin 631 Mar 5 13:27 startbak1.sh
-rw-r--r-- 1 hzadmin hzadmin 16658 Jul 12 09:21 start.log
[hzadmin@BDI2 bj_ggsn]$
[hzadmin@BDI2 bj_ggsn]$ crontab -l
00 03 * * * sh /home/hzadmin/bj_ggsn/start1.sh &>/home/hzadmin/bj_ggsn/start.log
00 13 * * * sh /dfs/ftp/hzadmin/trydemo/dailycheckdemo.sh >>/dfs/ftp/hzadmin/trydemo/log.txt
[hzadmin@BDI2 bj_ggsn]$ pwd
/home/hzadmin/bj_ggsn
[hzadmin@BDI2 bj_ggsn]$
[hzadmin@BDI2 bj_ggsn]$ pwd
/home/hzadmin/bj_ggsn
[hzadmin@BDI2 bj_ggsn]$ cat start1.sh
#!/bin/sh
source ~/.bash_profile
datetime=$(date --date "1 days ago" +%Y%m%d)
cd /home/hzadmin/bj_ggsn/
sh /home/hzadmin/bj_ggsn/select1.sh $datetime >> log/${datetime}_1.log 2>&1
sh /home/hzadmin/bj_ggsn/select2.sh $datetime >> log/${datetime}_2.log 2>&1
hadoop fs -mkdir /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
hadoop fs -mv /apps/hive/warehouse/dpi.db/bj_ggsn_mobile/receive_day=$datetime/* /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
sh /home/hzadmin/urlAPP/URLAppProgram_sf/get_uacds.sh
sh /home/hzadmin/urlAPP/BoncRun.sh
sh /home/hzadmin/urlAPP/hive.sh $datetime
sh /home/hzadmin/bj_ggsn/delete.sh
[hzadmin@BDI2 bj_ggsn]$ cat select1.sh
#!/bin/bash
datetime=$1
hours=/home/hzadmin/bj_ggsn/hours.txt
s=`du -k /dfs/ftp/hzadmin/t_user/phone_number.dat|awk '{print $1}'`
datetime2=`date -d -2day +%Y%m%d`
hive -e"use dpi;alter table t_user add IF NOT EXISTS partition(receive_day='$datetime');"
if [ $s -ge 4000 ];
then
hadoop fs -put /dfs/ftp/hzadmin/t_user/*.dat /apps/hive/warehouse/dpi.db/t_user/receive_day=$datetime/
else
hadoop fs -mv /apps/hive/warehouse/dpi.db/t_user/receive_day=$datetime2/*.dat /apps/hive/warehouse/dpi.db/t_user/receive_day=$datetime/
fi
while read LINE
do
hadoop fs -test -e /share/external_table/ssa/DPI_MBL/ALL/${datetime}/${LINE}
if [ $? -eq 0 ]; then
hive -e "use dpi;alter table bj_ggsn add partition (receive_day='${datetime}',hours='${LINE}') location '/share/external_table/ssa/DPI_MBL/ALL/${datetime}/${LINE}'" >>log/${datetime}.log 2>>log/${datetime}.log
else
echo 'not exist'
fi
done < $hours
hive -e"
use dpi;
set hive.auto.convert.join=false;
set mapreduce.job.queuename=thirdpart1;
from t_user m join bj_ggsn t
on(m.usernum = t.MDN and m.receive_day = '${datetime}' and t.receive_day = '${datetime}')
insert overwrite table bj_ggsn_mobile
partition (receive_day = '${datetime}')
select regexp_extract(t.MDN,'(1[0-9]{10})') MDN,
t.LAC,
t.CI,
t.IMEI,
t.BUSITYPE,
t.CAPTURETIME,
t.ENDTIME,
t.DURATION,
t.FLOWUP,
t.FLOWDOWN,
t.FLOWALL,
t.RATTYPE,
t.TERMIANL_IP,
t.DESTIP,
t.STATUSCODE,
t.USERAGENT,
t.APN,
t.IMSI,
t.SGSNIP,
t.GGSNIP,
t.CONTENTTYPE,
t.SOURCEPORT,
t.DESTPORT,
t.LOGOCODE,
t.URL,
t.RESULT,
t.HOST,
'3G',
t.YULIU2,
t.YULIU3;
"
[hzadmin@BDI2 bj_ggsn]$ cat select2.sh
hours=/home/hzadmin/bj_ggsn/hours.txt
datetime=$1
while read LINE
do
hadoop fs -test -e /share/external_table/ssa/DPI_MBL_4G/ALL/${datetime}/${LINE}
if [ $? -eq 0 ]; then
hive -e "use dpi;alter table bj_ggsn_4g add partition (receive_day='${datetime}',hours='${LINE}') location '/share/external_table/ssa/DPI_MBL_4G/ALL/${datetime}/${LINE}'" >>log/${datetime}.log 2>>log/${datetime}.log
else
echo 'not exist'
fi
done < $hours
hive -e"
use dpi;
set hive.auto.convert.join=false;
set mapreduce.job.queuename=thirdpart1;
from t_user m join bj_ggsn_4g t
on(m.usernum = t.MDN and m.receive_day = '${datetime}' and t.receive_day = '${datetime}')
insert into table bj_ggsn_mobile
partition (receive_day = '${datetime}')
select regexp_extract(t.MDN,'(1[0-9]{10})') MDN,
t.LAC,
t.CI,
t.IMEI,
t.BUSITYPE,
t.CAPTURETIME,
t.ENDTIME,
t.DURATION,
t.FLOWUP,
t.FLOWDOWN,
t.FLOWALL,
t.RATTYPE,
t.TERMIANL_IP,
t.DESTIP,
t.STATUSCODE,
t.USERAGENT,
t.APN,
t.IMSI,
t.SGSNIP,
t.GGSNIP,
t.CONTENTTYPE,
t.SOURCEPORT,
t.DESTPORT,
t.LOGOCODE,
t.URL,
t.RESULT,
t.HOST,
'4G',
t.YULIU2,
t.YULIU3;
"
[hzadmin@BDI2 bj_ggsn]$ cat /home/hzadmin/urlAPP/URLAppProgram_sf/get_uacds.sh
#!/bin/bash
cd `dirname $0`
cd ProgramByDay/
./get_file_list.sh
./get_uacds.sh
[hzadmin@BDI2 bj_ggsn]$ cd /home/hzadmin/urlAPP/URLAppProgram_sf/
[hzadmin@BDI2 URLAppProgram_sf]$ ll
total 129348
drwxr-xr-x 2 hzadmin hzadmin 4096 Jun 10 2015 bin
-rwxr-xr-x 1 hzadmin hzadmin 3017 Sep 28 2015 common.cfg
-rwxr-xr-x 1 hzadmin hzadmin 200 Nov 7 2014 create_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 80 May 10 14:21 get_uacds.sh
-rw-rw-r-- 1 hzadmin hzadmin 33 Jul 11 21:01 match.cfg
drwxr-xr-x 2 hzadmin hzadmin 4096 Jul 12 04:35 ProgramByDay
drwxr-xr-x 2 hzadmin hzadmin 4096 Jun 10 2015 ProgramByHour
-rwxr-xr-x 1 hzadmin hzadmin 741 Jul 14 2015 public.cfg
-rw-rw-r-- 1 hzadmin hzadmin 721256 Jul 11 21:01 R_APP_TYPE_20160711_00.txt
-rwxr-xr-x 1 hzadmin hzadmin 728 Nov 7 2014 reload.sh
-rwxr-xr-x 1 hzadmin hzadmin 4705 May 6 2015 remove_files.sh
-rw-rw-r-- 1 hzadmin hzadmin 4500 Jul 11 21:01 R_NOISE_TYPE_20160711_00.txt
-rw-rw-r-- 1 hzadmin hzadmin 1426612 Jul 11 21:01 R_SITE_TYPE_20160711_00.txt
-rwxr-xr-x 1 hzadmin hzadmin 6966 Jun 15 2015 rule.xml
-rwxr-xr-x 1 hzadmin hzadmin 6301 Sep 28 2015 runbak.sh
-rwxr-xr-x 1 hzadmin hzadmin 6291 May 7 2015 run.sh
-rw-rw-r-- 1 hzadmin hzadmin 1060990 Jul 11 21:01 R_URL_TYPE_20160711_00.txt
-rw-rw-r-- 1 hzadmin hzadmin 32290292 Jul 11 21:01 UACDS_20160711_00_01_1.jar
-rw-rw-r-- 1 hzadmin hzadmin 32233495 Jul 11 21:00 UACDS_20160711_00_01.jar
-rw-rw-r-- 1 hzadmin hzadmin 32339441 Jul 11 21:01 UACDS_20160711_00_02_1.jar
-rw-rw-r-- 1 hzadmin hzadmin 32282651 Jul 11 21:00 UACDS_20160711_00_02.jar
[hzadmin@BDI2 URLAppProgram_sf]$ cd ProgramByDay/
[hzadmin@BDI2 ProgramByDay]$ ll
total 132
-rwxr-xr-x 1 hzadmin hzadmin 1846 May 11 2015 alter_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 17407 Jul 20 2015 create_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 18168 Jun 8 2015 create_table.sh.bak
-rwxr-xr-x 1 hzadmin hzadmin 1280 Jun 16 2015 drop_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 291 Jul 14 2015 get_file_list.sh
-rwxr-xr-x 1 hzadmin hzadmin 2279 Jul 14 2015 get_uacds.sh
-rwxr-xr-x 1 hzadmin hzadmin 4389 May 7 2015 label.sh
-rwxr-xr-x 1 hzadmin hzadmin 604 Nov 7 2014 load_data.sh
-rwxr-xr-x 1 hzadmin hzadmin 1011 Nov 7 2014 logupload.sh
-rwxr-xr-x 1 hzadmin hzadmin 2829 Aug 4 2015 match1.sh
-rwxr-xr-x 1 hzadmin hzadmin 2908 Sep 28 2015 matchbak.sh
-rwxr-xr-x 1 hzadmin hzadmin 2820 May 6 2015 match.sh
-rwxr-xr-x 1 hzadmin hzadmin 6788 Jun 8 2015 report.sh
-rwxr-xr-x 1 hzadmin hzadmin 2060 May 6 2015 report_summary.sh
-rw-rw-r-- 1 hzadmin hzadmin 144 Jul 16 2015 RuleDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 147 Jul 16 2015 RuleSiteDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 144 Jul 16 2015 TypeDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 146 Jul 16 2015 TypeSiteDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 151 Jul 16 2015 UnMatchSiteTop1000.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 147 Jul 16 2015 UnMatchTop1000.20150717.20150715.00.811.DAT.tar.gz
-rwxr-xr-x 1 hzadmin hzadmin 4691 Nov 7 2014 upload.sh
-rw-rw-r-- 1 hzadmin hzadmin 166 Jul 16 2015 URLStatInfo.20150717.20150715.00.811.DAT.tar.gz
[hzadmin@BDI2 ProgramByDay]$ cat get_file_list.sh
#!/bin/bash
cd `dirname $0`
eval $(grep FTP_DATA_PATH ../public.cfg)
eval $(grep FTP_IP ../public.cfg)
eval $(grep FTP_USERNAME ../public.cfg)
eval $(grep FTP_PWD ../public.cfg)
ftp -n<<!
open $FTP_IP
user $FTP_USERNAME $FTP_PWD
cd $FTP_DATA_PATH
mdir $FTP_DATA_PATH ftp_con.txt
bye
!
[hzadmin@BDI2 ProgramByDay]$ cat ../public.cfg
#前置机存放分类匹配程序压缩包路径
FTP_DATA_PATH=/data3/ftp000/URLAppProgram;
#存放分类匹配程序压缩包路径
DATA_HOME=/home/hzadmin/urlAPP/URLAppProgram;
#集团分类匹配程序压缩包前置机IP
FTP_IP=132.63.10.7;
#集团前置机FTP用户名
FTP_USERNAME=ftp811;
#集团前置机FTP用户密码
FTP_PWD=ftp811!123;
#存放Top1000未匹配记录文件路径
UnMatchTop1000=/home/hzadmin/urlAPP/UnMatchTop1000
#存放urlApp统计报表文件路径
URLAppReport=/home/hzadmin/urlAPP/URLAppReport
#存放匹配程序压缩包上限
ZIP_LIMIT=10;
#存放Top1000、urlApp统计报表存储上限
REPORT_LIMIT=10;
#存放hadoop文件存储上限
DELETE_DAY=4;
#存放汇总match文件上限
SUMMARY_DAY=7
[hzadmin@BDI2 ProgramByDay]$ cat ftp_con.txt
cat: ftp_con.txt: No such file or directory
[hzadmin@BDI2 ProgramByDay]$ ll
total 132
-rwxr-xr-x 1 hzadmin hzadmin 1846 May 11 2015 alter_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 17407 Jul 20 2015 create_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 18168 Jun 8 2015 create_table.sh.bak
-rwxr-xr-x 1 hzadmin hzadmin 1280 Jun 16 2015 drop_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 291 Jul 14 2015 get_file_list.sh
-rwxr-xr-x 1 hzadmin hzadmin 2279 Jul 14 2015 get_uacds.sh
-rwxr-xr-x 1 hzadmin hzadmin 4389 May 7 2015 label.sh
-rwxr-xr-x 1 hzadmin hzadmin 604 Nov 7 2014 load_data.sh
-rwxr-xr-x 1 hzadmin hzadmin 1011 Nov 7 2014 logupload.sh
-rwxr-xr-x 1 hzadmin hzadmin 2829 Aug 4 2015 match1.sh
-rwxr-xr-x 1 hzadmin hzadmin 2908 Sep 28 2015 matchbak.sh
-rwxr-xr-x 1 hzadmin hzadmin 2820 May 6 2015 match.sh
-rwxr-xr-x 1 hzadmin hzadmin 6788 Jun 8 2015 report.sh
-rwxr-xr-x 1 hzadmin hzadmin 2060 May 6 2015 report_summary.sh
-rw-rw-r-- 1 hzadmin hzadmin 144 Jul 16 2015 RuleDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 147 Jul 16 2015 RuleSiteDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 144 Jul 16 2015 TypeDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 146 Jul 16 2015 TypeSiteDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 151 Jul 16 2015 UnMatchSiteTop1000.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 147 Jul 16 2015 UnMatchTop1000.20150717.20150715.00.811.DAT.tar.gz
-rwxr-xr-x 1 hzadmin hzadmin 4691 Nov 7 2014 upload.sh
-rw-rw-r-- 1 hzadmin hzadmin 166 Jul 16 2015 URLStatInfo.20150717.20150715.00.811.DAT.tar.gz
[hzadmin@BDI2 ProgramByDay]$ pwd
/home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
[hzadmin@BDI2 ProgramByDay]$ grep FTP_PWD ../public.cfg
FTP_PWD=ftp811!123;
[hzadmin@BDI2 ProgramByDay]$ id
uid=526(hzadmin) gid=526(hzadmin) groups=526(hzadmin),547(spark)
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 URLAppProgram_sf]$ pwd
/home/hzadmin/urlAPP/URLAppProgram_sf
[hzadmin@BDI2 URLAppProgram_sf]$ cd ProgramByDay/
[hzadmin@BDI2 ProgramByDay]$ ll
total 132
-rwxr-xr-x 1 hzadmin hzadmin 1846 May 11 2015 alter_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 17407 Jul 20 2015 create_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 18168 Jun 8 2015 create_table.sh.bak
-rwxr-xr-x 1 hzadmin hzadmin 1280 Jun 16 2015 drop_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 291 Jul 14 2015 get_file_list.sh
-rwxr-xr-x 1 hzadmin hzadmin 2279 Jul 14 2015 get_uacds.sh
-rwxr-xr-x 1 hzadmin hzadmin 4389 May 7 2015 label.sh
-rwxr-xr-x 1 hzadmin hzadmin 604 Nov 7 2014 load_data.sh
-rwxr-xr-x 1 hzadmin hzadmin 1011 Nov 7 2014 logupload.sh
-rwxr-xr-x 1 hzadmin hzadmin 2829 Aug 4 2015 match1.sh
-rwxr-xr-x 1 hzadmin hzadmin 2908 Sep 28 2015 matchbak.sh
-rwxr-xr-x 1 hzadmin hzadmin 2820 May 6 2015 match.sh
-rwxr-xr-x 1 hzadmin hzadmin 6788 Jun 8 2015 report.sh
-rwxr-xr-x 1 hzadmin hzadmin 2060 May 6 2015 report_summary.sh
-rw-rw-r-- 1 hzadmin hzadmin 144 Jul 16 2015 RuleDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 147 Jul 16 2015 RuleSiteDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 144 Jul 16 2015 TypeDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 146 Jul 16 2015 TypeSiteDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 151 Jul 16 2015 UnMatchSiteTop1000.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 147 Jul 16 2015 UnMatchTop1000.20150717.20150715.00.811.DAT.tar.gz
-rwxr-xr-x 1 hzadmin hzadmin 4691 Nov 7 2014 upload.sh
-rw-rw-r-- 1 hzadmin hzadmin 166 Jul 16 2015 URLStatInfo.20150717.20150715.00.811.DAT.tar.gz
[hzadmin@BDI2 ProgramByDay]$ cat get_uacds.sh
#!/bin/bash
#===========================================
#*
#* Copyright BONC
#* All rights reserved.
#* Abstract: Download file from the group to local
#* FileName: get_uacds.sh
#* Author: LiangWei
#* Create Time: 2014-02-22
#* Mender:
#* Mender Time:
#* Modify content:
#*
#============================================
cd `dirname $0`
eval $(grep FTP_DATA_PATH ../public.cfg)
eval $(grep DATA_HOME ../public.cfg)
eval $(grep FTP_IP ../public.cfg)
eval $(grep FTP_USERNAME ../public.cfg)
eval $(grep FTP_PWD ../public.cfg)
LOCAL=`pwd`
$LOCAL/get_file_list.sh
YY=`date +%Y`
MM=`date +%m`
DD=`date +%d`
DATE=$YY$MM$DD
cd $LOCAL
awk '{print $9}' ftp_con.txt > grep.txt
e=`wc -l grep.txt | cut -d ' ' -f 1`
for ((m=1;m<=e;m++))
do
grepstr='sed -n '$m'p grep.txt'
greps=`$grepstr`
greps2=`expr substr $greps 1 8`
greps3=`expr substr $greps 18 7`
if [[ "$greps2" = 'UACDS_20' && "$greps3" = '.tar.gz' ]]; then
echo $greps >> grep2
fi
done
cut -d '_' -f 2 grep2 | uniq | sort -r > day
a=`wc -l day | cut -d ' ' -f 1`
sort -r grep2 > mu
strd='sed -n 1p day'
str=`$strd`
b=`wc -l mu | cut -d ' ' -f 1`
for ((j=1;j<=b;j++))
do
str1='sed -n '$j'p mu '
echo `$str1` > str2
str2='str2'
str3=`cut -d '_' -f 2 str2`
if [ "$str"x = "$str3"x ];
then
cat $str2 >> files #del
break
fi
done
c=`wc -l files | cut -d ' ' -f 1`
for ((k=1;k<=c;k++))
do
mystr='sed -n '$k'p files'
myFile=`$mystr`
cd $DATA_HOME
if [ -f $myFile ]; then
echo $myFile
else
cd $LOCAL
echo $myFile > files2.txt #del
fi
done
logDir=${DATA_HOME}/logs
logFile=${DATA_HOME}/'logs'/${DATE}.log
mkdir -p $logDir
d=`wc -l files2.txt | cut -d ' ' -f 1`
for ((l=1;l<=d;l++))
do
echo "Begin at: `date` ;" | tee -a $logFile
time_begin=$(date +%s )
str4='sed -n '$l'p files2.txt'
DATAFILES=`$str4`
ftp -n<<!
open $FTP_IP
user $FTP_USERNAME $FTP_PWD
bin
prompt
cd $FTP_DATA_PATH
lcd $DATA_HOME
mget $DATAFILES
bye
!
time_end=$(date +%s )
time_total=`expr ${time_end} - ${time_begin}`
echo "End at: `date`;" | tee -a $logFile
echo "DownLoadfilename: $DATAFILES total time=${time_total} s ;" | tee -a $logFile
done
cd $LOCAL
rm day
rm files
rm ftp_con.txt
rm grep.txt
rm grep2
rm mu
rm str2
echo "$LOCAL";
sh $LOCAL/load_data.sh
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$ ll
total 132
-rwxr-xr-x 1 hzadmin hzadmin 1846 May 11 2015 alter_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 17407 Jul 20 2015 create_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 18168 Jun 8 2015 create_table.sh.bak
-rwxr-xr-x 1 hzadmin hzadmin 1280 Jun 16 2015 drop_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 291 Jul 14 2015 get_file_list.sh
-rwxr-xr-x 1 hzadmin hzadmin 2279 Jul 14 2015 get_uacds.sh
-rwxr-xr-x 1 hzadmin hzadmin 4389 May 7 2015 label.sh
-rwxr-xr-x 1 hzadmin hzadmin 604 Nov 7 2014 load_data.sh
-rwxr-xr-x 1 hzadmin hzadmin 1011 Nov 7 2014 logupload.sh
-rwxr-xr-x 1 hzadmin hzadmin 2829 Aug 4 2015 match1.sh
-rwxr-xr-x 1 hzadmin hzadmin 2908 Sep 28 2015 matchbak.sh
-rwxr-xr-x 1 hzadmin hzadmin 2820 May 6 2015 match.sh
-rwxr-xr-x 1 hzadmin hzadmin 6788 Jun 8 2015 report.sh
-rwxr-xr-x 1 hzadmin hzadmin 2060 May 6 2015 report_summary.sh
-rw-rw-r-- 1 hzadmin hzadmin 144 Jul 16 2015 RuleDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 147 Jul 16 2015 RuleSiteDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 144 Jul 16 2015 TypeDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 146 Jul 16 2015 TypeSiteDetails.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 151 Jul 16 2015 UnMatchSiteTop1000.20150717.20150715.00.811.DAT.tar.gz
-rw-rw-r-- 1 hzadmin hzadmin 147 Jul 16 2015 UnMatchTop1000.20150717.20150715.00.811.DAT.tar.gz
-rwxr-xr-x 1 hzadmin hzadmin 4691 Nov 7 2014 upload.sh
-rw-rw-r-- 1 hzadmin hzadmin 166 Jul 16 2015 URLStatInfo.20150717.20150715.00.811.DAT.tar.gz
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$ cat load_data.sh
#!/bin/bash
#*=================================================
#*
#* FileName : load_data.sh
#* CreateDate: 2014-02-25
#* Abstract : Unzip the file 'UACDS_YYYYMMDD_**.tar.gz'
#* Author : LiangWei
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`
PWDNOW=`pwd`
eval $(grep DATA_HOME ../public.cfg)
cd ..
LOC=`pwd`
cd ${PWDNOW}
a=`wc -l files2.txt | cut -d ' ' -f 1`
if [ "$a" == 1 ]; then
str=`sed -n 1p files2.txt`
rm -f ${LOC}/match.cfg
rm -f ${LOC}/UACDS*.jar
rm -f ${LOC}/R_*_TYPE*.txt
tar xzvf $DATA_HOME/$str -C ${LOC}
fi
rm files2.txt
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 URLAppProgram_sf]$ pwd
/home/hzadmin/urlAPP/URLAppProgram_sf
[hzadmin@BDI2 URLAppProgram_sf]$ ll
total 129348
drwxr-xr-x 2 hzadmin hzadmin 4096 Jun 10 2015 bin
-rwxr-xr-x 1 hzadmin hzadmin 3017 Sep 28 2015 common.cfg
-rwxr-xr-x 1 hzadmin hzadmin 200 Nov 7 2014 create_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 80 May 10 14:21 get_uacds.sh
-rw-rw-r-- 1 hzadmin hzadmin 33 Jul 11 21:01 match.cfg
drwxr-xr-x 2 hzadmin hzadmin 4096 Jul 12 04:35 ProgramByDay
drwxr-xr-x 2 hzadmin hzadmin 4096 Jun 10 2015 ProgramByHour
-rwxr-xr-x 1 hzadmin hzadmin 741 Jul 14 2015 public.cfg
-rw-rw-r-- 1 hzadmin hzadmin 721256 Jul 11 21:01 R_APP_TYPE_20160711_00.txt
-rwxr-xr-x 1 hzadmin hzadmin 728 Nov 7 2014 reload.sh
-rwxr-xr-x 1 hzadmin hzadmin 4705 May 6 2015 remove_files.sh
-rw-rw-r-- 1 hzadmin hzadmin 4500 Jul 11 21:01 R_NOISE_TYPE_20160711_00.txt
-rw-rw-r-- 1 hzadmin hzadmin 1426612 Jul 11 21:01 R_SITE_TYPE_20160711_00.txt
-rwxr-xr-x 1 hzadmin hzadmin 6966 Jun 15 2015 rule.xml
-rwxr-xr-x 1 hzadmin hzadmin 6301 Sep 28 2015 runbak.sh
-rwxr-xr-x 1 hzadmin hzadmin 6291 May 7 2015 run.sh
-rw-rw-r-- 1 hzadmin hzadmin 1060990 Jul 11 21:01 R_URL_TYPE_20160711_00.txt
-rw-rw-r-- 1 hzadmin hzadmin 32290292 Jul 11 21:01 UACDS_20160711_00_01_1.jar
-rw-rw-r-- 1 hzadmin hzadmin 32233495 Jul 11 21:00 UACDS_20160711_00_01.jar
-rw-rw-r-- 1 hzadmin hzadmin 32339441 Jul 11 21:01 UACDS_20160711_00_02_1.jar
-rw-rw-r-- 1 hzadmin hzadmin 32282651 Jul 11 21:00 UACDS_20160711_00_02.jar
[hzadmin@BDI2 URLAppProgram_sf]$ cat run.sh
#!/bin/bash
#run.sh
#*=================================================
#*
#* FileName : run.sh
#* CreateDate: 2014-04-03
#* Abstract : Overall deployment schedule
#* Author : LiBin
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`
eval $(grep DEFAULT_TIME common.cfg)
eval $(grep LOC_DIR common.cfg)
eval $(grep DELAY_DAY common.cfg)
eval $(grep DELAY_HOUR common.cfg)
eval $(grep IS_LTBAL common.cfg)
dataday=`date -d -${DELAY_DAY}days-${DELAY_HOUR}hours +%Y%m%d`
datahour=`date -d -${DELAY_HOUR}hours +%H`
if [ $# -eq 2 ] ; then
if [ ${DEFAULT_TIME} = 'day' ] ; then
echo "Input parameter error : there should be 1 parameters";
exit 1;
else
dataday=$1;
datahour=$2;
fi;
elif [ $# -eq 1 ] ; then
if [ ${DEFAULT_TIME} = 'hour' ] ; then
echo "Input parameter error : there should be 2 parameters";
exit 1;
else
dataday=$1;
fi;
fi;
DAY=`date +%Y%m%d`
if [ ! -d "${LOC_DIR}/logs/${DAY}" ] ; then
mkdir -p "${LOC_DIR}/logs/${DAY}"
fi;
DEL_DAY=`date -d -10days +%Y%m%d`
if [ -d "${LOC_DIR}/logs/${DEL_DAY}" ] ; then
rm -rf "${LOC_DIR}/logs/${DEL_DAY}"
fi;
if [ ${DEFAULT_TIME} = 'day' ] ; then
echo "===========================================================================";
echo "========== The program is running , please keep the network flow ...";
echo "========== running model ${DEFAULT_TIME}" : ${dataday};
echo "===========================================================================";
cd `dirname $0`
echo "========== Step 1 of 2 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday".log ";
./ProgramByDay/match.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday".log 2>&1
echo "========== Step 2 of 3 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/upload_"$DAY"_"$dataday".log ";
./ProgramByDay/report_summary.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/report_summary_"$DAY"_"$dataday".log 2>&1
./ProgramByDay/report.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday".log 2>&1
if [ ${IS_LTBAL} = '1' ] ; then
./ProgramByDay/label.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/label_"$DAY"_"$dataday".log 2>&1
fi
echo "========== Step 3 of 3 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/upload_"$DAY"_"$dataday".log ";
./ProgramByDay/upload.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/upload_"$DAY"_"$dataday".log 2>&1
./ProgramByDay/logupload.sh ${DAY}
exit 0;
fi;
if [[ ${datahour} = '00' ]] ; then
echo "===========================================================================";
echo "========== The program is running , please keep the network flow ...";
echo "========== running model ${DEFAULT_TIME}" : ${dataday} ${datahour};
echo "===========================================================================";
echo "========== Step 1 of 3 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/get_"$DAY"_"$dataday"_"$datahour".log ";
./ProgramByDay/ftp_getfilelist.sh > ${LOC_DIR}/logs/${DAY}/get_"$DAY"_"$dataday"_"$datahour".log 2>&1
echo "========== Step 2 of 3 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log ";
./ProgramByHour/match_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log 2>&1
echo "========== Step 3 of 3 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log ";
./ProgramByHour/report_summary_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_summary_"$DAY"_"$dataday"_"$datahour".log 2>&1
./ProgramByHour/report_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log 2>&1
if [[ ${datahour} = '23' ]] ; then
echo "===========================================================================";
echo "========== The program is running , please keep the network flow ...";
echo "========== running model ${DEFAULT_TIME}" : ${dataday} ${datahour};
echo "===========================================================================";
echo "========== Step 1 of 3 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log ";
./ProgramByHour/match_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log 2>&1
echo "========== Step 2 of 3 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log ";
./ProgramByHour/report_summary_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_summary_"$DAY"_"$dataday"_"$datahour".log 2>&1
./ProgramByHour/report_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log 2>&1
echo "========== Step 3 of 3 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/upload_"$DAY"_"$dataday"_"$datahour".log ";
if [ ${IS_LTBAL} = '1' ] ; then
./ProgramByDay/label.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/label_"$DAY"_"$dataday".log 2>&1
fi
./ProgramByHour/upload_H.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/upload_"$DAY"_"$dataday"_"$datahour".log 2>&1
else
echo "===========================================================================";
echo "========== The program is running , please keep the network flow ...";
echo "========== running model ${DEFAULT_TIME}" : ${dataday} ${datahour};
echo "===========================================================================";
echo "========== Step 1 of 2 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log ";
./ProgramByHour/match_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log 2>&1
echo "========== Step 2 of 2 ...";
echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log ";
./ProgramByHour/report_summary_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_summary_"$DAY"_"$dataday"_"$datahour".log 2>&1
./ProgramByHour/report_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log 2>&1
fi;
./remove_files.sh > ${LOC_DIR}/logs/${DAY}/remove_"$DAY"_"$dataday"_"$datahour".log 2>&1
./ProgramByDay/logupload.sh ${DAY}[hzadmin@BDI2 URLAppProgram_sf]$
[hzadmin@BDI2 URLAppProgram_sf]$
[hzadmin@BDI2 ProgramByDay]$ cat match.sh
#!/bin/bash
# match.sh
######################################################################################################
# function : 匹配程序运行、统计报表出数据
# date: 2014/02/10
# author: SPP
# param:day(数据日期)
######################################################################################################
cd `dirname $0`
cd ..
PWDNOW=`pwd`
cd `dirname $0`
eval $(grep RULE_PROV_VERNO ${PWDNOW}/match.cfg)
eval $(grep URL_MATCH ${PWDNOW}/common.cfg)
eval $(grep URL_INPUT_PATH ${PWDNOW}/common.cfg)
eval $(grep DPI_CONF_PATH ${PWDNOW}/common.cfg)
eval $(grep R_URL_TYPE ${PWDNOW}/common.cfg)
eval $(grep R_APP_TYPE ${PWDNOW}/common.cfg)
eval $(grep R_NOISE_TYPE ${PWDNOW}/common.cfg)
eval $(grep HIVE_USER ${PWDNOW}/common.cfg)
eval $(grep LOC_DIR ${PWDNOW}/common.cfg)
eval $(grep HIVE_LICENSE ${PWDNOW}/common.cfg)
eval $(grep MR_VERSION ${PWDNOW}/common.cfg)
eval $(grep PARA_JAR ${PWDNOW}/common.cfg)
eval $(grep PARA_HIVE ${PWDNOW}/common.cfg)
eval $(grep R_SITE_TYPE ${PWDNOW}/common.cfg)
#判断参数是否输入
if [ $# -ne 1 ] ; then
echo "Input parameter error : there should be 1 parameters";
exit 1;
fi;
day=$1
hadoop fs -ls ${URL_INPUT_PATH}${day} > exist_test ;
x=`wc -l exist_test | cut -d ' ' -f 1`;
if [ ${x} = 0 ] ; then
echo " HDFS DIR ERROR : ${URL_INPUT_PATH}${day} file is not exist !"
rm -f exist_test
exit 1;
fi;
rm -f exist_test
hadoop fs -rm ${R_URL_TYPE}R_URL_TYPE*.txt
hadoop fs -rm ${R_APP_TYPE}R_APP_TYPE*.txt
hadoop fs -rm ${R_NOISE_TYPE}R_NOISE_TYPE*.txt
hadoop fs -rm ${R_SITE_TYPE}R_SITE_TYPE*.txt
hadoop fs -put ${PWDNOW}/R_URL_TYPE*.txt ${R_URL_TYPE}
hadoop fs -put ${PWDNOW}/R_APP_TYPE*.txt ${R_APP_TYPE}
hadoop fs -put ${PWDNOW}/R_NOISE_TYPE*.txt ${R_NOISE_TYPE}
hadoop fs -put ${PWDNOW}/R_SITE_TYPE*.txt ${R_SITE_TYPE}
echo "${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar";
hadoop jar ${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar com.bonc.mapred.UserurlAllMain ${PARA_JAR} ${URL_INPUT_PATH}${day} ${URL_MATCH}${day} $PWDNOW/${DPI_CONF_PATH}
#hadoop fs -rm ${url_match}${day}/part-m-*.gz
hive -e"
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
set dpi.encode.license=${HIVE_LICENSE};
${PARA_HIVE}
set mapred.job.name=CMSS-COUNT;
alter table dpi_http_dtl_mark_match drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_dtl_mark_noise drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_dtl_mark_unmatch drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_dtl_mark_match add partition (receive_day='${day}') location '${day}/match';
alter table dpi_http_dtl_mark_noise add partition (receive_day='${day}') location '${day}/noise';
alter table dpi_http_dtl_mark_unmatch add partition (receive_day='${day}') location '${day}/unmatch';
"
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 URLAppProgram_sf]$ cat match.cfg
RULE_PROV_VERNO=UACDS_20160711_00
[hzadmin@BDI2 URLAppProgram_sf]$
[hzadmin@BDI2 URLAppProgram_sf]$ cat common.cfg
##匹配程序的输出路径,也是hive匹配表、噪音表、未识别表的hive文件路径
URL_MATCH=/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/
##hive统计汇总表
MATCH_SUMMARY=/share/hzadmin/urlapp/spp/dpi_http_dtl_mark_match_summary/
UNMATCH_SUMMARY=/share/hzadmin/urlapp/spp/dpi_http_dtl_mark_unmatch_summary/
##hive统计报表、top1000的hadoop文件路径
URL_SUMMARY= /share/hzadmin/urlapp/spp/dpi_http_mark_summary/
URL_RULE= /share/hzadmin/urlapp/spp/dpi_http_mark_rule/
URL_TYPECODE= /share/hzadmin/urlapp/spp/dpi_http_mark_type/
URL_UNMATCH_TOP1000= /share/hzadmin/urlapp/spp/dpi_http_mark_top1000/
SITE_RULE= /share/hzadmin/urlapp/spp/dpi_site_mark_rule/
SITE_TYPECODE= /share/hzadmin/urlapp/spp/dpi_site_mark_type/
SITE_UNMATCH_TOP1000=/share/hzadmin/urlapp/spp/dpi_site_mark_top1000/
##分类标准库的hadoop文件路径
R_URL_TYPE=/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/type/url_type/
R_APP_TYPE=/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/type/app_type/
R_NOISE_TYPE=/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/type/noise_type/
R_SITE_TYPE=/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/type/site_type/
##中间表hadoop文件路径
TEMP_DPI_MATCH=/share/hzadmin/urlapp/spp/temp_dpi_match
TEMP_DPI_NOISE=/share/hzadmin/urlapp/spp/temp_dpi_noise
TEMP_DPI_UNMATCH=/share/hzadmin/urlapp/spp/temp_dpi_unmatch
TEMP_DPI_URL=/share/hzadmin/urlapp/spp/temp_dpi_url
TEMP_DPI_APP=/share/hzadmin/urlapp/spp/temp_dpi_app
TEMP_DPI_SITE=/share/hzadmin/urlapp/spp/temp_dpi_site
##客户标签表路径(省份无客户标签可不进行配置)
LTBAL=/share/hzadmin/urlapp/spp/CDPI_USER_BEH_PREFER_D/
LTBAL_tmp=/share/hzadmin/urlapp/spp/CDPI_USER_BEH_PREFER_D_tmp/
##dpi原始文件在hadoop上的路径,即匹配程序的输入路径
URL_INPUT_PATH=/share/hzadmin/external_table/DMP_SSA/DPI/
#URL_INPUT_PATH="/apps/hive/warehouse/dpi.db/bj_ggsn_mobile/receive_day"="
##配置dpi字段配置文件路径,为linux路径
DPI_CONF_PATH=rule.xml
##本省省份编码:比如上海编码为831
AREA_NO=811
##重传次数
REUPLOAD_COUNT=00
##生成上传文件路径
UNMATCHTOP1000=/dfs/ftp/hzadmin/urlAPP/UnMatchTop1000
URLAPPREPORT=/dfs/ftp/hzadmin/urlAPP/URLAppReport
#本地程序存放目录
LOC_DIR=/home/hzadmin/urlAPP
#HIVE用户规范,未指定:default,指定用户填写用户名
HIVE_USER=dpi
#MapReduce框架版本号
MR_VERSION=_02_1
#匹配输出是否加密 0 :否 1 : 是
ISENCODE=0
#HIVE文件加密密钥
HIVE_LICENSE=DpiBonc
#执行JAR包时的特定参数
PARA_JAR='-D mapred.job.queue.name=thirdpart1'
#执行HIVE时的特定参数
PARA_HIVE='set hive.auto.convert.join=false;set mapreduce.job.queuename=thirdpart1;'
#原数据执行类型,按小时跑:hour,按天跑:day
DEFAULT_TIME=day
#DPI数据比运行时间延迟的天数
DELAY_DAY=1
#hour模式下,DPI数据比运行时间延迟的小时数
DELAY_HOUR=0
##本省是否配置客户标签程序,是:1,否:0
IS_LTBAL=0
[hzadmin@BDI2 URLAppProgram_sf]$
[root@BDI2 ~]# hdfs dfs -ls /share/hzadmin/external_table/DMP_SSA/DPI/20160711
Found 706 items
-rw-r--r-- 2 hzadmin hdfs 852456797 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000000_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1025069938 2016-07-12 04:11 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000000_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 816682614 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000001_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1030962113 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000001_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 922216071 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000002_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1018908652 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000002_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 873706406 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000003_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1025021048 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000003_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 876314487 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000004_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1007005145 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000004_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 886130165 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000005_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1017040305 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000005_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 870758798 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000006_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1075565204 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000006_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 853730203 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000007_0.snappy
-rw-r--r-- 2 hzadmin hdfs 990414241 2016-07-12 04:08 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000007_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 836870377 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000008_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1003709447 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000008_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 849027164 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000009_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1002311449 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000009_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 919945440 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000010_0.snappy
-rw-r--r-- 2 hzadmin hdfs 985896710 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000010_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 853679744 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000011_0.snappy
-rw-r--r-- 2 hzadmin hdfs 978387233 2016-07-12 04:08 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000011_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 857535482 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000012_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1013308316 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000012_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 839661882 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000013_0.snappy
-rw-r--r-- 2 hzadmin hdfs 963660717 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000013_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 853614174 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000014_0.snappy
-rw-r--r-- 2 hzadmin hdfs 923638015 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000014_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 833865334 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000015_0.snappy
-rw-r--r-- 2 hzadmin hdfs 993313562 2016-07-12 04:08 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000015_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 863788726 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000016_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1055935913 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000016_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 938640887 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000017_0.snappy
-rw-r--r-- 2 hzadmin hdfs 992664370 2016-07-12 04:08 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000017_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 836728758 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000018_0.snappy
-rw-r--r-- 2 hzadmin hdfs 958110493 2016-07-12 04:11 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000018_0_copy_1.snappy
-rw-r--r-- 2 hzadmin hdfs 878931625 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000019_0.snappy
-rw-r--r-- 2 hzadmin hdfs 962405868 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000586_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1051757492 2016-07-12 04:34 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000587_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1016882930 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000588_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1039885210 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000589_0.snappy
-rw-r--r-- 2 hzadmin hdfs 941194043 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000590_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1008122362 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000591_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1047224089 2016-07-12 04:34 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000592_0.snappy
-rw-r--r-- 2 hzadmin hdfs 982080269 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000593_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1023890503 2016-07-12 04:33 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000594_0.snappy
-rw-r--r-- 2 hzadmin hdfs 986968252 2016-07-12 04:34 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000595_0.snappy
-rw-r--r-- 2 hzadmin hdfs 987693087 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000596_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1011458249 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000597_0.snappy
-rw-r--r-- 2 hzadmin hdfs 1009166057 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000598_0.snappy
-rw-r--r-- 2 hzadmin hdfs 985772040 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000599_0.snappy
-rw-r--r-- 2 hzadmin hdfs 965906316 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000600_0.snappy
-rw-r--r-- 2 hzadmin hdfs 955717905 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000601_0.snappy
-rw-r--r-- 2 hzadmin hdfs 968491437 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000602_0.snappy
-rw-r--r-- 2 hzadmin hdfs 981757576 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000603_0.snappy
[root@BDI2 ~]# hdfs dfs -ls /share/hzadmin/external_table/DMP_SSA/DPI/
Found 6 items
drwxr-xr-x - hzadmin hdfs 0 2016-07-07 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160706
drwxr-xr-x - hzadmin hdfs 0 2016-07-08 03:59 /share/hzadmin/external_table/DMP_SSA/DPI/20160707
drwxr-xr-x - hzadmin hdfs 0 2016-07-09 04:21 /share/hzadmin/external_table/DMP_SSA/DPI/20160708
drwxr-xr-x - hzadmin hdfs 0 2016-07-10 04:30 /share/hzadmin/external_table/DMP_SSA/DPI/20160709
drwxr-xr-x - hzadmin hdfs 0 2016-07-11 04:26 /share/hzadmin/external_table/DMP_SSA/DPI/20160710
drwxr-xr-x - hzadmin hdfs 0 2016-07-12 04:34 /share/hzadmin/external_table/DMP_SSA/DPI/20160711
[root@BDI2 ~]#
[root@BDI2 ~]# hdfs dfs -ls /share/hzadmin/urlapp/spp/CDPI_USER_BEH_PREFER_D/
[root@BDI2 ~]#
#定时器 执行日志
[hzadmin@BDI2 bj_ggsn]$ cat /home/hzadmin/bj_ggsn/start.log
output to local-file: ftp_con.txt? output to local-file: ftp_con.txt? Begin at: Wed Jul 13 04:39:05 CST 2016 ;
Interactive mode off.
Local directory now /home/hzadmin/urlAPP/URLAppProgram
End at: Wed Jul 13 04:39:06 CST 2016;
DownLoadfilename: UACDS_20160712_00.tar.gz total time=1 s ;
/home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay
match.cfg
R_APP_TYPE_20160712_00.txt
R_NOISE_TYPE_20160712_00.txt
R_SITE_TYPE_20160712_00.txt
R_URL_TYPE_20160712_00.txt
UACDS_20160712_00_01_1.jar
UACDS_20160712_00_01.jar
UACDS_20160712_00_02_1.jar
UACDS_20160712_00_02.jar
===========================================================================
========== The program is running , please keep the network flow ...
========== running model day : 20160712
===========================================================================
========== Step 1 of 2 ...
========== loging : tail -100f /home/hzadmin/urlAPP/logs/20160713/match_20160713_20160712.log
========== Step 2 of 3 ...
========== loging : tail -100f /home/hzadmin/urlAPP/logs/20160713/upload_20160713_20160712.log
========== Step 3 of 3 ...
========== loging : tail -100f /home/hzadmin/urlAPP/logs/20160713/upload_20160713_20160712.log
not input days so day=today
create_day: 20160713
day: 20160712
/home/hzadmin/urlAPP/ResultMatch/remove_details.sh: line 1: ?#!/bin/bash: No such file or directory
delete file: 20160706
delete file: /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/merge/20160705
16/07/13 09:07:26 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 360 minutes, Emptier interval = 0 minutes.
Moved: 'hdfs://BJTEL/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/merge/20160705' to trash at: hdfs://BJTEL/user/hzadmin/.Trash/Current
Logging initialized using configuration in jar:file:/app/hive/lib/hive-common-0.13.0.2.1.1.0-385.jar!/hive-log4j.properties
OK
Time taken: 2.58 seconds
Added /home/hzadmin/bj_ggsn/jar/Decode.jar to class path
Added resource: /home/hzadmin/bj_ggsn/jar/Decode.jar
OK
Time taken: 1.676 seconds
Query ID = hzadmin_20160713090707_053181f1-eb59-4188-be6a-a983081c9f5f
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks not specified. Estimated from input data size: 231
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapreduce.job.reduces=<number>
Starting Job = job_1464150086810_11516, Tracking URL = http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11516/
Kill Command = /app/hadoop/bin/hadoop job -kill job_1464150086810_11516
Hadoop job information for Stage-1: number of mappers: 661; number of reducers: 231
2016-07-13 09:08:01,998 Stage-1 map = 0%, reduce = 0%
2016-07-13 09:08:50,489 Stage-1 map = 1%, reduce = 0%, Cumulative CPU 5197.54 sec
2016-07-13 09:09:03,046 Stage-1 map = 2%, reduce = 0%, Cumulative CPU 7764.25 sec
2016-07-13 09:09:07,341 Stage-1 map = 3%, reduce = 0%, Cumulative CPU 8663.68 sec
2016-07-13 09:09:12,666 Stage-1 map = 4%, reduce = 0%, Cumulative CPU 9852.68 sec
2016-07-13 09:09:21,205 Stage-1 map = 5%, reduce = 0%, Cumulative CPU 11010.68 sec
2016-07-13 09:09:25,392 Stage-1 map = 6%, reduce = 0%, Cumulative CPU 11844.46 sec
2016-07-13 09:09:27,450 Stage-1 map = 7%, reduce = 0%, Cumulative CPU 12290.34 sec
2016-07-13 09:09:29,554 Stage-1 map = 8%, reduce = 0%, Cumulative CPU 12730.05 sec
2016-07-13 09:09:32,076 Stage-1 map = 9%, reduce = 0%, Cumulative CPU 13393.25 sec
2016-07-13 09:09:33,453 Stage-1 map = 10%, reduce = 0%, Cumulative CPU 13535.78 sec
2016-07-13 09:09:35,612 Stage-1 map = 11%, reduce = 0%, Cumulative CPU 13797.18 sec
2016-07-13 09:09:40,782 Stage-1 map = 12%, reduce = 0%, Cumulative CPU 14445.45 sec
2016-07-13 09:09:47,771 Stage-1 map = 13%, reduce = 0%, Cumulative CPU 15535.53 sec
2016-07-13 09:09:52,221 Stage-1 map = 14%, reduce = 0%, Cumulative CPU 16179.99 sec
2016-07-13 09:09:56,108 Stage-1 map = 15%, reduce = 0%, Cumulative CPU 16602.82 sec
2016-07-13 09:10:05,058 Stage-1 map = 16%, reduce = 0%, Cumulative CPU 18181.36 sec
2016-07-13 09:10:08,191 Stage-1 map = 17%, reduce = 0%, Cumulative CPU 18632.7 sec
2016-07-13 09:10:12,436 Stage-1 map = 18%, reduce = 0%, Cumulative CPU 19106.37 sec
2016-07-13 09:10:15,600 Stage-1 map = 19%, reduce = 0%, Cumulative CPU 19617.38 sec
2016-07-13 09:10:18,757 Stage-1 map = 20%, reduce = 0%, Cumulative CPU 19945.72 sec
2016-07-13 09:10:27,207 Stage-1 map = 21%, reduce = 1%, Cumulative CPU 21447.84 sec
2016-07-13 09:10:32,471 Stage-1 map = 22%, reduce = 1%, Cumulative CPU 22085.79 sec
2016-07-13 09:10:36,684 Stage-1 map = 23%, reduce = 1%, Cumulative CPU 22729.29 sec
2016-07-13 09:10:41,143 Stage-1 map = 24%, reduce = 1%, Cumulative CPU 23253.89 sec
2016-07-13 09:10:43,254 Stage-1 map = 25%, reduce = 1%, Cumulative CPU 23457.85 sec
2016-07-13 09:10:45,362 Stage-1 map = 26%, reduce = 1%, Cumulative CPU 23678.34 sec
2016-07-13 09:10:46,404 Stage-1 map = 27%, reduce = 1%, Cumulative CPU 23774.85 sec
2016-07-13 09:10:50,629 Stage-1 map = 28%, reduce = 1%, Cumulative CPU 24180.11 sec
2016-07-13 09:10:57,034 Stage-1 map = 29%, reduce = 1%, Cumulative CPU 24839.33 sec
2016-07-13 09:11:01,255 Stage-1 map = 30%, reduce = 1%, Cumulative CPU 25307.08 sec
2016-07-13 09:11:03,958 Stage-1 map = 31%, reduce = 1%, Cumulative CPU 25763.26 sec
2016-07-13 09:11:08,528 Stage-1 map = 32%, reduce = 2%, Cumulative CPU 26300.39 sec
2016-07-13 09:11:14,875 Stage-1 map = 33%, reduce = 2%, Cumulative CPU 27264.86 sec
2016-07-13 09:11:24,386 Stage-1 map = 34%, reduce = 2%, Cumulative CPU 28419.61 sec
2016-07-13 09:11:31,847 Stage-1 map = 35%, reduce = 2%, Cumulative CPU 29402.95 sec
2016-07-13 09:11:36,334 Stage-1 map = 36%, reduce = 2%, Cumulative CPU 29849.6 sec
2016-07-13 09:11:43,729 Stage-1 map = 37%, reduce = 2%, Cumulative CPU 30704.56 sec
2016-07-13 09:11:47,933 Stage-1 map = 38%, reduce = 2%, Cumulative CPU 31181.2 sec
2016-07-13 09:11:50,068 Stage-1 map = 39%, reduce = 2%, Cumulative CPU 31369.04 sec
2016-07-13 09:11:55,367 Stage-1 map = 40%, reduce = 2%, Cumulative CPU 31936.9 sec
2016-07-13 09:11:57,473 Stage-1 map = 40%, reduce = 3%, Cumulative CPU 32240.58 sec
2016-07-13 09:11:59,545 Stage-1 map = 41%, reduce = 3%, Cumulative CPU 32428.94 sec
2016-07-13 09:12:01,649 Stage-1 map = 42%, reduce = 3%, Cumulative CPU 32744.61 sec
2016-07-13 09:12:07,942 Stage-1 map = 43%, reduce = 3%, Cumulative CPU 33410.43 sec
2016-07-13 09:12:12,113 Stage-1 map = 44%, reduce = 3%, Cumulative CPU 33952.16 sec
2016-07-13 09:12:15,242 Stage-1 map = 45%, reduce = 3%, Cumulative CPU 34206.1 sec
2016-07-13 09:12:21,495 Stage-1 map = 46%, reduce = 3%, Cumulative CPU 34844.34 sec
2016-07-13 09:12:30,872 Stage-1 map = 46%, reduce = 4%, Cumulative CPU 35728.26 sec
2016-07-13 09:12:34,036 Stage-1 map = 47%, reduce = 4%, Cumulative CPU 36057.98 sec
2016-07-13 09:12:40,293 Stage-1 map = 48%, reduce = 4%, Cumulative CPU 36777.14 sec
2016-07-13 09:12:50,802 Stage-1 map = 49%, reduce = 4%, Cumulative CPU 38085.11 sec
2016-07-13 09:12:58,132 Stage-1 map = 50%, reduce = 4%, Cumulative CPU 38899.54 sec
2016-07-13 09:13:00,323 Stage-1 map = 51%, reduce = 4%, Cumulative CPU 39107.81 sec
2016-07-13 09:13:04,948 Stage-1 map = 52%, reduce = 4%, Cumulative CPU 39535.16 sec
2016-07-13 09:13:09,327 Stage-1 map = 53%, reduce = 4%, Cumulative CPU 39903.93 sec
2016-07-13 09:13:13,553 Stage-1 map = 53%, reduce = 5%, Cumulative CPU 40394.49 sec
2016-07-13 09:13:15,702 Stage-1 map = 54%, reduce = 5%, Cumulative CPU 40552.41 sec
2016-07-13 09:13:24,136 Stage-1 map = 55%, reduce = 5%, Cumulative CPU 41410.8 sec
2016-07-13 09:13:28,720 Stage-1 map = 56%, reduce = 5%, Cumulative CPU 41846.2 sec
2016-07-13 09:13:33,006 Stage-1 map = 57%, reduce = 5%, Cumulative CPU 42176.72 sec
2016-07-13 09:13:43,675 Stage-1 map = 58%, reduce = 5%, Cumulative CPU 43068.84 sec
2016-07-13 09:13:52,139 Stage-1 map = 59%, reduce = 5%, Cumulative CPU 43853.31 sec
2016-07-13 09:13:59,475 Stage-1 map = 60%, reduce = 5%, Cumulative CPU 44538.15 sec
2016-07-13 09:14:03,638 Stage-1 map = 61%, reduce = 5%, Cumulative CPU 44855.9 sec
2016-07-13 09:14:18,548 Stage-1 map = 62%, reduce = 5%, Cumulative CPU 46195.74 sec
2016-07-13 09:14:24,862 Stage-1 map = 63%, reduce = 5%, Cumulative CPU 46731.81 sec
2016-07-13 09:14:33,309 Stage-1 map = 64%, reduce = 5%, Cumulative CPU 47588.96 sec
2016-07-13 09:14:36,458 Stage-1 map = 64%, reduce = 6%, Cumulative CPU 48099.49 sec
2016-07-13 09:14:42,805 Stage-1 map = 65%, reduce = 6%, Cumulative CPU 48679.78 sec
2016-07-13 09:14:49,239 Stage-1 map = 66%, reduce = 6%, Cumulative CPU 49262.33 sec
2016-07-13 09:14:55,665 Stage-1 map = 67%, reduce = 6%, Cumulative CPU 49899.25 sec
2016-07-13 09:15:03,040 Stage-1 map = 68%, reduce = 6%, Cumulative CPU 50497.22 sec
2016-07-13 09:15:12,510 Stage-1 map = 69%, reduce = 6%, Cumulative CPU 51435.08 sec
2016-07-13 09:15:24,174 Stage-1 map = 70%, reduce = 6%, Cumulative CPU 52788.92 sec
2016-07-13 09:15:32,711 Stage-1 map = 71%, reduce = 6%, Cumulative CPU 53665.99 sec
2016-07-13 09:15:42,103 Stage-1 map = 72%, reduce = 6%, Cumulative CPU 54553.65 sec
2016-07-13 09:15:47,333 Stage-1 map = 73%, reduce = 6%, Cumulative CPU 54961.75 sec
2016-07-13 09:15:54,771 Stage-1 map = 74%, reduce = 6%, Cumulative CPU 55748.89 sec
2016-07-13 09:16:02,233 Stage-1 map = 75%, reduce = 6%, Cumulative CPU 56317.34 sec
2016-07-13 09:16:06,637 Stage-1 map = 76%, reduce = 6%, Cumulative CPU 56664.01 sec
2016-07-13 09:16:07,687 Stage-1 map = 76%, reduce = 7%, Cumulative CPU 56726.21 sec
2016-07-13 09:16:15,012 Stage-1 map = 77%, reduce = 7%, Cumulative CPU 57229.48 sec
2016-07-13 09:16:28,460 Stage-1 map = 78%, reduce = 7%, Cumulative CPU 58587.28 sec
2016-07-13 09:16:32,644 Stage-1 map = 79%, reduce = 7%, Cumulative CPU 58893.37 sec
2016-07-13 09:16:43,193 Stage-1 map = 80%, reduce = 7%, Cumulative CPU 59747.56 sec
2016-07-13 09:16:51,545 Stage-1 map = 81%, reduce = 7%, Cumulative CPU 60503.95 sec
2016-07-13 09:16:56,784 Stage-1 map = 82%, reduce = 7%, Cumulative CPU 60890.77 sec
2016-07-13 09:17:08,285 Stage-1 map = 83%, reduce = 7%, Cumulative CPU 61747.42 sec
2016-07-13 09:17:14,555 Stage-1 map = 84%, reduce = 7%, Cumulative CPU 62244.9 sec
2016-07-13 09:17:19,774 Stage-1 map = 85%, reduce = 7%, Cumulative CPU 62596.97 sec
2016-07-13 09:17:31,248 Stage-1 map = 86%, reduce = 7%, Cumulative CPU 63653.0 sec
2016-07-13 09:17:38,563 Stage-1 map = 87%, reduce = 7%, Cumulative CPU 64143.74 sec
2016-07-13 09:17:41,682 Stage-1 map = 87%, reduce = 8%, Cumulative CPU 64299.0 sec
2016-07-13 09:17:46,882 Stage-1 map = 88%, reduce = 8%, Cumulative CPU 64714.76 sec
2016-07-13 09:17:58,323 Stage-1 map = 89%, reduce = 8%, Cumulative CPU 65398.87 sec
2016-07-13 09:18:11,927 Stage-1 map = 90%, reduce = 8%, Cumulative CPU 66519.12 sec
2016-07-13 09:18:17,138 Stage-1 map = 91%, reduce = 8%, Cumulative CPU 66854.71 sec
2016-07-13 09:18:25,471 Stage-1 map = 92%, reduce = 8%, Cumulative CPU 67504.99 sec
2016-07-13 09:18:32,823 Stage-1 map = 93%, reduce = 8%, Cumulative CPU 68050.78 sec
2016-07-13 09:18:35,954 Stage-1 map = 93%, reduce = 9%, Cumulative CPU 68204.15 sec
2016-07-13 09:18:40,180 Stage-1 map = 94%, reduce = 9%, Cumulative CPU 68751.23 sec
2016-07-13 09:18:44,347 Stage-1 map = 94%, reduce = 10%, Cumulative CPU 69048.97 sec
2016-07-13 09:18:46,472 Stage-1 map = 95%, reduce = 10%, Cumulative CPU 69248.88 sec
2016-07-13 09:18:52,740 Stage-1 map = 96%, reduce = 11%, Cumulative CPU 69662.81 sec
2016-07-13 09:18:55,923 Stage-1 map = 97%, reduce = 11%, Cumulative CPU 69937.75 sec
2016-07-13 09:18:57,023 Stage-1 map = 97%, reduce = 12%, Cumulative CPU 69987.32 sec
2016-07-13 09:19:01,192 Stage-1 map = 98%, reduce = 12%, Cumulative CPU 70135.07 sec
2016-07-13 09:19:02,250 Stage-1 map = 98%, reduce = 13%, Cumulative CPU 70162.43 sec
2016-07-13 09:19:07,490 Stage-1 map = 98%, reduce = 14%, Cumulative CPU 70372.58 sec
2016-07-13 09:19:12,776 Stage-1 map = 99%, reduce = 15%, Cumulative CPU 70731.6 sec
2016-07-13 09:19:18,031 Stage-1 map = 99%, reduce = 16%, Cumulative CPU 70899.25 sec
2016-07-13 09:19:31,485 Stage-1 map = 99%, reduce = 17%, Cumulative CPU 71470.97 sec
2016-07-13 09:19:46,107 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 71714.27 sec
2016-07-13 09:19:47,161 Stage-1 map = 100%, reduce = 19%, Cumulative CPU 71731.02 sec
2016-07-13 09:19:48,219 Stage-1 map = 100%, reduce = 24%, Cumulative CPU 71810.86 sec
2016-07-13 09:19:49,261 Stage-1 map = 100%, reduce = 29%, Cumulative CPU 72135.09 sec
2016-07-13 09:19:50,287 Stage-1 map = 100%, reduce = 33%, Cumulative CPU 72396.02 sec
2016-07-13 09:19:51,337 Stage-1 map = 100%, reduce = 35%, Cumulative CPU 72698.52 sec
2016-07-13 09:19:52,393 Stage-1 map = 100%, reduce = 38%, Cumulative CPU 72918.64 sec
2016-07-13 09:19:53,444 Stage-1 map = 100%, reduce = 44%, Cumulative CPU 73111.93 sec
2016-07-13 09:19:54,470 Stage-1 map = 100%, reduce = 46%, Cumulative CPU 73228.55 sec
2016-07-13 09:19:55,495 Stage-1 map = 100%, reduce = 47%, Cumulative CPU 73314.57 sec
2016-07-13 09:19:56,538 Stage-1 map = 100%, reduce = 48%, Cumulative CPU 73403.39 sec
2016-07-13 09:19:57,698 Stage-1 map = 100%, reduce = 49%, Cumulative CPU 73452.14 sec
2016-07-13 09:20:00,788 Stage-1 map = 100%, reduce = 51%, Cumulative CPU 73859.22 sec
2016-07-13 09:20:01,821 Stage-1 map = 100%, reduce = 57%, Cumulative CPU 74159.29 sec
2016-07-13 09:20:03,490 Stage-1 map = 100%, reduce = 66%, Cumulative CPU 74342.07 sec
2016-07-13 09:20:04,531 Stage-1 map = 100%, reduce = 70%, Cumulative CPU 74864.62 sec
2016-07-13 09:20:05,558 Stage-1 map = 100%, reduce = 75%, Cumulative CPU 75034.61 sec
2016-07-13 09:20:06,584 Stage-1 map = 100%, reduce = 77%, Cumulative CPU 75119.84 sec
2016-07-13 09:20:07,613 Stage-1 map = 100%, reduce = 79%, Cumulative CPU 75180.32 sec
2016-07-13 09:20:08,639 Stage-1 map = 100%, reduce = 81%, Cumulative CPU 75280.79 sec
2016-07-13 09:20:09,665 Stage-1 map = 100%, reduce = 83%, Cumulative CPU 75434.05 sec
2016-07-13 09:20:10,713 Stage-1 map = 100%, reduce = 85%, Cumulative CPU 75521.87 sec
2016-07-13 09:20:11,775 Stage-1 map = 100%, reduce = 87%, Cumulative CPU 75682.74 sec
2016-07-13 09:20:13,574 Stage-1 map = 100%, reduce = 91%, Cumulative CPU 75881.12 sec
2016-07-13 09:20:15,641 Stage-1 map = 100%, reduce = 92%, Cumulative CPU 76062.67 sec
2016-07-13 09:20:17,742 Stage-1 map = 100%, reduce = 93%, Cumulative CPU 76159.41 sec
2016-07-13 09:20:19,865 Stage-1 map = 100%, reduce = 94%, Cumulative CPU 76248.27 sec
2016-07-13 09:20:20,964 Stage-1 map = 100%, reduce = 95%, Cumulative CPU 76323.18 sec
2016-07-13 09:20:24,140 Stage-1 map = 100%, reduce = 96%, Cumulative CPU 76662.94 sec
2016-07-13 09:20:27,289 Stage-1 map = 100%, reduce = 97%, Cumulative CPU 76664.86 sec
2016-07-13 09:20:31,500 Stage-1 map = 100%, reduce = 98%, Cumulative CPU 77118.12 sec
2016-07-13 09:20:32,531 Stage-1 map = 100%, reduce = 99%, Cumulative CPU 77257.73 sec
2016-07-13 09:20:38,761 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 77452.2 sec
MapReduce Total cumulative CPU time: 0 days 21 hours 30 minutes 52 seconds 200 msec
Ended Job = job_1464150086810_11516
MapReduce Jobs Launched:
Job 0: Map: 661 Reduce: 231 Cumulative CPU: 77452.2 sec HDFS Read: 230040091537 HDFS Write: 2692705050 SUCCESS
Total MapReduce CPU Time Spent: 0 days 21 hours 30 minutes 52 seconds 200 msec
OK
Time taken: 774.963 seconds, Fetched: 66214908 row(s)
mv: cannot stat `/dfs/ftp/hzadmin/test/20160712ag': No such file or directory
mv: cannot stat `/dfs/ftp/hzadmin/test/20160712ah': No such file or directory
mv: cannot stat `/dfs/ftp/hzadmin/test/20160712ai': No such file or directory
16/07/13 09:33:08 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 360 minutes, Emptier interval = 0 minutes.
Moved: 'hdfs://BJTEL/share/hzadmin/urlapp/spp/dpi_http_dtl_mark_match_summary/receive_day=20160706' to trash at: hdfs://BJTEL/user/hzadmin/.Trash/Current
16/07/13 09:33:11 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 360 minutes, Emptier interval = 0 minutes.
Moved: 'hdfs://BJTEL/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706' to trash at: hdfs://BJTEL/user/hzadmin/.Trash/Current
16/07/13 09:33:13 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 360 minutes, Emptier interval = 0 minutes.
Moved: 'hdfs://BJTEL/share/hzadmin/external_table/DMP_SSA/DPI/20160706' to trash at: hdfs://BJTEL/user/hzadmin/.Trash/Current
Logging initialized using configuration in jar:file:/app/hive/lib/hive-common-0.13.0.2.1.1.0-385.jar!/hive-log4j.properties
OK
Time taken: 5.626 seconds
Dropped the partition receive_day=20160706
OK
Time taken: 5.321 seconds
[hzadmin@BDI2 bj_ggsn]$
#查看文件是否生成
[hzadmin@BDI2 bj_data]$ ls -ltr 20160712*
-rw-rw-r-- 1 hzadmin hzadmin 1254354742 Jul 13 09:32 20160712001.txt
-rw-rw-r-- 1 hzadmin hzadmin 1254303891 Jul 13 09:32 20160712002.txt
-rw-rw-r-- 1 hzadmin hzadmin 1254247185 Jul 13 09:32 20160712003.txt
-rw-rw-r-- 1 hzadmin hzadmin 1254298641 Jul 13 09:33 20160712004.txt
-rw-rw-r-- 1 hzadmin hzadmin 1254315336 Jul 13 09:33 20160712005.txt
-rw-rw-r-- 1 hzadmin hzadmin 649592869 Jul 13 09:33 20160712006.txt
[hzadmin@BDI2 bj_data]$ pwd
/dfs/ftp/hzadmin/bj_data
[hzadmin@BDI2 bj_data]$
#查看hadoop任务
[hzadmin@BDI2 bj_ggsn]$ hadoop job -list
DEPRECATED: Use of this script to execute mapred command is deprecated.
Instead use the mapred command for it.
Total jobs:0
JobId State StartTime UserName Queue Priority UsedContainers RsvdContainers UsedMem RsvdMem NeededMem AM info
[hzadmin@BDI2 bj_ggsn]$ jobs -l
[hzadmin@BDI2 bj_ggsn]$
[hzadmin@BDI2 bj_ggsn]$ ll
total 136
-rwxr-xr-x 1 hzadmin hzadmin 433 Feb 10 20:39 delete.sh
-rw-r--r-- 1 hzadmin hzadmin 71 Apr 30 2015 hours.txt
drwxr-xr-x 2 root root 4096 Aug 5 2015 jar
drwxrwxr-x 2 hzadmin hzadmin 36864 Jul 13 03:18 log
-rw------- 1 hzadmin hzadmin 21554 Apr 12 20:56 nohup.out
-rwxr-xr-x 1 hzadmin hzadmin 1845 Sep 23 2015 select1.sh
-rwxr-xr-x 1 hzadmin hzadmin 454 Oct 12 2015 select2bak.sh
-rwxr-xr-x 1 hzadmin hzadmin 1367 Oct 12 2015 select2.sh
-rwxr-xr-x 1 hzadmin hzadmin 1344 Jun 18 2015 select.sh
-rwxr-xr-x 1 hzadmin hzadmin 1337 May 4 2015 select.shbak
-rwxr-xr-x 1 hzadmin hzadmin 628 Oct 28 2015 start1.sh
-rwxr-xr-x 1 hzadmin hzadmin 692 Jun 26 19:31 start2.sh
-rwxr-xr-x 1 hzadmin hzadmin 636 May 10 14:22 start3.sh
-rwxr-xr-x 1 hzadmin hzadmin 631 Mar 5 13:27 startbak1.sh
-rw-r--r-- 1 hzadmin hzadmin 16926 Jul 13 09:33 start.log
[hzadmin@BDI2 bj_ggsn]$ cd /home/hzadmin/urlAPP/
[hzadmin@BDI2 urlAPP]$ ll
total 55164
-rwxr-xr-x 1 hzadmin hzadmin 1331 Sep 22 2015 BoncRun1.sh
-rwxr-xr-x 1 hzadmin hzadmin 1306 Oct 28 2015 BoncRun.sh
-rwxr-xr-x 1 hzadmin hzadmin 1636 May 31 11:33 hive.sh
drwxrwxr-x 17 hzadmin hzadmin 4096 Jul 13 04:39 logs
drwxrwxr-x 3 hzadmin hzadmin 4096 Jul 13 09:07 ResultMatch
-rwxr-xr-x 1 hzadmin hzadmin 56426871 Jun 16 2015 ResultMatch.zip
drwxr-xr-x 17 hzadmin hzadmin 4096 Jul 15 2015 UnMatchTop1000
drwxr-xr-x 3 hzadmin hzadmin 20480 Jul 13 04:39 URLAppProgram
drwxr-xr-x 5 hzadmin hzadmin 4096 Jul 13 04:39 URLAppProgram_sf
drwxr-xr-x 17 hzadmin hzadmin 4096 Jul 15 2015 URLAppReport
[hzadmin@BDI2 urlAPP]$ cd URLAppProgram_sf/
[hzadmin@BDI2 URLAppProgram_sf]$ ll
total 129348
drwxr-xr-x 2 hzadmin hzadmin 4096 Jun 10 2015 bin
-rwxr-xr-x 1 hzadmin hzadmin 3017 Sep 28 2015 common.cfg
-rwxr-xr-x 1 hzadmin hzadmin 200 Nov 7 2014 create_table.sh
-rwxr-xr-x 1 hzadmin hzadmin 80 May 10 14:21 get_uacds.sh
-rw-rw-r-- 1 hzadmin hzadmin 33 Jul 12 21:01 match.cfg
drwxr-xr-x 2 hzadmin hzadmin 4096 Jul 13 04:39 ProgramByDay
drwxr-xr-x 2 hzadmin hzadmin 4096 Jun 10 2015 ProgramByHour
-rwxr-xr-x 1 hzadmin hzadmin 741 Jul 14 2015 public.cfg
-rw-rw-r-- 1 hzadmin hzadmin 721993 Jul 12 21:01 R_APP_TYPE_20160712_00.txt
-rwxr-xr-x 1 hzadmin hzadmin 728 Nov 7 2014 reload.sh
-rwxr-xr-x 1 hzadmin hzadmin 4705 May 6 2015 remove_files.sh
-rw-rw-r-- 1 hzadmin hzadmin 4500 Jul 12 21:01 R_NOISE_TYPE_20160712_00.txt
-rw-rw-r-- 1 hzadmin hzadmin 1426612 Jul 12 21:01 R_SITE_TYPE_20160712_00.txt
-rwxr-xr-x 1 hzadmin hzadmin 6966 Jun 15 2015 rule.xml
-rwxr-xr-x 1 hzadmin hzadmin 6301 Sep 28 2015 runbak.sh
-rwxr-xr-x 1 hzadmin hzadmin 6291 May 7 2015 run.sh
-rw-rw-r-- 1 hzadmin hzadmin 1060990 Jul 12 21:01 R_URL_TYPE_20160712_00.txt
-rw-rw-r-- 1 hzadmin hzadmin 32290563 Jul 12 21:01 UACDS_20160712_00_01_1.jar
-rw-rw-r-- 1 hzadmin hzadmin 32233766 Jul 12 21:00 UACDS_20160712_00_01.jar
-rw-rw-r-- 1 hzadmin hzadmin 32339712 Jul 12 21:01 UACDS_20160712_00_02_1.jar
-rw-rw-r-- 1 hzadmin hzadmin 32282922 Jul 12 21:01 UACDS_20160712_00_02.jar
[hzadmin@BDI2 URLAppProgram_sf]$ pwd
/home/hzadmin/urlAPP/URLAppProgram_sf
[hzadmin@BDI2 URLAppProgram_sf]$
[hzadmin@BDI2 URLAppProgram_sf]$ cat ProgramByDay/match.sh
#!/bin/bash
# match.sh
######################################################################################################
# function : 匹配程序运行、统计报表出数据
# date: 2014/02/10
# author: SPP
# param:day(数据日期)
######################################################################################################
cd `dirname $0`
cd ..
PWDNOW=`pwd`
cd `dirname $0`
eval $(grep RULE_PROV_VERNO ${PWDNOW}/match.cfg)
eval $(grep URL_MATCH ${PWDNOW}/common.cfg)
eval $(grep URL_INPUT_PATH ${PWDNOW}/common.cfg)
eval $(grep DPI_CONF_PATH ${PWDNOW}/common.cfg)
eval $(grep R_URL_TYPE ${PWDNOW}/common.cfg)
eval $(grep R_APP_TYPE ${PWDNOW}/common.cfg)
eval $(grep R_NOISE_TYPE ${PWDNOW}/common.cfg)
eval $(grep HIVE_USER ${PWDNOW}/common.cfg)
eval $(grep LOC_DIR ${PWDNOW}/common.cfg)
eval $(grep HIVE_LICENSE ${PWDNOW}/common.cfg)
eval $(grep MR_VERSION ${PWDNOW}/common.cfg)
eval $(grep PARA_JAR ${PWDNOW}/common.cfg)
eval $(grep PARA_HIVE ${PWDNOW}/common.cfg)
eval $(grep R_SITE_TYPE ${PWDNOW}/common.cfg)
#判断参数是否输入
if [ $# -ne 1 ] ; then
echo "Input parameter error : there should be 1 parameters";
exit 1;
fi;
day=$1
hadoop fs -ls ${URL_INPUT_PATH}${day} > exist_test ;
x=`wc -l exist_test | cut -d ' ' -f 1`;
if [ ${x} = 0 ] ; then
echo " HDFS DIR ERROR : ${URL_INPUT_PATH}${day} file is not exist !"
rm -f exist_test
exit 1;
fi;
rm -f exist_test
hadoop fs -rm ${R_URL_TYPE}R_URL_TYPE*.txt
hadoop fs -rm ${R_APP_TYPE}R_APP_TYPE*.txt
hadoop fs -rm ${R_NOISE_TYPE}R_NOISE_TYPE*.txt
hadoop fs -rm ${R_SITE_TYPE}R_SITE_TYPE*.txt
hadoop fs -put ${PWDNOW}/R_URL_TYPE*.txt ${R_URL_TYPE}
hadoop fs -put ${PWDNOW}/R_APP_TYPE*.txt ${R_APP_TYPE}
hadoop fs -put ${PWDNOW}/R_NOISE_TYPE*.txt ${R_NOISE_TYPE}
hadoop fs -put ${PWDNOW}/R_SITE_TYPE*.txt ${R_SITE_TYPE}
echo "${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar";
hadoop jar ${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar com.bonc.mapred.UserurlAllMain ${PARA_JAR} ${URL_INPUT_PATH}${day} ${URL_MATCH}${day} $PWDNOW/${DPI_CONF_PATH}
#hadoop fs -rm ${url_match}${day}/part-m-*.gz
hive -e"
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
set dpi.encode.license=${HIVE_LICENSE};
${PARA_HIVE}
set mapred.job.name=CMSS-COUNT;
alter table dpi_http_dtl_mark_match drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_dtl_mark_noise drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_dtl_mark_unmatch drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_dtl_mark_match add partition (receive_day='${day}') location '${day}/match';
alter table dpi_http_dtl_mark_noise add partition (receive_day='${day}') location '${day}/noise';
alter table dpi_http_dtl_mark_unmatch add partition (receive_day='${day}') location '${day}/unmatch';
"[hzadmin@BDI2 URLAppProgram_sf]$
[hzadmin@BDI2 ProgramByDay]$ cat report_summary.sh
#!/bin/bash
# report_H.sh
#*=================================================
#*
#* FileName : report_H.sh
#* CreateDate: 2014-04-03
#* Abstract : Statistical analysis of the results of matches
#* Author : SPP
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`
eval $(grep RULE_PROV_VERNO ../match.cfg)
eval $(grep URL_MATCH ../common.cfg)
eval $(grep URL_INPUT_PATH ../common.cfg)
eval $(grep DPI_CONF_PATH ../common.cfg)
eval $(grep R_URL_TYPE ../common.cfg)
eval $(grep R_APP_TYPE ../common.cfg)
eval $(grep R_NOISE_TYPE ../common.cfg)
eval $(grep HIVE_USER ../common.cfg)
eval $(grep LOC_DIR ../common.cfg)
eval $(grep HIVE_LICENSE ../common.cfg)
eval $(grep PARA_HIVE ../common.cfg)
#判断参数是否输入
if [ $# -ne 1 ] ; then
echo "Input parameter error : there should be 1 parameters";
exit 1;
fi;
day=$1
hive -e"
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
${PARA_HIVE}
set dpi.encode.license=${HIVE_LICENSE};
set mapred.job.name=CMSS-COUNT;
alter table dpi_http_dtl_mark_match_summary drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_dtl_mark_match_summary add partition (receive_day=${day}) location '${day}';
alter table dpi_http_dtl_mark_unmatch_summary drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_dtl_mark_unmatch_summary add partition (receive_day=${day}) location '${day}';
insert overwrite table dpi_http_dtl_mark_match_summary partition(receive_day=${day})
select phone_id,visit_type,rule_id,type_code,app_type_code,site_id,site_code,starttime,host,
count(id) as count,sum(download_bytes) ,sum(upload_bytes)
from dpi_http_dtl_mark_match
where receive_day=${day}
group by phone_id,visit_type,rule_id,type_code,app_type_code,site_id,site_code,starttime,host;
insert overwrite table dpi_http_dtl_mark_unmatch_summary partition(receive_day=${day})
select host,url,count(id),sum(download_bytes),sum(upload_bytes)
from dpi_http_dtl_mark_unmatch
where receive_day=${day}
group by host,url;
"
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$ cat report.sh
#!/bin/bash
# report_H.sh
#*=================================================
#*
#* FileName : report_H.sh
#* CreateDate: 2014-04-03
#* Abstract : Statistical analysis of the results of matches
#* Author : SPP
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`
eval $(grep RULE_PROV_VERNO ../match.cfg)
eval $(grep URL_MATCH ../common.cfg)
eval $(grep URL_INPUT_PATH ../common.cfg)
eval $(grep DPI_CONF_PATH ../common.cfg)
eval $(grep R_URL_TYPE ../common.cfg)
eval $(grep R_APP_TYPE ../common.cfg)
eval $(grep R_NOISE_TYPE ../common.cfg)
eval $(grep HIVE_USER ../common.cfg)
eval $(grep LOC_DIR ../common.cfg)
eval $(grep HIVE_LICENSE ../common.cfg)
eval $(grep PARA_HIVE ../common.cfg)
#判断参数是否输入
if [ $# -ne 1 ] ; then
echo "Input parameter error : there should be 1 parameters";
exit 1;
fi;
day=$1
hive -e"
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
${PARA_HIVE}
set dpi.encode.license=${HIVE_LICENSE};
alter table dpi_http_mark_summary drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_mark_rule drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_mark_type drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_mark_top1000 drop IF EXISTS partition(receive_day='${day}');
alter table dpi_site_mark_rule drop IF EXISTS partition(receive_day='${day}');
alter table dpi_site_mark_type drop IF EXISTS partition(receive_day='${day}');
alter table dpi_site_mark_top1000 drop IF EXISTS partition(receive_day='${day}');
alter table dpi_http_mark_summary add partition (receive_day=${day}) location '${day}';
alter table dpi_http_mark_rule add partition (receive_day=${day}) location '${day}';
alter table dpi_http_mark_type add partition (receive_day=${day}) location '${day}';
alter table dpi_http_mark_top1000 add partition (receive_day=${day}) location '${day}';
alter table dpi_site_mark_rule add partition (receive_day=${day}) location '${day}';
alter table dpi_site_mark_type add partition (receive_day=${day}) location '${day}';
alter table dpi_site_mark_top1000 add partition (receive_day=${day}) location '${day}';
insert overwrite table dpi_http_mark_rule partition(receive_day=${day})
select visit_type,rule_id,typecode,matchs,'${day}' from
(
select visit_type,rule_id,type_code as typecode,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and visit_type='url' group by visit_type,rule_id,type_code
union all
select visit_type,rule_id,app_type_code as typecode,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and visit_type='app' group by visit_type,rule_id,app_type_code)sub ;
insert overwrite table dpi_http_mark_type partition(receive_day=${day})
select visit_type,typecode, rulematchs, urlmatchs,'${day}' from (
select visit_type,typecode, count(distinct ruleid) as rulematchs,sum(matchs) as urlmatchs from dpi_http_mark_rule where receive_day =${day} and visit_type='url' group by visit_type,typecode
union all
select visit_type,typecode, count(distinct ruleid) as rulematchs,sum(matchs) as urlmatchs from dpi_http_mark_rule where receive_day =${day} and visit_type='app' group by visit_type,typecode )sub;
insert overwrite table dpi_site_mark_rule partition(receive_day=${day})
select site_id,site_code,matchs,'${day}' from
(
select site_id,site_code,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and site_id <> '' group by site_id,site_code
) sub ;
insert overwrite table dpi_site_mark_type partition(receive_day=${day})
select site_code, matchs, hostmatchs,'${day}' from (
select site_code, count(distinct site_id) as matchs,sum(matchs)as hostmatchs from dpi_site_mark_rule where receive_day =${day} and site_id <> '' group by site_code
) sub;
insert overwrite table dpi_http_mark_top1000 partition(receive_day=${day})
select host,url,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}'
from dpi_http_dtl_mark_unmatch_summary
where receive_day =${day} group by host,url order by count desc limit 5000;
insert overwrite table dpi_site_mark_top1000 partition(receive_day=${day})
select host,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}'
from dpi_http_dtl_mark_match_summary
where receive_day =${day} and site_id = '' group by host
order by count desc limit 1000;
insert overwrite table temp_dpi_match partition(receive_day=${day})
SELECT
count(id) as matchcount,
cast(0 as bigint)as noisecount ,
cast(0 as bigint) as unmatchcount,
count(CASE
WHEN visit_type='url'
THEN id
ELSE null
END )AS urlcount,
count(CASE
WHEN visit_type='app'
THEN id
ELSE null
END )AS appcount,
cast(0 as bigint) as sitecount
FROM
dpi_http_dtl_mark_match
WHERE
receive_day=${day}
AND
reserved_19='1';
insert overwrite table temp_dpi_noise partition(receive_day=${day})
select cast(0 as bigint) as matchcount,count(id) as noisecount ,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_noise where receive_day=${day} ;
insert overwrite table temp_dpi_unmatch partition(receive_day=${day})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,sum(count) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_unmatch_summary where receive_day=${day} ;
insert overwrite table temp_dpi_site partition(receive_day=${day})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,sum(matchs) as sitecount from dpi_site_mark_type where receive_day=${day};
insert overwrite table dpi_http_mark_summary partition(receive_day='${day}')
select sum(matchcount+noisecount+unmatchcount) as totalcount,sum(noisecount),sum(unmatchcount),sum(matchcount),sum(urlcount),sum(appcount),'${day}',sum(sitecount) from
(
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_match where receive_day=${day}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_noise where receive_day=${day}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_unmatch where receive_day=${day}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_site where receive_day=${day}
) sub ;
"[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$
#查看hadoop任务(没查到)
[hzadmin@BDI2 ProgramByDay]$ hadoop job -list
DEPRECATED: Use of this script to execute mapred command is deprecated.
Instead use the mapred command for it.
Total jobs:0
JobId State StartTime UserName Queue Priority UsedContainers RsvdContainers UsedMem RsvdMem NeededMem AM info
#启动hadoop任务
hive (dpi)>
>
>
>
> select host,url,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'20160713'
> from dpi_http_dtl_mark_unmatch_summary
> where receive_day ='20160713' group by host,url order by count desc limit 5;
Query ID = hzadmin_20160713143333_11c325a3-d541-4fd2-b5e8-f8ee0d1f25d3
Total jobs = 2
Launching Job 1 out of 2
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapreduce.job.reduces=<number>
Starting Job = job_1464150086810_11524, Tracking URL = http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11524/
Kill Command = /app/hadoop/bin/hadoop job -kill job_1464150086810_11524
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2016-07-13 14:33:16,955 Stage-1 map = 0%, reduce = 0%
2016-07-13 14:33:28,419 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 18.02 sec
2016-07-13 14:33:38,849 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 20.2 sec
MapReduce Total cumulative CPU time: 20 seconds 200 msec
Ended Job = job_1464150086810_11524
Launching Job 2 out of 2
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapreduce.job.reduces=<number>
Starting Job = job_1464150086810_11525, Tracking URL = http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11525/
Kill Command = /app/hadoop/bin/hadoop job -kill job_1464150086810_11525 (杀死hadoop job)
Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1
2016-07-13 14:33:48,364 Stage-2 map = 0%, reduce = 0%
2016-07-13 14:33:59,031 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 10.59 sec
2016-07-13 14:34:07,565 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 12.53 sec
MapReduce Total cumulative CPU time: 12 seconds 530 msec
Ended Job = job_1464150086810_11525
MapReduce Jobs Launched:
Job 0: Map: 1 Reduce: 1 Cumulative CPU: 20.2 sec HDFS Read: 256 HDFS Write: 96 SUCCESS
Job 1: Map: 1 Reduce: 1 Cumulative CPU: 12.53 sec HDFS Read: 453 HDFS Write: 4 SUCCESS
Total MapReduce CPU Time Spent: 32 seconds 730 msec
OK
Time taken: 61.137 seconds
hive (dpi)>
#查看hadoop任务(查到)
[hzadmin@BDI2 ProgramByDay]$ hadoop job -list
DEPRECATED: Use of this script to execute mapred command is deprecated.
Instead use the mapred command for it.
Total jobs:1
JobId State StartTime UserName Queue Priority UsedContainers RsvdContainers UsedMem RsvdMem NeededMem AM info
job_1464150086810_11524 PREP 1468391589369 hzadmin default NORMAL 1 0 4096M 0M 4096M http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11524/
[hzadmin@BDI2 ProgramByDay]$ hadoop job -lis
DEPRECATED: Use of this script to execute mapred command is deprecated.
Instead use the mapred command for it.
Usage: CLI <command> <args>
[-submit <job-file>]
[-status <job-id>]
[-counter <job-id> <group-name> <counter-name>]
[-kill <job-id>]
[-set-priority <job-id> <priority>]. Valid values for priorities are: VERY_HIGH HIGH NORMAL LOW VERY_LOW
[-events <job-id> <from-event-#> <#-of-events>]
[-history <jobHistoryFile>]
[-list [all]]
[-list-active-trackers]
[-list-blacklisted-trackers]
[-list-attempt-ids <job-id> <task-type> <task-state>]. Valid values for <task-type> are REDUCE MAP. Valid values for <task-state> are running, completed
[-kill-task <task-attempt-id>]
[-fail-task <task-attempt-id>]
[-logs <job-id> <task-attempt-id>]
Generic options supported are
-conf <configuration file> specify an application configuration file
-D <property=value> use value for given property
-fs <local|namenode:port> specify a namenode
-jt <local|jobtracker:port> specify a job tracker
-files <comma separated list of files> specify comma separated files to be copied to the map reduce cluster
-libjars <comma separated list of jars> specify comma separated jar files to include in the classpath.
-archives <comma separated list of archives> specify comma separated archives to be unarchived on the compute machines.
The general command line syntax is
bin/hadoop command [genericOptions] [commandOptions]
[hzadmin@BDI2 ProgramByDay]$ hadoop job -list
DEPRECATED: Use of this script to execute mapred command is deprecated.
Instead use the mapred command for it.
Total jobs:1
JobId State StartTime UserName Queue Priority UsedContainers RsvdContainers UsedMem RsvdMem NeededMem AM info
job_1464150086810_11524 RUNNING 1468391589369 hzadmin default NORMAL 2 0 8192M 0M 8192M http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11524/
[hzadmin@BDI2 ProgramByDay]$
hive (dpi)>
> select site_id,site_code,sum(count) as matchs
> from dpi_http_dtl_mark_match_summary where receive_day='20160713' and site_id <> '' group by site_id,site_code
> ;
Query ID = hzadmin_20160713145555_149dda0c-d7c0-4841-91aa-57f3ce1f454d
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapreduce.job.reduces=<number>
Starting Job = job_1464150086810_11526, Tracking URL = http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11526/
Kill Command = /app/hadoop/bin/hadoop job -kill job_1464150086810_11526
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2016-07-13 14:55:29,837 Stage-1 map = 0%, reduce = 0%
2016-07-13 14:56:03,280 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 25.72 sec
2016-07-13 14:56:11,859 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 39.58 sec
MapReduce Total cumulative CPU time: 39 seconds 580 msec
Ended Job = job_1464150086810_11526
MapReduce Jobs Launched:
Job 0: Map: 1 Reduce: 1 Cumulative CPU: 39.58 sec HDFS Read: 256 HDFS Write: 4 SUCCESS
Total MapReduce CPU Time Spent: 39 seconds 580 msec
OK
Time taken: 50.996 seconds
hive (dpi)>
[hzadmin@BDI2 bj_ggsn]$ hadoop job -list
DEPRECATED: Use of this script to execute mapred command is deprecated.
Instead use the mapred command for it.
Total jobs:1
JobId State StartTime UserName Queue Priority UsedContainers RsvdContainers UsedMem RsvdMem NeededMem AM info
job_1464150086810_11526 RUNNING 1468392923303 hzadmin default NORMAL 2 0 8192M 0M 8192M http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11526/
[hzadmin@BDI2 bj_ggsn]$
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$ cat label.sh
#!/bin/bash
# report_H.sh
#*=================================================
#*
#* FileName : report_H.sh
#* CreateDate: 2014-04-03
#* Abstract : Statistical analysis of the results of matches
#* Author : SPP
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`
eval $(grep RULE_PROV_VERNO ../match.cfg)
eval $(grep URL_MATCH ../common.cfg)
eval $(grep URL_INPUT_PATH ../common.cfg)
eval $(grep DPI_CONF_PATH ../common.cfg)
eval $(grep R_URL_TYPE ../common.cfg)
eval $(grep R_APP_TYPE ../common.cfg)
eval $(grep R_NOISE_TYPE ../common.cfg)
eval $(grep HIVE_USER ../common.cfg)
eval $(grep LOC_DIR ../common.cfg)
eval $(grep HIVE_LICENSE ../common.cfg)
eval $(grep PARA_HIVE ../common.cfg)
#判断参数是否输入
if [ $# -ne 1 ] ; then
echo "Input parameter error : there should be 1 parameters";
exit 1;
fi;
day=$1
hive -e"
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
${PARA_HIVE}
set dpi.encode.license=${HIVE_LICENSE};
INSERT
OVERWRITE TABLE CDPI_USER_BEH_PREFER_D_tmp
SELECT
CASE
WHEN T.STARTTIME LIKE '%%-%%'
AND T.STARTTIME LIKE '%%.%%'
THEN FROM_UNIXTIME(CAST(ROUND(UNIX_TIMESTAMP(T.STARTTIME)) AS INT),'yyyyMMdd')
WHEN T.STARTTIME LIKE '%-%'
AND T.STARTTIME LIKE '%%:%%'
THEN FROM_UNIXTIME(CAST(ROUND(UNIX_TIMESTAMP(T.STARTTIME)) AS INT),'yyyyMMdd')
WHEN T.STARTTIME LIKE '%%.%%'
THEN FROM_UNIXTIME(CAST(ROUND(T.STARTTIME) AS INT),'yyyyMMdd')
WHEN LENGTH(T.STARTTIME) = 13
THEN FROM_UNIXTIME(CAST(ROUND(SUBSTR(T.STARTTIME,1,10)) AS INT),'yyyyMMdd')
ELSE SUBSTR(T.STARTTIME,1,8)
END AS VISIT_DAY,
T.PHONE_ID,
T1.LABEL_CODE,
COUNT(1) AS TIMES,
SUM(T.DOWNLOAD_BYTES + T.UPLOAD_BYTES) AS FLOWS,
1 AS DAYS
FROM
(
SELECT
STARTTIME,
PHONE_ID,
TYPE_CODE,
SUM(DOWNLOAD_BYTES) AS DOWNLOAD_BYTES,
SUM(UPLOAD_BYTES) AS UPLOAD_BYTES
FROM
dpi_http_dtl_mark_match_summary
WHERE
RECEIVE_DAY = '${day}'
AND LENGTH(PHONE_ID)=11
AND SUBSTR(PHONE_ID,1,1)='1'
AND PHONE_ID NOT LIKE '%.%'
AND TYPE_CODE IS NOT NULL
AND TYPE_CODE <> ''
GROUP BY
STARTTIME,
PHONE_ID,
TYPE_CODE ) T
LEFT OUTER JOIN
R_LABEL_BEHAVIOR T1
ON
T.TYPE_CODE = T1.TYPE_CODE
WHERE T1.LABEL_CODE IS NOT NULL
GROUP BY
CASE
WHEN T.STARTTIME LIKE '%%-%%'
AND T.STARTTIME LIKE '%%.%%'
THEN FROM_UNIXTIME(CAST(ROUND(UNIX_TIMESTAMP(T.STARTTIME)) AS INT),'yyyyMMdd')
WHEN T.STARTTIME LIKE '%-%'
AND T.STARTTIME LIKE '%%:%%'
THEN FROM_UNIXTIME(CAST(ROUND(UNIX_TIMESTAMP(T.STARTTIME)) AS INT),'yyyyMMdd')
WHEN T.STARTTIME LIKE '%%.%%'
THEN FROM_UNIXTIME(CAST(ROUND(T.STARTTIME) AS INT),'yyyyMMdd')
WHEN LENGTH(T.STARTTIME) = 13
THEN FROM_UNIXTIME(CAST(ROUND(SUBSTR(T.STARTTIME,1,10)) AS INT),'yyyyMMdd')
ELSE SUBSTR(T.STARTTIME,1,8)
END,
T.PHONE_ID,
T1.LABEL_CODE;
ALTER TABLE
CDPI_USER_BEH_PREFER_D DROP IF EXISTS PARTITION(DATA_DAY='${day}');
ALTER TABLE
CDPI_USER_BEH_PREFER_D ADD PARTITION (DATA_DAY='${day}') LOCATION '${day}';
INSERT
OVERWRITE TABLE CDPI_USER_BEH_PREFER_D PARTITION
(
DATA_DAY='${day}'
)
SELECT
*
FROM
(
SELECT
VISIT_DAY,
PHONE_ID,
CONCAT(SUBSTR(LABEL_CODE,1,10),'000') AS LABEL_CODE,
SUM(times),
SUM(flows),
count(distinct days)
FROM
CDPI_USER_BEH_PREFER_D_tmp
where
SUBSTR(LABEL_CODE,8,3) <> '000'
GROUP BY
VISIT_DAY,
PHONE_ID,
CONCAT(SUBSTR(LABEL_CODE,1,10),'000')
UNION ALL
SELECT
VISIT_DAY,
PHONE_ID,
CONCAT(SUBSTR(LABEL_CODE,1,7),'000000') AS LABEL_CODE,
SUM(times),
SUM(flows),
count(distinct days)
FROM
CDPI_USER_BEH_PREFER_D_tmp
where SUBSTR(LABEL_CODE,5,3) <> '000'
GROUP BY
VISIT_DAY,
PHONE_ID,
CONCAT(SUBSTR(LABEL_CODE,1,7),'000000') )A;
"[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$ cat upload.sh
#!/bin/bash
# upload.sh
######################################################################################################
# function : 抽取文件、压缩、上传至省份前置机
# date: 2014/02/10
# author: YyDou
# param:day(数据日期)
######################################################################################################
##
cd `dirname $0`
eval $(grep AREA_NO ../common.cfg)
eval $(grep REUPLOAD_COUNT ../common.cfg)
eval $(grep UNMATCHTOP1000 ../common.cfg)
eval $(grep URLAPPREPORT ../common.cfg)
eval $(grep HIVE_USER ../common.cfg)
eval $(grep PARA_HIVE ../common.cfg)
#判断参数是否输入
if [ $# -ne 1 ] ; then
echo "Input parameter error : there should be 1 parameters";
exit 1;
fi;
day=$1
#day=`date -d "yesterday" +%Y%m%d`
#day=$1
curr_date=`date +%Y%m%d`
curr_time=`date +%H`
if [ ${curr_time} -gt '06' ]; then
curr_date=`date -d +1days +%Y%m%d`
fi
if [ ! -d "$UNMATCHTOP1000/${curr_date}" ] ; then
mkdir -p "$UNMATCHTOP1000/${curr_date}"
fi;
if [ ! -d "$URLAPPREPORT/${curr_date}" ] ; then
mkdir -p "$URLAPPREPORT/${curr_date}"
fi;
cd $UNMATCHTOP1000/${curr_date}
#rm -f *.*
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',url,count,download_bytes,upload_bytes,day_date,host from dpi_http_mark_top1000 where receive_day=${day}" >> UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',host,count,download_bytes,upload_bytes,day_date from dpi_site_mark_top1000 where receive_day=${day}" >> UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
##压缩
tar -czf UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
cd $URLAPPREPORT/${curr_date}
#rm -f *.*
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',totalcount,noisecount,unmatchcount,matchcount,urlcount,appcount,day_date,sitecount from dpi_http_mark_summary where receive_day=${day}" >> URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',visit_type,ruleid,typecode,matchs,day_date from dpi_http_mark_rule where receive_day=${day}" >> RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',visit_type,type_code,matchs,urlmatchs,day_date from dpi_http_mark_type where receive_day=${day}" >> TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
(执行hiveSQL 定向到 文件)
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',site_id,site_code,matchs,day_date from dpi_site_mark_rule where receive_day=${day}" >> RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',site_code,matchs,hostmatchs,day_date from dpi_site_mark_type where receive_day=${day}" >> TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
##压缩(之后有删除)
tar -czf URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$ cat logupload.sh
#!/bin/bash
#logupload.sh
#*=================================================
#*
#* FileName : logupload.sh
#* CreateDate: 2014-06-25
#* Abstract :
#* Author : LiBin
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`
PWDNOW=`pwd`
eval $(grep URLAPPREPORT ../common.cfg)
eval $(grep AREA_NO ../common.cfg)
eval $(grep LOC_DIR ../common.cfg)
if [ $# -ne 1 ] ; then
echo "Input parameter error, there should be 1 parameters ";
exit 1;
fi;
LOGDAY=$1
curr_time=`date +%H`
DAY=`date +%Y%m%d`
if [ ${curr_time} -gt '06' ]; then
DAY=`date -d +1days +%Y%m%d`
fi
if [ ! -d "${URLAPPREPORT}/${DAY}" ] ; then
mkdir -p "${URLAPPREPORT}/${DAY}"
fi;
cd ${LOC_DIR}/logs/${LOGDAY}/
tar -zcf LogInfo.${LOGDAY}.${AREA_NO}.tar.gz *.log
mv -f LogInfo.${LOGDAY}.${AREA_NO}.tar.gz ${URLAPPREPORT}/${DAY}/
#前置机程序机分开省份使用
#cd ${PWDNOW}
#sh ./ftp_putalldata.sh ${URLAPPREPORT}/${DAY}/LogInfo.${LOGDAY}.${AREA_NO}.tar.gz
cd ${PWDNOW}
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$ cat ../ProgramByDay/ftp_getfilelist.sh
cat: ../ProgramByDay/ftp_getfilelist.sh: No such file or directory
[hzadmin@BDI2 ProgramByDay]$ cat ftp_getfilelist.sh
cat: ftp_getfilelist.sh: No such file or directory
[hzadmin@BDI2 ProgramByDay]$ cat ../ProgramByHour/match_H.sh
#!/bin/bash
# match_H.sh
######################################################################################################
# function : 匹配程序运行、统计报表出数据
# date: 2014/02/10
# author: YyDou
# param:day(数据日期)
######################################################################################################
##
cd `dirname $0`
PWDRE=`pwd`
cd ..
PWDNOW=`pwd`
cd $PWDRE
eval $(grep RULE_PROV_VERNO ${PWDNOW}/match.cfg)
eval $(grep URL_MATCH ${PWDNOW}/common.cfg)
eval $(grep URL_INPUT_PATH ${PWDNOW}/common.cfg)
eval $(grep DPI_CONF_PATH ${PWDNOW}/common.cfg)
eval $(grep R_URL_TYPE ${PWDNOW}/common.cfg)
eval $(grep R_APP_TYPE ${PWDNOW}/common.cfg)
eval $(grep R_NOISE_TYPE ${PWDNOW}/common.cfg)
eval $(grep HIVE_USER ${PWDNOW}/common.cfg)
eval $(grep LOC_DIR ${PWDNOW}/common.cfg)
eval $(grep HIVE_LICENSE ${PWDNOW}/common.cfg)
eval $(grep MR_VERSION ${PWDNOW}/common.cfg)
eval $(grep PARA_JAR ${PWDNOW}/common.cfg)
eval $(grep R_SITE_TYPE ${PWDNOW}/common.cfg)
#判断参数是否输入
if [ $# -ne 2 ] ; then
echo "Input parameter error : there should be 2 parameters";
exit 1;
fi;
day=$1
hour=$2
hadoop fs -ls ${URL_INPUT_PATH}${day}/${hour} > exist_test ;
x=`wc -l exist_test | cut -d ' ' -f 1`;
if [ ${x} = 0 ] ; then
echo " HDFS DIR ERROR : ${URL_INPUT_PATH}${day}/${hour} file is not exist !"
rm -f exist_test
exit 1;
fi;
rm -f exist_test
hadoop fs -rm ${R_URL_TYPE}R_URL_TYPE*.txt
hadoop fs -rm ${R_APP_TYPE}R_APP_TYPE*.txt
hadoop fs -rm ${R_NOISE_TYPE}R_NOISE_TYPE*.txt
hadoop fs -rm ${R_SITE_TYPE}R_SITE_TYPE*.txt
hadoop fs -put ${PWDNOW}/R_URL_TYPE*.txt ${R_URL_TYPE}
hadoop fs -put ${PWDNOW}/R_APP_TYPE*.txt ${R_APP_TYPE}
hadoop fs -put ${PWDNOW}/R_NOISE_TYPE*.txt ${R_NOISE_TYPE}
hadoop fs -put ${PWDNOW}/R_SITE_TYPE*.txt ${R_SITE_TYPE}
echo "${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar";
hadoop jar ${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar com.bonc.mapred.UserurlAllMain ${PARA_JAR} ${URL_INPUT_PATH}${day}/${hour} ${URL_MATCH}${day}/${hour} $PWDNOW/${DPI_CONF_PATH}
#hadoop fs -rm ${url_match}${day}/part-m-*.gz
hive -e "
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
set dpi.encode.license=${HIVE_LICENSE};
alter table dpi_http_dtl_mark_match drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_dtl_mark_noise drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_dtl_mark_unmatch drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_dtl_mark_match add partition (receive_day='${day}',receive_hour='${hour}') location '${day}/${hour}/match';
alter table dpi_http_dtl_mark_noise add partition (receive_day='${day}',receive_hour='${hour}') location '${day}/${hour}/noise';
alter table dpi_http_dtl_mark_unmatch add partition (receive_day='${day}',receive_hour='${hour}') location '${day}/${hour}/unmatch';
"
cd $PWDNOW
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$ cat report_summary_H.sh
cat: report_summary_H.sh: No such file or directory
[hzadmin@BDI2 ProgramByDay]$ cat ../ProgramByHour/report_summary_H.sh
#!/bin/bash
# report_H.sh
#*=================================================
#*
#* FileName : report_H.sh
#* CreateDate: 2014-04-03
#* Abstract : Statistical analysis of the results of matches
#* Author : SPP
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`
eval $(grep RULE_PROV_VERNO ../match.cfg)
eval $(grep URL_MATCH ../common.cfg)
eval $(grep URL_INPUT_PATH ../common.cfg)
eval $(grep DPI_CONF_PATH ../common.cfg)
eval $(grep R_URL_TYPE ../common.cfg)
eval $(grep R_APP_TYPE ../common.cfg)
eval $(grep R_NOISE_TYPE ../common.cfg)
eval $(grep HIVE_USER ../common.cfg)
eval $(grep LOC_DIR ../common.cfg)
eval $(grep HIVE_LICENSE ../common.cfg)
eval $(grep PARA_HIVE ../common.cfg)
#判断参数是否输入
if [ $# -ne 1 ] ; then
echo "Input parameter error : there should be 1 parameters";
exit 1;
fi;
day=$1
hive -e"
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
${PARA_HIVE}
set dpi.encode.license=${HIVE_LICENSE};
set mapred.job.name=CMSS-COUNT;
alter table dpi_http_dtl_mark_match_summary drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_dtl_mark_match_summary add partition (receive_day='${day}',receive_hour='${hour}') location '${day}/${hour}';
alter table dpi_http_dtl_mark_unmatch_summary drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_dtl_mark_unmatch_summary add partition (receive_day='${day}',receive_hour='${hour}') location '${day}/${hour}';
insert overwrite table dpi_http_dtl_mark_match_summary partition(receive_day=${day},receive_hour=${hour})
select phone_id,visit_type,rule_id,type_code,app_type_code,site_id,site_code,starttime,host,
count(id) as count,sum(download_bytes) ,sum(upload_bytes)
from dpi_http_dtl_mark_match
where receive_day=${day} and receive_hour=${hour}
group by phone_id,visit_type,rule_id,type_code,app_type_code,site_id,site_code,starttime,host;
insert overwrite table dpi_http_dtl_mark_unmatch_summary partition(receive_day=${day},receive_hour=${hour})
select host,url,count(id),sum(download_bytes),sum(upload_bytes)
from dpi_http_dtl_mark_unmatch
where receive_day=${day} and receive_hour=${hour}
group by host,url;
"
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$ cat ../ProgramByHour/report_H.sh
#!/bin/bash
# report_H.sh
#*=================================================
#*
#* FileName : report_H.sh
#* CreateDate: 2014-04-03
#* Abstract : Statistical analysis of the results of matches
#* Author : SPP
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`
eval $(grep RULE_PROV_VERNO ../match.cfg)
eval $(grep URL_MATCH ../common.cfg)
eval $(grep URL_INPUT_PATH ../common.cfg)
eval $(grep DPI_CONF_PATH ../common.cfg)
eval $(grep R_URL_TYPE ../common.cfg)
eval $(grep R_APP_TYPE ../common.cfg)
eval $(grep R_NOISE_TYPE ../common.cfg)
eval $(grep HIVE_USER ../common.cfg)
eval $(grep LOC_DIR ../common.cfg)
eval $(grep HIVE_LICENSE ../common.cfg)
eval $(grep PARA_HIVE ../common.cfg)
#?¶??貊剫
if [ $# -ne 1 ] ; then
echo "Input parameter error : there should be 1 parameters";
exit 1;
fi;
day=$1
hive -e"
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
${PARA_HIVE}
set dpi.encode.license=${HIVE_LICENSE};
alter table dpi_http_mark_summary drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_mark_rule drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_mark_top1000 drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_site_mark_rule drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_site_mark_top1000 drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_mark_summary add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}';
alter table dpi_http_mark_rule add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}';
alter table dpi_http_mark_top1000 add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}';
alter table dpi_site_mark_rule add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}';
alter table dpi_site_mark_top1000 add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}';
insert overwrite table dpi_http_mark_rule partition(receive_day=${day},receive_hour=${hour})
select visit_type,rule_id,typecode,matchs,'${day}' from
(
select visit_type,rule_id,type_code as typecode,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and visit_type='url' group by visit_type,rule_id,type_code
union all
select visit_type,rule_id,app_type_code as typecode,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and visit_type='app' group by visit_type,rule_id,app_type_code)sub ;
insert overwrite table dpi_site_mark_rule partition(receive_day=${day},receive_hour=${hour})
select site_id,site_code,matchs,'${day}' from
(
select site_id,site_code,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and site_id <> '' group by site_id,site_code
) sub ;
insert overwrite table dpi_http_mark_top1000 partition(receive_day=${day},receive_hour=${hour})
select host,url,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}'
from dpi_http_dtl_mark_unmatch_summary
where receive_day =${day} and receive_hour=${hour} group by host,url order by count desc limit 5000;
insert overwrite table dpi_site_mark_top1000 partition(receive_day=${day},receive_hour=${hour})
select host,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}'
from dpi_http_dtl_mark_match_summary
where receive_day =${day} and site_id = '' group by host
order by count desc limit 1000;
insert overwrite table temp_dpi_match partition(receive_day=${day},receive_hour=${hour})
select count(t.id) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} group by id ) t ;
insert overwrite table temp_dpi_url partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,count(url.id) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} and visit_type='url' group by id) url ;
insert overwrite table temp_dpi_app partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,count(app.id) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} and visit_type='app' group by id) app ;
insert overwrite table temp_dpi_noise partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,count(id) as noisecount ,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_noise where receive_day=${day} and receive_hour=${hour} ;
insert overwrite table temp_dpi_unmatch partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,sum(count) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_unmatch_summary where receive_day=${day} and receive_hour=${hour} ;
insert overwrite table temp_dpi_site partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,sum(matchs) as sitecount from dpi_site_mark_type where receive_day=${day} and receive_hour=${hour} ;
insert overwrite table dpi_http_mark_summary partition(receive_day='${day}',receive_hour=${hour})
select sum(matchcount+noisecount+unmatchcount) as totalcount,sum(noisecount),sum(unmatchcount),sum(matchcount),sum(urlcount),sum(appcount),'${day}',sum(sitecount) from
(
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_match where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_url where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_app where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_noise where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_unmatch where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_site where receive_day=${day} and receive_hour=${hour}
) sub ;
"[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$ cat ../ProgramByHour/upload_H.sh
#!/bin/bash
# upload_H.sh
#*=================================================
#*
#* FileName : upload_H.sh
#* CreateDate: 2014-04-03
#* Abstract : State statistics package
#* Author : LiBin
#*
#* BONC All rights reserved.
#*==================================================
##
cd `dirname $0`
eval $(grep AREA_NO ../common.cfg)
eval $(grep REUPLOAD_COUNT ../common.cfg)
eval $(grep UNMATCHTOP1000 ../common.cfg)
eval $(grep URLAPPREPORT ../common.cfg)
eval $(grep HIVE_USER ../common.cfg)
eval $(grep PARA_HIVE ../common.cfg)
#判断参数是否输入
if [ $# -ne 1 ] ; then
echo "Input parameter error : there should be 1 parameters";
exit 1;
fi;
day=$1
#day=`date -d "yesterday" +%Y%m%d`
#day=$1
curr_date=`date +%Y%m%d`
curr_time=`date +%H`
if [ ${curr_time} -gt '06' ]; then
curr_date=`date -d +1days +%Y%m%d`
fi
if [ ! -d "$UNMATCHTOP1000/${curr_date}" ] ; then
mkdir -p "$UNMATCHTOP1000/${curr_date}"
fi;
if [ ! -d "$URLAPPREPORT/${curr_date}" ] ; then
mkdir -p "$URLAPPREPORT/${curr_date}"
fi;
cd $UNMATCHTOP1000/${curr_date}
#rm -f *.*
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',url,sum(count) count,sum(download_bytes),sum(upload_bytes),'${day}',host
from dpi_http_mark_top1000
where receive_day=${day} group by url,host,day_date order by count desc limit 5000; " >> UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',host,sum(count),sum(download_bytes),sum(upload_bytes),'${day}' from dpi_site_mark_top1000 where receive_day=${day} group by host,day_date order by count desc limit 1000;" >> UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
##压缩
tar -czf UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
cd $URLAPPREPORT/${curr_date}
#rm -f *.*
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',sum(totalcount),sum(noisecount),sum(unmatchcount),sum(matchcount),sum(urlcount),sum(appcount),'${day}',sum(sitecount)
from dpi_http_mark_summary
where receive_day=${day};" >> URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',visit_type,ruleid,typecode,sum(matchs),'${day}'
from dpi_http_mark_rule
where receive_day=${day} group by visit_type,ruleid,typecode;" >> RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',visit_type,typecode,count(distinct ruleid) as matchs,sum(matchs),'${day}'
from dpi_http_mark_rule
where receive_day=${day} group by visit_type,typecode;" >> TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
alter table dpi_http_mark_type drop IF EXISTS partition(receive_day='${day}',receive_hour='00');
alter table dpi_http_mark_type add partition (receive_day=${day},receive_hour='00') location '${day}/00';
insert overwrite table dpi_http_mark_type partition(receive_day=${day},receive_hour=00)
select visit_type,typecode,count(distinct ruleid),sum(matchs),'${day}'
from dpi_http_mark_rule
where receive_day=${day} group by visit_type,typecode;"
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',site_id,site_code,sum(matchs),'${day}'
from dpi_site_mark_rule
where receive_day=${day} group by site_id,site_code" >> RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
select '${AREA_NO}',site_code,count(distinct site_id),sum(matchs),'${day}'
from dpi_site_mark_rule
where receive_day=${day} group by site_code" >> TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
hive -e "
use ${HIVE_USER};
${PARA_HIVE}
set mapred.job.name=CMSS-UPLOAD;
alter table dpi_site_mark_type drop IF EXISTS partition(receive_day='${day}',receive_hour='00');
alter table dpi_site_mark_type add partition (receive_day=${day},receive_hour='00') location '${day}/00';
insert overwrite table dpi_site_mark_type partition(receive_day=${day},receive_hour=00)
select site_code,count(distinct site_id),sum(matchs),'${day}'
from dpi_site_mark_rule
where receive_day=${day} group by site_code;"
##压缩
tar -czf URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
tar -czf TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT
[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$ cat ../ProgramByHour/report_H.sh
#!/bin/bash
# report_H.sh
#*=================================================
#*
#* FileName : report_H.sh
#* CreateDate: 2014-04-03
#* Abstract : Statistical analysis of the results of matches
#* Author : SPP
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`
eval $(grep RULE_PROV_VERNO ../match.cfg)
eval $(grep URL_MATCH ../common.cfg)
eval $(grep URL_INPUT_PATH ../common.cfg)
eval $(grep DPI_CONF_PATH ../common.cfg)
eval $(grep R_URL_TYPE ../common.cfg)
eval $(grep R_APP_TYPE ../common.cfg)
eval $(grep R_NOISE_TYPE ../common.cfg)
eval $(grep HIVE_USER ../common.cfg)
eval $(grep LOC_DIR ../common.cfg)
eval $(grep HIVE_LICENSE ../common.cfg)
eval $(grep PARA_HIVE ../common.cfg)
#?¶??貊剫
if [ $# -ne 1 ] ; then
echo "Input parameter error : there should be 1 parameters";
exit 1;
fi;
day=$1
hive -e"
add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar;
use ${HIVE_USER};
${PARA_HIVE}
set dpi.encode.license=${HIVE_LICENSE};
alter table dpi_http_mark_summary drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_mark_rule drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_mark_top1000 drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_site_mark_rule drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_site_mark_top1000 drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}');
alter table dpi_http_mark_summary add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}';
alter table dpi_http_mark_rule add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}';
alter table dpi_http_mark_top1000 add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}';
alter table dpi_site_mark_rule add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}';
alter table dpi_site_mark_top1000 add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}';
insert overwrite table dpi_http_mark_rule partition(receive_day=${day},receive_hour=${hour})
select visit_type,rule_id,typecode,matchs,'${day}' from
(
select visit_type,rule_id,type_code as typecode,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and visit_type='url' group by visit_type,rule_id,type_code
union all
select visit_type,rule_id,app_type_code as typecode,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and visit_type='app' group by visit_type,rule_id,app_type_code)sub ;
insert overwrite table dpi_site_mark_rule partition(receive_day=${day},receive_hour=${hour})
select site_id,site_code,matchs,'${day}' from
(
select site_id,site_code,sum(count) as matchs
from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and site_id <> '' group by site_id,site_code
) sub ;
insert overwrite table dpi_http_mark_top1000 partition(receive_day=${day},receive_hour=${hour})
select host,url,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}'
from dpi_http_dtl_mark_unmatch_summary
where receive_day =${day} and receive_hour=${hour} group by host,url order by count desc limit 5000;
insert overwrite table dpi_site_mark_top1000 partition(receive_day=${day},receive_hour=${hour})
select host,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}'
from dpi_http_dtl_mark_match_summary
where receive_day =${day} and site_id = '' group by host
order by count desc limit 1000;
insert overwrite table temp_dpi_match partition(receive_day=${day},receive_hour=${hour})
select count(t.id) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} group by id ) t ;
insert overwrite table temp_dpi_url partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,count(url.id) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} and visit_type='url' group by id) url ;
insert overwrite table temp_dpi_app partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,count(app.id) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} and visit_type='app' group by id) app ;
insert overwrite table temp_dpi_noise partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,count(id) as noisecount ,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_noise where receive_day=${day} and receive_hour=${hour} ;
insert overwrite table temp_dpi_unmatch partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,sum(count) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_unmatch_summary where receive_day=${day} and receive_hour=${hour} ;
insert overwrite table temp_dpi_site partition(receive_day=${day},receive_hour=${hour})
select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,sum(matchs) as sitecount from dpi_site_mark_type where receive_day=${day} and receive_hour=${hour} ;
insert overwrite table dpi_http_mark_summary partition(receive_day='${day}',receive_hour=${hour})
select sum(matchcount+noisecount+unmatchcount) as totalcount,sum(noisecount),sum(unmatchcount),sum(matchcount),sum(urlcount),sum(appcount),'${day}',sum(sitecount) from
(
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_match where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_url where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_app where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_noise where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_unmatch where receive_day=${day} and receive_hour=${hour}
union all
select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_site where receive_day=${day} and receive_hour=${hour}
) sub ;
"[hzadmin@BDI2 ProgramByDay]$
[hzadmin@BDI2 ProgramByDay]$ cd ..
[hzadmin@BDI2 URLAppProgram_sf]$ cat remove_files.sh
#!/bin/bash
#*=================================================
#*
#* FileName : remove_files.sh
#* CreateDate: 2014-02-25
#* Abstract : Delete 'UACDS_YYYYMMDD_**.tar.gz' files
#* on a regular , most retain ten files.
#* Author : LiangWei
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`
eval $(grep DATA_HOME public.cfg)
eval $(grep ZIP_LIMIT public.cfg)
eval $(grep REPORT_LIMIT public.cfg)
eval $(grep UNMATCHTOP1000 public.cfg)
eval $(grep URLAPPREPORT public.cfg)
eval $(grep SUMMARY_DAY public.cfg)
eval $(grep DELETE_DAY public.cfg)
eval $(grep URL_MATCH common.cfg)
eval $(grep TEMP_DPI_MATCH common.cfg)
eval $(grep TEMP_DPI_NOISE common.cfg)
eval $(grep TEMP_DPI_UNMATCH common.cfg)
eval $(grep TEMP_DPI_URL common.cfg)
eval $(grep TEMP_DPI_APP common.cfg)
eval $(grep TEMP_DPI_SITE common.cfg)
eval $(grep MATCH_SUMMARY common.cfg)
eval $(grep UNMATCH_SUMMARY common.cfg)
upl=$((ZIP_LIMIT+1))
cd $DATA_HOME
a=` ls UA* | wc -l`
ls UA* | sort -r > list
if [ $a -gt $ZIP_LIMIT ]; then
sed -n ${upl},${a}p list > dellist
c=`wc -l dellist | cut -d ' ' -f 1`
for ((m=1;m<=c;m++))
do
grepstr='sed -n '$m'p dellist'
greps=`$grepstr`
rm $greps
echo 'delete file:' $greps
done
rm dellist
else
echo ' Deleting data did not reach the upper limit!'
fi
rm list
#*=================================================
#*
#*
#* CreateDate: 2014-02-25
#* Abstract : Delete UnMatchTop1000 Folder on a regular.
#* Author : LiangWei
#*
#* BONC All rights reserved.
#*==================================================
upl=$((REPORT_LIMIT+1))
ls -lt $UNMATCHTOP1000 |awk '/^d/ {print $9}'| sort -r > list
q=`wc -l list | cut -d ' ' -f 1`
if [ $q -gt $REPORT_LIMIT ]; then
sed -n ${upl},${q}p list > dellist
x=`wc -l dellist | cut -d ' ' -f 1`
for ((m=1;m<=x;m++))
do
grepstr='sed -n '$m'p dellist'
greps=`$grepstr`
rm -rf ${UNMATCHTOP1000}/${greps}
echo 'delete file:' $greps
done
rm dellist
else
echo ' UnMatchTop1000 数据没有达到删除上限!'
fi
rm list
#*=================================================
#*
#*
#* CreateDate: 2014-02-25
#* Abstract : Delete URLAppReport Folder on a regular.
#* Author : LiangWei
#*
#* BONC All rights reserved.
#*==================================================
upl=$((REPORT_LIMIT+1))
ls -lt $URLAPPREPORT |awk '/^d/ {print $9}'| sort -r > list
w=`wc -l list | cut -d ' ' -f 1`
if [ $w -gt $REPORT_LIMIT ]; then
sed -n ${upl},${w}p list > dellist
v=`wc -l dellist | cut -d ' ' -f 1`
for ((m=1;m<=v;m++))
do
grepstr='sed -n '$m'p dellist'
greps=`$grepstr`
rm -rf ${URLAPPREPORT}/${greps}
echo 'delete file:' $greps
done
rm dellist
else
echo ' URLAppReport 数据没有达到删除上限!'
fi
rm list
#删除hdfs中match文件
upl=$((DELETE_DAY+1))
hadoop fs -ls $URL_MATCH |awk '/^d/ {print $8}'| sort -r > list
w=`wc -l list | cut -d ' ' -f 1`
if [ $w -gt $DELETE_DAY ]; then
sed -n ${upl},${w}p list > dellist
v=`wc -l dellist | cut -d ' ' -f 1`
for ((m=1;m<=v;m++))
do
grepstr='sed -n '$m'p dellist'
greps=`$grepstr`
hadoop fs -rmr ${URL_MATCH}/${greps}
echo 'delete file:' ${URL_MATCH}/${greps}
done
rm dellist
else
echo ' URL_MATCH 数据没有达到删除上限!'
fi
rm list
#删除hdfs中间表数据
DAY=`date -d -"$DELETE_DAY"day +%Y%m%d`
hadoop fs -rmr ${TEMP_DPI_MATCH}/receive_day=${DAY}
hadoop fs -rmr ${TEMP_DPI_NOISE}/receive_day=${DAY}
hadoop fs -rmr ${TEMP_DPI_UNMATCH}/receive_day=${DAY}
hadoop fs -rmr ${TEMP_DPI_URL}/receive_day=${DAY}
hadoop fs -rmr ${TEMP_DPI_APP}/receive_day=${DAY}
hadoop fs -rmr ${TEMP_DPI_SITE}/receive_day=${DAY}
#hadoop fs -rm -r ${url_match}/sitekey/${DAY}
#删除match汇总表数据
upl=$((SUMMARY_DAY+1))
hadoop fs -ls $MATCH_SUMMARY |awk '/^d/ {print $8}'| sort -r > list
w=`wc -l list | cut -d ' ' -f 1`
if [ $w -gt $SUMMARY_DAY ]; then
sed -n ${upl},${w}p list > dellist
v=`wc -l dellist | cut -d ' ' -f 1`
for ((m=1;m<=v;m++))
do
grepstr='sed -n '$m'p dellist'
greps=`$grepstr`
hadoop fs -rmr ${MATCH_SUMMARY}/${greps}
echo 'delete file:' ${MATCH_SUMMARY}/${greps}
done
rm dellist
else
echo ' MATCH_SUMMARY 数据没有达到删除上限!'
fi
rm list
#删除unmatch汇总表数据
upl=$((SUMMARY_DAY+1))
hadoop fs -ls $UNMATCH_SUMMARY |awk '/^d/ {print $8}'| sort -r > list
w=`wc -l list | cut -d ' ' -f 1`
if [ $w -gt $SUMMARY_DAY ]; then
sed -n ${upl},${w}p list > dellist
v=`wc -l dellist | cut -d ' ' -f 1`
for ((m=1;m<=v;m++))
do
grepstr='sed -n '$m'p dellist'
greps=`$grepstr`
hadoop fs -rmr ${UNMATCH_SUMMARY}/${greps}
echo 'delete file:' ${UNMATCH_SUMMARY}/${greps}
done
rm dellist
else
echo ' UNMATCH_SUMMARY 数据没有达到删除上限!'
fi
rm list
[hzadmin@BDI2 URLAppProgram_sf]$
[hzadmin@BDI2 URLAppProgram_sf]$ cat /home/hzadmin/urlAPP/ResultMatch/match_detail.sh
#!/bin/bash
#match_details.sh
#*=================================================
#*
#* FileName : match_details.sh
#* CreateDate: 2015-04-22
#* Abstract : CMSS Interface
#* Author : SPP
#* 主程序
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`
PWDNOW=`pwd`
eval $(grep MATCH_DETAILS ${PWDNOW}/details_conf.cfg)
eval $(grep MATCH_PRINT ${PWDNOW}/details_conf.cfg)
eval $(grep MATCH_INPUT ${PWDNOW}/details_conf.cfg)
eval $(grep FTP_DIR ${PWDNOW}/details_conf.cfg)
eval $(grep DELAY_DAY ${PWDNOW}/details_conf.cfg)
eval $(grep DETAILS ${PWDNOW}/details_conf.cfg)
unset details day steps
args=`getopt r:d:s: $*`
if test $? != 0
then
echo " Usage is ./match_details.sh [-r details -d day -s steps ] "
echo " Use database administrator account for user name "
exit 1
fi
set -- $args
for i
do
case "$i" in
-r) shift;details=$1;shift;;
-d) shift;day=$1;shift;;
-s) shift;steps=$1;shift;;
esac
done
#判断传入参数日期是否为空,如果为空则获取系统时间
if [ ! -n "$day" ] ; then
echo "not input days so day=today"
day=`date -d -${DELAY_DAY}days +%Y%m%d`
create_day=`date +%Y%m%d`
else
create_day=$day
fi
if [ ! -n "$details" ] ; then
LIST=$DETAILS
DETAILS_LIST=`echo $LIST | sed 's/,/ /g'`
else
DETAILS_LIST=$details
fi
echo "create_day:" $create_day
echo "day:" $day
#判断目录是否存在,如果不存在则重新创建
for DETAILS in $DETAILS_LIST
do
if [ ! -d "$MATCH_DETAILS/${create_day}/$DETAILS" ] ; then
mkdir -p "$MATCH_DETAILS/${create_day}/$DETAILS"
fi
if [ ! -d "$MATCH_DETAILS/${create_day}/logs" ] ; then
mkdir -p "$MATCH_DETAILS/${create_day}/logs"
fi
rm -f $MATCH_DETAILS/${create_day}/logs/run_"$DETAILS"_"$create_day".log
#判断步骤是否为空
if [ ! -n "$steps" ] ; then
./merge_file.sh $day $DETAILS> ${MATCH_DETAILS}/${create_day}/logs/run_"$DETAILS"_"$day".log 2>&1
./get_file.sh $day $create_day $DETAILS>> ${MATCH_DETAILS}/${create_day}/logs/run_"$DETAILS"_"$day".log 2>&1
else
if [ $steps -eq 2 ] ; then
./get_file.sh $day $create_day $DETAILS>> ${MATCH_DETAILS}/${create_day}/logs/run_"$DETAILS"_"$day".log 2>&1
else
echo " please make your second var is 2.........">> ${MATCH_DETAILS}/${create_day}/logs/run_"$DETAILS"_"$day".log 2>&1
fi
fi
done[hzadmin@BDI2 URLAppProgram_sf]$
[hzadmin@BDI2 URLAppProgram_sf]$ cat /home/hzadmin/urlAPP/ResultMatch/remove_details.sh
?#!/bin/bash
#remove_details.sh
#*=================================================
#*
#* FileName : remove_details.sh
#* CreateDate: 2014-10-22
#* Abstract : delete MATCH_DETAILS files
#* Author : WangNing
#*
#* BONC All rights reserved.
#*==================================================
cd `dirname $0`
PWDNOW=`pwd`
eval $(grep MATCH_DETAILS ${PWDNOW}/details_conf.cfg)
eval $(grep MATCH_PRINT ${PWDNOW}/details_conf.cfg)
eval $(grep REPORT_LIMIT ${PWDNOW}/details_conf.cfg)
eval $(grep HDFS_LIMIT ${PWDNOW}/details_conf.cfg)
upl=$((REPORT_LIMIT+1))
ls -lt $MATCH_DETAILS |awk '/^d/ {print $9}'| sort -r > list
q=`wc -l list | cut -d ' ' -f 1`
if [ $q -gt $REPORT_LIMIT ]; then
sed -n ${upl},${q}p list > dellist
x=`wc -l dellist | cut -d ' ' -f 1`
for ((m=1;m<=x;m++))
do
grepstr='sed -n '$m'p dellist'
greps=`$grepstr`
rm -rf ${MATCH_DETAILS}/${greps}
echo 'delete file:' $greps
done
rm -f dellist
else
echo ' MATCH_DETAILS 数据没有达到删除上限!'
fi
rm -f list
#删除hdfs上的输出文件
up_l=$((HDFS_LIMIT+1))
hadoop fs -ls ${MATCH_PRINT} |awk '/^d/ {print $8}'| sort -r>files.txt
s=`cat files.txt |wc -l`
if [ $s -gt $HDFS_LIMIT ]; then
sed -n ${up_l},${s}p files.txt | while read line
do
echo 'delete file:' $line
hadoop fs -rm -r $line
done
else
echo "hdfs 数据没有达到删除上限!"
fi
rm -f files.txt
[hzadmin@BDI2 URLAppProgram_sf]$
[hzadmin@BDI2 URLAppProgram_sf]$ cat /home/hzadmin/urlAPP/hive.sh
#!/bin/bash
. $HOME/.bash_profile
#dateday=`date -d -1day +%Y%m%d`
dateday=$1
hive -e "
set mapreduce.job.queuename=thirdpart1;use dpi;
add jar /home/hzadmin/bj_ggsn/jar/Decode.jar;
create temporary function decode as 'Decode';
select PHONE_ID,VISIT_TYPE,TYPE_CODE,TYPE_NAME1,TYPE_NAME2,TYPE_NAME3,TYPE_NAME4,TYPE_NAME5,TYPE_NAME6,TYPE_LEVEL,APP_TYPE_CODE,APP_TYPE_NAME1,APP_TYPE_NAME2,APP_TYPE_NAME3,sum(DOWNLOAD_BYTES),sum(UPLOAD_BYTES),keyword,count(id),Decode(gen_flag,'','3G','4G','4G',gen_flag) from dpi_http_dtl_mark_match where receive_day='${dateday}' and phone_id is not null group by PHONE_ID,VISIT_TYPE,TYPE_CODE,TYPE_NAME1,TYPE_NAME2,TYPE_NAME3,TYPE_NAME4,TYPE_NAME5,TYPE_NAME6,TYPE_LEVEL,APP_TYPE_CODE,APP_TYPE_NAME1,APP_TYPE_NAME2,APP_TYPE_NAME3,keyword,gen_flag" > /dfs/ftp/hzadmin/test/${dateday}.txt
split -l 12000000 /dfs/ftp/hzadmin/test/${dateday}.txt /dfs/ftp/hzadmin/test/${dateday}
mv /dfs/ftp/hzadmin/test/${dateday}aa /dfs/ftp/hzadmin/bj_data/${dateday}001.txt
mv /dfs/ftp/hzadmin/test/${dateday}ab /dfs/ftp/hzadmin/bj_data/${dateday}002.txt
mv /dfs/ftp/hzadmin/test/${dateday}ac /dfs/ftp/hzadmin/bj_data/${dateday}003.txt
mv /dfs/ftp/hzadmin/test/${dateday}ad /dfs/ftp/hzadmin/bj_data/${dateday}004.txt
mv /dfs/ftp/hzadmin/test/${dateday}ae /dfs/ftp/hzadmin/bj_data/${dateday}005.txt
mv /dfs/ftp/hzadmin/test/${dateday}af /dfs/ftp/hzadmin/bj_data/${dateday}006.txt
mv /dfs/ftp/hzadmin/test/${dateday}ag /dfs/ftp/hzadmin/bj_data/${dateday}007.txt
mv /dfs/ftp/hzadmin/test/${dateday}ah /dfs/ftp/hzadmin/bj_data/${dateday}008.txt
mv /dfs/ftp/hzadmin/test/${dateday}ai /dfs/ftp/hzadmin/bj_data/${dateday}009.txt
[hzadmin@BDI2 URLAppProgram_sf]$
[hzadmin@BDI2 URLAppProgram_sf]$ cat /home/hzadmin/bj_ggsn/start1.sh
#!/bin/sh
source ~/.bash_profile
datetime=$(date --date "1 days ago" +%Y%m%d)
cd /home/hzadmin/bj_ggsn/
sh /home/hzadmin/bj_ggsn/select1.sh $datetime >> log/${datetime}_1.log 2>&1
sh /home/hzadmin/bj_ggsn/select2.sh $datetime >> log/${datetime}_2.log 2>&1
hadoop fs -mkdir /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
hadoop fs -mv /apps/hive/warehouse/dpi.db/bj_ggsn_mobile/receive_day=$datetime/* /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
sh /home/hzadmin/urlAPP/URLAppProgram_sf/get_uacds.sh
sh /home/hzadmin/urlAPP/BoncRun.sh
sh /home/hzadmin/urlAPP/hive.sh $datetime
sh /home/hzadmin/bj_ggsn/delete.sh
[hzadmin@BDI2 URLAppProgram_sf]$ cat /home/hzadmin/bj_ggsn/delete.sh
#!/bin/bash
source ~/.bash_profile
dataday=$(date --date "7 days ago" +%Y%m%d)
hadoop fs -rm -r /share/hzadmin/urlapp/spp/dpi_http_dtl_mark_match_summary/receive_day=$dataday
hadoop fs -rm -r /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/$dataday
hadoop fs -rm -r /share/hzadmin/external_table/DMP_SSA/DPI/$dataday
hive -e "use dpi;alter table dpi_http_dtl_mark_match_summary drop partition(receive_day='$dataday')"
[hzadmin@BDI2 URLAppProgram_sf]$
[hzadmin@BDI2 URLAppProgram_sf]$ cat /home/hzadmin/bj_ggsn/start2.sh
#!/bin/sh
source ~/.bash_profile
#datetime=$(date --date "1 days ago" +%Y%m%d)
datetime=$1
cd /home/hzadmin/bj_ggsn/
sh /home/hzadmin/bj_ggsn/select1.sh $datetime
sh /home/hzadmin/bj_ggsn/select2.sh $datetime
hadoop fs -rm -r /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
hadoop fs -mkdir /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
hadoop fs -mv /apps/hive/warehouse/dpi.db/bj_ggsn_mobile/receive_day=$datetime/* /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
#sh /home/hzadmin/urlAPP/URLAppProgram_sf/get_uacds.sh
sh /home/hzadmin/urlAPP/BoncRun1.sh $datetime
sh /home/hzadmin/urlAPP/ResultMatch/match_detail.sh -d $datetime
sh /home/hzadmin/urlAPP/hive.sh $datetime
[hzadmin@BDI2 URLAppProgram_sf]$ crontab -l
00 03 * * * sh /home/hzadmin/bj_ggsn/start1.sh &>/home/hzadmin/bj_ggsn/start.log
00 13 * * * sh /dfs/ftp/hzadmin/trydemo/dailycheckdemo.sh >>/dfs/ftp/hzadmin/trydemo/log.txt
[hzadmin@BDI2 URLAppProgram_sf]$ cat /home/hzadmin/bj_ggsn/start1.sh
#!/bin/sh
source ~/.bash_profile
datetime=$(date --date "1 days ago" +%Y%m%d)
cd /home/hzadmin/bj_ggsn/
sh /home/hzadmin/bj_ggsn/select1.sh $datetime >> log/${datetime}_1.log 2>&1
sh /home/hzadmin/bj_ggsn/select2.sh $datetime >> log/${datetime}_2.log 2>&1
hadoop fs -mkdir /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
hadoop fs -mv /apps/hive/warehouse/dpi.db/bj_ggsn_mobile/receive_day=$datetime/* /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/
sh /home/hzadmin/urlAPP/URLAppProgram_sf/get_uacds.sh
sh /home/hzadmin/urlAPP/BoncRun.sh
sh /home/hzadmin/urlAPP/hive.sh $datetime
sh /home/hzadmin/bj_ggsn/delete.sh
[hzadmin@BDI2 URLAppProgram_sf]$
[hzadmin@BDI2 URLAppProgram_sf]$
[hzadmin@BDI2 URLAppProgram_sf]$
[hzadmin@BDI2 URLAppProgram_sf]$ cat /home/hzadmin/bj_ggsn/select2.sh
hours=/home/hzadmin/bj_ggsn/hours.txt
datetime=$1
while read LINE
do
hadoop fs -test -e /share/external_table/ssa/DPI_MBL_4G/ALL/${datetime}/${LINE}
if [ $? -eq 0 ]; then
hive -e "use dpi;alter table bj_ggsn_4g add partition (receive_day='${datetime}',hours='${LINE}') location '/share/external_table/ssa/DPI_MBL_4G/ALL/${datetime}/${LINE}'" >>log/${datetime}.log 2>>log/${datetime}.log
else
echo 'not exist'
fi
done < $hours
hive -e"
use dpi;
set hive.auto.convert.join=false;
set mapreduce.job.queuename=thirdpart1;
from t_user m join bj_ggsn_4g t
on(m.usernum = t.MDN and m.receive_day = '${datetime}' and t.receive_day = '${datetime}')
insert into table bj_ggsn_mobile
partition (receive_day = '${datetime}')
select regexp_extract(t.MDN,'(1[0-9]{10})') MDN,
t.LAC,
t.CI,
t.IMEI,
t.BUSITYPE,
t.CAPTURETIME,
t.ENDTIME,
t.DURATION,
t.FLOWUP,
t.FLOWDOWN,
t.FLOWALL,
t.RATTYPE,
t.TERMIANL_IP,
t.DESTIP,
t.STATUSCODE,
t.USERAGENT,
t.APN,
t.IMSI,
t.SGSNIP,
t.GGSNIP,
t.CONTENTTYPE,
t.SOURCEPORT,
t.DESTPORT,
t.LOGOCODE,
t.URL,
t.RESULT,
t.HOST,
'4G',
t.YULIU2,
t.YULIU3;
"
[hzadmin@BDI2 URLAppProgram_sf]$
标签:
原文地址:http://blog.csdn.net/hzdxw/article/details/51992982