标签:
-rw-rw-r-- 1 hzadmin hzadmin 1258939838 Jul 10 08:53 20160709001.txt -rw-rw-r-- 1 hzadmin hzadmin 1259011023 Jul 10 08:53 20160709002.txt -rw-rw-r-- 1 hzadmin hzadmin 1258893627 Jul 10 08:53 20160709003.txt -rw-rw-r-- 1 hzadmin hzadmin 1258825284 Jul 10 08:53 20160709004.txt -rw-rw-r-- 1 hzadmin hzadmin 1258902330 Jul 10 08:53 20160709005.txt -rw-rw-r-- 1 hzadmin hzadmin 12662944 Jul 10 08:53 20160709006.txt -rw-rw-r-- 1 hzadmin hzadmin 1258267725 Jul 11 08:48 20160710001.txt -rw-rw-r-- 1 hzadmin hzadmin 1258199674 Jul 11 08:48 20160710002.txt -rw-rw-r-- 1 hzadmin hzadmin 1258244177 Jul 11 08:48 20160710003.txt -rw-rw-r-- 1 hzadmin hzadmin 1258312191 Jul 11 08:48 20160710004.txt -rw-rw-r-- 1 hzadmin hzadmin 1131040166 Jul 11 08:48 20160710005.txt -rw-rw-r-- 1 hzadmin hzadmin 1257713262 Jul 12 09:20 20160711001.txt -rw-rw-r-- 1 hzadmin hzadmin 1257638353 Jul 12 09:20 20160711002.txt -rw-rw-r-- 1 hzadmin hzadmin 1257634023 Jul 12 09:20 20160711003.txt -rw-rw-r-- 1 hzadmin hzadmin 1257516561 Jul 12 09:20 20160711004.txt -rw-rw-r-- 1 hzadmin hzadmin 1257627299 Jul 12 09:21 20160711005.txt -rw-rw-r-- 1 hzadmin hzadmin 553070932 Jul 12 09:21 20160711006.txt [hzadmin@BDI2 bj_data]$ [hzadmin@BDI2 bj_data]$ [hzadmin@BDI2 bj_data]$ pwd /dfs/ftp/hzadmin/bj_data [hzadmin@BDI2 bj_data]$ history 23 hdfs dfs -ls /home/hzadmin/bj_ggsn/start.log 24 hdfs dfs -ls /share/hzadmin/external_table/DMP_SSA/DPI/ 25 hdfs dfs -ls /share/hzadmin/external_table/DMP_SSA/DPI/20160509 26 cd .. 27 ll 28 crontab -l 29 sh /home/hzadmin/bj_ggsn/start3.sh > /home/hzadmin/bj_ggsn/start.log & 30 tail -f /home/hzadmin/bj_ggsn/start.log 31 sh /home/hzadmin/bj_ggsn/start3.sh > /home/hzadmin/bj_ggsn/start.log & 32 ll 33 cd /home/hzadmin 34 ll 35 pwd 36 cd bj_ggsn/ 37 ll 38 pwd 39 cd ../ 40 ll 41 cd urlAPP/ 42 ll 43 pwd 44 crobtab -l 45 crontab -l 46 cd ../bj_ggsn/ 47 ll 48 cd 49 ll 50 cd test 51 ll 52 cd ../bj_data/ 53 ll 54 crontab -l 55 ps -ef|grep start1 56 tail -f /home/hzadmin/bj_ggsn/start.log 57 more /home/hzadmin/bj_ggsn/start.log 58 hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G/all/20160509 59 hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G/ 60 hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G/ALL/20160509 61 hdfs dfs -ls /share/external_table/ssa/DPI_MBL_4G/ALL/20160509 62 crontab -l 63 cd .. 64 ll 65 cd /home/hzadmin 66 ll 67 cd bj_ggsn/ 68 ll 69 cd ../urlAPP/ 70 ll 71 cd URLAppProgram_sf 72 ll 73 more public.cfg 74 ftp 132.63.10.7 75 ll 76 crontab -l 77 cd trydemo 78 ll 79 pwd 80 cd bj_ggsn 81 ll 82 cd .. 83 ll 84 cd /home/hzadmin/bj_ggsn/ 85 ll 86 pwd 87 vi /home/hzadmin/bj_ggsn/hours.txt 88 cd .. 89 ll 90 cd urlAPP 91 ll 92 cd .. 93 ;ll 94 ll 95 pwd 96 cd urlAPP 97 ll 98 cd /home/hzadmin/urlAPP/URLAppProgram_sf 99 ll 100 cd .. 101 ll 102 cd .. 103 ll 104 cd urlAPP 105 ll 106 cd URLAppProgram_sf 107 ll 108 pwd 109 ll 110 pwd 111 ll 112 vi public.cfg 113 cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay 114 ll 115 vi get_file_list.sh 116 cd /home/hzadmin/urlAPP/URLAppProgram_sf 117 ll 118 vi get_uacds.sh 119 cd ProgramByDay 120 ll 121 vi get_uacds.sh 122 ll 123 cd /home/hzadmin/urlAPP/URLAppProgram_sf 124 ll 125 vi public.cfg 126 ll 127 cd bj_data/ 128 ll 129 cd .. 130 ll 131 cd /home/hzadmin 132 ll 133 cd bj_ggsn/ 134 ll 135 more start 136 more start.log 137 ps -ef|grep start1.sh 138 ps -ef|grep start3.sh 139 kill -9 178805 140 kill -9 221082 141 ps -ef|grep start1.sh 142 ll 143 cd bj_data/ 144 ll 145 cd /home/hzadmin 146 ll 147 cd bj_ggsn/ 148 ll 149 crontab -l 150 ps -ef|grep start1.sh 151 sh /home/hzadmin/bj_ggsn/start1.sh &>/home/hzadmin/bj_ggsn/start.log & 152 tail -f /home/hzadmin/bj_ggsn/start.log 153 kill -9 14886 154 ll 155 sh /home/hzadmin/bj_ggsn/start2.sh 20160509 > /home/hzadmin/bj_ggsn/start.log 156 ps -ef|grep start2 157 sh /home/hzadmin/bj_ggsn/start2.sh 20160509 > /home/hzadmin/bj_ggsn/start.log & 158 ps -ef|grep start2 159 ps -ef|grep start1 160 ps -ef|grep start2 161 ps -ef|grep start3 162 cd /home/hzadmin 163 ll 164 cd bj_ggsn/ 165 ll 166 more select1.sh 167 more start1.sh 168 ll 169 cd bj_data/ 170 ll 171 cd /home/hzadmin 172 ll 173 cd bj_ggsn/ 174 ll 175 tail -f start 176 tail -f start.log 177 ll 178 cd log 179 ll 180 tail -f 20160509_1.log 181 hive 182 hdfs dfs -ls /usr/local 183 hdfs dfs -ls / 184 hdfs dfs -chown /usr/local 185 hdfs dfs -chown hdfs /usr/local 186 ll 187 cd bj_data 188 ll 189 cd .. 190 ll 191 cd test 192 ll 193 cd /home/hzadmin 194 ll 195 cd bj_ggsn/ 196 ll 197 crontab -l 198 sh /home/hzadmin/bj_ggsn/start1.sh &>/home/hzadmin/bj_ggsn/start.log & 199 hive 200 ll 201 cd bj_data/ 202 ll 203 hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G 204 hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G/all/ 205 hdfs dfs -du -h /share/external_table/ssa/DPI_MBL_4G/ALL/ 206 ll 207 ll 208 cd urlAPP 209 ll 210 crontab -l 211 cd /dfs/ftp/hzadmin 212 ll 213 cd bj_data 214 ll 215 vi 20160509007.txt 216 cd .. 217 ll 218 vi log.txt 219 cd t_user 220 ll 221 vi phone_number.dat 222 cd /home/hzadmin/bj_ggsn 223 ll 224 vi select2.sh 225 vi /home/hzadmin/urlAPP/BoncRun.sh 226 cd /home/hzadmin/urlAPP 227 ll 228 cd URLAppProgram_sf 229 ll 230 vi common.cfg 231 df 232 cd /home/hzadmin/urlAPP/URLAppProgram_sf 233 ll 234 vi run.sh 235 ll 236 cd ProgramByDay 237 ll 238 vi report_summary.sh 239 ll 240 cd .. 241 ll 242 vi match.cfg 243 cd ProgramByHour 244 ll 245 cd .. 246 ll 247 cd ProgramByDay 248 ll 249 pwd 250 cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay 251 ll 252 sh ftp_getfilelist.sh 253 cd .. 254 ll 255 cd ProgramByDay 256 ll 257 cd .. 258 ll 259 cd ProgramByHour 260 ll 261 pwd 262 cd .. 263 ll 264 vi match.cfg 265 cd ProgramByHour 266 ll 267 cd .. 268 ll 269 cd .. 270 ll 271 cd ResultMatch 272 ll 273 crontab -l 274 exit 275 cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay 276 ll 277 vi get_uacds.sh 278 cd /home/hzadmin/urlAPP/URLAppProgram_sf 279 ll 280 cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay 281 ll 282 vi get_file_list.sh 283 get_uacds.sh 284 cd /data3/ftp000/URLAppProgram 285 cd .. 286 ll 287 cd ProgramByDay 288 ll 289 cd .. 290 ll 291 cd .. 292 ll 293 cd logs 294 ll 295 vi hive__20160320.log 296 ll 297 cd .. 298 ll 299 cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay 300 ll 301 cd .. 302 ll 303 vi R_URL_TYPE_20160510_00.txt 304 df 305 cd ProgramByDay; 306 ll 307 cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay 308 ll 309 cd /home/hzadmin/urlAPP/URLAppProgram_sf 310 ll 311 ping 132.63.10.7 312 ls -lt /dfs/ftp/hzadmin/urlAPP/ResultMatch/data 313 df 314 cd /home/hzadmin/urlAPP/ 315 ll 316 vi hive.sh 317 cd /home/hzadmin/bj_ggsn/ 318 ll 319 vi delete.sh 320 pwd 321 cd /home/hzadmin/urlAPP/URLAppProgram_sf 322 ll 323 cd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay 324 ll 325 vi match.sh 326 pwd 327 ll 328 cd /dfs/data/ugftp/ccg/ 329 ll 330 cd /dfs/ftp/hzadmin 331 ll 332 cd bj_data 333 ll 334 pwd 335 cd .. 336 ll 337 cd urlAPP 338 ll 339 cd ResultMatch 340 ll 341 cd data 342 ll 343 cd .. 344 ll 345 cd .. 346 ll 347 cd .. 348 ll 349 cd bj_data 350 ll 351 cd .. 352 ll 353 du -sh bj_data 354 df 355 df -h 356 cd .. 357 ll 358 cd .. 359 ll 360 df 361 ll 362 cd /dfs/ftp/hzadmin 363 ll 364 cd t_user/ 365 ll 366 cd .. 367 ll 368 cd /dfs/ftp/hzadmin/ 369 ll 370 cd /home/hzadmin/ 371 ll 372 cd bj_ggsn/ 373 ll 374 more start1.sh 375 more select1.sh 376 cd /home/hzadmin 377 ll 378 cd 379 ll 380 cd bj_data/ 381 ll 382 pwd 383 cd .. 384 ll 385 cd t_user/ 386 ll 387 cd .. 388 ll 389 cd urlAPP/ 390 ll 391 cd ResultMatch/ 392 ll 393 cd data 394 ll 395 cd 201605 396 ll 397 cd 20160530 398 ll 399 cd .. 400 ll 401 cd 402 ll 403 hdfs 404 hadoop 405 hadoop version 406 ll 407 cd bj_data/ 408 ll 409 cd .. 410 cd /home/hzadmin/ 411 ll 412 cd bj_ggsn/ 413 ll 414 vim start1.sh 415 vim select1.sh 416 vim delete.sh 417 more start1.sh 418 vim /home/hzadmin/urlAPP/hive.sh 419 cd 420 cd bj_data/ 421 ll 422 exit 423 ll 424 pwd 425 cd home 426 cd bj_data 427 ll 428 cd ../ 429 ll 430 pwd 431 cd /home 432 ll 433 cd hzadmin 434 ll 435 cd urlAPP 436 ll 437 cd .. 438 ll 439 cd bj_data 440 ll 441 cd .. 442 ll 443 cd /home/hzadmin/bj_ggsn 444 ll 445 cd .. 446 ll 447 pwd 448 cd bj_ggsn 449 ll 450 cd jar 451 ll 452 cd .. 453 ll 454 cd .. 455 ll 456 cd urlAPP 457 ll 458 vi ResultMatch 459 cd URLAppProgram 460 ll 461 cd .. 462 ll 463 cd URLAppProgram_sf 464 ll 465 vi public.cfg 466 vi run.sh 467 ll 468 cd urlAPP 469 ll 470 cd .. 471 cd bj_data 472 ll 473 vi 20160607006.txt 474 cat 20160607006.txt 475 ll 476 cd /home 477 ll 478 cd /home/hzadmin/bj_ggsn/start.log 479 cd /home/hzadmin/bj_ggsn 480 ll 481 cat start.log 482 cd ../ 483 ll 484 cat /dfs/ftp/hzadmin/trydemo/log.txt 485 crontab -l 486 ll 487 cd /dfs/ftp/hzadmin/bj_data 488 ll 489 cat 20160607006.txt 490 ll 491 cd bj_data 492 ll 493 crontab -l 494 cd ~ 495 ls 496 cd /home/hzadmin/ 497 ls 498 pwd 499 cd ~ 500 pwd 501 cd /home/hzadmin/ 502 cd bj_ggsn/ 503 ls 504 vim start1.sh 505 cd .. 506 ls 507 cd urlAPP/ 508 ls 509 vim hive.sh 510 ls 511 cd .. 512 ls 513 cd urlAPP/ 514 ls 515 cd logs 516 ls 517 ll 518 cd 20160615 519 ls 520 ll 521 more match_20160615_20160614.log 522 ls 523 more report_20160615_20160614.log 524 cd .. 525 ls 526 cd .. 527 ls 528 cd URLAppProgram_sf/ 529 ls 530 vim run.sh 531 ls 532 cd ProgramByDay/ 533 ls 534 vim alter_table.sh 535 ls 536 vim create_table.sh 537 ls 538 vim match1.sh 539 ls 540 vim match.sh 541 hive 542 ll 543 cd bj_data/ 544 ll 545 rm -f ./201605*.txt 546 ll 547 ll|grep 201604 548 rm -f ./201604*.txt 549 ll 550 ls -lt 551 rm -f ./2015*.txt 552 ll 553 ls -lt 554 rm -f ./2015*.tx 555 ll 556 ls -lrt 557 ls -lt 558 debugfs 559 exit 560 ll 561 cd urlAPP/ 562 ll 563 cd /dfs/ftp 564 ll 565 cd /dfs/ftp/hzadmin 566 ll 567 cd urlAPP/ 568 ll 569 cd URLAppReport/ 570 LL 571 ll 572 cd .. 573 ll 574 cd UnMatchTop1000/ 575 ll 576 cd .. 577 ll 578 cd ResultMatch/ 579 ll 580 cd data/ 581 ll 582 cd ../.. 583 l 584 cd .. 585 ll 586 cd 587 ll 588 cd /dfs/ftp/hzadmin/ 589 ll 590 cd /home/hzadmin/ 591 ll 592 cd bj_ggsn/ 593 ll 594 cd .. 595 ll 596 cd urlAPP/ 597 ll 598 cd URLAppProgram 599 cd URLAppProgram_sf 600 cd ../URLAppProgram_sf 601 ll 602 cd bin 603 ll 604 cd .. 605 ll 606 pwd 607 find .. -name "*match*" 608 find .. -name "*match.sh" 609 cd ../URLAppProgram_sf/ProgramByDay/match.sh 610 cd ../URLAppProgram_sf/ProgramByDay/ 611 ll 612 pwd 613 ll 614 cd bj_data/ 615 ll 616 exit 617 ll 618 exit 619 ll 620 cd bj_data/ 621 ll 622 cd /home/hzadmin 623 ll 624 cd bj_ggsn/ 625 ll 626 sh start2.sh 20160625 627 sh start2.sh 20160625 > start.log 2>&1 & 628 tail -f start.log 629 cd 630 ll 631 cd bj_data/ 632 ll 633 cd /ap 634 cd /app 635 ll 636 cd hadoop/con 637 cd hadoop/etc/hadoop/ 638 ll 639 more core-site.xml 640 ll 641 ll 642 cd /home/hzadmin 643 ll 644 cd bj_ggsn/ 645 ll 646 more start2.sh 647 sh start2.sh 20160625 648 ll 649 sh start2.sh 20160625 > start.log 2>&1 & 650 tail -f start.log 651 ll 652 more start1.sh 653 more start2.sh 654 ll 655 more start.log 656 cd /dfs/ftp/hzadmin/test/ 657 tail start.log 658 cd - 659 tail -n 200 start.log 660 ll 661 more start3.sh 662 sh ./start2.sh 20160625 > start.log 2>&1 & 663 tail -f start.log 664 cd 665 cd test/ 666 ll 667 cd .. 668 ll 669 cd bj_data/ 670 ll 671 cd 672 cd /home/hzadmin/ 673 ll 674 cd bj_ggsn/ 675 ll 676 vim start2.sh 677 sh ./start2.sh 20160625 > start.log 2>&1 & 678 df -h 679 tail -f start.log 680 ll 681 cd bj_data/ 682 ll 683 cd .. 684 ll 685 cd /home/hzadmin 686 ll 687 cd bj_ggsn/ 688 ll 689 sh start2.sh 20160624 > start.log 2>&1 & 690 ll /dfs/ftp/hzadmin/bj_data/ 691 cd 692 ll 693 cd bj_data/ 694 ll 695 cd - 696 ll 697 cd - 698 ll 699 cd - 700 cd /home/hzadmin/ 701 ll 702 cd bj_ggsn/ 703 ll 704 tail -f start.log 705 ll /dfs/ftp/hzadmin/bj_data/ 706 sh start2.sh 20160625 > start.log 2>&1 & 707 ftp 10.62.242.124 708 ll /dfs/ftp/hzadmin/bj_data/ 709 tail -f start.log 710 ll /dfs/ftp/hzadmin/bj_data/ 711 tail -f start.log 712 ll 713 ps -ef |grep start2.sh 714 ll 715 ll /dfs/ftp/hzadmin/bj_data/ 716 tail -f -n 100 start.log 717 ll 718 cd bj_data/ 719 ll 720 cd /home/hzadmin 721 ll 722 cd bj_ggsn/ 723 ll 724 sh start2.sh 20160626 > start.log 2>&1 & 725 hadoop fs -ls /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160627/match 726 hadoop fs -ls /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160627/ 727 hadoop fs -ls / 728 hadoop fs -ls /user 729 hadoop fs -ls /user/hzadmin 730 hadoop fs -mkdir /user/hzadmin/extract 731 hadoop fs -ls /user/hzadmin/ 732 exit 733 cd ~ 734 ls 735 cd /home/hzadmin 736 ls 737 spark-submit 738 exit 739 ls 740 exit 741 hadoop fs -ls /home/hzadmin 742 hadoop fs -ls /user/hzadmin 743 hadoop fs -rm -r /user/hzadmin/extract 744 hadoop fs -ls /user/hzadmin 745 exit 746 ll 747 cd bj_data/ 748 ll 749 /home/spark/spark-1.2.2-bin-hadoop2.4/spark-submit --class Extract --master yarn --deploy-mode client /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract 750 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode client /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract 751 ll /home/spark/spark-1.2.2-bin-hadoop2.4/bin 752 exit 753 ll /home/spark/spark-1.2.2-bin-hadoop2.4/bin 754 ll /home/spark/spark-1.2.2-bin-hadoop2.4/ 755 ll /home/spark/ 756 exit 757 ll /home/spark/ 758 ll /home/spark/spark-1.2.2-bin-hadoop2.4/ 759 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit 760 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode client /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract 761 exit 762 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode client /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract 763 yarn application -list 764 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract 765 yarn application -list 766 yarn application -kill application_1464150086810_7363 767 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4g --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract 768 hadoop fs -ls /user/hzadmin 769 hadoop fs -rm -r /user/hzadmin/extract 770 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4g --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160628/match /user/hzadmin/extract 771 hadoop fs -ls /user/hzadmin/extract 772 hadoop fs -du -h /user/hzadmin/extract 773 hadoop fs -du -h /user/hzadmin/ 774 ls 775 exit 776 hadoop fs -ls /user/hzadmin 777 hadoop fs -ls /user/hzadmin/.sparkStaging 778 hadoop fs -ls /user/hzadmin/.sparkStaging/application_1464150086810_7363 779 ls 780 mkdir extract 781 ls 782 hadoop fs -get /user/hzadmin/extract/* /home/hzadmin/extract/ 783 ls 784 ll -h 785 ll extract/ 786 ls 787 tar -zcvf extract.tar.gz extract 788 ls 789 ll -h 790 exit 791 ll 792 cd bj_data/ 793 ll 794 ll -h 795 cd .. 796 ll 797 mkdir 6y 798 ll 799 cd bj_data/ 800 ll 801 cp 201606* ../6y/ 802 ll 803 cd .. 804 ll 805 rm -rf 6y 806 ll 807 cd 6y/ 808 ll 809 df -h 810 ll 811 cd .. 812 ll 813 cd bj_data/ 814 ll 815 ls |grep 201606 816 ls |grep 201606|xargs du -h 817 ls |grep 201606|xargs du -cb 818 ls |grep 201606|xargs du -h 819 ls |grep 201606|xargs du -cb 820 ls |grep 201606|xargs du -cbh 821 ls |grep 201603|xargs du -cbh 822 hadoop fs -ls /user/hzadmin 823 hadoop fs -ls /user/hzadmin/extract 824 hadoop fs -rm -r /user/hzadmin/extract 825 hadoop fs -ls /user/hzadmin 826 hadoop fs -ls /user/hzadmin/.sparkStaging 827 hadoop fs -ls /user/hzadmin/.sparkStaging/application_1464150086810_9663 828 hadoop fs -ls /user/hzadmin/.sparkStaging/.staging 829 hadoop fs -ls /user/hzadmin/.staging 830 hadoop fs -ls / 831 hadoop fs -ls /app-logs 832 hadoop fs -ls /app-logs/hzadmin 833 hadoop fs -ls /app-logs/hzadmin/logs 834 hadoop fs -ls /app-logs/hzadmin/logs/application_1464150086810_9663 835 cd ~ 836 ls 837 cd /home/hzadmin/ 838 ls 839 hadoop fs -get /app-logs/hzadmin/logs/application_1464150086810_9663/BD18.bd.bjtel_45454 840 ls 841 more BD18.bd.bjtel_45454 842 hadoop fs -tail /app-logs/hzadmin/logs/application_1464150086810_9663/BD18.bd.bjtel_45454 843 exit 844 hadoop fs -ls /user/hzadmin 845 hadoop fs -ls /user/hzadmin/extract 846 hadoop fs -rm -r /user/hzadmin/extract 847 hadoop fs -ls /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/ 848 hadoop fs -du -h /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/ 849 hadoop fs -ls /user/hzadmin/extract 850 hadoop fs -du -h /user/hzadmin/extract 851 hadoop fs -du -h /user/hzadmin/ 852 hadoop fs -du -h /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/ 853 hadoop fs -du -h /user/hzadmin/ 854 hadoop fs -du -h /user/hzadmin/extract2 855 cd /home/hzadmin 856 ls 857 hadoop fs -get /user/hzadmin/extract 858 ls 859 ls extract/ 860 hadoop fs -get /user/hzadmin/extract2 861 ls 862 man gz 863 man tar 864 ls 865 tar -cf extract 866 tar zcvf extract.tar.gz extract 867 ls 868 tar zcvf extract2.tar.gz extract2 869 ls 870 exit 871 hadoop fs -ls /user/hzadmin 872 hadoop fs -ls /user/hzadmin/extract 873 hadoop fs -rm -r /user/hzadmin/extract 874 hadoop fs -ls /user/hzadmin/ 875 ls 876 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 8g --executor-cores 4 --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160705/match /user/hzadmin/extract 877 yarn application -list 878 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4g --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160705/match /user/hzadmin/extract 879 hadoop fs -ls /user/hzadmin 880 hadoop fs -ls /user/hzadmin/extract 881 hadoop fs -rm -r /user/hzadmin/extract 882 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4g --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160705/match /user/hzadmin/extract 883 yarn application -list 884 yarn application -kill application_1464150086810_9170 885 yarn application -list 886 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4g --num-executors 20 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract 887 yarn application -list 888 yarn application -kill application_1464150086810_9256 889 history 890 hadoop fs -ls /user/hzadmin 891 hadoop fs -ls /user/hzadmin 892 hadoop fs -ls /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match 893 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4G --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract 894 yarn application -list 895 yarn application -kill application_1464150086810_9293 896 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4G --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract 897 ls /home/spark/ 898 find /home/spark/ -name *example*jar 899 jar -tvf /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar | grep -i pi 900 jar -tvf /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar | grep -i Pi 901 jar -tvf /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar | grep -i SparkPi 902 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apacke.spark.examples.sparkPi --deploy-mode cluster --executor-memory 4G --num-executors 40 10 903 find /home/spark/ -name *example*jar 904 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apacke.spark.examples.sparkPi --deploy-mode cluster --executor-memory 4G --num-executors 40 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10 905 yarn 906 yarn application 907 yarn application -list 908 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apacke.spark.examples.sparkPi --deploy-mode cluster --executor-memory 4G --num-executors 4 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10 909 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apacke.spark.examples.sparkPi --deploy-mode cluster --executor-memory 1G --num-executors 4 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10 910 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apacke.spark.examples.sparkPi --master yarn --deploy-mode client --executor-memory 1G --num-executors 4 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10 911 hdfs dfs -ls /user/hzadmin/ 912 hdfs dfs -ls /user/hzadmin/extract 913 hdfs dfs -rmr /user/hzadmin/extract 914 hdfs dfs -ls /user/hzadmin/extract 915 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4G --num-executors 40 /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract 916 yarn application -list 917 yarn application -kill application_1464150086810_9459 918 yarn application -list 919 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract 920 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode client --executor-memory 4G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract 921 yarn application -list 922 yarn application -kill application_1464150086810_9476 923 hadoop fs -ls /user/hzadmin 924 hadoop fs -rm -r /user/hzadmin/extract 925 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode client --executor-memory 4G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract 926 yarn application -list 927 yarn application -kill application_1464150086810_9481 928 pwd 929 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apache.spark.examples.sparkPi --master yarn --deploy-mode client --executor-memory 1G --num-executors 4 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10 930 jar -tvf /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar | grep sparkPi 931 jar -tvf /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar | grep -i sparkpi 932 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn --deploy-mode client --executor-memory 1G --num-executors 4 /home/spark/spark-1.2.2-bin-hadoop2.4/lib/spark-examples-1.2.2-hadoop2.4.0.jar 10 933 diagnostics: Application application_1464150086810_9496 failed 2 times due to AM Container for appattempt_1464150086810_9496_000002 exited with exitCode: 10 due to: Exception from container-launch: org.apache.hadoop.util.Shell$ExitCodeException: 934 org.apache.hadoop.util.Shell$ExitCodeException: 935 at org.apache.hadoop.util.Shell.runCommand(Shell.java:505) 936 at org.apache.hadoop.util.Shell.run(Shell.java:418) 937 at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:650) 938 at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:195) 939 at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:300) 940 at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:81) 941 at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303) 942 at java.util.concurrent.FutureTask.run(FutureTask.java:138) 943 at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886) 944 at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908) 945 at java.lang.Thread.run(Thread.java:662) 946 Container exited with a non-zero exit code 10 947 .Failing this attempt.. Failing the application. 948 ApplicationMaster host: N/A 949 ApplicationMaster RPC port: -1 950 queue: default 951 start time: 1467966689710 952 final status: FAILED 953 tracking URL: BD01.bd.bjtel:8088/cluster/app/application_1464150086810_9496 954 user: hzadmin 955 Exception in thread "main" org.apache.spark.SparkException: Yarn application has already ended! It might have been killed or unable to launch application master. 956 at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.waitForApplication(YarnClientSchedulerBackend.scala:118) 957 at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:59) 958 at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:140) 959 at org.apache.spark.SparkContext.<init>(SparkContext.scala:348) 960 at org.apache.spark.examples.SparkPi$.main(SparkPi.scala:28) 961 at org.apache.spark.examples.SparkPi.main(SparkPi.scala) 962 at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 963 at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 964 at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 965 at java.lang.reflect.Method.invoke(Method.java:606) 966 at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:358) 967 at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75) 968 at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) 969 [hzadmin@BDI2 hzadmin]$ 970 yarn application -list 971 hadoop fs -ls /user/hzadmin 972 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 4G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract 973 yarn application -list 974 yarn application -kill application_1464150086810_9663 975 hadoop fs -ls /user/hzadmin 976 hadoop fs -ls /user/hzadmin/extract 977 hadoop fs -rm -r /user/hzadmin/extract 978 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 10G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract 979 yarn application -lsit 980 yarn application -lis 981 yarn application -kill application_1464150086810_9732 982 hadoop fs -rm -r /user/hzadmin/extract 983 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 16G --executor-cores 4 --num-executors 10 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract 984 yarn application -kill application_1464150086810_9733 985 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 20G --executor-cores 4 --num-executors 10 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706/match /user/hzadmin/extract 986 /home/spark/spark-1.2.2-bin-hadoop2.4/bin/spark-submit --class Extract --master yarn --deploy-mode cluster --executor-memory 10G --num-executors 40 --queue datagather /home/hzadmin/process_2.10-1.0.jar /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160707/match /user/hzadmin/extract2 987 ls 988 ls extract/ 989 rm -rf extract 990 ls 991 top 992 ls 993 top 994 ll 995 [wd 996 pwd 997 cd /home/hzadmin 998 ll 999 cd bj_ggsn/ 1000 ll 1001 crontab -l 1002 more start1.sh 1003 more start2.sh 1004 ~/bj_data/ 1005 cd ~/bj_data/ 1006 ll 1007 cd - 1008 ll 1009 more start2.sh 1010 more start1.sh 1011 ll 1012 cd .. 1013 cd urlAPP/ 1014 ll 1015 cd 1016 ll 1017 cd /dfs/ftp/hzadmin/ 1018 ll 1019 cd bj_data/ 1020 ll 1021 pwd 1022 history [hzadmin@BDI2 bj_data]$ -------------------------------------------------------------------------------------- [hzadmin@BDI2 home]$ cd hzadmin/ [hzadmin@BDI2 hzadmin]$ [hzadmin@BDI2 hzadmin]$ ll total 28 drwxrwxr-x 3 hzadmin hzadmin 4096 Aug 5 2015 bak drwxr-xr-x 4 hzadmin hzadmin 4096 Jun 26 19:31 bj_ggsn drwxrwxr-x 2 hzadmin hzadmin 4096 Jul 11 14:48 extract drwxrwxr-x 2 hzadmin hzadmin 4096 Jul 11 14:55 extract2 -rw-r--r-- 1 root root 5485 Jun 29 10:46 process_2.10-1.0.jar drwxrwxr-x 8 hzadmin hzadmin 4096 Jun 17 11:09 urlAPP [hzadmin@BDI2 hzadmin]$ pwd /home/hzadmin [hzadmin@BDI2 hzadmin]$ cd bj_ggsn/ [hzadmin@BDI2 bj_ggsn]$ ll total 136 -rwxr-xr-x 1 hzadmin hzadmin 433 Feb 10 20:39 delete.sh -rw-r--r-- 1 hzadmin hzadmin 71 Apr 30 2015 hours.txt drwxr-xr-x 2 root root 4096 Aug 5 2015 jar drwxrwxr-x 2 hzadmin hzadmin 36864 Jul 12 03:19 log -rw------- 1 hzadmin hzadmin 21554 Apr 12 20:56 nohup.out -rwxr-xr-x 1 hzadmin hzadmin 1845 Sep 23 2015 select1.sh -rwxr-xr-x 1 hzadmin hzadmin 454 Oct 12 2015 select2bak.sh -rwxr-xr-x 1 hzadmin hzadmin 1367 Oct 12 2015 select2.sh -rwxr-xr-x 1 hzadmin hzadmin 1344 Jun 18 2015 select.sh -rwxr-xr-x 1 hzadmin hzadmin 1337 May 4 2015 select.shbak -rwxr-xr-x 1 hzadmin hzadmin 628 Oct 28 2015 start1.sh -rwxr-xr-x 1 hzadmin hzadmin 692 Jun 26 19:31 start2.sh -rwxr-xr-x 1 hzadmin hzadmin 636 May 10 14:22 start3.sh -rwxr-xr-x 1 hzadmin hzadmin 631 Mar 5 13:27 startbak1.sh -rw-r--r-- 1 hzadmin hzadmin 16658 Jul 12 09:21 start.log [hzadmin@BDI2 bj_ggsn]$ [hzadmin@BDI2 bj_ggsn]$ crontab -l 00 03 * * * sh /home/hzadmin/bj_ggsn/start1.sh &>/home/hzadmin/bj_ggsn/start.log 00 13 * * * sh /dfs/ftp/hzadmin/trydemo/dailycheckdemo.sh >>/dfs/ftp/hzadmin/trydemo/log.txt [hzadmin@BDI2 bj_ggsn]$ pwd /home/hzadmin/bj_ggsn [hzadmin@BDI2 bj_ggsn]$ [hzadmin@BDI2 bj_ggsn]$ pwd /home/hzadmin/bj_ggsn [hzadmin@BDI2 bj_ggsn]$ cat start1.sh #!/bin/sh source ~/.bash_profile datetime=$(date --date "1 days ago" +%Y%m%d) cd /home/hzadmin/bj_ggsn/ sh /home/hzadmin/bj_ggsn/select1.sh $datetime >> log/${datetime}_1.log 2>&1 sh /home/hzadmin/bj_ggsn/select2.sh $datetime >> log/${datetime}_2.log 2>&1 hadoop fs -mkdir /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/ hadoop fs -mv /apps/hive/warehouse/dpi.db/bj_ggsn_mobile/receive_day=$datetime/* /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/ sh /home/hzadmin/urlAPP/URLAppProgram_sf/get_uacds.sh sh /home/hzadmin/urlAPP/BoncRun.sh sh /home/hzadmin/urlAPP/hive.sh $datetime sh /home/hzadmin/bj_ggsn/delete.sh [hzadmin@BDI2 bj_ggsn]$ cat select1.sh #!/bin/bash datetime=$1 hours=/home/hzadmin/bj_ggsn/hours.txt s=`du -k /dfs/ftp/hzadmin/t_user/phone_number.dat|awk '{print $1}'` datetime2=`date -d -2day +%Y%m%d` hive -e"use dpi;alter table t_user add IF NOT EXISTS partition(receive_day='$datetime');" if [ $s -ge 4000 ]; then hadoop fs -put /dfs/ftp/hzadmin/t_user/*.dat /apps/hive/warehouse/dpi.db/t_user/receive_day=$datetime/ else hadoop fs -mv /apps/hive/warehouse/dpi.db/t_user/receive_day=$datetime2/*.dat /apps/hive/warehouse/dpi.db/t_user/receive_day=$datetime/ fi while read LINE do hadoop fs -test -e /share/external_table/ssa/DPI_MBL/ALL/${datetime}/${LINE} if [ $? -eq 0 ]; then hive -e "use dpi;alter table bj_ggsn add partition (receive_day='${datetime}',hours='${LINE}') location '/share/external_table/ssa/DPI_MBL/ALL/${datetime}/${LINE}'" >>log/${datetime}.log 2>>log/${datetime}.log else echo 'not exist' fi done < $hours hive -e" use dpi; set hive.auto.convert.join=false; set mapreduce.job.queuename=thirdpart1; from t_user m join bj_ggsn t on(m.usernum = t.MDN and m.receive_day = '${datetime}' and t.receive_day = '${datetime}') insert overwrite table bj_ggsn_mobile partition (receive_day = '${datetime}') select regexp_extract(t.MDN,'(1[0-9]{10})') MDN, t.LAC, t.CI, t.IMEI, t.BUSITYPE, t.CAPTURETIME, t.ENDTIME, t.DURATION, t.FLOWUP, t.FLOWDOWN, t.FLOWALL, t.RATTYPE, t.TERMIANL_IP, t.DESTIP, t.STATUSCODE, t.USERAGENT, t.APN, t.IMSI, t.SGSNIP, t.GGSNIP, t.CONTENTTYPE, t.SOURCEPORT, t.DESTPORT, t.LOGOCODE, t.URL, t.RESULT, t.HOST, '3G', t.YULIU2, t.YULIU3; " [hzadmin@BDI2 bj_ggsn]$ cat select2.sh hours=/home/hzadmin/bj_ggsn/hours.txt datetime=$1 while read LINE do hadoop fs -test -e /share/external_table/ssa/DPI_MBL_4G/ALL/${datetime}/${LINE} if [ $? -eq 0 ]; then hive -e "use dpi;alter table bj_ggsn_4g add partition (receive_day='${datetime}',hours='${LINE}') location '/share/external_table/ssa/DPI_MBL_4G/ALL/${datetime}/${LINE}'" >>log/${datetime}.log 2>>log/${datetime}.log else echo 'not exist' fi done < $hours hive -e" use dpi; set hive.auto.convert.join=false; set mapreduce.job.queuename=thirdpart1; from t_user m join bj_ggsn_4g t on(m.usernum = t.MDN and m.receive_day = '${datetime}' and t.receive_day = '${datetime}') insert into table bj_ggsn_mobile partition (receive_day = '${datetime}') select regexp_extract(t.MDN,'(1[0-9]{10})') MDN, t.LAC, t.CI, t.IMEI, t.BUSITYPE, t.CAPTURETIME, t.ENDTIME, t.DURATION, t.FLOWUP, t.FLOWDOWN, t.FLOWALL, t.RATTYPE, t.TERMIANL_IP, t.DESTIP, t.STATUSCODE, t.USERAGENT, t.APN, t.IMSI, t.SGSNIP, t.GGSNIP, t.CONTENTTYPE, t.SOURCEPORT, t.DESTPORT, t.LOGOCODE, t.URL, t.RESULT, t.HOST, '4G', t.YULIU2, t.YULIU3; " [hzadmin@BDI2 bj_ggsn]$ cat /home/hzadmin/urlAPP/URLAppProgram_sf/get_uacds.sh #!/bin/bash cd `dirname $0` cd ProgramByDay/ ./get_file_list.sh ./get_uacds.sh [hzadmin@BDI2 bj_ggsn]$ cd /home/hzadmin/urlAPP/URLAppProgram_sf/ [hzadmin@BDI2 URLAppProgram_sf]$ ll total 129348 drwxr-xr-x 2 hzadmin hzadmin 4096 Jun 10 2015 bin -rwxr-xr-x 1 hzadmin hzadmin 3017 Sep 28 2015 common.cfg -rwxr-xr-x 1 hzadmin hzadmin 200 Nov 7 2014 create_table.sh -rwxr-xr-x 1 hzadmin hzadmin 80 May 10 14:21 get_uacds.sh -rw-rw-r-- 1 hzadmin hzadmin 33 Jul 11 21:01 match.cfg drwxr-xr-x 2 hzadmin hzadmin 4096 Jul 12 04:35 ProgramByDay drwxr-xr-x 2 hzadmin hzadmin 4096 Jun 10 2015 ProgramByHour -rwxr-xr-x 1 hzadmin hzadmin 741 Jul 14 2015 public.cfg -rw-rw-r-- 1 hzadmin hzadmin 721256 Jul 11 21:01 R_APP_TYPE_20160711_00.txt -rwxr-xr-x 1 hzadmin hzadmin 728 Nov 7 2014 reload.sh -rwxr-xr-x 1 hzadmin hzadmin 4705 May 6 2015 remove_files.sh -rw-rw-r-- 1 hzadmin hzadmin 4500 Jul 11 21:01 R_NOISE_TYPE_20160711_00.txt -rw-rw-r-- 1 hzadmin hzadmin 1426612 Jul 11 21:01 R_SITE_TYPE_20160711_00.txt -rwxr-xr-x 1 hzadmin hzadmin 6966 Jun 15 2015 rule.xml -rwxr-xr-x 1 hzadmin hzadmin 6301 Sep 28 2015 runbak.sh -rwxr-xr-x 1 hzadmin hzadmin 6291 May 7 2015 run.sh -rw-rw-r-- 1 hzadmin hzadmin 1060990 Jul 11 21:01 R_URL_TYPE_20160711_00.txt -rw-rw-r-- 1 hzadmin hzadmin 32290292 Jul 11 21:01 UACDS_20160711_00_01_1.jar -rw-rw-r-- 1 hzadmin hzadmin 32233495 Jul 11 21:00 UACDS_20160711_00_01.jar -rw-rw-r-- 1 hzadmin hzadmin 32339441 Jul 11 21:01 UACDS_20160711_00_02_1.jar -rw-rw-r-- 1 hzadmin hzadmin 32282651 Jul 11 21:00 UACDS_20160711_00_02.jar [hzadmin@BDI2 URLAppProgram_sf]$ cd ProgramByDay/ [hzadmin@BDI2 ProgramByDay]$ ll total 132 -rwxr-xr-x 1 hzadmin hzadmin 1846 May 11 2015 alter_table.sh -rwxr-xr-x 1 hzadmin hzadmin 17407 Jul 20 2015 create_table.sh -rwxr-xr-x 1 hzadmin hzadmin 18168 Jun 8 2015 create_table.sh.bak -rwxr-xr-x 1 hzadmin hzadmin 1280 Jun 16 2015 drop_table.sh -rwxr-xr-x 1 hzadmin hzadmin 291 Jul 14 2015 get_file_list.sh -rwxr-xr-x 1 hzadmin hzadmin 2279 Jul 14 2015 get_uacds.sh -rwxr-xr-x 1 hzadmin hzadmin 4389 May 7 2015 label.sh -rwxr-xr-x 1 hzadmin hzadmin 604 Nov 7 2014 load_data.sh -rwxr-xr-x 1 hzadmin hzadmin 1011 Nov 7 2014 logupload.sh -rwxr-xr-x 1 hzadmin hzadmin 2829 Aug 4 2015 match1.sh -rwxr-xr-x 1 hzadmin hzadmin 2908 Sep 28 2015 matchbak.sh -rwxr-xr-x 1 hzadmin hzadmin 2820 May 6 2015 match.sh -rwxr-xr-x 1 hzadmin hzadmin 6788 Jun 8 2015 report.sh -rwxr-xr-x 1 hzadmin hzadmin 2060 May 6 2015 report_summary.sh -rw-rw-r-- 1 hzadmin hzadmin 144 Jul 16 2015 RuleDetails.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 147 Jul 16 2015 RuleSiteDetails.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 144 Jul 16 2015 TypeDetails.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 146 Jul 16 2015 TypeSiteDetails.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 151 Jul 16 2015 UnMatchSiteTop1000.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 147 Jul 16 2015 UnMatchTop1000.20150717.20150715.00.811.DAT.tar.gz -rwxr-xr-x 1 hzadmin hzadmin 4691 Nov 7 2014 upload.sh -rw-rw-r-- 1 hzadmin hzadmin 166 Jul 16 2015 URLStatInfo.20150717.20150715.00.811.DAT.tar.gz [hzadmin@BDI2 ProgramByDay]$ cat get_file_list.sh #!/bin/bash cd `dirname $0` eval $(grep FTP_DATA_PATH ../public.cfg) eval $(grep FTP_IP ../public.cfg) eval $(grep FTP_USERNAME ../public.cfg) eval $(grep FTP_PWD ../public.cfg) ftp -n<<! open $FTP_IP user $FTP_USERNAME $FTP_PWD cd $FTP_DATA_PATH mdir $FTP_DATA_PATH ftp_con.txt bye ! [hzadmin@BDI2 ProgramByDay]$ cat ../public.cfg #前置机存放分类匹配程序压缩包路径 FTP_DATA_PATH=/data3/ftp000/URLAppProgram; #存放分类匹配程序压缩包路径 DATA_HOME=/home/hzadmin/urlAPP/URLAppProgram; #集团分类匹配程序压缩包前置机IP FTP_IP=132.63.10.7; #集团前置机FTP用户名 FTP_USERNAME=ftp811; #集团前置机FTP用户密码 FTP_PWD=ftp811!123; #存放Top1000未匹配记录文件路径 UnMatchTop1000=/home/hzadmin/urlAPP/UnMatchTop1000 #存放urlApp统计报表文件路径 URLAppReport=/home/hzadmin/urlAPP/URLAppReport #存放匹配程序压缩包上限 ZIP_LIMIT=10; #存放Top1000、urlApp统计报表存储上限 REPORT_LIMIT=10; #存放hadoop文件存储上限 DELETE_DAY=4; #存放汇总match文件上限 SUMMARY_DAY=7 [hzadmin@BDI2 ProgramByDay]$ cat ftp_con.txt cat: ftp_con.txt: No such file or directory [hzadmin@BDI2 ProgramByDay]$ ll total 132 -rwxr-xr-x 1 hzadmin hzadmin 1846 May 11 2015 alter_table.sh -rwxr-xr-x 1 hzadmin hzadmin 17407 Jul 20 2015 create_table.sh -rwxr-xr-x 1 hzadmin hzadmin 18168 Jun 8 2015 create_table.sh.bak -rwxr-xr-x 1 hzadmin hzadmin 1280 Jun 16 2015 drop_table.sh -rwxr-xr-x 1 hzadmin hzadmin 291 Jul 14 2015 get_file_list.sh -rwxr-xr-x 1 hzadmin hzadmin 2279 Jul 14 2015 get_uacds.sh -rwxr-xr-x 1 hzadmin hzadmin 4389 May 7 2015 label.sh -rwxr-xr-x 1 hzadmin hzadmin 604 Nov 7 2014 load_data.sh -rwxr-xr-x 1 hzadmin hzadmin 1011 Nov 7 2014 logupload.sh -rwxr-xr-x 1 hzadmin hzadmin 2829 Aug 4 2015 match1.sh -rwxr-xr-x 1 hzadmin hzadmin 2908 Sep 28 2015 matchbak.sh -rwxr-xr-x 1 hzadmin hzadmin 2820 May 6 2015 match.sh -rwxr-xr-x 1 hzadmin hzadmin 6788 Jun 8 2015 report.sh -rwxr-xr-x 1 hzadmin hzadmin 2060 May 6 2015 report_summary.sh -rw-rw-r-- 1 hzadmin hzadmin 144 Jul 16 2015 RuleDetails.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 147 Jul 16 2015 RuleSiteDetails.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 144 Jul 16 2015 TypeDetails.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 146 Jul 16 2015 TypeSiteDetails.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 151 Jul 16 2015 UnMatchSiteTop1000.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 147 Jul 16 2015 UnMatchTop1000.20150717.20150715.00.811.DAT.tar.gz -rwxr-xr-x 1 hzadmin hzadmin 4691 Nov 7 2014 upload.sh -rw-rw-r-- 1 hzadmin hzadmin 166 Jul 16 2015 URLStatInfo.20150717.20150715.00.811.DAT.tar.gz [hzadmin@BDI2 ProgramByDay]$ pwd /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay [hzadmin@BDI2 ProgramByDay]$ grep FTP_PWD ../public.cfg FTP_PWD=ftp811!123; [hzadmin@BDI2 ProgramByDay]$ id uid=526(hzadmin) gid=526(hzadmin) groups=526(hzadmin),547(spark) [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 URLAppProgram_sf]$ pwd /home/hzadmin/urlAPP/URLAppProgram_sf [hzadmin@BDI2 URLAppProgram_sf]$ cd ProgramByDay/ [hzadmin@BDI2 ProgramByDay]$ ll total 132 -rwxr-xr-x 1 hzadmin hzadmin 1846 May 11 2015 alter_table.sh -rwxr-xr-x 1 hzadmin hzadmin 17407 Jul 20 2015 create_table.sh -rwxr-xr-x 1 hzadmin hzadmin 18168 Jun 8 2015 create_table.sh.bak -rwxr-xr-x 1 hzadmin hzadmin 1280 Jun 16 2015 drop_table.sh -rwxr-xr-x 1 hzadmin hzadmin 291 Jul 14 2015 get_file_list.sh -rwxr-xr-x 1 hzadmin hzadmin 2279 Jul 14 2015 get_uacds.sh -rwxr-xr-x 1 hzadmin hzadmin 4389 May 7 2015 label.sh -rwxr-xr-x 1 hzadmin hzadmin 604 Nov 7 2014 load_data.sh -rwxr-xr-x 1 hzadmin hzadmin 1011 Nov 7 2014 logupload.sh -rwxr-xr-x 1 hzadmin hzadmin 2829 Aug 4 2015 match1.sh -rwxr-xr-x 1 hzadmin hzadmin 2908 Sep 28 2015 matchbak.sh -rwxr-xr-x 1 hzadmin hzadmin 2820 May 6 2015 match.sh -rwxr-xr-x 1 hzadmin hzadmin 6788 Jun 8 2015 report.sh -rwxr-xr-x 1 hzadmin hzadmin 2060 May 6 2015 report_summary.sh -rw-rw-r-- 1 hzadmin hzadmin 144 Jul 16 2015 RuleDetails.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 147 Jul 16 2015 RuleSiteDetails.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 144 Jul 16 2015 TypeDetails.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 146 Jul 16 2015 TypeSiteDetails.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 151 Jul 16 2015 UnMatchSiteTop1000.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 147 Jul 16 2015 UnMatchTop1000.20150717.20150715.00.811.DAT.tar.gz -rwxr-xr-x 1 hzadmin hzadmin 4691 Nov 7 2014 upload.sh -rw-rw-r-- 1 hzadmin hzadmin 166 Jul 16 2015 URLStatInfo.20150717.20150715.00.811.DAT.tar.gz [hzadmin@BDI2 ProgramByDay]$ cat get_uacds.sh #!/bin/bash #=========================================== #* #* Copyright BONC #* All rights reserved. #* Abstract: Download file from the group to local #* FileName: get_uacds.sh #* Author: LiangWei #* Create Time: 2014-02-22 #* Mender: #* Mender Time: #* Modify content: #* #============================================ cd `dirname $0` eval $(grep FTP_DATA_PATH ../public.cfg) eval $(grep DATA_HOME ../public.cfg) eval $(grep FTP_IP ../public.cfg) eval $(grep FTP_USERNAME ../public.cfg) eval $(grep FTP_PWD ../public.cfg) LOCAL=`pwd` $LOCAL/get_file_list.sh YY=`date +%Y` MM=`date +%m` DD=`date +%d` DATE=$YY$MM$DD cd $LOCAL awk '{print $9}' ftp_con.txt > grep.txt e=`wc -l grep.txt | cut -d ' ' -f 1` for ((m=1;m<=e;m++)) do grepstr='sed -n '$m'p grep.txt' greps=`$grepstr` greps2=`expr substr $greps 1 8` greps3=`expr substr $greps 18 7` if [[ "$greps2" = 'UACDS_20' && "$greps3" = '.tar.gz' ]]; then echo $greps >> grep2 fi done cut -d '_' -f 2 grep2 | uniq | sort -r > day a=`wc -l day | cut -d ' ' -f 1` sort -r grep2 > mu strd='sed -n 1p day' str=`$strd` b=`wc -l mu | cut -d ' ' -f 1` for ((j=1;j<=b;j++)) do str1='sed -n '$j'p mu ' echo `$str1` > str2 str2='str2' str3=`cut -d '_' -f 2 str2` if [ "$str"x = "$str3"x ]; then cat $str2 >> files #del break fi done c=`wc -l files | cut -d ' ' -f 1` for ((k=1;k<=c;k++)) do mystr='sed -n '$k'p files' myFile=`$mystr` cd $DATA_HOME if [ -f $myFile ]; then echo $myFile else cd $LOCAL echo $myFile > files2.txt #del fi done logDir=${DATA_HOME}/logs logFile=${DATA_HOME}/'logs'/${DATE}.log mkdir -p $logDir d=`wc -l files2.txt | cut -d ' ' -f 1` for ((l=1;l<=d;l++)) do echo "Begin at: `date` ;" | tee -a $logFile time_begin=$(date +%s ) str4='sed -n '$l'p files2.txt' DATAFILES=`$str4` ftp -n<<! open $FTP_IP user $FTP_USERNAME $FTP_PWD bin prompt cd $FTP_DATA_PATH lcd $DATA_HOME mget $DATAFILES bye ! time_end=$(date +%s ) time_total=`expr ${time_end} - ${time_begin}` echo "End at: `date`;" | tee -a $logFile echo "DownLoadfilename: $DATAFILES total time=${time_total} s ;" | tee -a $logFile done cd $LOCAL rm day rm files rm ftp_con.txt rm grep.txt rm grep2 rm mu rm str2 echo "$LOCAL"; sh $LOCAL/load_data.sh [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ ll total 132 -rwxr-xr-x 1 hzadmin hzadmin 1846 May 11 2015 alter_table.sh -rwxr-xr-x 1 hzadmin hzadmin 17407 Jul 20 2015 create_table.sh -rwxr-xr-x 1 hzadmin hzadmin 18168 Jun 8 2015 create_table.sh.bak -rwxr-xr-x 1 hzadmin hzadmin 1280 Jun 16 2015 drop_table.sh -rwxr-xr-x 1 hzadmin hzadmin 291 Jul 14 2015 get_file_list.sh -rwxr-xr-x 1 hzadmin hzadmin 2279 Jul 14 2015 get_uacds.sh -rwxr-xr-x 1 hzadmin hzadmin 4389 May 7 2015 label.sh -rwxr-xr-x 1 hzadmin hzadmin 604 Nov 7 2014 load_data.sh -rwxr-xr-x 1 hzadmin hzadmin 1011 Nov 7 2014 logupload.sh -rwxr-xr-x 1 hzadmin hzadmin 2829 Aug 4 2015 match1.sh -rwxr-xr-x 1 hzadmin hzadmin 2908 Sep 28 2015 matchbak.sh -rwxr-xr-x 1 hzadmin hzadmin 2820 May 6 2015 match.sh -rwxr-xr-x 1 hzadmin hzadmin 6788 Jun 8 2015 report.sh -rwxr-xr-x 1 hzadmin hzadmin 2060 May 6 2015 report_summary.sh -rw-rw-r-- 1 hzadmin hzadmin 144 Jul 16 2015 RuleDetails.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 147 Jul 16 2015 RuleSiteDetails.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 144 Jul 16 2015 TypeDetails.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 146 Jul 16 2015 TypeSiteDetails.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 151 Jul 16 2015 UnMatchSiteTop1000.20150717.20150715.00.811.DAT.tar.gz -rw-rw-r-- 1 hzadmin hzadmin 147 Jul 16 2015 UnMatchTop1000.20150717.20150715.00.811.DAT.tar.gz -rwxr-xr-x 1 hzadmin hzadmin 4691 Nov 7 2014 upload.sh -rw-rw-r-- 1 hzadmin hzadmin 166 Jul 16 2015 URLStatInfo.20150717.20150715.00.811.DAT.tar.gz [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ cat load_data.sh #!/bin/bash #*================================================= #* #* FileName : load_data.sh #* CreateDate: 2014-02-25 #* Abstract : Unzip the file 'UACDS_YYYYMMDD_**.tar.gz' #* Author : LiangWei #* #* BONC All rights reserved. #*================================================== cd `dirname $0` PWDNOW=`pwd` eval $(grep DATA_HOME ../public.cfg) cd .. LOC=`pwd` cd ${PWDNOW} a=`wc -l files2.txt | cut -d ' ' -f 1` if [ "$a" == 1 ]; then str=`sed -n 1p files2.txt` rm -f ${LOC}/match.cfg rm -f ${LOC}/UACDS*.jar rm -f ${LOC}/R_*_TYPE*.txt tar xzvf $DATA_HOME/$str -C ${LOC} fi rm files2.txt [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 URLAppProgram_sf]$ pwd /home/hzadmin/urlAPP/URLAppProgram_sf [hzadmin@BDI2 URLAppProgram_sf]$ ll total 129348 drwxr-xr-x 2 hzadmin hzadmin 4096 Jun 10 2015 bin -rwxr-xr-x 1 hzadmin hzadmin 3017 Sep 28 2015 common.cfg -rwxr-xr-x 1 hzadmin hzadmin 200 Nov 7 2014 create_table.sh -rwxr-xr-x 1 hzadmin hzadmin 80 May 10 14:21 get_uacds.sh -rw-rw-r-- 1 hzadmin hzadmin 33 Jul 11 21:01 match.cfg drwxr-xr-x 2 hzadmin hzadmin 4096 Jul 12 04:35 ProgramByDay drwxr-xr-x 2 hzadmin hzadmin 4096 Jun 10 2015 ProgramByHour -rwxr-xr-x 1 hzadmin hzadmin 741 Jul 14 2015 public.cfg -rw-rw-r-- 1 hzadmin hzadmin 721256 Jul 11 21:01 R_APP_TYPE_20160711_00.txt -rwxr-xr-x 1 hzadmin hzadmin 728 Nov 7 2014 reload.sh -rwxr-xr-x 1 hzadmin hzadmin 4705 May 6 2015 remove_files.sh -rw-rw-r-- 1 hzadmin hzadmin 4500 Jul 11 21:01 R_NOISE_TYPE_20160711_00.txt -rw-rw-r-- 1 hzadmin hzadmin 1426612 Jul 11 21:01 R_SITE_TYPE_20160711_00.txt -rwxr-xr-x 1 hzadmin hzadmin 6966 Jun 15 2015 rule.xml -rwxr-xr-x 1 hzadmin hzadmin 6301 Sep 28 2015 runbak.sh -rwxr-xr-x 1 hzadmin hzadmin 6291 May 7 2015 run.sh -rw-rw-r-- 1 hzadmin hzadmin 1060990 Jul 11 21:01 R_URL_TYPE_20160711_00.txt -rw-rw-r-- 1 hzadmin hzadmin 32290292 Jul 11 21:01 UACDS_20160711_00_01_1.jar -rw-rw-r-- 1 hzadmin hzadmin 32233495 Jul 11 21:00 UACDS_20160711_00_01.jar -rw-rw-r-- 1 hzadmin hzadmin 32339441 Jul 11 21:01 UACDS_20160711_00_02_1.jar -rw-rw-r-- 1 hzadmin hzadmin 32282651 Jul 11 21:00 UACDS_20160711_00_02.jar [hzadmin@BDI2 URLAppProgram_sf]$ cat run.sh #!/bin/bash #run.sh #*================================================= #* #* FileName : run.sh #* CreateDate: 2014-04-03 #* Abstract : Overall deployment schedule #* Author : LiBin #* #* BONC All rights reserved. #*================================================== cd `dirname $0` eval $(grep DEFAULT_TIME common.cfg) eval $(grep LOC_DIR common.cfg) eval $(grep DELAY_DAY common.cfg) eval $(grep DELAY_HOUR common.cfg) eval $(grep IS_LTBAL common.cfg) dataday=`date -d -${DELAY_DAY}days-${DELAY_HOUR}hours +%Y%m%d` datahour=`date -d -${DELAY_HOUR}hours +%H` if [ $# -eq 2 ] ; then if [ ${DEFAULT_TIME} = 'day' ] ; then echo "Input parameter error : there should be 1 parameters"; exit 1; else dataday=$1; datahour=$2; fi; elif [ $# -eq 1 ] ; then if [ ${DEFAULT_TIME} = 'hour' ] ; then echo "Input parameter error : there should be 2 parameters"; exit 1; else dataday=$1; fi; fi; DAY=`date +%Y%m%d` if [ ! -d "${LOC_DIR}/logs/${DAY}" ] ; then mkdir -p "${LOC_DIR}/logs/${DAY}" fi; DEL_DAY=`date -d -10days +%Y%m%d` if [ -d "${LOC_DIR}/logs/${DEL_DAY}" ] ; then rm -rf "${LOC_DIR}/logs/${DEL_DAY}" fi; if [ ${DEFAULT_TIME} = 'day' ] ; then echo "==========================================================================="; echo "========== The program is running , please keep the network flow ..."; echo "========== running model ${DEFAULT_TIME}" : ${dataday}; echo "==========================================================================="; cd `dirname $0` echo "========== Step 1 of 2 ..."; echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday".log "; ./ProgramByDay/match.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday".log 2>&1 echo "========== Step 2 of 3 ..."; echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/upload_"$DAY"_"$dataday".log "; ./ProgramByDay/report_summary.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/report_summary_"$DAY"_"$dataday".log 2>&1 ./ProgramByDay/report.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday".log 2>&1 if [ ${IS_LTBAL} = '1' ] ; then ./ProgramByDay/label.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/label_"$DAY"_"$dataday".log 2>&1 fi echo "========== Step 3 of 3 ..."; echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/upload_"$DAY"_"$dataday".log "; ./ProgramByDay/upload.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/upload_"$DAY"_"$dataday".log 2>&1 ./ProgramByDay/logupload.sh ${DAY} exit 0; fi; if [[ ${datahour} = '00' ]] ; then echo "==========================================================================="; echo "========== The program is running , please keep the network flow ..."; echo "========== running model ${DEFAULT_TIME}" : ${dataday} ${datahour}; echo "==========================================================================="; echo "========== Step 1 of 3 ..."; echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/get_"$DAY"_"$dataday"_"$datahour".log "; ./ProgramByDay/ftp_getfilelist.sh > ${LOC_DIR}/logs/${DAY}/get_"$DAY"_"$dataday"_"$datahour".log 2>&1 echo "========== Step 2 of 3 ..."; echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log "; ./ProgramByHour/match_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log 2>&1 echo "========== Step 3 of 3 ..."; echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log "; ./ProgramByHour/report_summary_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_summary_"$DAY"_"$dataday"_"$datahour".log 2>&1 ./ProgramByHour/report_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log 2>&1 if [[ ${datahour} = '23' ]] ; then echo "==========================================================================="; echo "========== The program is running , please keep the network flow ..."; echo "========== running model ${DEFAULT_TIME}" : ${dataday} ${datahour}; echo "==========================================================================="; echo "========== Step 1 of 3 ..."; echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log "; ./ProgramByHour/match_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log 2>&1 echo "========== Step 2 of 3 ..."; echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log "; ./ProgramByHour/report_summary_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_summary_"$DAY"_"$dataday"_"$datahour".log 2>&1 ./ProgramByHour/report_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log 2>&1 echo "========== Step 3 of 3 ..."; echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/upload_"$DAY"_"$dataday"_"$datahour".log "; if [ ${IS_LTBAL} = '1' ] ; then ./ProgramByDay/label.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/label_"$DAY"_"$dataday".log 2>&1 fi ./ProgramByHour/upload_H.sh ${dataday} > ${LOC_DIR}/logs/${DAY}/upload_"$DAY"_"$dataday"_"$datahour".log 2>&1 else echo "==========================================================================="; echo "========== The program is running , please keep the network flow ..."; echo "========== running model ${DEFAULT_TIME}" : ${dataday} ${datahour}; echo "==========================================================================="; echo "========== Step 1 of 2 ..."; echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log "; ./ProgramByHour/match_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/match_"$DAY"_"$dataday"_"$datahour".log 2>&1 echo "========== Step 2 of 2 ..."; echo "========== loging : tail -100f ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log "; ./ProgramByHour/report_summary_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_summary_"$DAY"_"$dataday"_"$datahour".log 2>&1 ./ProgramByHour/report_H.sh ${dataday} ${datahour} > ${LOC_DIR}/logs/${DAY}/report_"$DAY"_"$dataday"_"$datahour".log 2>&1 fi; ./remove_files.sh > ${LOC_DIR}/logs/${DAY}/remove_"$DAY"_"$dataday"_"$datahour".log 2>&1 ./ProgramByDay/logupload.sh ${DAY}[hzadmin@BDI2 URLAppProgram_sf]$ [hzadmin@BDI2 URLAppProgram_sf]$ [hzadmin@BDI2 ProgramByDay]$ cat match.sh #!/bin/bash # match.sh ###################################################################################################### # function : 匹配程序运行、统计报表出数据 # date: 2014/02/10 # author: SPP # param:day(数据日期) ###################################################################################################### cd `dirname $0` cd .. PWDNOW=`pwd` cd `dirname $0` eval $(grep RULE_PROV_VERNO ${PWDNOW}/match.cfg) eval $(grep URL_MATCH ${PWDNOW}/common.cfg) eval $(grep URL_INPUT_PATH ${PWDNOW}/common.cfg) eval $(grep DPI_CONF_PATH ${PWDNOW}/common.cfg) eval $(grep R_URL_TYPE ${PWDNOW}/common.cfg) eval $(grep R_APP_TYPE ${PWDNOW}/common.cfg) eval $(grep R_NOISE_TYPE ${PWDNOW}/common.cfg) eval $(grep HIVE_USER ${PWDNOW}/common.cfg) eval $(grep LOC_DIR ${PWDNOW}/common.cfg) eval $(grep HIVE_LICENSE ${PWDNOW}/common.cfg) eval $(grep MR_VERSION ${PWDNOW}/common.cfg) eval $(grep PARA_JAR ${PWDNOW}/common.cfg) eval $(grep PARA_HIVE ${PWDNOW}/common.cfg) eval $(grep R_SITE_TYPE ${PWDNOW}/common.cfg) #判断参数是否输入 if [ $# -ne 1 ] ; then echo "Input parameter error : there should be 1 parameters"; exit 1; fi; day=$1 hadoop fs -ls ${URL_INPUT_PATH}${day} > exist_test ; x=`wc -l exist_test | cut -d ' ' -f 1`; if [ ${x} = 0 ] ; then echo " HDFS DIR ERROR : ${URL_INPUT_PATH}${day} file is not exist !" rm -f exist_test exit 1; fi; rm -f exist_test hadoop fs -rm ${R_URL_TYPE}R_URL_TYPE*.txt hadoop fs -rm ${R_APP_TYPE}R_APP_TYPE*.txt hadoop fs -rm ${R_NOISE_TYPE}R_NOISE_TYPE*.txt hadoop fs -rm ${R_SITE_TYPE}R_SITE_TYPE*.txt hadoop fs -put ${PWDNOW}/R_URL_TYPE*.txt ${R_URL_TYPE} hadoop fs -put ${PWDNOW}/R_APP_TYPE*.txt ${R_APP_TYPE} hadoop fs -put ${PWDNOW}/R_NOISE_TYPE*.txt ${R_NOISE_TYPE} hadoop fs -put ${PWDNOW}/R_SITE_TYPE*.txt ${R_SITE_TYPE} echo "${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar"; hadoop jar ${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar com.bonc.mapred.UserurlAllMain ${PARA_JAR} ${URL_INPUT_PATH}${day} ${URL_MATCH}${day} $PWDNOW/${DPI_CONF_PATH} #hadoop fs -rm ${url_match}${day}/part-m-*.gz hive -e" add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar; use ${HIVE_USER}; set dpi.encode.license=${HIVE_LICENSE}; ${PARA_HIVE} set mapred.job.name=CMSS-COUNT; alter table dpi_http_dtl_mark_match drop IF EXISTS partition(receive_day='${day}'); alter table dpi_http_dtl_mark_noise drop IF EXISTS partition(receive_day='${day}'); alter table dpi_http_dtl_mark_unmatch drop IF EXISTS partition(receive_day='${day}'); alter table dpi_http_dtl_mark_match add partition (receive_day='${day}') location '${day}/match'; alter table dpi_http_dtl_mark_noise add partition (receive_day='${day}') location '${day}/noise'; alter table dpi_http_dtl_mark_unmatch add partition (receive_day='${day}') location '${day}/unmatch'; " [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 URLAppProgram_sf]$ cat match.cfg RULE_PROV_VERNO=UACDS_20160711_00 [hzadmin@BDI2 URLAppProgram_sf]$ [hzadmin@BDI2 URLAppProgram_sf]$ cat common.cfg ##匹配程序的输出路径,也是hive匹配表、噪音表、未识别表的hive文件路径 URL_MATCH=/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/ ##hive统计汇总表 MATCH_SUMMARY=/share/hzadmin/urlapp/spp/dpi_http_dtl_mark_match_summary/ UNMATCH_SUMMARY=/share/hzadmin/urlapp/spp/dpi_http_dtl_mark_unmatch_summary/ ##hive统计报表、top1000的hadoop文件路径 URL_SUMMARY= /share/hzadmin/urlapp/spp/dpi_http_mark_summary/ URL_RULE= /share/hzadmin/urlapp/spp/dpi_http_mark_rule/ URL_TYPECODE= /share/hzadmin/urlapp/spp/dpi_http_mark_type/ URL_UNMATCH_TOP1000= /share/hzadmin/urlapp/spp/dpi_http_mark_top1000/ SITE_RULE= /share/hzadmin/urlapp/spp/dpi_site_mark_rule/ SITE_TYPECODE= /share/hzadmin/urlapp/spp/dpi_site_mark_type/ SITE_UNMATCH_TOP1000=/share/hzadmin/urlapp/spp/dpi_site_mark_top1000/ ##分类标准库的hadoop文件路径 R_URL_TYPE=/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/type/url_type/ R_APP_TYPE=/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/type/app_type/ R_NOISE_TYPE=/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/type/noise_type/ R_SITE_TYPE=/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/type/site_type/ ##中间表hadoop文件路径 TEMP_DPI_MATCH=/share/hzadmin/urlapp/spp/temp_dpi_match TEMP_DPI_NOISE=/share/hzadmin/urlapp/spp/temp_dpi_noise TEMP_DPI_UNMATCH=/share/hzadmin/urlapp/spp/temp_dpi_unmatch TEMP_DPI_URL=/share/hzadmin/urlapp/spp/temp_dpi_url TEMP_DPI_APP=/share/hzadmin/urlapp/spp/temp_dpi_app TEMP_DPI_SITE=/share/hzadmin/urlapp/spp/temp_dpi_site ##客户标签表路径(省份无客户标签可不进行配置) LTBAL=/share/hzadmin/urlapp/spp/CDPI_USER_BEH_PREFER_D/ LTBAL_tmp=/share/hzadmin/urlapp/spp/CDPI_USER_BEH_PREFER_D_tmp/ ##dpi原始文件在hadoop上的路径,即匹配程序的输入路径 URL_INPUT_PATH=/share/hzadmin/external_table/DMP_SSA/DPI/ #URL_INPUT_PATH="/apps/hive/warehouse/dpi.db/bj_ggsn_mobile/receive_day"=" ##配置dpi字段配置文件路径,为linux路径 DPI_CONF_PATH=rule.xml ##本省省份编码:比如上海编码为831 AREA_NO=811 ##重传次数 REUPLOAD_COUNT=00 ##生成上传文件路径 UNMATCHTOP1000=/dfs/ftp/hzadmin/urlAPP/UnMatchTop1000 URLAPPREPORT=/dfs/ftp/hzadmin/urlAPP/URLAppReport #本地程序存放目录 LOC_DIR=/home/hzadmin/urlAPP #HIVE用户规范,未指定:default,指定用户填写用户名 HIVE_USER=dpi #MapReduce框架版本号 MR_VERSION=_02_1 #匹配输出是否加密 0 :否 1 : 是 ISENCODE=0 #HIVE文件加密密钥 HIVE_LICENSE=DpiBonc #执行JAR包时的特定参数 PARA_JAR='-D mapred.job.queue.name=thirdpart1' #执行HIVE时的特定参数 PARA_HIVE='set hive.auto.convert.join=false;set mapreduce.job.queuename=thirdpart1;' #原数据执行类型,按小时跑:hour,按天跑:day DEFAULT_TIME=day #DPI数据比运行时间延迟的天数 DELAY_DAY=1 #hour模式下,DPI数据比运行时间延迟的小时数 DELAY_HOUR=0 ##本省是否配置客户标签程序,是:1,否:0 IS_LTBAL=0 [hzadmin@BDI2 URLAppProgram_sf]$ [root@BDI2 ~]# hdfs dfs -ls /share/hzadmin/external_table/DMP_SSA/DPI/20160711 Found 706 items -rw-r--r-- 2 hzadmin hdfs 852456797 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000000_0.snappy -rw-r--r-- 2 hzadmin hdfs 1025069938 2016-07-12 04:11 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000000_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 816682614 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000001_0.snappy -rw-r--r-- 2 hzadmin hdfs 1030962113 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000001_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 922216071 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000002_0.snappy -rw-r--r-- 2 hzadmin hdfs 1018908652 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000002_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 873706406 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000003_0.snappy -rw-r--r-- 2 hzadmin hdfs 1025021048 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000003_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 876314487 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000004_0.snappy -rw-r--r-- 2 hzadmin hdfs 1007005145 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000004_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 886130165 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000005_0.snappy -rw-r--r-- 2 hzadmin hdfs 1017040305 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000005_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 870758798 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000006_0.snappy -rw-r--r-- 2 hzadmin hdfs 1075565204 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000006_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 853730203 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000007_0.snappy -rw-r--r-- 2 hzadmin hdfs 990414241 2016-07-12 04:08 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000007_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 836870377 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000008_0.snappy -rw-r--r-- 2 hzadmin hdfs 1003709447 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000008_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 849027164 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000009_0.snappy -rw-r--r-- 2 hzadmin hdfs 1002311449 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000009_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 919945440 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000010_0.snappy -rw-r--r-- 2 hzadmin hdfs 985896710 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000010_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 853679744 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000011_0.snappy -rw-r--r-- 2 hzadmin hdfs 978387233 2016-07-12 04:08 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000011_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 857535482 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000012_0.snappy -rw-r--r-- 2 hzadmin hdfs 1013308316 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000012_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 839661882 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000013_0.snappy -rw-r--r-- 2 hzadmin hdfs 963660717 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000013_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 853614174 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000014_0.snappy -rw-r--r-- 2 hzadmin hdfs 923638015 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000014_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 833865334 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000015_0.snappy -rw-r--r-- 2 hzadmin hdfs 993313562 2016-07-12 04:08 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000015_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 863788726 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000016_0.snappy -rw-r--r-- 2 hzadmin hdfs 1055935913 2016-07-12 04:09 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000016_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 938640887 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000017_0.snappy -rw-r--r-- 2 hzadmin hdfs 992664370 2016-07-12 04:08 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000017_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 836728758 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000018_0.snappy -rw-r--r-- 2 hzadmin hdfs 958110493 2016-07-12 04:11 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000018_0_copy_1.snappy -rw-r--r-- 2 hzadmin hdfs 878931625 2016-07-12 03:17 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000019_0.snappy -rw-r--r-- 2 hzadmin hdfs 962405868 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000586_0.snappy -rw-r--r-- 2 hzadmin hdfs 1051757492 2016-07-12 04:34 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000587_0.snappy -rw-r--r-- 2 hzadmin hdfs 1016882930 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000588_0.snappy -rw-r--r-- 2 hzadmin hdfs 1039885210 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000589_0.snappy -rw-r--r-- 2 hzadmin hdfs 941194043 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000590_0.snappy -rw-r--r-- 2 hzadmin hdfs 1008122362 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000591_0.snappy -rw-r--r-- 2 hzadmin hdfs 1047224089 2016-07-12 04:34 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000592_0.snappy -rw-r--r-- 2 hzadmin hdfs 982080269 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000593_0.snappy -rw-r--r-- 2 hzadmin hdfs 1023890503 2016-07-12 04:33 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000594_0.snappy -rw-r--r-- 2 hzadmin hdfs 986968252 2016-07-12 04:34 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000595_0.snappy -rw-r--r-- 2 hzadmin hdfs 987693087 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000596_0.snappy -rw-r--r-- 2 hzadmin hdfs 1011458249 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000597_0.snappy -rw-r--r-- 2 hzadmin hdfs 1009166057 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000598_0.snappy -rw-r--r-- 2 hzadmin hdfs 985772040 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000599_0.snappy -rw-r--r-- 2 hzadmin hdfs 965906316 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000600_0.snappy -rw-r--r-- 2 hzadmin hdfs 955717905 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000601_0.snappy -rw-r--r-- 2 hzadmin hdfs 968491437 2016-07-12 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000602_0.snappy -rw-r--r-- 2 hzadmin hdfs 981757576 2016-07-12 04:32 /share/hzadmin/external_table/DMP_SSA/DPI/20160711/000603_0.snappy [root@BDI2 ~]# hdfs dfs -ls /share/hzadmin/external_table/DMP_SSA/DPI/ Found 6 items drwxr-xr-x - hzadmin hdfs 0 2016-07-07 04:31 /share/hzadmin/external_table/DMP_SSA/DPI/20160706 drwxr-xr-x - hzadmin hdfs 0 2016-07-08 03:59 /share/hzadmin/external_table/DMP_SSA/DPI/20160707 drwxr-xr-x - hzadmin hdfs 0 2016-07-09 04:21 /share/hzadmin/external_table/DMP_SSA/DPI/20160708 drwxr-xr-x - hzadmin hdfs 0 2016-07-10 04:30 /share/hzadmin/external_table/DMP_SSA/DPI/20160709 drwxr-xr-x - hzadmin hdfs 0 2016-07-11 04:26 /share/hzadmin/external_table/DMP_SSA/DPI/20160710 drwxr-xr-x - hzadmin hdfs 0 2016-07-12 04:34 /share/hzadmin/external_table/DMP_SSA/DPI/20160711 [root@BDI2 ~]# [root@BDI2 ~]# hdfs dfs -ls /share/hzadmin/urlapp/spp/CDPI_USER_BEH_PREFER_D/ [root@BDI2 ~]# #定时器 执行日志 [hzadmin@BDI2 bj_ggsn]$ cat /home/hzadmin/bj_ggsn/start.log output to local-file: ftp_con.txt? output to local-file: ftp_con.txt? Begin at: Wed Jul 13 04:39:05 CST 2016 ; Interactive mode off. Local directory now /home/hzadmin/urlAPP/URLAppProgram End at: Wed Jul 13 04:39:06 CST 2016; DownLoadfilename: UACDS_20160712_00.tar.gz total time=1 s ; /home/hzadmin/urlAPP/URLAppProgram_sf/ProgramByDay match.cfg R_APP_TYPE_20160712_00.txt R_NOISE_TYPE_20160712_00.txt R_SITE_TYPE_20160712_00.txt R_URL_TYPE_20160712_00.txt UACDS_20160712_00_01_1.jar UACDS_20160712_00_01.jar UACDS_20160712_00_02_1.jar UACDS_20160712_00_02.jar =========================================================================== ========== The program is running , please keep the network flow ... ========== running model day : 20160712 =========================================================================== ========== Step 1 of 2 ... ========== loging : tail -100f /home/hzadmin/urlAPP/logs/20160713/match_20160713_20160712.log ========== Step 2 of 3 ... ========== loging : tail -100f /home/hzadmin/urlAPP/logs/20160713/upload_20160713_20160712.log ========== Step 3 of 3 ... ========== loging : tail -100f /home/hzadmin/urlAPP/logs/20160713/upload_20160713_20160712.log not input days so day=today create_day: 20160713 day: 20160712 /home/hzadmin/urlAPP/ResultMatch/remove_details.sh: line 1: ?#!/bin/bash: No such file or directory delete file: 20160706 delete file: /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/merge/20160705 16/07/13 09:07:26 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 360 minutes, Emptier interval = 0 minutes. Moved: 'hdfs://BJTEL/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/merge/20160705' to trash at: hdfs://BJTEL/user/hzadmin/.Trash/Current Logging initialized using configuration in jar:file:/app/hive/lib/hive-common-0.13.0.2.1.1.0-385.jar!/hive-log4j.properties OK Time taken: 2.58 seconds Added /home/hzadmin/bj_ggsn/jar/Decode.jar to class path Added resource: /home/hzadmin/bj_ggsn/jar/Decode.jar OK Time taken: 1.676 seconds Query ID = hzadmin_20160713090707_053181f1-eb59-4188-be6a-a983081c9f5f Total jobs = 1 Launching Job 1 out of 1 Number of reduce tasks not specified. Estimated from input data size: 231 In order to change the average load for a reducer (in bytes): set hive.exec.reducers.bytes.per.reducer=<number> In order to limit the maximum number of reducers: set hive.exec.reducers.max=<number> In order to set a constant number of reducers: set mapreduce.job.reduces=<number> Starting Job = job_1464150086810_11516, Tracking URL = http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11516/ Kill Command = /app/hadoop/bin/hadoop job -kill job_1464150086810_11516 Hadoop job information for Stage-1: number of mappers: 661; number of reducers: 231 2016-07-13 09:08:01,998 Stage-1 map = 0%, reduce = 0% 2016-07-13 09:08:50,489 Stage-1 map = 1%, reduce = 0%, Cumulative CPU 5197.54 sec 2016-07-13 09:09:03,046 Stage-1 map = 2%, reduce = 0%, Cumulative CPU 7764.25 sec 2016-07-13 09:09:07,341 Stage-1 map = 3%, reduce = 0%, Cumulative CPU 8663.68 sec 2016-07-13 09:09:12,666 Stage-1 map = 4%, reduce = 0%, Cumulative CPU 9852.68 sec 2016-07-13 09:09:21,205 Stage-1 map = 5%, reduce = 0%, Cumulative CPU 11010.68 sec 2016-07-13 09:09:25,392 Stage-1 map = 6%, reduce = 0%, Cumulative CPU 11844.46 sec 2016-07-13 09:09:27,450 Stage-1 map = 7%, reduce = 0%, Cumulative CPU 12290.34 sec 2016-07-13 09:09:29,554 Stage-1 map = 8%, reduce = 0%, Cumulative CPU 12730.05 sec 2016-07-13 09:09:32,076 Stage-1 map = 9%, reduce = 0%, Cumulative CPU 13393.25 sec 2016-07-13 09:09:33,453 Stage-1 map = 10%, reduce = 0%, Cumulative CPU 13535.78 sec 2016-07-13 09:09:35,612 Stage-1 map = 11%, reduce = 0%, Cumulative CPU 13797.18 sec 2016-07-13 09:09:40,782 Stage-1 map = 12%, reduce = 0%, Cumulative CPU 14445.45 sec 2016-07-13 09:09:47,771 Stage-1 map = 13%, reduce = 0%, Cumulative CPU 15535.53 sec 2016-07-13 09:09:52,221 Stage-1 map = 14%, reduce = 0%, Cumulative CPU 16179.99 sec 2016-07-13 09:09:56,108 Stage-1 map = 15%, reduce = 0%, Cumulative CPU 16602.82 sec 2016-07-13 09:10:05,058 Stage-1 map = 16%, reduce = 0%, Cumulative CPU 18181.36 sec 2016-07-13 09:10:08,191 Stage-1 map = 17%, reduce = 0%, Cumulative CPU 18632.7 sec 2016-07-13 09:10:12,436 Stage-1 map = 18%, reduce = 0%, Cumulative CPU 19106.37 sec 2016-07-13 09:10:15,600 Stage-1 map = 19%, reduce = 0%, Cumulative CPU 19617.38 sec 2016-07-13 09:10:18,757 Stage-1 map = 20%, reduce = 0%, Cumulative CPU 19945.72 sec 2016-07-13 09:10:27,207 Stage-1 map = 21%, reduce = 1%, Cumulative CPU 21447.84 sec 2016-07-13 09:10:32,471 Stage-1 map = 22%, reduce = 1%, Cumulative CPU 22085.79 sec 2016-07-13 09:10:36,684 Stage-1 map = 23%, reduce = 1%, Cumulative CPU 22729.29 sec 2016-07-13 09:10:41,143 Stage-1 map = 24%, reduce = 1%, Cumulative CPU 23253.89 sec 2016-07-13 09:10:43,254 Stage-1 map = 25%, reduce = 1%, Cumulative CPU 23457.85 sec 2016-07-13 09:10:45,362 Stage-1 map = 26%, reduce = 1%, Cumulative CPU 23678.34 sec 2016-07-13 09:10:46,404 Stage-1 map = 27%, reduce = 1%, Cumulative CPU 23774.85 sec 2016-07-13 09:10:50,629 Stage-1 map = 28%, reduce = 1%, Cumulative CPU 24180.11 sec 2016-07-13 09:10:57,034 Stage-1 map = 29%, reduce = 1%, Cumulative CPU 24839.33 sec 2016-07-13 09:11:01,255 Stage-1 map = 30%, reduce = 1%, Cumulative CPU 25307.08 sec 2016-07-13 09:11:03,958 Stage-1 map = 31%, reduce = 1%, Cumulative CPU 25763.26 sec 2016-07-13 09:11:08,528 Stage-1 map = 32%, reduce = 2%, Cumulative CPU 26300.39 sec 2016-07-13 09:11:14,875 Stage-1 map = 33%, reduce = 2%, Cumulative CPU 27264.86 sec 2016-07-13 09:11:24,386 Stage-1 map = 34%, reduce = 2%, Cumulative CPU 28419.61 sec 2016-07-13 09:11:31,847 Stage-1 map = 35%, reduce = 2%, Cumulative CPU 29402.95 sec 2016-07-13 09:11:36,334 Stage-1 map = 36%, reduce = 2%, Cumulative CPU 29849.6 sec 2016-07-13 09:11:43,729 Stage-1 map = 37%, reduce = 2%, Cumulative CPU 30704.56 sec 2016-07-13 09:11:47,933 Stage-1 map = 38%, reduce = 2%, Cumulative CPU 31181.2 sec 2016-07-13 09:11:50,068 Stage-1 map = 39%, reduce = 2%, Cumulative CPU 31369.04 sec 2016-07-13 09:11:55,367 Stage-1 map = 40%, reduce = 2%, Cumulative CPU 31936.9 sec 2016-07-13 09:11:57,473 Stage-1 map = 40%, reduce = 3%, Cumulative CPU 32240.58 sec 2016-07-13 09:11:59,545 Stage-1 map = 41%, reduce = 3%, Cumulative CPU 32428.94 sec 2016-07-13 09:12:01,649 Stage-1 map = 42%, reduce = 3%, Cumulative CPU 32744.61 sec 2016-07-13 09:12:07,942 Stage-1 map = 43%, reduce = 3%, Cumulative CPU 33410.43 sec 2016-07-13 09:12:12,113 Stage-1 map = 44%, reduce = 3%, Cumulative CPU 33952.16 sec 2016-07-13 09:12:15,242 Stage-1 map = 45%, reduce = 3%, Cumulative CPU 34206.1 sec 2016-07-13 09:12:21,495 Stage-1 map = 46%, reduce = 3%, Cumulative CPU 34844.34 sec 2016-07-13 09:12:30,872 Stage-1 map = 46%, reduce = 4%, Cumulative CPU 35728.26 sec 2016-07-13 09:12:34,036 Stage-1 map = 47%, reduce = 4%, Cumulative CPU 36057.98 sec 2016-07-13 09:12:40,293 Stage-1 map = 48%, reduce = 4%, Cumulative CPU 36777.14 sec 2016-07-13 09:12:50,802 Stage-1 map = 49%, reduce = 4%, Cumulative CPU 38085.11 sec 2016-07-13 09:12:58,132 Stage-1 map = 50%, reduce = 4%, Cumulative CPU 38899.54 sec 2016-07-13 09:13:00,323 Stage-1 map = 51%, reduce = 4%, Cumulative CPU 39107.81 sec 2016-07-13 09:13:04,948 Stage-1 map = 52%, reduce = 4%, Cumulative CPU 39535.16 sec 2016-07-13 09:13:09,327 Stage-1 map = 53%, reduce = 4%, Cumulative CPU 39903.93 sec 2016-07-13 09:13:13,553 Stage-1 map = 53%, reduce = 5%, Cumulative CPU 40394.49 sec 2016-07-13 09:13:15,702 Stage-1 map = 54%, reduce = 5%, Cumulative CPU 40552.41 sec 2016-07-13 09:13:24,136 Stage-1 map = 55%, reduce = 5%, Cumulative CPU 41410.8 sec 2016-07-13 09:13:28,720 Stage-1 map = 56%, reduce = 5%, Cumulative CPU 41846.2 sec 2016-07-13 09:13:33,006 Stage-1 map = 57%, reduce = 5%, Cumulative CPU 42176.72 sec 2016-07-13 09:13:43,675 Stage-1 map = 58%, reduce = 5%, Cumulative CPU 43068.84 sec 2016-07-13 09:13:52,139 Stage-1 map = 59%, reduce = 5%, Cumulative CPU 43853.31 sec 2016-07-13 09:13:59,475 Stage-1 map = 60%, reduce = 5%, Cumulative CPU 44538.15 sec 2016-07-13 09:14:03,638 Stage-1 map = 61%, reduce = 5%, Cumulative CPU 44855.9 sec 2016-07-13 09:14:18,548 Stage-1 map = 62%, reduce = 5%, Cumulative CPU 46195.74 sec 2016-07-13 09:14:24,862 Stage-1 map = 63%, reduce = 5%, Cumulative CPU 46731.81 sec 2016-07-13 09:14:33,309 Stage-1 map = 64%, reduce = 5%, Cumulative CPU 47588.96 sec 2016-07-13 09:14:36,458 Stage-1 map = 64%, reduce = 6%, Cumulative CPU 48099.49 sec 2016-07-13 09:14:42,805 Stage-1 map = 65%, reduce = 6%, Cumulative CPU 48679.78 sec 2016-07-13 09:14:49,239 Stage-1 map = 66%, reduce = 6%, Cumulative CPU 49262.33 sec 2016-07-13 09:14:55,665 Stage-1 map = 67%, reduce = 6%, Cumulative CPU 49899.25 sec 2016-07-13 09:15:03,040 Stage-1 map = 68%, reduce = 6%, Cumulative CPU 50497.22 sec 2016-07-13 09:15:12,510 Stage-1 map = 69%, reduce = 6%, Cumulative CPU 51435.08 sec 2016-07-13 09:15:24,174 Stage-1 map = 70%, reduce = 6%, Cumulative CPU 52788.92 sec 2016-07-13 09:15:32,711 Stage-1 map = 71%, reduce = 6%, Cumulative CPU 53665.99 sec 2016-07-13 09:15:42,103 Stage-1 map = 72%, reduce = 6%, Cumulative CPU 54553.65 sec 2016-07-13 09:15:47,333 Stage-1 map = 73%, reduce = 6%, Cumulative CPU 54961.75 sec 2016-07-13 09:15:54,771 Stage-1 map = 74%, reduce = 6%, Cumulative CPU 55748.89 sec 2016-07-13 09:16:02,233 Stage-1 map = 75%, reduce = 6%, Cumulative CPU 56317.34 sec 2016-07-13 09:16:06,637 Stage-1 map = 76%, reduce = 6%, Cumulative CPU 56664.01 sec 2016-07-13 09:16:07,687 Stage-1 map = 76%, reduce = 7%, Cumulative CPU 56726.21 sec 2016-07-13 09:16:15,012 Stage-1 map = 77%, reduce = 7%, Cumulative CPU 57229.48 sec 2016-07-13 09:16:28,460 Stage-1 map = 78%, reduce = 7%, Cumulative CPU 58587.28 sec 2016-07-13 09:16:32,644 Stage-1 map = 79%, reduce = 7%, Cumulative CPU 58893.37 sec 2016-07-13 09:16:43,193 Stage-1 map = 80%, reduce = 7%, Cumulative CPU 59747.56 sec 2016-07-13 09:16:51,545 Stage-1 map = 81%, reduce = 7%, Cumulative CPU 60503.95 sec 2016-07-13 09:16:56,784 Stage-1 map = 82%, reduce = 7%, Cumulative CPU 60890.77 sec 2016-07-13 09:17:08,285 Stage-1 map = 83%, reduce = 7%, Cumulative CPU 61747.42 sec 2016-07-13 09:17:14,555 Stage-1 map = 84%, reduce = 7%, Cumulative CPU 62244.9 sec 2016-07-13 09:17:19,774 Stage-1 map = 85%, reduce = 7%, Cumulative CPU 62596.97 sec 2016-07-13 09:17:31,248 Stage-1 map = 86%, reduce = 7%, Cumulative CPU 63653.0 sec 2016-07-13 09:17:38,563 Stage-1 map = 87%, reduce = 7%, Cumulative CPU 64143.74 sec 2016-07-13 09:17:41,682 Stage-1 map = 87%, reduce = 8%, Cumulative CPU 64299.0 sec 2016-07-13 09:17:46,882 Stage-1 map = 88%, reduce = 8%, Cumulative CPU 64714.76 sec 2016-07-13 09:17:58,323 Stage-1 map = 89%, reduce = 8%, Cumulative CPU 65398.87 sec 2016-07-13 09:18:11,927 Stage-1 map = 90%, reduce = 8%, Cumulative CPU 66519.12 sec 2016-07-13 09:18:17,138 Stage-1 map = 91%, reduce = 8%, Cumulative CPU 66854.71 sec 2016-07-13 09:18:25,471 Stage-1 map = 92%, reduce = 8%, Cumulative CPU 67504.99 sec 2016-07-13 09:18:32,823 Stage-1 map = 93%, reduce = 8%, Cumulative CPU 68050.78 sec 2016-07-13 09:18:35,954 Stage-1 map = 93%, reduce = 9%, Cumulative CPU 68204.15 sec 2016-07-13 09:18:40,180 Stage-1 map = 94%, reduce = 9%, Cumulative CPU 68751.23 sec 2016-07-13 09:18:44,347 Stage-1 map = 94%, reduce = 10%, Cumulative CPU 69048.97 sec 2016-07-13 09:18:46,472 Stage-1 map = 95%, reduce = 10%, Cumulative CPU 69248.88 sec 2016-07-13 09:18:52,740 Stage-1 map = 96%, reduce = 11%, Cumulative CPU 69662.81 sec 2016-07-13 09:18:55,923 Stage-1 map = 97%, reduce = 11%, Cumulative CPU 69937.75 sec 2016-07-13 09:18:57,023 Stage-1 map = 97%, reduce = 12%, Cumulative CPU 69987.32 sec 2016-07-13 09:19:01,192 Stage-1 map = 98%, reduce = 12%, Cumulative CPU 70135.07 sec 2016-07-13 09:19:02,250 Stage-1 map = 98%, reduce = 13%, Cumulative CPU 70162.43 sec 2016-07-13 09:19:07,490 Stage-1 map = 98%, reduce = 14%, Cumulative CPU 70372.58 sec 2016-07-13 09:19:12,776 Stage-1 map = 99%, reduce = 15%, Cumulative CPU 70731.6 sec 2016-07-13 09:19:18,031 Stage-1 map = 99%, reduce = 16%, Cumulative CPU 70899.25 sec 2016-07-13 09:19:31,485 Stage-1 map = 99%, reduce = 17%, Cumulative CPU 71470.97 sec 2016-07-13 09:19:46,107 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 71714.27 sec 2016-07-13 09:19:47,161 Stage-1 map = 100%, reduce = 19%, Cumulative CPU 71731.02 sec 2016-07-13 09:19:48,219 Stage-1 map = 100%, reduce = 24%, Cumulative CPU 71810.86 sec 2016-07-13 09:19:49,261 Stage-1 map = 100%, reduce = 29%, Cumulative CPU 72135.09 sec 2016-07-13 09:19:50,287 Stage-1 map = 100%, reduce = 33%, Cumulative CPU 72396.02 sec 2016-07-13 09:19:51,337 Stage-1 map = 100%, reduce = 35%, Cumulative CPU 72698.52 sec 2016-07-13 09:19:52,393 Stage-1 map = 100%, reduce = 38%, Cumulative CPU 72918.64 sec 2016-07-13 09:19:53,444 Stage-1 map = 100%, reduce = 44%, Cumulative CPU 73111.93 sec 2016-07-13 09:19:54,470 Stage-1 map = 100%, reduce = 46%, Cumulative CPU 73228.55 sec 2016-07-13 09:19:55,495 Stage-1 map = 100%, reduce = 47%, Cumulative CPU 73314.57 sec 2016-07-13 09:19:56,538 Stage-1 map = 100%, reduce = 48%, Cumulative CPU 73403.39 sec 2016-07-13 09:19:57,698 Stage-1 map = 100%, reduce = 49%, Cumulative CPU 73452.14 sec 2016-07-13 09:20:00,788 Stage-1 map = 100%, reduce = 51%, Cumulative CPU 73859.22 sec 2016-07-13 09:20:01,821 Stage-1 map = 100%, reduce = 57%, Cumulative CPU 74159.29 sec 2016-07-13 09:20:03,490 Stage-1 map = 100%, reduce = 66%, Cumulative CPU 74342.07 sec 2016-07-13 09:20:04,531 Stage-1 map = 100%, reduce = 70%, Cumulative CPU 74864.62 sec 2016-07-13 09:20:05,558 Stage-1 map = 100%, reduce = 75%, Cumulative CPU 75034.61 sec 2016-07-13 09:20:06,584 Stage-1 map = 100%, reduce = 77%, Cumulative CPU 75119.84 sec 2016-07-13 09:20:07,613 Stage-1 map = 100%, reduce = 79%, Cumulative CPU 75180.32 sec 2016-07-13 09:20:08,639 Stage-1 map = 100%, reduce = 81%, Cumulative CPU 75280.79 sec 2016-07-13 09:20:09,665 Stage-1 map = 100%, reduce = 83%, Cumulative CPU 75434.05 sec 2016-07-13 09:20:10,713 Stage-1 map = 100%, reduce = 85%, Cumulative CPU 75521.87 sec 2016-07-13 09:20:11,775 Stage-1 map = 100%, reduce = 87%, Cumulative CPU 75682.74 sec 2016-07-13 09:20:13,574 Stage-1 map = 100%, reduce = 91%, Cumulative CPU 75881.12 sec 2016-07-13 09:20:15,641 Stage-1 map = 100%, reduce = 92%, Cumulative CPU 76062.67 sec 2016-07-13 09:20:17,742 Stage-1 map = 100%, reduce = 93%, Cumulative CPU 76159.41 sec 2016-07-13 09:20:19,865 Stage-1 map = 100%, reduce = 94%, Cumulative CPU 76248.27 sec 2016-07-13 09:20:20,964 Stage-1 map = 100%, reduce = 95%, Cumulative CPU 76323.18 sec 2016-07-13 09:20:24,140 Stage-1 map = 100%, reduce = 96%, Cumulative CPU 76662.94 sec 2016-07-13 09:20:27,289 Stage-1 map = 100%, reduce = 97%, Cumulative CPU 76664.86 sec 2016-07-13 09:20:31,500 Stage-1 map = 100%, reduce = 98%, Cumulative CPU 77118.12 sec 2016-07-13 09:20:32,531 Stage-1 map = 100%, reduce = 99%, Cumulative CPU 77257.73 sec 2016-07-13 09:20:38,761 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 77452.2 sec MapReduce Total cumulative CPU time: 0 days 21 hours 30 minutes 52 seconds 200 msec Ended Job = job_1464150086810_11516 MapReduce Jobs Launched: Job 0: Map: 661 Reduce: 231 Cumulative CPU: 77452.2 sec HDFS Read: 230040091537 HDFS Write: 2692705050 SUCCESS Total MapReduce CPU Time Spent: 0 days 21 hours 30 minutes 52 seconds 200 msec OK Time taken: 774.963 seconds, Fetched: 66214908 row(s) mv: cannot stat `/dfs/ftp/hzadmin/test/20160712ag': No such file or directory mv: cannot stat `/dfs/ftp/hzadmin/test/20160712ah': No such file or directory mv: cannot stat `/dfs/ftp/hzadmin/test/20160712ai': No such file or directory 16/07/13 09:33:08 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 360 minutes, Emptier interval = 0 minutes. Moved: 'hdfs://BJTEL/share/hzadmin/urlapp/spp/dpi_http_dtl_mark_match_summary/receive_day=20160706' to trash at: hdfs://BJTEL/user/hzadmin/.Trash/Current 16/07/13 09:33:11 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 360 minutes, Emptier interval = 0 minutes. Moved: 'hdfs://BJTEL/share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/20160706' to trash at: hdfs://BJTEL/user/hzadmin/.Trash/Current 16/07/13 09:33:13 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 360 minutes, Emptier interval = 0 minutes. Moved: 'hdfs://BJTEL/share/hzadmin/external_table/DMP_SSA/DPI/20160706' to trash at: hdfs://BJTEL/user/hzadmin/.Trash/Current Logging initialized using configuration in jar:file:/app/hive/lib/hive-common-0.13.0.2.1.1.0-385.jar!/hive-log4j.properties OK Time taken: 5.626 seconds Dropped the partition receive_day=20160706 OK Time taken: 5.321 seconds [hzadmin@BDI2 bj_ggsn]$ #查看文件是否生成 [hzadmin@BDI2 bj_data]$ ls -ltr 20160712* -rw-rw-r-- 1 hzadmin hzadmin 1254354742 Jul 13 09:32 20160712001.txt -rw-rw-r-- 1 hzadmin hzadmin 1254303891 Jul 13 09:32 20160712002.txt -rw-rw-r-- 1 hzadmin hzadmin 1254247185 Jul 13 09:32 20160712003.txt -rw-rw-r-- 1 hzadmin hzadmin 1254298641 Jul 13 09:33 20160712004.txt -rw-rw-r-- 1 hzadmin hzadmin 1254315336 Jul 13 09:33 20160712005.txt -rw-rw-r-- 1 hzadmin hzadmin 649592869 Jul 13 09:33 20160712006.txt [hzadmin@BDI2 bj_data]$ pwd /dfs/ftp/hzadmin/bj_data [hzadmin@BDI2 bj_data]$ #查看hadoop任务 [hzadmin@BDI2 bj_ggsn]$ hadoop job -list DEPRECATED: Use of this script to execute mapred command is deprecated. Instead use the mapred command for it. Total jobs:0 JobId State StartTime UserName Queue Priority UsedContainers RsvdContainers UsedMem RsvdMem NeededMem AM info [hzadmin@BDI2 bj_ggsn]$ jobs -l [hzadmin@BDI2 bj_ggsn]$ [hzadmin@BDI2 bj_ggsn]$ ll total 136 -rwxr-xr-x 1 hzadmin hzadmin 433 Feb 10 20:39 delete.sh -rw-r--r-- 1 hzadmin hzadmin 71 Apr 30 2015 hours.txt drwxr-xr-x 2 root root 4096 Aug 5 2015 jar drwxrwxr-x 2 hzadmin hzadmin 36864 Jul 13 03:18 log -rw------- 1 hzadmin hzadmin 21554 Apr 12 20:56 nohup.out -rwxr-xr-x 1 hzadmin hzadmin 1845 Sep 23 2015 select1.sh -rwxr-xr-x 1 hzadmin hzadmin 454 Oct 12 2015 select2bak.sh -rwxr-xr-x 1 hzadmin hzadmin 1367 Oct 12 2015 select2.sh -rwxr-xr-x 1 hzadmin hzadmin 1344 Jun 18 2015 select.sh -rwxr-xr-x 1 hzadmin hzadmin 1337 May 4 2015 select.shbak -rwxr-xr-x 1 hzadmin hzadmin 628 Oct 28 2015 start1.sh -rwxr-xr-x 1 hzadmin hzadmin 692 Jun 26 19:31 start2.sh -rwxr-xr-x 1 hzadmin hzadmin 636 May 10 14:22 start3.sh -rwxr-xr-x 1 hzadmin hzadmin 631 Mar 5 13:27 startbak1.sh -rw-r--r-- 1 hzadmin hzadmin 16926 Jul 13 09:33 start.log [hzadmin@BDI2 bj_ggsn]$ cd /home/hzadmin/urlAPP/ [hzadmin@BDI2 urlAPP]$ ll total 55164 -rwxr-xr-x 1 hzadmin hzadmin 1331 Sep 22 2015 BoncRun1.sh -rwxr-xr-x 1 hzadmin hzadmin 1306 Oct 28 2015 BoncRun.sh -rwxr-xr-x 1 hzadmin hzadmin 1636 May 31 11:33 hive.sh drwxrwxr-x 17 hzadmin hzadmin 4096 Jul 13 04:39 logs drwxrwxr-x 3 hzadmin hzadmin 4096 Jul 13 09:07 ResultMatch -rwxr-xr-x 1 hzadmin hzadmin 56426871 Jun 16 2015 ResultMatch.zip drwxr-xr-x 17 hzadmin hzadmin 4096 Jul 15 2015 UnMatchTop1000 drwxr-xr-x 3 hzadmin hzadmin 20480 Jul 13 04:39 URLAppProgram drwxr-xr-x 5 hzadmin hzadmin 4096 Jul 13 04:39 URLAppProgram_sf drwxr-xr-x 17 hzadmin hzadmin 4096 Jul 15 2015 URLAppReport [hzadmin@BDI2 urlAPP]$ cd URLAppProgram_sf/ [hzadmin@BDI2 URLAppProgram_sf]$ ll total 129348 drwxr-xr-x 2 hzadmin hzadmin 4096 Jun 10 2015 bin -rwxr-xr-x 1 hzadmin hzadmin 3017 Sep 28 2015 common.cfg -rwxr-xr-x 1 hzadmin hzadmin 200 Nov 7 2014 create_table.sh -rwxr-xr-x 1 hzadmin hzadmin 80 May 10 14:21 get_uacds.sh -rw-rw-r-- 1 hzadmin hzadmin 33 Jul 12 21:01 match.cfg drwxr-xr-x 2 hzadmin hzadmin 4096 Jul 13 04:39 ProgramByDay drwxr-xr-x 2 hzadmin hzadmin 4096 Jun 10 2015 ProgramByHour -rwxr-xr-x 1 hzadmin hzadmin 741 Jul 14 2015 public.cfg -rw-rw-r-- 1 hzadmin hzadmin 721993 Jul 12 21:01 R_APP_TYPE_20160712_00.txt -rwxr-xr-x 1 hzadmin hzadmin 728 Nov 7 2014 reload.sh -rwxr-xr-x 1 hzadmin hzadmin 4705 May 6 2015 remove_files.sh -rw-rw-r-- 1 hzadmin hzadmin 4500 Jul 12 21:01 R_NOISE_TYPE_20160712_00.txt -rw-rw-r-- 1 hzadmin hzadmin 1426612 Jul 12 21:01 R_SITE_TYPE_20160712_00.txt -rwxr-xr-x 1 hzadmin hzadmin 6966 Jun 15 2015 rule.xml -rwxr-xr-x 1 hzadmin hzadmin 6301 Sep 28 2015 runbak.sh -rwxr-xr-x 1 hzadmin hzadmin 6291 May 7 2015 run.sh -rw-rw-r-- 1 hzadmin hzadmin 1060990 Jul 12 21:01 R_URL_TYPE_20160712_00.txt -rw-rw-r-- 1 hzadmin hzadmin 32290563 Jul 12 21:01 UACDS_20160712_00_01_1.jar -rw-rw-r-- 1 hzadmin hzadmin 32233766 Jul 12 21:00 UACDS_20160712_00_01.jar -rw-rw-r-- 1 hzadmin hzadmin 32339712 Jul 12 21:01 UACDS_20160712_00_02_1.jar -rw-rw-r-- 1 hzadmin hzadmin 32282922 Jul 12 21:01 UACDS_20160712_00_02.jar [hzadmin@BDI2 URLAppProgram_sf]$ pwd /home/hzadmin/urlAPP/URLAppProgram_sf [hzadmin@BDI2 URLAppProgram_sf]$ [hzadmin@BDI2 URLAppProgram_sf]$ cat ProgramByDay/match.sh #!/bin/bash # match.sh ###################################################################################################### # function : 匹配程序运行、统计报表出数据 # date: 2014/02/10 # author: SPP # param:day(数据日期) ###################################################################################################### cd `dirname $0` cd .. PWDNOW=`pwd` cd `dirname $0` eval $(grep RULE_PROV_VERNO ${PWDNOW}/match.cfg) eval $(grep URL_MATCH ${PWDNOW}/common.cfg) eval $(grep URL_INPUT_PATH ${PWDNOW}/common.cfg) eval $(grep DPI_CONF_PATH ${PWDNOW}/common.cfg) eval $(grep R_URL_TYPE ${PWDNOW}/common.cfg) eval $(grep R_APP_TYPE ${PWDNOW}/common.cfg) eval $(grep R_NOISE_TYPE ${PWDNOW}/common.cfg) eval $(grep HIVE_USER ${PWDNOW}/common.cfg) eval $(grep LOC_DIR ${PWDNOW}/common.cfg) eval $(grep HIVE_LICENSE ${PWDNOW}/common.cfg) eval $(grep MR_VERSION ${PWDNOW}/common.cfg) eval $(grep PARA_JAR ${PWDNOW}/common.cfg) eval $(grep PARA_HIVE ${PWDNOW}/common.cfg) eval $(grep R_SITE_TYPE ${PWDNOW}/common.cfg) #判断参数是否输入 if [ $# -ne 1 ] ; then echo "Input parameter error : there should be 1 parameters"; exit 1; fi; day=$1 hadoop fs -ls ${URL_INPUT_PATH}${day} > exist_test ; x=`wc -l exist_test | cut -d ' ' -f 1`; if [ ${x} = 0 ] ; then echo " HDFS DIR ERROR : ${URL_INPUT_PATH}${day} file is not exist !" rm -f exist_test exit 1; fi; rm -f exist_test hadoop fs -rm ${R_URL_TYPE}R_URL_TYPE*.txt hadoop fs -rm ${R_APP_TYPE}R_APP_TYPE*.txt hadoop fs -rm ${R_NOISE_TYPE}R_NOISE_TYPE*.txt hadoop fs -rm ${R_SITE_TYPE}R_SITE_TYPE*.txt hadoop fs -put ${PWDNOW}/R_URL_TYPE*.txt ${R_URL_TYPE} hadoop fs -put ${PWDNOW}/R_APP_TYPE*.txt ${R_APP_TYPE} hadoop fs -put ${PWDNOW}/R_NOISE_TYPE*.txt ${R_NOISE_TYPE} hadoop fs -put ${PWDNOW}/R_SITE_TYPE*.txt ${R_SITE_TYPE} echo "${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar"; hadoop jar ${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar com.bonc.mapred.UserurlAllMain ${PARA_JAR} ${URL_INPUT_PATH}${day} ${URL_MATCH}${day} $PWDNOW/${DPI_CONF_PATH} #hadoop fs -rm ${url_match}${day}/part-m-*.gz hive -e" add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar; use ${HIVE_USER}; set dpi.encode.license=${HIVE_LICENSE}; ${PARA_HIVE} set mapred.job.name=CMSS-COUNT; alter table dpi_http_dtl_mark_match drop IF EXISTS partition(receive_day='${day}'); alter table dpi_http_dtl_mark_noise drop IF EXISTS partition(receive_day='${day}'); alter table dpi_http_dtl_mark_unmatch drop IF EXISTS partition(receive_day='${day}'); alter table dpi_http_dtl_mark_match add partition (receive_day='${day}') location '${day}/match'; alter table dpi_http_dtl_mark_noise add partition (receive_day='${day}') location '${day}/noise'; alter table dpi_http_dtl_mark_unmatch add partition (receive_day='${day}') location '${day}/unmatch'; "[hzadmin@BDI2 URLAppProgram_sf]$ [hzadmin@BDI2 ProgramByDay]$ cat report_summary.sh #!/bin/bash # report_H.sh #*================================================= #* #* FileName : report_H.sh #* CreateDate: 2014-04-03 #* Abstract : Statistical analysis of the results of matches #* Author : SPP #* #* BONC All rights reserved. #*================================================== cd `dirname $0` eval $(grep RULE_PROV_VERNO ../match.cfg) eval $(grep URL_MATCH ../common.cfg) eval $(grep URL_INPUT_PATH ../common.cfg) eval $(grep DPI_CONF_PATH ../common.cfg) eval $(grep R_URL_TYPE ../common.cfg) eval $(grep R_APP_TYPE ../common.cfg) eval $(grep R_NOISE_TYPE ../common.cfg) eval $(grep HIVE_USER ../common.cfg) eval $(grep LOC_DIR ../common.cfg) eval $(grep HIVE_LICENSE ../common.cfg) eval $(grep PARA_HIVE ../common.cfg) #判断参数是否输入 if [ $# -ne 1 ] ; then echo "Input parameter error : there should be 1 parameters"; exit 1; fi; day=$1 hive -e" add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar; use ${HIVE_USER}; ${PARA_HIVE} set dpi.encode.license=${HIVE_LICENSE}; set mapred.job.name=CMSS-COUNT; alter table dpi_http_dtl_mark_match_summary drop IF EXISTS partition(receive_day='${day}'); alter table dpi_http_dtl_mark_match_summary add partition (receive_day=${day}) location '${day}'; alter table dpi_http_dtl_mark_unmatch_summary drop IF EXISTS partition(receive_day='${day}'); alter table dpi_http_dtl_mark_unmatch_summary add partition (receive_day=${day}) location '${day}'; insert overwrite table dpi_http_dtl_mark_match_summary partition(receive_day=${day}) select phone_id,visit_type,rule_id,type_code,app_type_code,site_id,site_code,starttime,host, count(id) as count,sum(download_bytes) ,sum(upload_bytes) from dpi_http_dtl_mark_match where receive_day=${day} group by phone_id,visit_type,rule_id,type_code,app_type_code,site_id,site_code,starttime,host; insert overwrite table dpi_http_dtl_mark_unmatch_summary partition(receive_day=${day}) select host,url,count(id),sum(download_bytes),sum(upload_bytes) from dpi_http_dtl_mark_unmatch where receive_day=${day} group by host,url; " [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ cat report.sh #!/bin/bash # report_H.sh #*================================================= #* #* FileName : report_H.sh #* CreateDate: 2014-04-03 #* Abstract : Statistical analysis of the results of matches #* Author : SPP #* #* BONC All rights reserved. #*================================================== cd `dirname $0` eval $(grep RULE_PROV_VERNO ../match.cfg) eval $(grep URL_MATCH ../common.cfg) eval $(grep URL_INPUT_PATH ../common.cfg) eval $(grep DPI_CONF_PATH ../common.cfg) eval $(grep R_URL_TYPE ../common.cfg) eval $(grep R_APP_TYPE ../common.cfg) eval $(grep R_NOISE_TYPE ../common.cfg) eval $(grep HIVE_USER ../common.cfg) eval $(grep LOC_DIR ../common.cfg) eval $(grep HIVE_LICENSE ../common.cfg) eval $(grep PARA_HIVE ../common.cfg) #判断参数是否输入 if [ $# -ne 1 ] ; then echo "Input parameter error : there should be 1 parameters"; exit 1; fi; day=$1 hive -e" add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar; use ${HIVE_USER}; ${PARA_HIVE} set dpi.encode.license=${HIVE_LICENSE}; alter table dpi_http_mark_summary drop IF EXISTS partition(receive_day='${day}'); alter table dpi_http_mark_rule drop IF EXISTS partition(receive_day='${day}'); alter table dpi_http_mark_type drop IF EXISTS partition(receive_day='${day}'); alter table dpi_http_mark_top1000 drop IF EXISTS partition(receive_day='${day}'); alter table dpi_site_mark_rule drop IF EXISTS partition(receive_day='${day}'); alter table dpi_site_mark_type drop IF EXISTS partition(receive_day='${day}'); alter table dpi_site_mark_top1000 drop IF EXISTS partition(receive_day='${day}'); alter table dpi_http_mark_summary add partition (receive_day=${day}) location '${day}'; alter table dpi_http_mark_rule add partition (receive_day=${day}) location '${day}'; alter table dpi_http_mark_type add partition (receive_day=${day}) location '${day}'; alter table dpi_http_mark_top1000 add partition (receive_day=${day}) location '${day}'; alter table dpi_site_mark_rule add partition (receive_day=${day}) location '${day}'; alter table dpi_site_mark_type add partition (receive_day=${day}) location '${day}'; alter table dpi_site_mark_top1000 add partition (receive_day=${day}) location '${day}'; insert overwrite table dpi_http_mark_rule partition(receive_day=${day}) select visit_type,rule_id,typecode,matchs,'${day}' from ( select visit_type,rule_id,type_code as typecode,sum(count) as matchs from dpi_http_dtl_mark_match_summary where receive_day=${day} and visit_type='url' group by visit_type,rule_id,type_code union all select visit_type,rule_id,app_type_code as typecode,sum(count) as matchs from dpi_http_dtl_mark_match_summary where receive_day=${day} and visit_type='app' group by visit_type,rule_id,app_type_code)sub ; insert overwrite table dpi_http_mark_type partition(receive_day=${day}) select visit_type,typecode, rulematchs, urlmatchs,'${day}' from ( select visit_type,typecode, count(distinct ruleid) as rulematchs,sum(matchs) as urlmatchs from dpi_http_mark_rule where receive_day =${day} and visit_type='url' group by visit_type,typecode union all select visit_type,typecode, count(distinct ruleid) as rulematchs,sum(matchs) as urlmatchs from dpi_http_mark_rule where receive_day =${day} and visit_type='app' group by visit_type,typecode )sub; insert overwrite table dpi_site_mark_rule partition(receive_day=${day}) select site_id,site_code,matchs,'${day}' from ( select site_id,site_code,sum(count) as matchs from dpi_http_dtl_mark_match_summary where receive_day=${day} and site_id <> '' group by site_id,site_code ) sub ; insert overwrite table dpi_site_mark_type partition(receive_day=${day}) select site_code, matchs, hostmatchs,'${day}' from ( select site_code, count(distinct site_id) as matchs,sum(matchs)as hostmatchs from dpi_site_mark_rule where receive_day =${day} and site_id <> '' group by site_code ) sub; insert overwrite table dpi_http_mark_top1000 partition(receive_day=${day}) select host,url,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}' from dpi_http_dtl_mark_unmatch_summary where receive_day =${day} group by host,url order by count desc limit 5000; insert overwrite table dpi_site_mark_top1000 partition(receive_day=${day}) select host,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}' from dpi_http_dtl_mark_match_summary where receive_day =${day} and site_id = '' group by host order by count desc limit 1000; insert overwrite table temp_dpi_match partition(receive_day=${day}) SELECT count(id) as matchcount, cast(0 as bigint)as noisecount , cast(0 as bigint) as unmatchcount, count(CASE WHEN visit_type='url' THEN id ELSE null END )AS urlcount, count(CASE WHEN visit_type='app' THEN id ELSE null END )AS appcount, cast(0 as bigint) as sitecount FROM dpi_http_dtl_mark_match WHERE receive_day=${day} AND reserved_19='1'; insert overwrite table temp_dpi_noise partition(receive_day=${day}) select cast(0 as bigint) as matchcount,count(id) as noisecount ,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_noise where receive_day=${day} ; insert overwrite table temp_dpi_unmatch partition(receive_day=${day}) select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,sum(count) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_unmatch_summary where receive_day=${day} ; insert overwrite table temp_dpi_site partition(receive_day=${day}) select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,sum(matchs) as sitecount from dpi_site_mark_type where receive_day=${day}; insert overwrite table dpi_http_mark_summary partition(receive_day='${day}') select sum(matchcount+noisecount+unmatchcount) as totalcount,sum(noisecount),sum(unmatchcount),sum(matchcount),sum(urlcount),sum(appcount),'${day}',sum(sitecount) from ( select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_match where receive_day=${day} union all select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_noise where receive_day=${day} union all select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_unmatch where receive_day=${day} union all select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_site where receive_day=${day} ) sub ; "[hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ #查看hadoop任务(没查到) [hzadmin@BDI2 ProgramByDay]$ hadoop job -list DEPRECATED: Use of this script to execute mapred command is deprecated. Instead use the mapred command for it. Total jobs:0 JobId State StartTime UserName Queue Priority UsedContainers RsvdContainers UsedMem RsvdMem NeededMem AM info #启动hadoop任务 hive (dpi)> > > > > select host,url,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'20160713' > from dpi_http_dtl_mark_unmatch_summary > where receive_day ='20160713' group by host,url order by count desc limit 5; Query ID = hzadmin_20160713143333_11c325a3-d541-4fd2-b5e8-f8ee0d1f25d3 Total jobs = 2 Launching Job 1 out of 2 Number of reduce tasks not specified. Estimated from input data size: 1 In order to change the average load for a reducer (in bytes): set hive.exec.reducers.bytes.per.reducer=<number> In order to limit the maximum number of reducers: set hive.exec.reducers.max=<number> In order to set a constant number of reducers: set mapreduce.job.reduces=<number> Starting Job = job_1464150086810_11524, Tracking URL = http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11524/ Kill Command = /app/hadoop/bin/hadoop job -kill job_1464150086810_11524 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1 2016-07-13 14:33:16,955 Stage-1 map = 0%, reduce = 0% 2016-07-13 14:33:28,419 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 18.02 sec 2016-07-13 14:33:38,849 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 20.2 sec MapReduce Total cumulative CPU time: 20 seconds 200 msec Ended Job = job_1464150086810_11524 Launching Job 2 out of 2 Number of reduce tasks determined at compile time: 1 In order to change the average load for a reducer (in bytes): set hive.exec.reducers.bytes.per.reducer=<number> In order to limit the maximum number of reducers: set hive.exec.reducers.max=<number> In order to set a constant number of reducers: set mapreduce.job.reduces=<number> Starting Job = job_1464150086810_11525, Tracking URL = http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11525/ Kill Command = /app/hadoop/bin/hadoop job -kill job_1464150086810_11525 (杀死hadoop job) Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1 2016-07-13 14:33:48,364 Stage-2 map = 0%, reduce = 0% 2016-07-13 14:33:59,031 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 10.59 sec 2016-07-13 14:34:07,565 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 12.53 sec MapReduce Total cumulative CPU time: 12 seconds 530 msec Ended Job = job_1464150086810_11525 MapReduce Jobs Launched: Job 0: Map: 1 Reduce: 1 Cumulative CPU: 20.2 sec HDFS Read: 256 HDFS Write: 96 SUCCESS Job 1: Map: 1 Reduce: 1 Cumulative CPU: 12.53 sec HDFS Read: 453 HDFS Write: 4 SUCCESS Total MapReduce CPU Time Spent: 32 seconds 730 msec OK Time taken: 61.137 seconds hive (dpi)> #查看hadoop任务(查到) [hzadmin@BDI2 ProgramByDay]$ hadoop job -list DEPRECATED: Use of this script to execute mapred command is deprecated. Instead use the mapred command for it. Total jobs:1 JobId State StartTime UserName Queue Priority UsedContainers RsvdContainers UsedMem RsvdMem NeededMem AM info job_1464150086810_11524 PREP 1468391589369 hzadmin default NORMAL 1 0 4096M 0M 4096M http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11524/ [hzadmin@BDI2 ProgramByDay]$ hadoop job -lis DEPRECATED: Use of this script to execute mapred command is deprecated. Instead use the mapred command for it. Usage: CLI <command> <args> [-submit <job-file>] [-status <job-id>] [-counter <job-id> <group-name> <counter-name>] [-kill <job-id>] [-set-priority <job-id> <priority>]. Valid values for priorities are: VERY_HIGH HIGH NORMAL LOW VERY_LOW [-events <job-id> <from-event-#> <#-of-events>] [-history <jobHistoryFile>] [-list [all]] [-list-active-trackers] [-list-blacklisted-trackers] [-list-attempt-ids <job-id> <task-type> <task-state>]. Valid values for <task-type> are REDUCE MAP. Valid values for <task-state> are running, completed [-kill-task <task-attempt-id>] [-fail-task <task-attempt-id>] [-logs <job-id> <task-attempt-id>] Generic options supported are -conf <configuration file> specify an application configuration file -D <property=value> use value for given property -fs <local|namenode:port> specify a namenode -jt <local|jobtracker:port> specify a job tracker -files <comma separated list of files> specify comma separated files to be copied to the map reduce cluster -libjars <comma separated list of jars> specify comma separated jar files to include in the classpath. -archives <comma separated list of archives> specify comma separated archives to be unarchived on the compute machines. The general command line syntax is bin/hadoop command [genericOptions] [commandOptions] [hzadmin@BDI2 ProgramByDay]$ hadoop job -list DEPRECATED: Use of this script to execute mapred command is deprecated. Instead use the mapred command for it. Total jobs:1 JobId State StartTime UserName Queue Priority UsedContainers RsvdContainers UsedMem RsvdMem NeededMem AM info job_1464150086810_11524 RUNNING 1468391589369 hzadmin default NORMAL 2 0 8192M 0M 8192M http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11524/ [hzadmin@BDI2 ProgramByDay]$ hive (dpi)> > select site_id,site_code,sum(count) as matchs > from dpi_http_dtl_mark_match_summary where receive_day='20160713' and site_id <> '' group by site_id,site_code > ; Query ID = hzadmin_20160713145555_149dda0c-d7c0-4841-91aa-57f3ce1f454d Total jobs = 1 Launching Job 1 out of 1 Number of reduce tasks not specified. Estimated from input data size: 1 In order to change the average load for a reducer (in bytes): set hive.exec.reducers.bytes.per.reducer=<number> In order to limit the maximum number of reducers: set hive.exec.reducers.max=<number> In order to set a constant number of reducers: set mapreduce.job.reduces=<number> Starting Job = job_1464150086810_11526, Tracking URL = http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11526/ Kill Command = /app/hadoop/bin/hadoop job -kill job_1464150086810_11526 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1 2016-07-13 14:55:29,837 Stage-1 map = 0%, reduce = 0% 2016-07-13 14:56:03,280 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 25.72 sec 2016-07-13 14:56:11,859 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 39.58 sec MapReduce Total cumulative CPU time: 39 seconds 580 msec Ended Job = job_1464150086810_11526 MapReduce Jobs Launched: Job 0: Map: 1 Reduce: 1 Cumulative CPU: 39.58 sec HDFS Read: 256 HDFS Write: 4 SUCCESS Total MapReduce CPU Time Spent: 39 seconds 580 msec OK Time taken: 50.996 seconds hive (dpi)> [hzadmin@BDI2 bj_ggsn]$ hadoop job -list DEPRECATED: Use of this script to execute mapred command is deprecated. Instead use the mapred command for it. Total jobs:1 JobId State StartTime UserName Queue Priority UsedContainers RsvdContainers UsedMem RsvdMem NeededMem AM info job_1464150086810_11526 RUNNING 1468392923303 hzadmin default NORMAL 2 0 8192M 0M 8192M http://BD01.bd.bjtel:8088/proxy/application_1464150086810_11526/ [hzadmin@BDI2 bj_ggsn]$ [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ cat label.sh #!/bin/bash # report_H.sh #*================================================= #* #* FileName : report_H.sh #* CreateDate: 2014-04-03 #* Abstract : Statistical analysis of the results of matches #* Author : SPP #* #* BONC All rights reserved. #*================================================== cd `dirname $0` eval $(grep RULE_PROV_VERNO ../match.cfg) eval $(grep URL_MATCH ../common.cfg) eval $(grep URL_INPUT_PATH ../common.cfg) eval $(grep DPI_CONF_PATH ../common.cfg) eval $(grep R_URL_TYPE ../common.cfg) eval $(grep R_APP_TYPE ../common.cfg) eval $(grep R_NOISE_TYPE ../common.cfg) eval $(grep HIVE_USER ../common.cfg) eval $(grep LOC_DIR ../common.cfg) eval $(grep HIVE_LICENSE ../common.cfg) eval $(grep PARA_HIVE ../common.cfg) #判断参数是否输入 if [ $# -ne 1 ] ; then echo "Input parameter error : there should be 1 parameters"; exit 1; fi; day=$1 hive -e" add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar; use ${HIVE_USER}; ${PARA_HIVE} set dpi.encode.license=${HIVE_LICENSE}; INSERT OVERWRITE TABLE CDPI_USER_BEH_PREFER_D_tmp SELECT CASE WHEN T.STARTTIME LIKE '%%-%%' AND T.STARTTIME LIKE '%%.%%' THEN FROM_UNIXTIME(CAST(ROUND(UNIX_TIMESTAMP(T.STARTTIME)) AS INT),'yyyyMMdd') WHEN T.STARTTIME LIKE '%-%' AND T.STARTTIME LIKE '%%:%%' THEN FROM_UNIXTIME(CAST(ROUND(UNIX_TIMESTAMP(T.STARTTIME)) AS INT),'yyyyMMdd') WHEN T.STARTTIME LIKE '%%.%%' THEN FROM_UNIXTIME(CAST(ROUND(T.STARTTIME) AS INT),'yyyyMMdd') WHEN LENGTH(T.STARTTIME) = 13 THEN FROM_UNIXTIME(CAST(ROUND(SUBSTR(T.STARTTIME,1,10)) AS INT),'yyyyMMdd') ELSE SUBSTR(T.STARTTIME,1,8) END AS VISIT_DAY, T.PHONE_ID, T1.LABEL_CODE, COUNT(1) AS TIMES, SUM(T.DOWNLOAD_BYTES + T.UPLOAD_BYTES) AS FLOWS, 1 AS DAYS FROM ( SELECT STARTTIME, PHONE_ID, TYPE_CODE, SUM(DOWNLOAD_BYTES) AS DOWNLOAD_BYTES, SUM(UPLOAD_BYTES) AS UPLOAD_BYTES FROM dpi_http_dtl_mark_match_summary WHERE RECEIVE_DAY = '${day}' AND LENGTH(PHONE_ID)=11 AND SUBSTR(PHONE_ID,1,1)='1' AND PHONE_ID NOT LIKE '%.%' AND TYPE_CODE IS NOT NULL AND TYPE_CODE <> '' GROUP BY STARTTIME, PHONE_ID, TYPE_CODE ) T LEFT OUTER JOIN R_LABEL_BEHAVIOR T1 ON T.TYPE_CODE = T1.TYPE_CODE WHERE T1.LABEL_CODE IS NOT NULL GROUP BY CASE WHEN T.STARTTIME LIKE '%%-%%' AND T.STARTTIME LIKE '%%.%%' THEN FROM_UNIXTIME(CAST(ROUND(UNIX_TIMESTAMP(T.STARTTIME)) AS INT),'yyyyMMdd') WHEN T.STARTTIME LIKE '%-%' AND T.STARTTIME LIKE '%%:%%' THEN FROM_UNIXTIME(CAST(ROUND(UNIX_TIMESTAMP(T.STARTTIME)) AS INT),'yyyyMMdd') WHEN T.STARTTIME LIKE '%%.%%' THEN FROM_UNIXTIME(CAST(ROUND(T.STARTTIME) AS INT),'yyyyMMdd') WHEN LENGTH(T.STARTTIME) = 13 THEN FROM_UNIXTIME(CAST(ROUND(SUBSTR(T.STARTTIME,1,10)) AS INT),'yyyyMMdd') ELSE SUBSTR(T.STARTTIME,1,8) END, T.PHONE_ID, T1.LABEL_CODE; ALTER TABLE CDPI_USER_BEH_PREFER_D DROP IF EXISTS PARTITION(DATA_DAY='${day}'); ALTER TABLE CDPI_USER_BEH_PREFER_D ADD PARTITION (DATA_DAY='${day}') LOCATION '${day}'; INSERT OVERWRITE TABLE CDPI_USER_BEH_PREFER_D PARTITION ( DATA_DAY='${day}' ) SELECT * FROM ( SELECT VISIT_DAY, PHONE_ID, CONCAT(SUBSTR(LABEL_CODE,1,10),'000') AS LABEL_CODE, SUM(times), SUM(flows), count(distinct days) FROM CDPI_USER_BEH_PREFER_D_tmp where SUBSTR(LABEL_CODE,8,3) <> '000' GROUP BY VISIT_DAY, PHONE_ID, CONCAT(SUBSTR(LABEL_CODE,1,10),'000') UNION ALL SELECT VISIT_DAY, PHONE_ID, CONCAT(SUBSTR(LABEL_CODE,1,7),'000000') AS LABEL_CODE, SUM(times), SUM(flows), count(distinct days) FROM CDPI_USER_BEH_PREFER_D_tmp where SUBSTR(LABEL_CODE,5,3) <> '000' GROUP BY VISIT_DAY, PHONE_ID, CONCAT(SUBSTR(LABEL_CODE,1,7),'000000') )A; "[hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ cat upload.sh #!/bin/bash # upload.sh ###################################################################################################### # function : 抽取文件、压缩、上传至省份前置机 # date: 2014/02/10 # author: YyDou # param:day(数据日期) ###################################################################################################### ## cd `dirname $0` eval $(grep AREA_NO ../common.cfg) eval $(grep REUPLOAD_COUNT ../common.cfg) eval $(grep UNMATCHTOP1000 ../common.cfg) eval $(grep URLAPPREPORT ../common.cfg) eval $(grep HIVE_USER ../common.cfg) eval $(grep PARA_HIVE ../common.cfg) #判断参数是否输入 if [ $# -ne 1 ] ; then echo "Input parameter error : there should be 1 parameters"; exit 1; fi; day=$1 #day=`date -d "yesterday" +%Y%m%d` #day=$1 curr_date=`date +%Y%m%d` curr_time=`date +%H` if [ ${curr_time} -gt '06' ]; then curr_date=`date -d +1days +%Y%m%d` fi if [ ! -d "$UNMATCHTOP1000/${curr_date}" ] ; then mkdir -p "$UNMATCHTOP1000/${curr_date}" fi; if [ ! -d "$URLAPPREPORT/${curr_date}" ] ; then mkdir -p "$URLAPPREPORT/${curr_date}" fi; cd $UNMATCHTOP1000/${curr_date} #rm -f *.* hive -e " use ${HIVE_USER}; ${PARA_HIVE} set mapred.job.name=CMSS-UPLOAD; select '${AREA_NO}',url,count,download_bytes,upload_bytes,day_date,host from dpi_http_mark_top1000 where receive_day=${day}" >> UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT hive -e " use ${HIVE_USER}; ${PARA_HIVE} set mapred.job.name=CMSS-UPLOAD; select '${AREA_NO}',host,count,download_bytes,upload_bytes,day_date from dpi_site_mark_top1000 where receive_day=${day}" >> UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT ##压缩 tar -czf UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT tar -czf UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT cd $URLAPPREPORT/${curr_date} #rm -f *.* hive -e " use ${HIVE_USER}; ${PARA_HIVE} set mapred.job.name=CMSS-UPLOAD; select '${AREA_NO}',totalcount,noisecount,unmatchcount,matchcount,urlcount,appcount,day_date,sitecount from dpi_http_mark_summary where receive_day=${day}" >> URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT hive -e " use ${HIVE_USER}; ${PARA_HIVE} set mapred.job.name=CMSS-UPLOAD; select '${AREA_NO}',visit_type,ruleid,typecode,matchs,day_date from dpi_http_mark_rule where receive_day=${day}" >> RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT hive -e " use ${HIVE_USER}; ${PARA_HIVE} set mapred.job.name=CMSS-UPLOAD; select '${AREA_NO}',visit_type,type_code,matchs,urlmatchs,day_date from dpi_http_mark_type where receive_day=${day}" >> TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT (执行hiveSQL 定向到 文件) hive -e " use ${HIVE_USER}; ${PARA_HIVE} set mapred.job.name=CMSS-UPLOAD; select '${AREA_NO}',site_id,site_code,matchs,day_date from dpi_site_mark_rule where receive_day=${day}" >> RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT hive -e " use ${HIVE_USER}; ${PARA_HIVE} set mapred.job.name=CMSS-UPLOAD; select '${AREA_NO}',site_code,matchs,hostmatchs,day_date from dpi_site_mark_type where receive_day=${day}" >> TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT ##压缩(之后有删除) tar -czf URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT tar -czf RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT tar -czf TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT tar -czf RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT tar -czf TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT[hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ cat logupload.sh #!/bin/bash #logupload.sh #*================================================= #* #* FileName : logupload.sh #* CreateDate: 2014-06-25 #* Abstract : #* Author : LiBin #* #* BONC All rights reserved. #*================================================== cd `dirname $0` PWDNOW=`pwd` eval $(grep URLAPPREPORT ../common.cfg) eval $(grep AREA_NO ../common.cfg) eval $(grep LOC_DIR ../common.cfg) if [ $# -ne 1 ] ; then echo "Input parameter error, there should be 1 parameters "; exit 1; fi; LOGDAY=$1 curr_time=`date +%H` DAY=`date +%Y%m%d` if [ ${curr_time} -gt '06' ]; then DAY=`date -d +1days +%Y%m%d` fi if [ ! -d "${URLAPPREPORT}/${DAY}" ] ; then mkdir -p "${URLAPPREPORT}/${DAY}" fi; cd ${LOC_DIR}/logs/${LOGDAY}/ tar -zcf LogInfo.${LOGDAY}.${AREA_NO}.tar.gz *.log mv -f LogInfo.${LOGDAY}.${AREA_NO}.tar.gz ${URLAPPREPORT}/${DAY}/ #前置机程序机分开省份使用 #cd ${PWDNOW} #sh ./ftp_putalldata.sh ${URLAPPREPORT}/${DAY}/LogInfo.${LOGDAY}.${AREA_NO}.tar.gz cd ${PWDNOW} [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ cat ../ProgramByDay/ftp_getfilelist.sh cat: ../ProgramByDay/ftp_getfilelist.sh: No such file or directory [hzadmin@BDI2 ProgramByDay]$ cat ftp_getfilelist.sh cat: ftp_getfilelist.sh: No such file or directory [hzadmin@BDI2 ProgramByDay]$ cat ../ProgramByHour/match_H.sh #!/bin/bash # match_H.sh ###################################################################################################### # function : 匹配程序运行、统计报表出数据 # date: 2014/02/10 # author: YyDou # param:day(数据日期) ###################################################################################################### ## cd `dirname $0` PWDRE=`pwd` cd .. PWDNOW=`pwd` cd $PWDRE eval $(grep RULE_PROV_VERNO ${PWDNOW}/match.cfg) eval $(grep URL_MATCH ${PWDNOW}/common.cfg) eval $(grep URL_INPUT_PATH ${PWDNOW}/common.cfg) eval $(grep DPI_CONF_PATH ${PWDNOW}/common.cfg) eval $(grep R_URL_TYPE ${PWDNOW}/common.cfg) eval $(grep R_APP_TYPE ${PWDNOW}/common.cfg) eval $(grep R_NOISE_TYPE ${PWDNOW}/common.cfg) eval $(grep HIVE_USER ${PWDNOW}/common.cfg) eval $(grep LOC_DIR ${PWDNOW}/common.cfg) eval $(grep HIVE_LICENSE ${PWDNOW}/common.cfg) eval $(grep MR_VERSION ${PWDNOW}/common.cfg) eval $(grep PARA_JAR ${PWDNOW}/common.cfg) eval $(grep R_SITE_TYPE ${PWDNOW}/common.cfg) #判断参数是否输入 if [ $# -ne 2 ] ; then echo "Input parameter error : there should be 2 parameters"; exit 1; fi; day=$1 hour=$2 hadoop fs -ls ${URL_INPUT_PATH}${day}/${hour} > exist_test ; x=`wc -l exist_test | cut -d ' ' -f 1`; if [ ${x} = 0 ] ; then echo " HDFS DIR ERROR : ${URL_INPUT_PATH}${day}/${hour} file is not exist !" rm -f exist_test exit 1; fi; rm -f exist_test hadoop fs -rm ${R_URL_TYPE}R_URL_TYPE*.txt hadoop fs -rm ${R_APP_TYPE}R_APP_TYPE*.txt hadoop fs -rm ${R_NOISE_TYPE}R_NOISE_TYPE*.txt hadoop fs -rm ${R_SITE_TYPE}R_SITE_TYPE*.txt hadoop fs -put ${PWDNOW}/R_URL_TYPE*.txt ${R_URL_TYPE} hadoop fs -put ${PWDNOW}/R_APP_TYPE*.txt ${R_APP_TYPE} hadoop fs -put ${PWDNOW}/R_NOISE_TYPE*.txt ${R_NOISE_TYPE} hadoop fs -put ${PWDNOW}/R_SITE_TYPE*.txt ${R_SITE_TYPE} echo "${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar"; hadoop jar ${PWDNOW}/${RULE_PROV_VERNO}${MR_VERSION}.jar com.bonc.mapred.UserurlAllMain ${PARA_JAR} ${URL_INPUT_PATH}${day}/${hour} ${URL_MATCH}${day}/${hour} $PWDNOW/${DPI_CONF_PATH} #hadoop fs -rm ${url_match}${day}/part-m-*.gz hive -e " add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar; use ${HIVE_USER}; set dpi.encode.license=${HIVE_LICENSE}; alter table dpi_http_dtl_mark_match drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}'); alter table dpi_http_dtl_mark_noise drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}'); alter table dpi_http_dtl_mark_unmatch drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}'); alter table dpi_http_dtl_mark_match add partition (receive_day='${day}',receive_hour='${hour}') location '${day}/${hour}/match'; alter table dpi_http_dtl_mark_noise add partition (receive_day='${day}',receive_hour='${hour}') location '${day}/${hour}/noise'; alter table dpi_http_dtl_mark_unmatch add partition (receive_day='${day}',receive_hour='${hour}') location '${day}/${hour}/unmatch'; " cd $PWDNOW [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ cat report_summary_H.sh cat: report_summary_H.sh: No such file or directory [hzadmin@BDI2 ProgramByDay]$ cat ../ProgramByHour/report_summary_H.sh #!/bin/bash # report_H.sh #*================================================= #* #* FileName : report_H.sh #* CreateDate: 2014-04-03 #* Abstract : Statistical analysis of the results of matches #* Author : SPP #* #* BONC All rights reserved. #*================================================== cd `dirname $0` eval $(grep RULE_PROV_VERNO ../match.cfg) eval $(grep URL_MATCH ../common.cfg) eval $(grep URL_INPUT_PATH ../common.cfg) eval $(grep DPI_CONF_PATH ../common.cfg) eval $(grep R_URL_TYPE ../common.cfg) eval $(grep R_APP_TYPE ../common.cfg) eval $(grep R_NOISE_TYPE ../common.cfg) eval $(grep HIVE_USER ../common.cfg) eval $(grep LOC_DIR ../common.cfg) eval $(grep HIVE_LICENSE ../common.cfg) eval $(grep PARA_HIVE ../common.cfg) #判断参数是否输入 if [ $# -ne 1 ] ; then echo "Input parameter error : there should be 1 parameters"; exit 1; fi; day=$1 hive -e" add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar; use ${HIVE_USER}; ${PARA_HIVE} set dpi.encode.license=${HIVE_LICENSE}; set mapred.job.name=CMSS-COUNT; alter table dpi_http_dtl_mark_match_summary drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}'); alter table dpi_http_dtl_mark_match_summary add partition (receive_day='${day}',receive_hour='${hour}') location '${day}/${hour}'; alter table dpi_http_dtl_mark_unmatch_summary drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}'); alter table dpi_http_dtl_mark_unmatch_summary add partition (receive_day='${day}',receive_hour='${hour}') location '${day}/${hour}'; insert overwrite table dpi_http_dtl_mark_match_summary partition(receive_day=${day},receive_hour=${hour}) select phone_id,visit_type,rule_id,type_code,app_type_code,site_id,site_code,starttime,host, count(id) as count,sum(download_bytes) ,sum(upload_bytes) from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} group by phone_id,visit_type,rule_id,type_code,app_type_code,site_id,site_code,starttime,host; insert overwrite table dpi_http_dtl_mark_unmatch_summary partition(receive_day=${day},receive_hour=${hour}) select host,url,count(id),sum(download_bytes),sum(upload_bytes) from dpi_http_dtl_mark_unmatch where receive_day=${day} and receive_hour=${hour} group by host,url; " [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ cat ../ProgramByHour/report_H.sh #!/bin/bash # report_H.sh #*================================================= #* #* FileName : report_H.sh #* CreateDate: 2014-04-03 #* Abstract : Statistical analysis of the results of matches #* Author : SPP #* #* BONC All rights reserved. #*================================================== cd `dirname $0` eval $(grep RULE_PROV_VERNO ../match.cfg) eval $(grep URL_MATCH ../common.cfg) eval $(grep URL_INPUT_PATH ../common.cfg) eval $(grep DPI_CONF_PATH ../common.cfg) eval $(grep R_URL_TYPE ../common.cfg) eval $(grep R_APP_TYPE ../common.cfg) eval $(grep R_NOISE_TYPE ../common.cfg) eval $(grep HIVE_USER ../common.cfg) eval $(grep LOC_DIR ../common.cfg) eval $(grep HIVE_LICENSE ../common.cfg) eval $(grep PARA_HIVE ../common.cfg) #?¶??貊剫 if [ $# -ne 1 ] ; then echo "Input parameter error : there should be 1 parameters"; exit 1; fi; day=$1 hive -e" add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar; use ${HIVE_USER}; ${PARA_HIVE} set dpi.encode.license=${HIVE_LICENSE}; alter table dpi_http_mark_summary drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}'); alter table dpi_http_mark_rule drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}'); alter table dpi_http_mark_top1000 drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}'); alter table dpi_site_mark_rule drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}'); alter table dpi_site_mark_top1000 drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}'); alter table dpi_http_mark_summary add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}'; alter table dpi_http_mark_rule add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}'; alter table dpi_http_mark_top1000 add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}'; alter table dpi_site_mark_rule add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}'; alter table dpi_site_mark_top1000 add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}'; insert overwrite table dpi_http_mark_rule partition(receive_day=${day},receive_hour=${hour}) select visit_type,rule_id,typecode,matchs,'${day}' from ( select visit_type,rule_id,type_code as typecode,sum(count) as matchs from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and visit_type='url' group by visit_type,rule_id,type_code union all select visit_type,rule_id,app_type_code as typecode,sum(count) as matchs from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and visit_type='app' group by visit_type,rule_id,app_type_code)sub ; insert overwrite table dpi_site_mark_rule partition(receive_day=${day},receive_hour=${hour}) select site_id,site_code,matchs,'${day}' from ( select site_id,site_code,sum(count) as matchs from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and site_id <> '' group by site_id,site_code ) sub ; insert overwrite table dpi_http_mark_top1000 partition(receive_day=${day},receive_hour=${hour}) select host,url,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}' from dpi_http_dtl_mark_unmatch_summary where receive_day =${day} and receive_hour=${hour} group by host,url order by count desc limit 5000; insert overwrite table dpi_site_mark_top1000 partition(receive_day=${day},receive_hour=${hour}) select host,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}' from dpi_http_dtl_mark_match_summary where receive_day =${day} and site_id = '' group by host order by count desc limit 1000; insert overwrite table temp_dpi_match partition(receive_day=${day},receive_hour=${hour}) select count(t.id) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} group by id ) t ; insert overwrite table temp_dpi_url partition(receive_day=${day},receive_hour=${hour}) select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,count(url.id) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} and visit_type='url' group by id) url ; insert overwrite table temp_dpi_app partition(receive_day=${day},receive_hour=${hour}) select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,count(app.id) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} and visit_type='app' group by id) app ; insert overwrite table temp_dpi_noise partition(receive_day=${day},receive_hour=${hour}) select cast(0 as bigint) as matchcount,count(id) as noisecount ,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_noise where receive_day=${day} and receive_hour=${hour} ; insert overwrite table temp_dpi_unmatch partition(receive_day=${day},receive_hour=${hour}) select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,sum(count) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_unmatch_summary where receive_day=${day} and receive_hour=${hour} ; insert overwrite table temp_dpi_site partition(receive_day=${day},receive_hour=${hour}) select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,sum(matchs) as sitecount from dpi_site_mark_type where receive_day=${day} and receive_hour=${hour} ; insert overwrite table dpi_http_mark_summary partition(receive_day='${day}',receive_hour=${hour}) select sum(matchcount+noisecount+unmatchcount) as totalcount,sum(noisecount),sum(unmatchcount),sum(matchcount),sum(urlcount),sum(appcount),'${day}',sum(sitecount) from ( select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_match where receive_day=${day} and receive_hour=${hour} union all select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_url where receive_day=${day} and receive_hour=${hour} union all select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_app where receive_day=${day} and receive_hour=${hour} union all select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_noise where receive_day=${day} and receive_hour=${hour} union all select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_unmatch where receive_day=${day} and receive_hour=${hour} union all select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_site where receive_day=${day} and receive_hour=${hour} ) sub ; "[hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ cat ../ProgramByHour/upload_H.sh #!/bin/bash # upload_H.sh #*================================================= #* #* FileName : upload_H.sh #* CreateDate: 2014-04-03 #* Abstract : State statistics package #* Author : LiBin #* #* BONC All rights reserved. #*================================================== ## cd `dirname $0` eval $(grep AREA_NO ../common.cfg) eval $(grep REUPLOAD_COUNT ../common.cfg) eval $(grep UNMATCHTOP1000 ../common.cfg) eval $(grep URLAPPREPORT ../common.cfg) eval $(grep HIVE_USER ../common.cfg) eval $(grep PARA_HIVE ../common.cfg) #判断参数是否输入 if [ $# -ne 1 ] ; then echo "Input parameter error : there should be 1 parameters"; exit 1; fi; day=$1 #day=`date -d "yesterday" +%Y%m%d` #day=$1 curr_date=`date +%Y%m%d` curr_time=`date +%H` if [ ${curr_time} -gt '06' ]; then curr_date=`date -d +1days +%Y%m%d` fi if [ ! -d "$UNMATCHTOP1000/${curr_date}" ] ; then mkdir -p "$UNMATCHTOP1000/${curr_date}" fi; if [ ! -d "$URLAPPREPORT/${curr_date}" ] ; then mkdir -p "$URLAPPREPORT/${curr_date}" fi; cd $UNMATCHTOP1000/${curr_date} #rm -f *.* hive -e " use ${HIVE_USER}; ${PARA_HIVE} set mapred.job.name=CMSS-UPLOAD; select '${AREA_NO}',url,sum(count) count,sum(download_bytes),sum(upload_bytes),'${day}',host from dpi_http_mark_top1000 where receive_day=${day} group by url,host,day_date order by count desc limit 5000; " >> UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT hive -e " use ${HIVE_USER}; ${PARA_HIVE} set mapred.job.name=CMSS-UPLOAD; select '${AREA_NO}',host,sum(count),sum(download_bytes),sum(upload_bytes),'${day}' from dpi_site_mark_top1000 where receive_day=${day} group by host,day_date order by count desc limit 1000;" >> UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT ##压缩 tar -czf UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f UnMatchTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT tar -czf UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f UnMatchSiteTop1000.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT cd $URLAPPREPORT/${curr_date} #rm -f *.* hive -e " use ${HIVE_USER}; ${PARA_HIVE} set mapred.job.name=CMSS-UPLOAD; select '${AREA_NO}',sum(totalcount),sum(noisecount),sum(unmatchcount),sum(matchcount),sum(urlcount),sum(appcount),'${day}',sum(sitecount) from dpi_http_mark_summary where receive_day=${day};" >> URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT hive -e " use ${HIVE_USER}; ${PARA_HIVE} set mapred.job.name=CMSS-UPLOAD; select '${AREA_NO}',visit_type,ruleid,typecode,sum(matchs),'${day}' from dpi_http_mark_rule where receive_day=${day} group by visit_type,ruleid,typecode;" >> RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT hive -e " use ${HIVE_USER}; ${PARA_HIVE} set mapred.job.name=CMSS-UPLOAD; select '${AREA_NO}',visit_type,typecode,count(distinct ruleid) as matchs,sum(matchs),'${day}' from dpi_http_mark_rule where receive_day=${day} group by visit_type,typecode;" >> TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT hive -e " use ${HIVE_USER}; ${PARA_HIVE} set mapred.job.name=CMSS-UPLOAD; alter table dpi_http_mark_type drop IF EXISTS partition(receive_day='${day}',receive_hour='00'); alter table dpi_http_mark_type add partition (receive_day=${day},receive_hour='00') location '${day}/00'; insert overwrite table dpi_http_mark_type partition(receive_day=${day},receive_hour=00) select visit_type,typecode,count(distinct ruleid),sum(matchs),'${day}' from dpi_http_mark_rule where receive_day=${day} group by visit_type,typecode;" hive -e " use ${HIVE_USER}; ${PARA_HIVE} set mapred.job.name=CMSS-UPLOAD; select '${AREA_NO}',site_id,site_code,sum(matchs),'${day}' from dpi_site_mark_rule where receive_day=${day} group by site_id,site_code" >> RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT hive -e " use ${HIVE_USER}; ${PARA_HIVE} set mapred.job.name=CMSS-UPLOAD; select '${AREA_NO}',site_code,count(distinct site_id),sum(matchs),'${day}' from dpi_site_mark_rule where receive_day=${day} group by site_code" >> TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT hive -e " use ${HIVE_USER}; ${PARA_HIVE} set mapred.job.name=CMSS-UPLOAD; alter table dpi_site_mark_type drop IF EXISTS partition(receive_day='${day}',receive_hour='00'); alter table dpi_site_mark_type add partition (receive_day=${day},receive_hour='00') location '${day}/00'; insert overwrite table dpi_site_mark_type partition(receive_day=${day},receive_hour=00) select site_code,count(distinct site_id),sum(matchs),'${day}' from dpi_site_mark_rule where receive_day=${day} group by site_code;" ##压缩 tar -czf URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f URLStatInfo.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT tar -czf RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f RuleDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT tar -czf TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f TypeDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT tar -czf RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f RuleSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT tar -czf TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT.tar.gz TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT && rm -f TypeSiteDetails.${curr_date}.${day}.${REUPLOAD_COUNT}.${AREA_NO}.DAT [hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ cat ../ProgramByHour/report_H.sh #!/bin/bash # report_H.sh #*================================================= #* #* FileName : report_H.sh #* CreateDate: 2014-04-03 #* Abstract : Statistical analysis of the results of matches #* Author : SPP #* #* BONC All rights reserved. #*================================================== cd `dirname $0` eval $(grep RULE_PROV_VERNO ../match.cfg) eval $(grep URL_MATCH ../common.cfg) eval $(grep URL_INPUT_PATH ../common.cfg) eval $(grep DPI_CONF_PATH ../common.cfg) eval $(grep R_URL_TYPE ../common.cfg) eval $(grep R_APP_TYPE ../common.cfg) eval $(grep R_NOISE_TYPE ../common.cfg) eval $(grep HIVE_USER ../common.cfg) eval $(grep LOC_DIR ../common.cfg) eval $(grep HIVE_LICENSE ../common.cfg) eval $(grep PARA_HIVE ../common.cfg) #?¶??貊剫 if [ $# -ne 1 ] ; then echo "Input parameter error : there should be 1 parameters"; exit 1; fi; day=$1 hive -e" add jar ${LOC_DIR}/URLAppProgram_sf/bin/Dpiformat2.0.jar; use ${HIVE_USER}; ${PARA_HIVE} set dpi.encode.license=${HIVE_LICENSE}; alter table dpi_http_mark_summary drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}'); alter table dpi_http_mark_rule drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}'); alter table dpi_http_mark_top1000 drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}'); alter table dpi_site_mark_rule drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}'); alter table dpi_site_mark_top1000 drop IF EXISTS partition(receive_day='${day}',receive_hour='${hour}'); alter table dpi_http_mark_summary add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}'; alter table dpi_http_mark_rule add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}'; alter table dpi_http_mark_top1000 add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}'; alter table dpi_site_mark_rule add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}'; alter table dpi_site_mark_top1000 add partition (receive_day=${day},receive_hour='${hour}') location '${day}/${hour}'; insert overwrite table dpi_http_mark_rule partition(receive_day=${day},receive_hour=${hour}) select visit_type,rule_id,typecode,matchs,'${day}' from ( select visit_type,rule_id,type_code as typecode,sum(count) as matchs from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and visit_type='url' group by visit_type,rule_id,type_code union all select visit_type,rule_id,app_type_code as typecode,sum(count) as matchs from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and visit_type='app' group by visit_type,rule_id,app_type_code)sub ; insert overwrite table dpi_site_mark_rule partition(receive_day=${day},receive_hour=${hour}) select site_id,site_code,matchs,'${day}' from ( select site_id,site_code,sum(count) as matchs from dpi_http_dtl_mark_match_summary where receive_day=${day} and receive_hour=${hour} and site_id <> '' group by site_id,site_code ) sub ; insert overwrite table dpi_http_mark_top1000 partition(receive_day=${day},receive_hour=${hour}) select host,url,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}' from dpi_http_dtl_mark_unmatch_summary where receive_day =${day} and receive_hour=${hour} group by host,url order by count desc limit 5000; insert overwrite table dpi_site_mark_top1000 partition(receive_day=${day},receive_hour=${hour}) select host,sum(count) as count,sum(download_bytes) as download_bytes,sum(upload_bytes) as upload_bytes,'${day}' from dpi_http_dtl_mark_match_summary where receive_day =${day} and site_id = '' group by host order by count desc limit 1000; insert overwrite table temp_dpi_match partition(receive_day=${day},receive_hour=${hour}) select count(t.id) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} group by id ) t ; insert overwrite table temp_dpi_url partition(receive_day=${day},receive_hour=${hour}) select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,count(url.id) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} and visit_type='url' group by id) url ; insert overwrite table temp_dpi_app partition(receive_day=${day},receive_hour=${hour}) select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,count(app.id) as appcount,cast(0 as bigint) as sitecount from (select id from dpi_http_dtl_mark_match where receive_day=${day} and receive_hour=${hour} and visit_type='app' group by id) app ; insert overwrite table temp_dpi_noise partition(receive_day=${day},receive_hour=${hour}) select cast(0 as bigint) as matchcount,count(id) as noisecount ,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_noise where receive_day=${day} and receive_hour=${hour} ; insert overwrite table temp_dpi_unmatch partition(receive_day=${day},receive_hour=${hour}) select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,sum(count) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,cast(0 as bigint) as sitecount from dpi_http_dtl_mark_unmatch_summary where receive_day=${day} and receive_hour=${hour} ; insert overwrite table temp_dpi_site partition(receive_day=${day},receive_hour=${hour}) select cast(0 as bigint) as matchcount,cast(0 as bigint) as noisecount,cast(0 as bigint) as unmatchcount,cast(0 as bigint) as urlcount,cast(0 as bigint) as appcount,sum(matchs) as sitecount from dpi_site_mark_type where receive_day=${day} and receive_hour=${hour} ; insert overwrite table dpi_http_mark_summary partition(receive_day='${day}',receive_hour=${hour}) select sum(matchcount+noisecount+unmatchcount) as totalcount,sum(noisecount),sum(unmatchcount),sum(matchcount),sum(urlcount),sum(appcount),'${day}',sum(sitecount) from ( select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_match where receive_day=${day} and receive_hour=${hour} union all select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_url where receive_day=${day} and receive_hour=${hour} union all select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_app where receive_day=${day} and receive_hour=${hour} union all select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_noise where receive_day=${day} and receive_hour=${hour} union all select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_unmatch where receive_day=${day} and receive_hour=${hour} union all select matchcount,noisecount,unmatchcount,urlcount,appcount,sitecount from temp_dpi_site where receive_day=${day} and receive_hour=${hour} ) sub ; "[hzadmin@BDI2 ProgramByDay]$ [hzadmin@BDI2 ProgramByDay]$ cd .. [hzadmin@BDI2 URLAppProgram_sf]$ cat remove_files.sh #!/bin/bash #*================================================= #* #* FileName : remove_files.sh #* CreateDate: 2014-02-25 #* Abstract : Delete 'UACDS_YYYYMMDD_**.tar.gz' files #* on a regular , most retain ten files. #* Author : LiangWei #* #* BONC All rights reserved. #*================================================== cd `dirname $0` eval $(grep DATA_HOME public.cfg) eval $(grep ZIP_LIMIT public.cfg) eval $(grep REPORT_LIMIT public.cfg) eval $(grep UNMATCHTOP1000 public.cfg) eval $(grep URLAPPREPORT public.cfg) eval $(grep SUMMARY_DAY public.cfg) eval $(grep DELETE_DAY public.cfg) eval $(grep URL_MATCH common.cfg) eval $(grep TEMP_DPI_MATCH common.cfg) eval $(grep TEMP_DPI_NOISE common.cfg) eval $(grep TEMP_DPI_UNMATCH common.cfg) eval $(grep TEMP_DPI_URL common.cfg) eval $(grep TEMP_DPI_APP common.cfg) eval $(grep TEMP_DPI_SITE common.cfg) eval $(grep MATCH_SUMMARY common.cfg) eval $(grep UNMATCH_SUMMARY common.cfg) upl=$((ZIP_LIMIT+1)) cd $DATA_HOME a=` ls UA* | wc -l` ls UA* | sort -r > list if [ $a -gt $ZIP_LIMIT ]; then sed -n ${upl},${a}p list > dellist c=`wc -l dellist | cut -d ' ' -f 1` for ((m=1;m<=c;m++)) do grepstr='sed -n '$m'p dellist' greps=`$grepstr` rm $greps echo 'delete file:' $greps done rm dellist else echo ' Deleting data did not reach the upper limit!' fi rm list #*================================================= #* #* #* CreateDate: 2014-02-25 #* Abstract : Delete UnMatchTop1000 Folder on a regular. #* Author : LiangWei #* #* BONC All rights reserved. #*================================================== upl=$((REPORT_LIMIT+1)) ls -lt $UNMATCHTOP1000 |awk '/^d/ {print $9}'| sort -r > list q=`wc -l list | cut -d ' ' -f 1` if [ $q -gt $REPORT_LIMIT ]; then sed -n ${upl},${q}p list > dellist x=`wc -l dellist | cut -d ' ' -f 1` for ((m=1;m<=x;m++)) do grepstr='sed -n '$m'p dellist' greps=`$grepstr` rm -rf ${UNMATCHTOP1000}/${greps} echo 'delete file:' $greps done rm dellist else echo ' UnMatchTop1000 数据没有达到删除上限!' fi rm list #*================================================= #* #* #* CreateDate: 2014-02-25 #* Abstract : Delete URLAppReport Folder on a regular. #* Author : LiangWei #* #* BONC All rights reserved. #*================================================== upl=$((REPORT_LIMIT+1)) ls -lt $URLAPPREPORT |awk '/^d/ {print $9}'| sort -r > list w=`wc -l list | cut -d ' ' -f 1` if [ $w -gt $REPORT_LIMIT ]; then sed -n ${upl},${w}p list > dellist v=`wc -l dellist | cut -d ' ' -f 1` for ((m=1;m<=v;m++)) do grepstr='sed -n '$m'p dellist' greps=`$grepstr` rm -rf ${URLAPPREPORT}/${greps} echo 'delete file:' $greps done rm dellist else echo ' URLAppReport 数据没有达到删除上限!' fi rm list #删除hdfs中match文件 upl=$((DELETE_DAY+1)) hadoop fs -ls $URL_MATCH |awk '/^d/ {print $8}'| sort -r > list w=`wc -l list | cut -d ' ' -f 1` if [ $w -gt $DELETE_DAY ]; then sed -n ${upl},${w}p list > dellist v=`wc -l dellist | cut -d ' ' -f 1` for ((m=1;m<=v;m++)) do grepstr='sed -n '$m'p dellist' greps=`$grepstr` hadoop fs -rmr ${URL_MATCH}/${greps} echo 'delete file:' ${URL_MATCH}/${greps} done rm dellist else echo ' URL_MATCH 数据没有达到删除上限!' fi rm list #删除hdfs中间表数据 DAY=`date -d -"$DELETE_DAY"day +%Y%m%d` hadoop fs -rmr ${TEMP_DPI_MATCH}/receive_day=${DAY} hadoop fs -rmr ${TEMP_DPI_NOISE}/receive_day=${DAY} hadoop fs -rmr ${TEMP_DPI_UNMATCH}/receive_day=${DAY} hadoop fs -rmr ${TEMP_DPI_URL}/receive_day=${DAY} hadoop fs -rmr ${TEMP_DPI_APP}/receive_day=${DAY} hadoop fs -rmr ${TEMP_DPI_SITE}/receive_day=${DAY} #hadoop fs -rm -r ${url_match}/sitekey/${DAY} #删除match汇总表数据 upl=$((SUMMARY_DAY+1)) hadoop fs -ls $MATCH_SUMMARY |awk '/^d/ {print $8}'| sort -r > list w=`wc -l list | cut -d ' ' -f 1` if [ $w -gt $SUMMARY_DAY ]; then sed -n ${upl},${w}p list > dellist v=`wc -l dellist | cut -d ' ' -f 1` for ((m=1;m<=v;m++)) do grepstr='sed -n '$m'p dellist' greps=`$grepstr` hadoop fs -rmr ${MATCH_SUMMARY}/${greps} echo 'delete file:' ${MATCH_SUMMARY}/${greps} done rm dellist else echo ' MATCH_SUMMARY 数据没有达到删除上限!' fi rm list #删除unmatch汇总表数据 upl=$((SUMMARY_DAY+1)) hadoop fs -ls $UNMATCH_SUMMARY |awk '/^d/ {print $8}'| sort -r > list w=`wc -l list | cut -d ' ' -f 1` if [ $w -gt $SUMMARY_DAY ]; then sed -n ${upl},${w}p list > dellist v=`wc -l dellist | cut -d ' ' -f 1` for ((m=1;m<=v;m++)) do grepstr='sed -n '$m'p dellist' greps=`$grepstr` hadoop fs -rmr ${UNMATCH_SUMMARY}/${greps} echo 'delete file:' ${UNMATCH_SUMMARY}/${greps} done rm dellist else echo ' UNMATCH_SUMMARY 数据没有达到删除上限!' fi rm list [hzadmin@BDI2 URLAppProgram_sf]$ [hzadmin@BDI2 URLAppProgram_sf]$ cat /home/hzadmin/urlAPP/ResultMatch/match_detail.sh #!/bin/bash #match_details.sh #*================================================= #* #* FileName : match_details.sh #* CreateDate: 2015-04-22 #* Abstract : CMSS Interface #* Author : SPP #* 主程序 #* BONC All rights reserved. #*================================================== cd `dirname $0` PWDNOW=`pwd` eval $(grep MATCH_DETAILS ${PWDNOW}/details_conf.cfg) eval $(grep MATCH_PRINT ${PWDNOW}/details_conf.cfg) eval $(grep MATCH_INPUT ${PWDNOW}/details_conf.cfg) eval $(grep FTP_DIR ${PWDNOW}/details_conf.cfg) eval $(grep DELAY_DAY ${PWDNOW}/details_conf.cfg) eval $(grep DETAILS ${PWDNOW}/details_conf.cfg) unset details day steps args=`getopt r:d:s: $*` if test $? != 0 then echo " Usage is ./match_details.sh [-r details -d day -s steps ] " echo " Use database administrator account for user name " exit 1 fi set -- $args for i do case "$i" in -r) shift;details=$1;shift;; -d) shift;day=$1;shift;; -s) shift;steps=$1;shift;; esac done #判断传入参数日期是否为空,如果为空则获取系统时间 if [ ! -n "$day" ] ; then echo "not input days so day=today" day=`date -d -${DELAY_DAY}days +%Y%m%d` create_day=`date +%Y%m%d` else create_day=$day fi if [ ! -n "$details" ] ; then LIST=$DETAILS DETAILS_LIST=`echo $LIST | sed 's/,/ /g'` else DETAILS_LIST=$details fi echo "create_day:" $create_day echo "day:" $day #判断目录是否存在,如果不存在则重新创建 for DETAILS in $DETAILS_LIST do if [ ! -d "$MATCH_DETAILS/${create_day}/$DETAILS" ] ; then mkdir -p "$MATCH_DETAILS/${create_day}/$DETAILS" fi if [ ! -d "$MATCH_DETAILS/${create_day}/logs" ] ; then mkdir -p "$MATCH_DETAILS/${create_day}/logs" fi rm -f $MATCH_DETAILS/${create_day}/logs/run_"$DETAILS"_"$create_day".log #判断步骤是否为空 if [ ! -n "$steps" ] ; then ./merge_file.sh $day $DETAILS> ${MATCH_DETAILS}/${create_day}/logs/run_"$DETAILS"_"$day".log 2>&1 ./get_file.sh $day $create_day $DETAILS>> ${MATCH_DETAILS}/${create_day}/logs/run_"$DETAILS"_"$day".log 2>&1 else if [ $steps -eq 2 ] ; then ./get_file.sh $day $create_day $DETAILS>> ${MATCH_DETAILS}/${create_day}/logs/run_"$DETAILS"_"$day".log 2>&1 else echo " please make your second var is 2.........">> ${MATCH_DETAILS}/${create_day}/logs/run_"$DETAILS"_"$day".log 2>&1 fi fi done[hzadmin@BDI2 URLAppProgram_sf]$ [hzadmin@BDI2 URLAppProgram_sf]$ cat /home/hzadmin/urlAPP/ResultMatch/remove_details.sh ?#!/bin/bash #remove_details.sh #*================================================= #* #* FileName : remove_details.sh #* CreateDate: 2014-10-22 #* Abstract : delete MATCH_DETAILS files #* Author : WangNing #* #* BONC All rights reserved. #*================================================== cd `dirname $0` PWDNOW=`pwd` eval $(grep MATCH_DETAILS ${PWDNOW}/details_conf.cfg) eval $(grep MATCH_PRINT ${PWDNOW}/details_conf.cfg) eval $(grep REPORT_LIMIT ${PWDNOW}/details_conf.cfg) eval $(grep HDFS_LIMIT ${PWDNOW}/details_conf.cfg) upl=$((REPORT_LIMIT+1)) ls -lt $MATCH_DETAILS |awk '/^d/ {print $9}'| sort -r > list q=`wc -l list | cut -d ' ' -f 1` if [ $q -gt $REPORT_LIMIT ]; then sed -n ${upl},${q}p list > dellist x=`wc -l dellist | cut -d ' ' -f 1` for ((m=1;m<=x;m++)) do grepstr='sed -n '$m'p dellist' greps=`$grepstr` rm -rf ${MATCH_DETAILS}/${greps} echo 'delete file:' $greps done rm -f dellist else echo ' MATCH_DETAILS 数据没有达到删除上限!' fi rm -f list #删除hdfs上的输出文件 up_l=$((HDFS_LIMIT+1)) hadoop fs -ls ${MATCH_PRINT} |awk '/^d/ {print $8}'| sort -r>files.txt s=`cat files.txt |wc -l` if [ $s -gt $HDFS_LIMIT ]; then sed -n ${up_l},${s}p files.txt | while read line do echo 'delete file:' $line hadoop fs -rm -r $line done else echo "hdfs 数据没有达到删除上限!" fi rm -f files.txt [hzadmin@BDI2 URLAppProgram_sf]$ [hzadmin@BDI2 URLAppProgram_sf]$ cat /home/hzadmin/urlAPP/hive.sh #!/bin/bash . $HOME/.bash_profile #dateday=`date -d -1day +%Y%m%d` dateday=$1 hive -e " set mapreduce.job.queuename=thirdpart1;use dpi; add jar /home/hzadmin/bj_ggsn/jar/Decode.jar; create temporary function decode as 'Decode'; select PHONE_ID,VISIT_TYPE,TYPE_CODE,TYPE_NAME1,TYPE_NAME2,TYPE_NAME3,TYPE_NAME4,TYPE_NAME5,TYPE_NAME6,TYPE_LEVEL,APP_TYPE_CODE,APP_TYPE_NAME1,APP_TYPE_NAME2,APP_TYPE_NAME3,sum(DOWNLOAD_BYTES),sum(UPLOAD_BYTES),keyword,count(id),Decode(gen_flag,'','3G','4G','4G',gen_flag) from dpi_http_dtl_mark_match where receive_day='${dateday}' and phone_id is not null group by PHONE_ID,VISIT_TYPE,TYPE_CODE,TYPE_NAME1,TYPE_NAME2,TYPE_NAME3,TYPE_NAME4,TYPE_NAME5,TYPE_NAME6,TYPE_LEVEL,APP_TYPE_CODE,APP_TYPE_NAME1,APP_TYPE_NAME2,APP_TYPE_NAME3,keyword,gen_flag" > /dfs/ftp/hzadmin/test/${dateday}.txt split -l 12000000 /dfs/ftp/hzadmin/test/${dateday}.txt /dfs/ftp/hzadmin/test/${dateday} mv /dfs/ftp/hzadmin/test/${dateday}aa /dfs/ftp/hzadmin/bj_data/${dateday}001.txt mv /dfs/ftp/hzadmin/test/${dateday}ab /dfs/ftp/hzadmin/bj_data/${dateday}002.txt mv /dfs/ftp/hzadmin/test/${dateday}ac /dfs/ftp/hzadmin/bj_data/${dateday}003.txt mv /dfs/ftp/hzadmin/test/${dateday}ad /dfs/ftp/hzadmin/bj_data/${dateday}004.txt mv /dfs/ftp/hzadmin/test/${dateday}ae /dfs/ftp/hzadmin/bj_data/${dateday}005.txt mv /dfs/ftp/hzadmin/test/${dateday}af /dfs/ftp/hzadmin/bj_data/${dateday}006.txt mv /dfs/ftp/hzadmin/test/${dateday}ag /dfs/ftp/hzadmin/bj_data/${dateday}007.txt mv /dfs/ftp/hzadmin/test/${dateday}ah /dfs/ftp/hzadmin/bj_data/${dateday}008.txt mv /dfs/ftp/hzadmin/test/${dateday}ai /dfs/ftp/hzadmin/bj_data/${dateday}009.txt [hzadmin@BDI2 URLAppProgram_sf]$ [hzadmin@BDI2 URLAppProgram_sf]$ cat /home/hzadmin/bj_ggsn/start1.sh #!/bin/sh source ~/.bash_profile datetime=$(date --date "1 days ago" +%Y%m%d) cd /home/hzadmin/bj_ggsn/ sh /home/hzadmin/bj_ggsn/select1.sh $datetime >> log/${datetime}_1.log 2>&1 sh /home/hzadmin/bj_ggsn/select2.sh $datetime >> log/${datetime}_2.log 2>&1 hadoop fs -mkdir /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/ hadoop fs -mv /apps/hive/warehouse/dpi.db/bj_ggsn_mobile/receive_day=$datetime/* /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/ sh /home/hzadmin/urlAPP/URLAppProgram_sf/get_uacds.sh sh /home/hzadmin/urlAPP/BoncRun.sh sh /home/hzadmin/urlAPP/hive.sh $datetime sh /home/hzadmin/bj_ggsn/delete.sh [hzadmin@BDI2 URLAppProgram_sf]$ cat /home/hzadmin/bj_ggsn/delete.sh #!/bin/bash source ~/.bash_profile dataday=$(date --date "7 days ago" +%Y%m%d) hadoop fs -rm -r /share/hzadmin/urlapp/spp/dpi_http_dtl_mark_match_summary/receive_day=$dataday hadoop fs -rm -r /share/hzadmin/external_table/DMP_SOR/USERLABEL/BONC/INFO/http/$dataday hadoop fs -rm -r /share/hzadmin/external_table/DMP_SSA/DPI/$dataday hive -e "use dpi;alter table dpi_http_dtl_mark_match_summary drop partition(receive_day='$dataday')" [hzadmin@BDI2 URLAppProgram_sf]$ [hzadmin@BDI2 URLAppProgram_sf]$ cat /home/hzadmin/bj_ggsn/start2.sh #!/bin/sh source ~/.bash_profile #datetime=$(date --date "1 days ago" +%Y%m%d) datetime=$1 cd /home/hzadmin/bj_ggsn/ sh /home/hzadmin/bj_ggsn/select1.sh $datetime sh /home/hzadmin/bj_ggsn/select2.sh $datetime hadoop fs -rm -r /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/ hadoop fs -mkdir /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/ hadoop fs -mv /apps/hive/warehouse/dpi.db/bj_ggsn_mobile/receive_day=$datetime/* /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/ #sh /home/hzadmin/urlAPP/URLAppProgram_sf/get_uacds.sh sh /home/hzadmin/urlAPP/BoncRun1.sh $datetime sh /home/hzadmin/urlAPP/ResultMatch/match_detail.sh -d $datetime sh /home/hzadmin/urlAPP/hive.sh $datetime [hzadmin@BDI2 URLAppProgram_sf]$ crontab -l 00 03 * * * sh /home/hzadmin/bj_ggsn/start1.sh &>/home/hzadmin/bj_ggsn/start.log 00 13 * * * sh /dfs/ftp/hzadmin/trydemo/dailycheckdemo.sh >>/dfs/ftp/hzadmin/trydemo/log.txt [hzadmin@BDI2 URLAppProgram_sf]$ cat /home/hzadmin/bj_ggsn/start1.sh #!/bin/sh source ~/.bash_profile datetime=$(date --date "1 days ago" +%Y%m%d) cd /home/hzadmin/bj_ggsn/ sh /home/hzadmin/bj_ggsn/select1.sh $datetime >> log/${datetime}_1.log 2>&1 sh /home/hzadmin/bj_ggsn/select2.sh $datetime >> log/${datetime}_2.log 2>&1 hadoop fs -mkdir /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/ hadoop fs -mv /apps/hive/warehouse/dpi.db/bj_ggsn_mobile/receive_day=$datetime/* /share/hzadmin/external_table/DMP_SSA/DPI/$datetime/ sh /home/hzadmin/urlAPP/URLAppProgram_sf/get_uacds.sh sh /home/hzadmin/urlAPP/BoncRun.sh sh /home/hzadmin/urlAPP/hive.sh $datetime sh /home/hzadmin/bj_ggsn/delete.sh [hzadmin@BDI2 URLAppProgram_sf]$ [hzadmin@BDI2 URLAppProgram_sf]$ [hzadmin@BDI2 URLAppProgram_sf]$ [hzadmin@BDI2 URLAppProgram_sf]$ cat /home/hzadmin/bj_ggsn/select2.sh hours=/home/hzadmin/bj_ggsn/hours.txt datetime=$1 while read LINE do hadoop fs -test -e /share/external_table/ssa/DPI_MBL_4G/ALL/${datetime}/${LINE} if [ $? -eq 0 ]; then hive -e "use dpi;alter table bj_ggsn_4g add partition (receive_day='${datetime}',hours='${LINE}') location '/share/external_table/ssa/DPI_MBL_4G/ALL/${datetime}/${LINE}'" >>log/${datetime}.log 2>>log/${datetime}.log else echo 'not exist' fi done < $hours hive -e" use dpi; set hive.auto.convert.join=false; set mapreduce.job.queuename=thirdpart1; from t_user m join bj_ggsn_4g t on(m.usernum = t.MDN and m.receive_day = '${datetime}' and t.receive_day = '${datetime}') insert into table bj_ggsn_mobile partition (receive_day = '${datetime}') select regexp_extract(t.MDN,'(1[0-9]{10})') MDN, t.LAC, t.CI, t.IMEI, t.BUSITYPE, t.CAPTURETIME, t.ENDTIME, t.DURATION, t.FLOWUP, t.FLOWDOWN, t.FLOWALL, t.RATTYPE, t.TERMIANL_IP, t.DESTIP, t.STATUSCODE, t.USERAGENT, t.APN, t.IMSI, t.SGSNIP, t.GGSNIP, t.CONTENTTYPE, t.SOURCEPORT, t.DESTPORT, t.LOGOCODE, t.URL, t.RESULT, t.HOST, '4G', t.YULIU2, t.YULIU3; " [hzadmin@BDI2 URLAppProgram_sf]$
标签:
原文地址:http://blog.csdn.net/hzdxw/article/details/51992982