***************************************************************************************************
##build-input.sh
#!/bin/bash
set -eu
current_home=$(readlink -f $(dirname $0))/..
source $current_home/conf/app.conf
source $current_home/scripts/app.rc
function usage() {
echo "Usage: $0" >&2
exit $1
}
cur_date=$(date +%Y%m%d)
while getopts "h" opt; do
case $opt in
h) uage 0;;
\?) echo "Invalid option: -$OPTARG" >&2; usage 1;;
esac
done
shift $((OPTIND-1))
#pre_date=$(date -d "$cur_date -1 day" + %Y%m%d)
input_dir=$current_home/scripts/input
input=$input_dir/input.sh
echo "$(getdate) > BEGIN Build Input" 2>&1
echo "$(getdate) > STAR Build Input" 2>&1
sh $input $cur_date
echo "$(getdate) > FINISH Build Input" 2>&1
echo "$(getdate) > END Build Input" 2>&1
***************************************************************************************************
##input/input.sh
#!/bin/bash
#set -eu
current_home=$(readlink -f $(dirname $0))/../..
source $current_home/conf/app.conf
source $current_home/scripts/app.rc
function usage() {
echo "Usage: $0" >&2
exit $1
}
class=/ClassPath/
job_name=$(basename ${0%.sh})
job_conf=$(get_job_conf $0)
libjars=$exec_jar
force=true
cur_date=$1
#pre_date=$(date -d "$cur_date -1 day" +%Y%m%d)
inputdata=/group/laiwang/laiwang/accesslog/20140618/*/*
outputdata=$app_prefix/out
busy_file=$outputdata.busy
done_file=$outputdata.done
$force && hrm $done_file && hrm $busy_file
hexist $busy_file || hexist $done_file && exit
htouch $busy_file
hrmr $outputdata
hjar $exec_jar $class $inputdata $outputdata
htouch $done_file
************************************************************************************************
##app.conf 定义变量,脚本根据source拼接,变量共享
export JAVA_HOME="/home/yunti/java-current"
# hadoop conf
hadoop_home=/home/yunti/hadoop-current
hadoop_exec=$hadoop_home/bin/hadoop
hadoop_exec_conf=$HOME/hadoop/conf
# libjar conf
exec_jar=$current_home/jar/snapshot.jar
# io conf
app_prefix=/HDFSHome/
#local path conf
log_dir=$current_home/log
************************************************************************************************
##hadoop.rc
# Hadoop resource file
function hadoop() {
if [ -n "$hadoop_exec_conf" ]; then
$hadoop_exec --config $hadoop_exec_conf "$@"
else
$hadoop_exec "$@"
fi
}
function hput() {
hadoop fs -put "$@"
}
function hget() {
hadoop fs -get "$@"
}
function hcat() {
hadoop fs -cat "$@"
}
function hls() {
hadoop fs -ls "$@"
}
function htext() {
hadoop fs -text "$@"
}
function hgetmerge() {
hadoop fs -getmerge "$@"
}
function htouch() {
hadoop fs -touchz "$@" >/dev/null 2>&1
return 0
}
function hexist() {
hadoop fs -test -e "$@"
}
function hrmr() {
if hexist "$@"; then
hadoop fs -rmr "$@" >/dev/null 2>&1
fi
}
function hrm() {
if hexist "$@"; then
hadoop fs -rm "$@" >/dev/null 2>&1
fi
}
function hmv() {
hadoop fs -mv "$@"
}
function hmkdir() {
hadoop fs -mkdir "$@"
}
function hcp() {
hadoop fs -cp "$@" >/dev/null 2>&1
}
function hsetrep() {
hadoop fs -setrep "$@" >/dev/null 2>&1
}
function hdfs_part_num() {
if hexist "$@"; then
hadoop fs -ls "$@" | grep ‘part-‘ | wc -l
fi
}
function hdfs_size() {
if hexist "$@"; then
hadoop fs -dus "$@" | grep "$@" | awk ‘{print $2;}‘
fi
}
function hdfs_time() {
if hexist "$@"; then
hadoop fs -ls "$@" | grep "$@" | awk ‘{print $6","$7}‘
fi
}
function hdfs_check() {
path=$1
num_parts=$2
min_size=$3
parts=$(hadoop fs -ls $path | grep ‘part-‘ | wc -l)
size=$(hadoop fs -dus $path | awk ‘{print $2}‘)
if [[ $parts == $num_parts && $size -ge $min_size || \
min_size == 0 ]]; then
return 0
else
return 1
fi
}
function get_counter() {
cat $1 | grep ‘INFO mapred.JobClient: ‘ | \
sed ‘s/.*INFO mapred.JobClient: //‘ > $2
}
function hjar() {
local tag_file=$log_dir/${job_name/: /.}.$cur_date.tag
local log_file=$log_dir/${job_name/: /.}.$cur_date.log
local counter_file=$log_dir/${job_name/: /.}.$cur_date.cnt
touch $tag_file
hadoop jar "$@" 2>&1 | tee $log_file
local status=${PIPESTATUS[0]}
get_counter $log_file $counter_file
return $status
}
function hdistcp() {
hadoop distcp "$@"
}
function hstat() {
hadoop fs -stat "$@" >/dev/null 2>&1
}
function dist_pull() {
if [ $# -lt 2 ]; then
echo "$(getdate) > invalid number of argument"
return 1
fi
local src=$1
local dest=$2
local log_dir=${dest}_distcp_logs
if [ $# -gt 2 ]; then
log_dir=$3
fi
hexist $log_dir && hrmr $log_dir
hdistcp -Dmapred.map.child.java.opts="-Djava.system.class.loader=org.apache.hadoop.mapred.DeAppClassLoader -Xmx200m" \
-libjars $hadoop_jar -i -overwrite -delete -m $distcp_map_num -log $log_dir $src $dest
local ret=$?
if [ $ret == 0 ]
then
hadoop fs -chmod -R +r $dest
fi
return $ret
}
************************************************************************************************
##build.sh
#!bin/bash
mvn clean package
rm -f jar/*.jar
cp target/*.jar jar/snapshot.jar
原文地址:http://www.cnblogs.com/gray035/p/3823089.html