在以上实验环境中执行计算任务,计算任务涉及HIVE、Mahout、Hbase bulkload、MapReduce,工作流驱动通过Shell脚本控制,整个任务执行过程涉及基础行为数据160万条,业务数据40万条。
HBase
hbase-site.xml
<property>
<name>zookeeper.session.timeout</name>
<value>300000</value>
</property>
<property>
<name>hbase.zookeeper.property.tickTime</name>
<value>60000</value>
</property>
<property>
<name>hbase.hregion.memstroe.mslab.enable</name>
<value>true</value>
</property>
<property>
<name>hbase.zookeeper.property.maxClientCnxns</name>
<value>10000</value>
</property>
<property>
<name>hbase.client.scanner.timeout.period</name>
<value>240000</value>
</property>
<property>
<name>hbase.rpc.timeout</name>
<value>280000</value>
</property>
<property>
<name>hbase.hregion.max.filesize</name>
<value>107374182400</value>
</property>
<property>
<name>hbase.regionserver.handler.count</name>
<value>100</value>
</property>
<property>
<name>dfs.client.socket-timeout</name>
<value>300000</value>
<description>Down the DFS timeout from 60 to 10 seconds.</description>
</property>
hbase-env.sh
export HBASE_HEAPSIZE=2048M
export HBASE_HOME=/home/fulong/Hbase/hbase-0.98.6-cdh5.2.0
export HBASE_LOG_DIR=${HBASE_HOME}/logs
export HBASE_OPTS="-server -Xms1g -Xmx1g -XX:NewRatio=2 -XX:PermSize=128m -XX:MaxPermSize=128m -verbose:gc -Xloggc:$HBASE_HOME/logs/hbasegc.log -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled
-XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=75 -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=$HBASE_HOME/logs"
zookeeper
zoo.cfg
syncLimit=10
#New in 3.3.0: the maximum session timeout in milliseconds that the server will allow the client to negotiate. Defaults to 20 times the tickTime.
maxSessionTimeout=300000
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/home/fulong/Zookeeper/CDH/zookdata
# the port at which the clients will connect
clientPort=2181
修改以下两个文件是为了跟踪ZK日志,ZK的默认日志查看不方便。
log4j.properties
zookeeper.root.logger=INFO,CONSOLE,ROLLINGFILE
zookeeper.console.threshold=INFO
zookeeper.log.dir=/home/fulong/Zookeeper/CDH/zooklogs
zookeeper.log.file=zookeeper.log
zookeeper.log.threshold=DEBUG
zookeeper.tracelog.dir=/home/fulong/Zookeeper/CDH/zooklogs
zookeeper.tracelog.file=zookeeper_trace.log
log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender
log4j.appender.ROLLINGFILE.Threshold=${zookeeper.log.threshold}
log4j.appender.ROLLINGFILE.File=${zookeeper.log.dir}/${zookeeper.log.file}
# Max log file size of 10MB
log4j.appender.ROLLINGFILE.MaxFileSize=50MB
zkEnv.sh
if [ "x${ZOO_LOG4J_PROP}" = "x" ]
then
ZOO_LOG4J_PROP="INFO,CONSOLE,ROLLINGFILE"
fi
备注:修改完以上两个文件后,并没有如愿的见到ZK的Log4j日志文件,原因待进一步调研。
HDFS
hdfs-site.xml
<property>
<name>dfs.datanode.socket.write.timeout</name>
<value>600000</value>
</property>
<property>
<name>dfs.client.socket-timeout</name>
<value>300000</value>
</property>
<property>
<name>dfs.datanode.max.xcievers</name>
<value>4096</value>
</property>
YARN
yarn-site.xml
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>512</value>
</property>
<property>
<name>yarn.scheduler.fair.user-as-default-queue</name>
<value>false</value>
</property>
<property>
<name>yarn.resourcemanager.zk-timeout-ms</name>
<value>120000</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>3072</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>128</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>3072</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>1</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>1</value>
</property>
<property>
<name>yarn.nodemanager.container-monitor.interval-ms</name>
<value>300000</value>
</property>