Python & Hadoop

时间：2016-03-09 06:58:01 阅读：252 评论：0 收藏：0 [点我收藏+]

标签：

由于试验的需要，需要调整大量的参数，索性就采用Python写个脚本去执行hadoop命令。

------------------------------------------------------------------------------------------------------------------------------

Python，今天是第一次接触，按照例子写了一个执行命令。

#!/usr/bin/python
import sys 
import subprocess
import os
import commands
from datetime import datetime
from datetime import timedelta
import time
import re
import math
import cmd

# This following script aim to do experiments in hadoop. 


# ###############################################################################################################
#   global variables
# ###############################################################################################################
 
 
logger = open('./logger.out', 'w')
logger_error = open('./logger.err', 'w')
hadoopDir = './hadoop-1.2.1/'   

# ###############################################################################################################
#   function execute command 
# ###############################################################################################################
 
def execute_command(cmd):
    result = executionTime()
    logger.write(cmd+'\n')
    print cmd
    p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    logger_error.write(cmd+'\n')
    for line in p.stderr.readlines():
        logger_error.write(line)
        print 'error >>>>> '+ line
        print line
    for line in p.stdout.readlines():
        logger.write(line)
        print 'out >>>>>> ' + line
        result.parseLines(line)
        '''
        if 'time in ' in line:
            tokens = line.split()
            length = len(tokens)
            executionTime = tokens[length-1]
        '''
    logger.write('***->Execution time in Milliseconds(' + result.runTime.replace('\n','')+') ')
    print '***->Execution time of ' + result.runTime.replace('\n','')
    return result

# ###############################################################################################################
#   function close writers 
# ###############################################################################################################
 
def close_writers():
    logger.flush()
    logger_error.flush()
    logger.close()
    logger_error.close()

def log(message):
    logger.write('\n----------\n\t'+message+'\n-----------\n')

# ###############################################################################################################
#   Experiments on gplot
# ###############################################################################################################

#dataFiles = ['lakes.random','buildings.spatial','allobjects.spatial']
dataFiles = ['land.rtree']
dataShape = 'shppolygon'  
 
overwrite = True;
def gplot():
    overwriteFlag = ''
    if overwrite is True:
        overwriteFlag = '-overwrite'

    for infilename in dataFiles:
        cmd = hadoopDir + './bin/hadoop' + ' gplot ' + infilename +' '+ infilename + '.png' + 'color:red' + ' shape:' + dataShape +' ' + overwriteFlag
        logger.write(cmd+'\n')
        t = execute_command(cmd) 
        return t    
    logger.close()

# ###############################################################################################################
#Classes
# ###############################################################################################################
    
class executionTime(object):
    
    def __init__(self):
        self.runTime = ''
        self.sampleTime = ''
        self.subdivisionTime = ''
        
    def parseLines(self,line):
        if 'Total time for sampling' in line:
            token = line.split(" ")
            self.sampleTime = token[len(token)-1]
        elif 'Total time for space subdivision' in line:
            token = line.split(" ")
            self.subdivisionTime = token[len(token)-1] 
        elif 'time in' in line:
            token = line.split(" ")
            self.runTime = token[len(token)-1]
            
# ###############################################################################################################
#   Main()
# ###############################################################################################################    
 
gplot()
print 'Program is done '
close_writers()

执行后，也得到了正确的结果，nice！！！！！！！！！！！！！！！！！！！！

技术分享

Python & Hadoop

标签：

原文地址：http://blog.csdn.net/yaoxiaochuang/article/details/50832267

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行