标签:行合并 [] cep void ram pat input getname tac
合并小文件,存放到HDFS上, 采取在向HDFS复制上传的过程中将小文件进行合并,效果会更好
package org.xueruan.hadoop.hdfs; import java.nio.file.Path; import sun.management.FileSystem; /* * function: merge file while copying and uploading files into HDFS */ public class PutMerge { public static void put(String localDir,String hdfsFile){ /* * @param localDir: local file directory * * @param hdfsFile: HDFS file path */ Configuration conf = new Configuration(); Path localPath = new Path(localDir); Path hdfsPath = new Path(hdfsFile); try{ FileSystem localFs = FileSystem.getLocal(conf); FileSystem hdfs = FileSystem.get(conf); FileStatus[] status = localFs.listStatus(localPath); FSDataOutputStream fsDataOutputStream = hdfs.create(hdfsPath); for(FileStatus fileStatus:status){ Path path = fileStatus.getPath(); System.out.println("File is :"+path.getName()); //open file input stream FSDdataInputStream fsDataInputStream = localFs.open(path); byte[] buffer= new byte[1024]; int len =0; while(len = fsDataInputStream.read(buffer)>0){ fsDataOutputStream.write(buffer,0,len); } fsDataInputStrea.close(); } fsDataOutputStream.close(); }catch(Exception e){ e.printStackTrace(); } } }
标签:行合并 [] cep void ram pat input getname tac
原文地址:http://www.cnblogs.com/andypengyong/p/7258840.html