标签:node rate 数据 etl conf apr art create 文件合并
1.创建maven工程并导入jar包
jdk使用1.8、maven3.x版本
pom.xml添加一下内容
<repositories> <repository> <id>cloudera</id> <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url> </repository> </repositories> <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.6.0-mr1-cdh5.14.2</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.6.0-cdh5.14.2</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.6.0-cdh5.14.2</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-core</artifactId> <version>2.6.0-cdh5.14.2</version> </dependency> <!-- https://mvnrepository.com/artifact/junit/junit --> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.11</version> <scope>test</scope> </dependency> <dependency> <groupId>org.testng</groupId> <artifactId>testng</artifactId> <version>RELEASE</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>3.0</version> <configuration> <source>1.8</source> <target>1.8</target> <encoding>UTF-8</encoding> <!-- <verbal>true</verbal>--> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> <version>2.4.3</version> <executions> <execution> <phase>package</phase> <goals> <goal>shade</goal> </goals> <configuration> <minimizeJar>true</minimizeJar> </configuration> </execution> </executions> </plugin> </plugins> </build>
2.test目录下创建com.my.hdfs并在该包下创建HDFSOperate.java文件
2.1 创建文件夹
@Test public void mkdirToHdfs() throws IOException {
Configuration configuration = new Configuration(); configuration.set("fs.defaultFS","hdfs://node01:8020"); FileSystem fileSystem = FileSystem.get(configuration); fileSystem.mkdirs(new Path("/dir1"));//hdfs路径 fileSystem.close(); }
2.2文件上传
@Test public void uploadFile() throws IOException { Configuration configuration = new Configuration(); configuration.set("fs.defaultFS","hdfs://node01:8020"); FileSystem fileSystem = FileSystem.get(configuration); fileSystem.copyFromLocalFile(new Path("file:///d:\\hello.txt")/**本地目录*/,new Path("hdfs://node01:8020/dir1")/**hdfs目录*/); fileSystem.close(); }
2.3文件下载
@Test
public void downloadFile() throws IOException { Configuration configuration = new Configuration(); configuration.set("fs.defaultFS","hdfs://node01:8020"); FileSystem fileSystem = FileSystem.get(configuration); fileSystem.copyToLocalFile(new Path("hdfs://node01:8020/dir1/hello.txt")/**hdfs目录*/,new Path("file:///d:\\hello2.txt")/**本地目录*/); fileSystem.close(); }
2.4文件删除
@Test public void deleteHDFS() throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://node01:8020"); FileSystem fs = FileSystem.get(conf); fs.delete(new Path("/dir1/hello.txt"), true); fs.close(); }
2.5重命名
@Test public void renameDFS() throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://node01:8020"); FileSystem fs = FileSystem.get(conf); fs.rename(new Path("/dir1/hello.txt"), new Path("/dir1/hello.md")); fs.close(); }
2.6查看hdfs文件信息
@Test public void testListFiles() throws IOException, InterruptedException, URISyntaxException{ // 1获取文件系统 Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(new URI("hdfs://node01:8020"), configuration); // 2 获取文件详情 RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true); while(listFiles.hasNext()){ LocatedFileStatus status = listFiles.next(); // 输出详情 // 文件名称 System.out.println(status.getPath().getName()); // 长度 System.out.println(status.getLen()); // 权限 System.out.println(status.getPermission()); // 分组 System.out.println(status.getGroup()); // 获取存储的块信息 BlockLocation[] blockLocations = status.getBlockLocations(); for (BlockLocation blockLocation : blockLocations) { // 获取块存储的主机节点 String[] hosts = blockLocation.getHosts(); for (String host : hosts) { System.out.println(host); } } } // 3 关闭资源 fs.close(); }
3.java API通过IO流操作hdfs文件
3.1 io流数据上传
@Test public void putFileToHDFS() throws IOException, InterruptedException, URISyntaxException { // 1 获取文件系统 Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(new URI("hdfs://node01:8020"), configuration); // 2 创建输入流 不需要加file:/// FileInputStream fis = new FileInputStream(new File("e:\\helo.txt")); // 3 获取输出流 FSDataOutputStream fos = fs.create(new Path("hdfs://node01:8020/outresult.txt")); // 4 流对拷 IOUtils.copy(fis, fos); // 5 关闭资源 IOUtils.closeQuietly(fos); IOUtils.closeQuietly(fis); fs.close(); }
3.2 io流数据下载
@Test public void getFileFromHDFS() throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs:/[表情]de01:8020"); FileSystem fs = FileSystem.get(conf); FSDataInputStream fis = fs.open(new Path("/dir1/hello.txt")); FileOutputStream fos = new FileOutputStream("D:/hello_2.txt"); //filesystem关闭 IOUtils.copy(fis, fos); IOUtils.closeQuietly(fis); IOUtils.closeQuietly(fos); fs.close(); }
3.3 hdfs小文件合并
/** * 小文件合并 */ @Test public void mergeFile() throws URISyntaxException, IOException, InterruptedException { //获取分布式文件系统hdfs FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), new Configuration(), "hadoop"); FSDataOutputStream fsDataOutputStream = fileSystem.create(new Path("hdfs://node01:8020/bigfile.xml")); //获取本地文件系统 localFileSystem LocalFileSystem localFileSystem = FileSystem.getLocal(new Configuration()); //读取本地的文件 FileStatus[] fileStatuses = localFileSystem.listStatus(new Path("file:///D:smallFiles")); for (FileStatus fileStatus : fileStatuses) {
//获取每一个本地的文件路径 Path path = fileStatus.getPath(); //读取本地小文件 FSDataInputStream fsDataInputStream = localFileSystem.open(path); IOUtils.copy(fsDataInputStream,fsDataOutputStream); IOUtils.closeQuietly(fsDataInputStream); } IOUtils.closeQuietly(fsDataOutputStream); localFileSystem.close(); fileSystem.close(); //读取所有本地小文件,写入到hdfs的大文件里面去 }
标签:node rate 数据 etl conf apr art create 文件合并
原文地址:https://www.cnblogs.com/dan2/p/12663924.html