码迷,mamicode.com
首页 > 编程语言 > 详细

hdfs的Java Api开发

时间:2020-04-09 00:13:36      阅读:97      评论:0      收藏:0      [点我收藏+]

标签:node   rate   数据   etl   conf   apr   art   create   文件合并   

1.创建maven工程并导入jar包

  jdk使用1.8、maven3.x版本

  pom.xml添加一下内容

<repositories>

    <repository>

        <id>cloudera</id>

        <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>

    </repository>

 </repositories>

 <dependencies>

    <dependency>

        <groupId>org.apache.hadoop</groupId>

        <artifactId>hadoop-client</artifactId>

        <version>2.6.0-mr1-cdh5.14.2</version>

    </dependency>

    <dependency>

        <groupId>org.apache.hadoop</groupId>

        <artifactId>hadoop-common</artifactId>

        <version>2.6.0-cdh5.14.2</version>

    </dependency>

    <dependency>

        <groupId>org.apache.hadoop</groupId>

        <artifactId>hadoop-hdfs</artifactId>

        <version>2.6.0-cdh5.14.2</version>

    </dependency>

 

    <dependency>

        <groupId>org.apache.hadoop</groupId>

        <artifactId>hadoop-mapreduce-client-core</artifactId>

        <version>2.6.0-cdh5.14.2</version>

    </dependency>

    <!-- https://mvnrepository.com/artifact/junit/junit -->

    <dependency>

        <groupId>junit</groupId>

        <artifactId>junit</artifactId>

        <version>4.11</version>

        <scope>test</scope>

    </dependency>

    <dependency>

        <groupId>org.testng</groupId>

        <artifactId>testng</artifactId>

        <version>RELEASE</version>

    </dependency>

 </dependencies>

 <build>

    <plugins>

        <plugin>

            <groupId>org.apache.maven.plugins</groupId>

            <artifactId>maven-compiler-plugin</artifactId>

            <version>3.0</version>

            <configuration>

                <source>1.8</source>

                <target>1.8</target>

                <encoding>UTF-8</encoding>

                <!--   <verbal>true</verbal>-->

            </configuration>

        </plugin>

        <plugin>

            <groupId>org.apache.maven.plugins</groupId>

            <artifactId>maven-shade-plugin</artifactId>

            <version>2.4.3</version>

            <executions>

                <execution>

                    <phase>package</phase>

                    <goals>

                        <goal>shade</goal>

                    </goals>

                    <configuration>

                        <minimizeJar>true</minimizeJar>

                    </configuration>

                </execution>

            </executions>

        </plugin>

    </plugins>

 </build>

2.test目录下创建com.my.hdfs并在该包下创建HDFSOperate.java文件

  2.1 创建文件夹

  @Test
  public void mkdirToHdfs() throws IOException {    
    Configuration configuration = new Configuration();     configuration.set("fs.defaultFS","hdfs://node01:8020");     FileSystem fileSystem = FileSystem.get(configuration);     fileSystem.mkdirs(new Path("/dir1"));//hdfs路径     fileSystem.close();   }

   2.2文件上传

  @Test
  public void uploadFile() throws IOException {
    Configuration configuration = new Configuration();
    configuration.set("fs.defaultFS","hdfs://node01:8020");
    FileSystem fileSystem = FileSystem.get(configuration);
    fileSystem.copyFromLocalFile(new Path("file:///d:\\hello.txt")/**本地目录*/,new Path("hdfs://node01:8020/dir1")/**hdfs目录*/);
    fileSystem.close();
  }

   2.3文件下载

 @Test
 public void downloadFile() throws IOException {     Configuration configuration = new Configuration();     configuration.set("fs.defaultFS","hdfs://node01:8020");     FileSystem fileSystem = FileSystem.get(configuration);     fileSystem.copyToLocalFile(new Path("hdfs://node01:8020/dir1/hello.txt")/**hdfs目录*/,new Path("file:///d:\\hello2.txt")/**本地目录*/);     fileSystem.close();   }

   2.4文件删除

  @Test
  public void deleteHDFS() throws IOException {
    Configuration conf = new Configuration();
    conf.set("fs.defaultFS", "hdfs://node01:8020");
    FileSystem fs = FileSystem.get(conf);
    fs.delete(new Path("/dir1/hello.txt"), true);
    fs.close();
  }

  2.5重命名

  @Test
  public void renameDFS() throws IOException {
    Configuration conf = new Configuration();
    conf.set("fs.defaultFS", "hdfs://node01:8020");
    FileSystem fs = FileSystem.get(conf);
    fs.rename(new Path("/dir1/hello.txt"), new Path("/dir1/hello.md"));
    fs.close();
  }

  2.6查看hdfs文件信息

@Test
    public void testListFiles() throws IOException, InterruptedException, URISyntaxException{
        // 1获取文件系统
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://node01:8020"), configuration);
        // 2 获取文件详情
        RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);
        while(listFiles.hasNext()){
            LocatedFileStatus status = listFiles.next();
            // 输出详情
            // 文件名称
            System.out.println(status.getPath().getName());
            // 长度
            System.out.println(status.getLen());
            // 权限
            System.out.println(status.getPermission());
            // 分组
            System.out.println(status.getGroup());
            // 获取存储的块信息
            BlockLocation[] blockLocations = status.getBlockLocations();
 
            for (BlockLocation blockLocation : blockLocations) {
                // 获取块存储的主机节点
                String[] hosts = blockLocation.getHosts();
                for (String host : hosts) {
                    System.out.println(host);
                }
            }
        }
        // 3 关闭资源
        fs.close();
    }

3.java API通过IO流操作hdfs文件

  3.1 io流数据上传

@Test
    public void putFileToHDFS() throws IOException, InterruptedException, URISyntaxException {
        // 1 获取文件系统
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://node01:8020"), configuration);
        // 2 创建输入流 不需要加file:///
        FileInputStream fis = new FileInputStream(new File("e:\\helo.txt"));
        // 3 获取输出流
        FSDataOutputStream fos = fs.create(new Path("hdfs://node01:8020/outresult.txt"));
          // 4 流对拷
        IOUtils.copy(fis, fos);
        // 5 关闭资源
        IOUtils.closeQuietly(fos);
        IOUtils.closeQuietly(fis);
        fs.close();
    }

   3.2 io流数据下载

@Test
    public void  getFileFromHDFS() throws IOException {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs:/[表情]de01:8020");
        FileSystem fs = FileSystem.get(conf);

        FSDataInputStream fis = fs.open(new Path("/dir1/hello.txt"));
        FileOutputStream fos = new FileOutputStream("D:/hello_2.txt");
        //filesystem关闭
        IOUtils.copy(fis, fos);
        IOUtils.closeQuietly(fis);
        IOUtils.closeQuietly(fos);
        fs.close();
    }

   3.3 hdfs小文件合并

  /**
   * 小文件合并
   */
  @Test
  public   void  mergeFile() throws URISyntaxException, IOException, InterruptedException {
    //获取分布式文件系统hdfs
    FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), new Configuration(), "hadoop");
    FSDataOutputStream fsDataOutputStream = fileSystem.create(new Path("hdfs://node01:8020/bigfile.xml"));
    //获取本地文件系统 localFileSystem
    LocalFileSystem localFileSystem = FileSystem.getLocal(new Configuration());
    //读取本地的文件
    FileStatus[] fileStatuses = localFileSystem.listStatus(new Path("file:///D:smallFiles"));
    for (FileStatus fileStatus : fileStatuses) {    
      //获取每一个本地的文件路径       Path path = fileStatus.getPath();       //读取本地小文件       FSDataInputStream fsDataInputStream = localFileSystem.open(path);       IOUtils.copy(fsDataInputStream,fsDataOutputStream);       IOUtils.closeQuietly(fsDataInputStream);     }     IOUtils.closeQuietly(fsDataOutputStream);     localFileSystem.close();     fileSystem.close();     //读取所有本地小文件,写入到hdfs的大文件里面去   }

 

hdfs的Java Api开发

标签:node   rate   数据   etl   conf   apr   art   create   文件合并   

原文地址:https://www.cnblogs.com/dan2/p/12663924.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!