您的位置:首页 > 编程语言 > Java开发

09_java访问Hadoop的HDFS

2018-02-04 16:34 281 查看
项目说明:本项目基于maven   jdk8

《pom.xml》
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion>

<groupId>org.lanqiao</groupId>
<artifactId>hadoop_hdfs</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.5</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.5</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.5</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.7.5</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.7.5</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>19.0</version>
</dependency>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.2</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>commons-configuration</groupId>
<artifactId>commons-configuration</artifactId>
<version>1.9</version>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.7.7</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>RELEASE</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.0.0</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-my-jar-with-dependencies</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.3</version>
<configura
9acd
tion>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>3.0.0</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>

</project>

/**
* Hello world!
*使用Java程序读取hadoop的存储上的文件
*/
public class HelloHDFS
{

public static void main(String[] args) throws IOException {
commonReadFun();
}
public static  void   commonReadFun() throws IOException {
//创建配置对象
Configuration conf = new Configuration();
//设置dfs的连接信息
conf.set("fs.defaultFS","hdfs://192.168.40.57:9000");
//通过连接信息 得到文件系统
FileSystem fileSystem = FileSystem.get(conf);
//        //使用文件系统在hdfs的根目录下创建目录lanqiao   覆盖创建
//        boolean success = fileSystem.mkdirs(new Path("/lanqiao"));
//        System.out.println(success);
//        //判断文件是否存在
//        success =fileSystem.exists(new Path("/hello.txt"));
//        System.out.println(success);
//        //删除目录   参数一:文件路径   参数二:文件是否真正的从hdfs删除
//        success = fileSystem.delete(new Path("/lanqiao"),true);
//        System.out.println(success);
//        //检查目录是否存在
//       success =  fileSystem.exists(new Path("/lanqiao"));
//        System.out.println(success);
//上传文件到hdfs
/*        FSDataOutputStream out =  fileSystem.create(new Path("/test.data"),true);
FileInputStream in = new FileInputStream("d://test.log");
IOUtils.copyBytes(in,out,1024,true);*/
//获取指定目录下的文件列表
FileStatus[] fileStatus = fileSystem.listStatus(new Path("/"));
for (FileStatus fs :fileStatus){
System.out.println(fs.getPath());//文件路径
System.out.println(fs.getPermission());//文件的读写权限
System.out.println(fs.getReplication());//文件分几块

}

}

public static  void  firstReade() throws IOException {
/**
* 第一种方式
*/
//由于URL默认只支持http协议,而hadoop的HDFS使用的是HDFS协议,所以在这里设置URL,使其支持hdfs协议
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
//使用URL 访问HDFS 使用协议为hdfs   此时的hello.txt存在于hadoop存储的根目录下
URL url = new URL("hdfs://master:9000/hello.txt");
//调用url的openStrem()方法获取一个InputStrem
InputStream in = url.openStream();
//使用hadoop提供的IOUtils的copyBytes(输入流,输出流,缓冲区大小,流是否在使用完之后自动关闭)
IOUtils.copyBytes(in ,System.out,1024,true);
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: