Hadoop: the definitive guide 第三版 拾遗 第三章 之查看文件及正则表达式

package com.tht.hdfs;

//cc FileSystemDoubleCat Displays files from a Hadoop filesystem on standard output twice, by using seek
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;

//vv FileSystemDoubleCat
public class FileSystemDoubleCat {

public static void main(String[] args) throws Exception {
// String uri = args[0];
String uri = "hdfs://";
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
FSDataInputStream in = null;
byte b[] = new byte[500];
try {
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
//in.seek(0); // go back to the start of the file
//IOUtils.copyBytes(in, System.out, 4096, false);

System.out.println(new String(b));
} finally {
// ^^ FileSystemDoubleCat


FSDataInputStream also implements the PositionedReadable interface for reading parts

of a file at a given offset:
public interface PositionedReadable {
public int read(long position, byte[] buffer, int offset, int length)
throws IOException;

public void readFully(long position, byte[] buffer, int offset, int length)
throws IOException;

public void readFully(long position, byte[] buffer) throws IOException;

The read() method reads up to length bytes from the given position in the file into the

buffer at the given offset in the buffer. The return value is the number of bytes actually

read; callers should check this value, as it may be less than length. 



//cc RegexExcludePathFilter A PathFilter for excluding paths that match a regular expression

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;

//vv RegexExcludePathFilter
public class RegexExcludePathFilter implements PathFilter {

private final String regex;

public RegexExcludePathFilter(String regex) {
this.regex = regex;

public boolean accept(Path path) {
return !path.toString().matches(regex);
//^^ RegexExcludePathFilter


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;

import java.io.IOException;
import java.net.URI;

public class GlobStatus {
public static void main(String[] args) throws IOException {
String uri = "hdfs://*";
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);

FileStatus[] status = fs.globStatus(new Path(uri),new RegexExcludePathFilter("^.*/"));
Path[] listedPaths = FileUtil.stat2Paths(status);
for (Path p : listedPaths) {

*星号Matches zero or more characters
?问号Matches a single character
[ab]字符类Matches a single character in the set {a, b}
[^ab]非字符类Matches a single character that is not in the set {a, b}
[a-b]字符范围Matches a single character in the (closed) range [a, b],

 where a is lexicographically less than or equal to b
[^a-b]    非字符范围  Matches a single character that is not in the (closed) range [a, b],

 where a is lexicographically less than or equal to b
{a,b}或选择Matches either expression a or b
\c转义字符Matches character c when it is a metacharacter


import java.io.OutputStream;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class CoherencyModel {
public static void main(String[] args) throws Exception {
String uri = "hdfs://";
Configuration conf=new Configuration();
FileSystem fs=FileSystem.get(URI.create(uri),conf);
Path p = new Path(uri+"/p");//如果改为Path p = new Path("p");则输出结果变为hdfs://
OutputStream out = fs.create(p);
out.write("content for tht test".getBytes("UTF-8"));

