php 使用 hdfs 分布式存储海量小文件
2012-03-09 12:15
387 查看
php可以通过thrift连接hbase,同样php可以通过thrift读取hadoop资源(HDFS资源)。
准备:
php需要thrift的libary
thrift 安装参见 我上一篇文章
packages:hadoop-0.20.2\src\contrib\thriftfs\gen-php // 搜索hadoop 下载源码 ,在hadoop源码包里
<?php
$GLOBALS['THRIFT_ROOT'] = ROOTPATH . '/lib/thrift';
require_once($GLOBALS['THRIFT_ROOT'].'/Thrift.php');
require_once($GLOBALS['THRIFT_ROOT'].'/transport/TSocket.php');
require_once($GLOBALS['THRIFT_ROOT'].'/transport/TBufferedTransport.php');
require_once($GLOBALS['THRIFT_ROOT'].'/protocol/TBinaryProtocol.php');
require_once($GLOBALS["THRIFT_ROOT"] . "/packages/hadoopfs/ThriftHadoopFileSystem.php");
$hadoop_socket = new TSocket("localhost", 59256);
$hadoop_socket -> setSendTimeout(10000); // Ten seconds
$hadoop_socket -> setRecvTimeout(20000); // Twenty seconds
$hadoop_transport = new TBufferedTransport($hadoop_socket);
$hadoop_protocol = new TBinaryProtocol($hadoop_transport);
$hadoopClient = new ThriftHadoopFileSystemClient($hadoop_protocol);
$hadoop_transport -> open();
try {
// create directory
$dirpathname = new Pathname(array("pathname" => "/user/root/hadoop"));
if($hadoopClient -> exists($dirpathname) == TRUE) {
echo $dirpathname -> pathname . " exists.\n";
} else {
$result = $hadoopClient -> mkdirs($dirpathname);
}
// put file
$filepathname = new Pathname(array("pathname" => $dirpathname -> pathname . "/hello.txt"));
$localfile = fopen("hello.txt", "rb");
$hdfsfile = $hadoopClient -> create($filepathname);
while(true) {
$data = fread($localfile, 1024);
if(strlen($data) == 0)
break;
$hadoopClient -> write($hdfsfile, $data);
}
$hadoopClient -> close($hdfsfile);
fclose($localfile);
// get file
echo "read file:\n";
print_r($filepathname);
$data = "";
$hdfsfile = $hadoopClient -> open($filepathname);
print_r($hdfsfile);
while(true) {
$data = $hadoopClient -> read($hdfsfile, 0, 1024);
if(strlen($data) == 0)
break;
print $data;
}
$hadoopClient -> close($hdfsfile);
echo "listStatus:\n";
$result = $hadoopClient -> listStatus($dirpathname);
print_r($result);
foreach($result as $key => $value) {
if($value -> isdir == "1")
print "dir\t";
else
print "file\t";
print $value -> block_replication . "\t" . $value -> length . "\t" . $value -> modification_time . "\t" . $value -> permission . "\t" . $value -> owner . "\t" . $value -> group . "\t" . $value -> path . "\n";
}
$hadoop_transport -> close();
} catch(Exception $e) {
print_r($e);
}
?>
启动hadoop的thrift
hadoop-0.20.2\src\contrib\thriftfs\scripts\start_thrift_server.sh 59256
problem one:
在系统目录创建文件,而不是在hadoop目录中创建文件
原因:
thrift启动时加载默认的配置文件
解决方法:
修改start_thrift_server.sh文件
TOP=/usr/local/hadoop-0.20.2
CLASSPATH=$CLASSPATH:$TOP/conf
problem two:
java.lang.NullPointerException
at org.apache.hadoop.thriftfs.HadoopThriftServer$HadoopThriftHandler.write(HadoopThriftServer.java:282)
at org.apache.hadoop.thriftfs.api.ThriftHadoopFileSystem$Processor$write.process(Unknown Source)
at org.apache.hadoop.thriftfs.api.ThriftHadoopFileSystem$Processor.process(Unknown Source)
at com.facebook.thrift.server.TThreadPoolServer$WorkerProcess.run(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)
原因:
java返回的map hash id为long类型,而php(32位)无法存储long类型的数据,导致转换成float数据后丢失精度。
private long nextId = new Random().nextLong();
java返回数据:4207488029786584864
php获取数据:4.2074880297866E+18
java获得php传递数据:4207488029786585088
解决方法:
修改hadoop-0.20.2\src\contrib\thriftfs\if\hadoopfs.thrift文件
修改
struct ThriftHandle {
i64 id
}
为
struct ThriftHandle {
string id
}
重新生成php packages
thrift --gen php hadoopfs.thrift
修改org.apache.hadoop.thriftfs.api.ThriftHandle类
修改
public long id;
为:
public String id;
修改相应的程序
org.apache.hadoop.thriftfs.HadoopThriftServer
修改
long id = insert(out);
ThriftHandle obj = new ThriftHandle(id);
为
long id = insert(out);
String _id = String.valueOf(id);
ThriftHandle obj = new ThriftHandle(_id);
修改相应的程序
重新打包,启动hadoop的thrift:
hadoop-0.20.2\src\contrib\thriftfs\scripts\start_thrift_server.sh 59256
这样php就可以连接并且获取hadoop中的资源了
准备:
php需要thrift的libary
thrift 安装参见 我上一篇文章
packages:hadoop-0.20.2\src\contrib\thriftfs\gen-php // 搜索hadoop 下载源码 ,在hadoop源码包里
<?php
$GLOBALS['THRIFT_ROOT'] = ROOTPATH . '/lib/thrift';
require_once($GLOBALS['THRIFT_ROOT'].'/Thrift.php');
require_once($GLOBALS['THRIFT_ROOT'].'/transport/TSocket.php');
require_once($GLOBALS['THRIFT_ROOT'].'/transport/TBufferedTransport.php');
require_once($GLOBALS['THRIFT_ROOT'].'/protocol/TBinaryProtocol.php');
require_once($GLOBALS["THRIFT_ROOT"] . "/packages/hadoopfs/ThriftHadoopFileSystem.php");
$hadoop_socket = new TSocket("localhost", 59256);
$hadoop_socket -> setSendTimeout(10000); // Ten seconds
$hadoop_socket -> setRecvTimeout(20000); // Twenty seconds
$hadoop_transport = new TBufferedTransport($hadoop_socket);
$hadoop_protocol = new TBinaryProtocol($hadoop_transport);
$hadoopClient = new ThriftHadoopFileSystemClient($hadoop_protocol);
$hadoop_transport -> open();
try {
// create directory
$dirpathname = new Pathname(array("pathname" => "/user/root/hadoop"));
if($hadoopClient -> exists($dirpathname) == TRUE) {
echo $dirpathname -> pathname . " exists.\n";
} else {
$result = $hadoopClient -> mkdirs($dirpathname);
}
// put file
$filepathname = new Pathname(array("pathname" => $dirpathname -> pathname . "/hello.txt"));
$localfile = fopen("hello.txt", "rb");
$hdfsfile = $hadoopClient -> create($filepathname);
while(true) {
$data = fread($localfile, 1024);
if(strlen($data) == 0)
break;
$hadoopClient -> write($hdfsfile, $data);
}
$hadoopClient -> close($hdfsfile);
fclose($localfile);
// get file
echo "read file:\n";
print_r($filepathname);
$data = "";
$hdfsfile = $hadoopClient -> open($filepathname);
print_r($hdfsfile);
while(true) {
$data = $hadoopClient -> read($hdfsfile, 0, 1024);
if(strlen($data) == 0)
break;
print $data;
}
$hadoopClient -> close($hdfsfile);
echo "listStatus:\n";
$result = $hadoopClient -> listStatus($dirpathname);
print_r($result);
foreach($result as $key => $value) {
if($value -> isdir == "1")
print "dir\t";
else
print "file\t";
print $value -> block_replication . "\t" . $value -> length . "\t" . $value -> modification_time . "\t" . $value -> permission . "\t" . $value -> owner . "\t" . $value -> group . "\t" . $value -> path . "\n";
}
$hadoop_transport -> close();
} catch(Exception $e) {
print_r($e);
}
?>
启动hadoop的thrift
hadoop-0.20.2\src\contrib\thriftfs\scripts\start_thrift_server.sh 59256
problem one:
在系统目录创建文件,而不是在hadoop目录中创建文件
原因:
thrift启动时加载默认的配置文件
解决方法:
修改start_thrift_server.sh文件
TOP=/usr/local/hadoop-0.20.2
CLASSPATH=$CLASSPATH:$TOP/conf
problem two:
java.lang.NullPointerException
at org.apache.hadoop.thriftfs.HadoopThriftServer$HadoopThriftHandler.write(HadoopThriftServer.java:282)
at org.apache.hadoop.thriftfs.api.ThriftHadoopFileSystem$Processor$write.process(Unknown Source)
at org.apache.hadoop.thriftfs.api.ThriftHadoopFileSystem$Processor.process(Unknown Source)
at com.facebook.thrift.server.TThreadPoolServer$WorkerProcess.run(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)
原因:
java返回的map hash id为long类型,而php(32位)无法存储long类型的数据,导致转换成float数据后丢失精度。
private long nextId = new Random().nextLong();
java返回数据:4207488029786584864
php获取数据:4.2074880297866E+18
java获得php传递数据:4207488029786585088
解决方法:
修改hadoop-0.20.2\src\contrib\thriftfs\if\hadoopfs.thrift文件
修改
struct ThriftHandle {
i64 id
}
为
struct ThriftHandle {
string id
}
重新生成php packages
thrift --gen php hadoopfs.thrift
修改org.apache.hadoop.thriftfs.api.ThriftHandle类
修改
public long id;
为:
public String id;
修改相应的程序
org.apache.hadoop.thriftfs.HadoopThriftServer
修改
long id = insert(out);
ThriftHandle obj = new ThriftHandle(id);
为
long id = insert(out);
String _id = String.valueOf(id);
ThriftHandle obj = new ThriftHandle(_id);
修改相应的程序
重新打包,启动hadoop的thrift:
hadoop-0.20.2\src\contrib\thriftfs\scripts\start_thrift_server.sh 59256
这样php就可以连接并且获取hadoop中的资源了
相关文章推荐
- php 使用 hdfs 分布式存储海量小文件
- PHP 使用扩展 phdfs 操作 HDFS存储的文件
- php使用fopen创建utf8编码文件的方法
- 使用PHP接受文件并获得其后缀名的方法
- PHP使用fopen()函数打开文件提示权限不够问题
- 如何在直接引入的php文件中使用Joomla的类库
- PHP中使用Session配合Javascript实现文件上传进度条功能
- php使用ftp远程上传文件类(解决主从文件同步问题的简单方法)
- php使用glob函数快速查询指定目录文件的方法
- php归档格式:phar后缀文件详解(创建、使用、解包还原提取)
- php使用指定的文件记录错误报告日志
- 模拟使用Flume监听日志变化,并且把增量的日志文件写入到hdfs中
- php使用glob函数快速查询指定目录文件的方法
- php使用gzip压缩传输js和css文件的方法
- 文件打包,下载之使用PHP自带的ZipArchive压缩文件并下载打包好的文件
- 微软云(Azure)如何通过PHP使用Blob服务上传文件
- thinkphp相对路径问题及其使用php原生文件操作的方法
- 如何使用PHP实现文件上传
- php使用file函数、fseek函数读取大文件效率分析
- php经典实例使用正则动态修改配置文件