caffe的mnist格式数据生成和mnist格式数据转npy
2016-11-01 15:35
453 查看
MNIST 具体的文件格式描述,及其里面的属性。见官网
mnist就是一种二进制格式,matlab转化代码如下:
img2mnist.m
label2mnist.m
filename='train-labels-idx1-ubyte';
fp = fopen(filename, 'rb');
assert(fp ~= -1, ['Could not open ', filename, '']);
magic = fread(fp, 1, 'int32', 0, 'ieee-be');
assert(magic == 2049, ['Bad magic number in ', filename, '']);
numLabels = fread(fp, 1, 'int32', 0, 'ieee-be');
labels = fread(fp, inf, 'unsigned char');
assert(size(labels,1) == numLabels, 'Mismatch in label count');
fclose(fp);
f2=fopen('test_mnist_lable.bin', 'wb');
fwrite(f2,magic,'int32', 0, 'ieee-be');
fwrite(f2,numLabels,'int32', 0, 'ieee-be');
fwrite(f2,labels,'unsigned char');
fclose(f2);
===================================================================================================================================
mnist转npy:
data_util.py
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 25 14:40:06 2016
load MNIST dataset
@author: liudiwei
"""
import numpy as np
import struct
import matplotlib.pyplot as plt
import os
class DataUtils(object):
"""MNIST数据集加载
输出格式为:numpy.array()
使用方法如下
from data_util import DataUtils
def main():
trainfile_X = '../dataset/MNIST/train-images.idx3-ubyte'
trainfile_y = '../dataset/MNIST/train-labels.idx1-ubyte'
testfile_X = '../dataset/MNIST/t10k-images.idx3-ubyte'
testfile_y = '../dataset/MNIST/t10k-labels.idx1-ubyte'
train_X = DataUtils(filename=trainfile_X).getImage()
train_y = DataUtils(filename=trainfile_y).getLabel()
test_X = DataUtils(testfile_X).getImage()
test_y = DataUtils(testfile_y).getLabel()
#以下内容是将图像保存到本地文件中
#path_trainset = "../dataset/MNIST/imgs_train"
#path_testset = "../dataset/MNIST/imgs_test"
#if not os.path.exists(path_trainset):
# os.mkdir(path_trainset)
#if not os.path.exists(path_testset):
# os.mkdir(path_testset)
#DataUtils(outpath=path_trainset).outImg(train_X, train_y)
#DataUtils(outpath=path_testset).outImg(test_X, test_y)
return train_X, train_y, test_X, test_y
"""
def __init__(self, filename=None, outpath=None):
self._filename = filename
self._outpath = outpath
self._tag = '>'
self._twoBytes = 'II'
self._fourBytes = 'IIII'
self._pictureBytes = '784B'
self._labelByte = '1B'
self._twoBytes2 = self._tag + self._twoBytes
self._fourBytes2 = self._tag + self._fourBytes
self._pictureBytes2 = self._tag + self._pictureBytes
self._labelByte2 = self._tag + self._labelByte
def getImage(self):
"""
将MNIST的二进制文件转换成像素特征数据
"""
binfile = open(self._filename, 'rb') #以二进制方式打开文件
buf = binfile.read()
binfile.close()
index = 0
numMagic,numImgs,numRows,numCols=struct.unpack_from(self._fourBytes2,\
buf,\
index)
index += struct.calcsize(self._fourBytes)
images = []
for i in range(numImgs):
imgVal = struct.unpack_from(self._pictureBytes2, buf, index)
index += struct.calcsize(self._pictureBytes2)
imgVal = list(imgVal)
for j in range(len(imgVal)):
if imgVal[j] > 1:
imgVal[j] = 1
images.append(imgVal)
return np.array(images)
def getLabel(self):
"""
将MNIST中label二进制文件转换成对应的label数字特征
"""
binFile = open(self._filename,'rb')
buf = binFile.read()
binFile.close()
index = 0
magic, numItems= struct.unpack_from(self._twoBytes2, buf,index)
index += struct.calcsize(self._twoBytes2)
labels = [];
for x in range(numItems):
im = struct.unpack_from(self._labelByte2,buf,index)
index += struct.calcsize(self._labelByte2)
labels.append(im[0])
return np.array(labels)
def outImg(self, arrX, arrY):
"""
根据生成的特征和数字标号,输出png的图像
"""
m, n = np.shape(arrX)
#每张图是28*28=784Byte
for i in range(1):
img = np.array(arrX[i])
img = img.reshape(28,28)
outfile = str(i) + "_" + str(arrY[i]) + ".png"
plt.figure()
plt.imshow(img, cmap = 'binary') #将图像黑白显示
plt.savefig(self._outpath + "/" + outfile)
==================================================================================================================================
感谢http://blog.csdn.net/woyaopojie1990/article/details/42873571
学习小插曲:使用caffe的时候,如何设置GPU的ID号
有两种情况设置GPU的ID号:
1、如果使用caffe的源码(即非Python和matlab接口)
在脚本文件中加入参数: --gpu=ID; 例如 "--gpu=1",如果不设置默认使用过的ID=0,注意这个ID数是大于等于0的数,如0,1,2.....
2、如果使用matlab的接口的时候,有两种方法
A、使用matlab自带的命令,g=gpuDevice(ID);例如,g=gpuDevice(1),注意这个ID数是大于0的数,1,2.....
B、在初始化caffe的时候,进行设置,caffe('set_device',ID);
例如 caffe('set_device', 1),注意这个ID数是大于等于0的数,如0,1,2.....
MNIST 具体的文件格式描述,及其里面的属性。见官网
mnist就是一种二进制格式,matlab转化代码如下:
img2mnist.m
filename='train-images-idx3-ubyte'; fp = fopen(filename, 'rb'); assert(fp ~= -1, ['Could not open ', filename, '']); magic = fread(fp, 1, 'int32', 0, 'ieee-be'); assert(magic == 2051, ['Bad magic number in ', filename, '']); numImages = fread(fp, 1, 'int32', 0, 'ieee-be'); numRows = fread(fp, 1, 'int32', 0, 'ieee-be'); numCols = fread(fp, 1, 'int32', 0, 'ieee-be'); images = fread(fp, inf, 'unsigned char'); images = reshape(images, numCols, numRows, numImages); images = permute(images,[2 1 3]); fclose(fp); %%%% f1 = fopen('test_mnist.bin', 'wb'); fwrite(f1,magic,'int32', 0, 'ieee-be'); fwrite(f1,numImages,'int32', 0, 'ieee-be'); fwrite(f1,numRows,'int32', 0, 'ieee-be'); fwrite(f1,numRows,'int32', 0, 'ieee-be'); images = permute(images,[2 1 3]); images=reshape(images, numCols*numRows*numImages,1); fwrite(f1,images,'unsigned char'); fclose(f1);
label2mnist.m
filename='train-labels-idx1-ubyte';
fp = fopen(filename, 'rb');
assert(fp ~= -1, ['Could not open ', filename, '']);
magic = fread(fp, 1, 'int32', 0, 'ieee-be');
assert(magic == 2049, ['Bad magic number in ', filename, '']);
numLabels = fread(fp, 1, 'int32', 0, 'ieee-be');
labels = fread(fp, inf, 'unsigned char');
assert(size(labels,1) == numLabels, 'Mismatch in label count');
fclose(fp);
f2=fopen('test_mnist_lable.bin', 'wb');
fwrite(f2,magic,'int32', 0, 'ieee-be');
fwrite(f2,numLabels,'int32', 0, 'ieee-be');
fwrite(f2,labels,'unsigned char');
fclose(f2);
===================================================================================================================================
mnist转npy:
data_util.py
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 25 14:40:06 2016
load MNIST dataset
@author: liudiwei
"""
import numpy as np
import struct
import matplotlib.pyplot as plt
import os
class DataUtils(object):
"""MNIST数据集加载
输出格式为:numpy.array()
使用方法如下
from data_util import DataUtils
def main():
trainfile_X = '../dataset/MNIST/train-images.idx3-ubyte'
trainfile_y = '../dataset/MNIST/train-labels.idx1-ubyte'
testfile_X = '../dataset/MNIST/t10k-images.idx3-ubyte'
testfile_y = '../dataset/MNIST/t10k-labels.idx1-ubyte'
train_X = DataUtils(filename=trainfile_X).getImage()
train_y = DataUtils(filename=trainfile_y).getLabel()
test_X = DataUtils(testfile_X).getImage()
test_y = DataUtils(testfile_y).getLabel()
#以下内容是将图像保存到本地文件中
#path_trainset = "../dataset/MNIST/imgs_train"
#path_testset = "../dataset/MNIST/imgs_test"
#if not os.path.exists(path_trainset):
# os.mkdir(path_trainset)
#if not os.path.exists(path_testset):
# os.mkdir(path_testset)
#DataUtils(outpath=path_trainset).outImg(train_X, train_y)
#DataUtils(outpath=path_testset).outImg(test_X, test_y)
return train_X, train_y, test_X, test_y
"""
def __init__(self, filename=None, outpath=None):
self._filename = filename
self._outpath = outpath
self._tag = '>'
self._twoBytes = 'II'
self._fourBytes = 'IIII'
self._pictureBytes = '784B'
self._labelByte = '1B'
self._twoBytes2 = self._tag + self._twoBytes
self._fourBytes2 = self._tag + self._fourBytes
self._pictureBytes2 = self._tag + self._pictureBytes
self._labelByte2 = self._tag + self._labelByte
def getImage(self):
"""
将MNIST的二进制文件转换成像素特征数据
"""
binfile = open(self._filename, 'rb') #以二进制方式打开文件
buf = binfile.read()
binfile.close()
index = 0
numMagic,numImgs,numRows,numCols=struct.unpack_from(self._fourBytes2,\
buf,\
index)
index += struct.calcsize(self._fourBytes)
images = []
for i in range(numImgs):
imgVal = struct.unpack_from(self._pictureBytes2, buf, index)
index += struct.calcsize(self._pictureBytes2)
imgVal = list(imgVal)
for j in range(len(imgVal)):
if imgVal[j] > 1:
imgVal[j] = 1
images.append(imgVal)
return np.array(images)
def getLabel(self):
"""
将MNIST中label二进制文件转换成对应的label数字特征
"""
binFile = open(self._filename,'rb')
buf = binFile.read()
binFile.close()
index = 0
magic, numItems= struct.unpack_from(self._twoBytes2, buf,index)
index += struct.calcsize(self._twoBytes2)
labels = [];
for x in range(numItems):
im = struct.unpack_from(self._labelByte2,buf,index)
index += struct.calcsize(self._labelByte2)
labels.append(im[0])
return np.array(labels)
def outImg(self, arrX, arrY):
"""
根据生成的特征和数字标号,输出png的图像
"""
m, n = np.shape(arrX)
#每张图是28*28=784Byte
for i in range(1):
img = np.array(arrX[i])
img = img.reshape(28,28)
outfile = str(i) + "_" + str(arrY[i]) + ".png"
plt.figure()
plt.imshow(img, cmap = 'binary') #将图像黑白显示
plt.savefig(self._outpath + "/" + outfile)
==================================================================================================================================
感谢http://blog.csdn.net/woyaopojie1990/article/details/42873571
学习小插曲:使用caffe的时候,如何设置GPU的ID号
有两种情况设置GPU的ID号:
1、如果使用caffe的源码(即非Python和matlab接口)
在脚本文件中加入参数: --gpu=ID; 例如 "--gpu=1",如果不设置默认使用过的ID=0,注意这个ID数是大于等于0的数,如0,1,2.....
2、如果使用matlab的接口的时候,有两种方法
A、使用matlab自带的命令,g=gpuDevice(ID);例如,g=gpuDevice(1),注意这个ID数是大于0的数,1,2.....
B、在初始化caffe的时候,进行设置,caffe('set_device',ID);
例如 caffe('set_device', 1),注意这个ID数是大于等于0的数,如0,1,2.....
相关文章推荐
- caffe均值文件的生成以及npy格式的转换
- (原)caffe中通过图像生成lmdb格式的数据
- Caffe下将mnist手写图片数据转化成lmdb格式
- 自己制作npy数据集,数据格式类似于mnist数据(16)---《深度学习》
- 【caffe】caffe中通过图像生成lmdb格式的数据
- 【caffe-Windows】以mnist为例lmdb格式数据
- leveldb研究2- 存储分析,数据库日志文件格式,数据文件的格式和生成
- java生成json格式数据,一个简单的class
- C#将datatable生成easyui的绑定tree 的json数据格式
- box2d 描点工具生成b2Shape数据(xml通用格式),oc、c++解析模块类文件,b2Shape数据可视化工具
- box2d 描点工具生成b2Shape数据(xml通用格式),oc、c++解析模块类文件,b2Shape数据可视化工具
- 生成libSVM的数据格式及使用方法总结
- 【原】.Net创建Excel文件(插入数据、修改格式、生成图表)的方法
- nutch抓取数据后生成的文件格式
- nutch抓取数据后生成的文件格式
- 【原】.Net创建Excel文件(插入数据、修改格式、生成图表)的方法 .
- Java类,生成Dojo_Gird表格的json数据格式
- 常见数据类型的手机二维码生成与识别格式参考
- C#生成JSON数据格式的函数
- silverlight3 datagrid c#中动态生成模板列(日期格式)使用IValueConvert对绑定数据的格式化操作