您的位置:首页 > 编程语言 > Python开发

Python将自己的图片数据集导入h5py,做识别的预处理

2018-03-21 15:45 781 查看
很多情况下,在训练卷积神经网络时,需要将自己的图片作为卷积神经网络的输入。
将自己的图片数据集导入h5py,所占空间小,使用方便
条件:自己的图片,eg:cats VS dogs,并将两类图片分别放置于两个文件夹(我这里是yes_tumble与not_tumble)import os
import numpy as np
from PIL import Image
import tensorflow as tf
import matplotlib.pyplot as plt
import sklearn
from sklearn import preprocessing
import h5py
import scipy
#导入必要的包
def get_files(file_dir):
    cats = []
    label_cats = []
    dogs = []
    label_dogs = []
    
    for file in os.listdir(file_dir+'/not_tumble'):
            cats.append(file_dir +'/not_tumble'+'/'+ file) 
            label_cats.append(0)     #添加标签,该类标签为0,此为2分类例子,多类别识别问题自行添加
    for file in os.listdir(file_dir+'/yes_tumble'):
            dogs.append(file_dir +'/yes_tumble'+'/'+file)
            label_dogs.append(1)
            
    #把cat和dog合起来组成一个list(img和lab)
    image_list = np.hstack((cats, dogs))
    label_list = np.hstack((label_cats, label_dogs))

    #利用shuffle打乱顺序
    temp = np.array([image_list, label_list])
    temp = temp.transpose()
    np.random.shuffle(temp)

    #从打乱的temp中再取出list(img和lab)
    image_list = list(temp[:, 0])
    label_list = list(temp[:, 1])
    label_list = [int(i) for i in label_list] 
    
    return  image_list,label_list
    #返回两个list 分别为图片文件名及其标签  顺序已被打乱
train_dir = 'F:/CSISA_Picture'
image_list,label_list = get_files(train_dir)

print(len(image_list))
print(len(label_list))
#450为数据长度的20%
Train_image =  np.random.rand(len(image_list)-450, 64, 64, 3).astype('float32')
Train_label = np.random.rand(len(image_list)-450, 1).astype('float32')

Test_image =  np.random.rand(450, 64, 64, 3).astype('float32')
Test_label = np.random.rand(450, 1).astype('float32')
for i in range(len(image_list)-450):
Train_image[i] = np.array(plt.imread(image_list[i]))
Train_label[i] = np.array(label_list[i])

for i in range(len(image_list)-450, len(image_list)):
Test_image[i+450-len(image_list)] = np.array(plt.imread(image_list[i]))
Test_label[i+450-len(image_list)] = np.array(label_list[i])
# Create a new file
f = h5py.File('data.h5', 'w')
f.create_dataset('X_train', data=Train_image)
f.create_dataset('y_train', data=Train_label)
f.create_dataset('X_test', data=Test_image)
f.create_dataset('y_test', data=Test_label)
f.close()
# Load hdf5 dataset
train_dataset = h5py.File('data.h5', 'r')
train_set_x_orig = np.array(train_dataset['X_train'][:]) # your train set features
train_set_y_orig = np.array(train_dataset['y_train'][:]) # your train set labels
test_set_x_orig = np.array(train_dataset['X_test'][:]) # your train set features
test_set_y_orig = np.array(train_dataset['y_test'][:]) # your train set labels
f.close()
print(train_set_x_orig.shape)
print(train_set_y_orig.shape)

print(train_set_x_orig.max())
print(train_set_x_orig.min())

print(test_set_x_orig.shape)
print(test_set_y_orig.shape)
#测试
plt.imshow(train_set_x_orig[222])
print(train_set_y_orig[222])
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  python Deep Learning