您的位置:首页 > 其它

Kaggle | Digit Recognizer

2015-08-29 11:09 453 查看
Digit Recognizer题目地址

使用的就是mnist data, train set 42000, test set 28000



1) 使用random forest决策树来实现,准确率0.966左右

library(randomForest)

set.seed(0)

numTrain <- 42000
numTrees <- 200

train <- read.csv("train.csv")

rows <- sample(1:nrow(train), numTrain)
labels <- as.factor(train[rows, 1])

print(head(labels))

train <- train[rows, -1]

gc()
print(memory.size())
print(memory.limit())

rf <- randomForest(train, labels, ntree = numTrees)
rm(train)

test <- read.csv("test.csv")
pre <- predict(rf, newdata = test)

print(head(pre))

predictions <- data.frame(ImageId = 1:nrow(test), Label = pre)

write.csv(predictions, "predict.csv", row.names = FALSE)


Fri, 28 Aug 2015 12:41:01

Edit description
predict.csv0.96600
2) 使用DeepLearnToolbox中的cnn库来实现,参数都没怎么改,numepochs设置的比较大,准确率大约0.98829

widon@widon-X401A:~$ ls lib/DeepLearnToolbox/

CAE CONTRIBUTING.md data LICENSE README_header.md REFS.md tests

CNN create_readme.sh DBN NN README.md SAE util

%function test_example_CNN
%load mnist_uint8;

%test = csvread('test.csv', 1, 0);

clear ; close all; clc

load('digitdata.mat')

casenum = 42000
tmp = randperm(size(train, 1), casenum);
train_x = train(tmp, 2:end);
label = train(tmp, 1);
test_x = test;

m = size(label, 1)
train_y = zeros(m, 10);
for i=1:m
train_y(i, label(i)+1) = 1;
end
train_x = double(reshape(train_x',28,28,casenum))/255;
test_x = double(reshape(test_x',28,28,28000))/255;
train_y = double(train_y');
%test_y = double(test_y');

%% ex1 Train a 6c-2s-12c-2s Convolutional neural network
%will run 1 epoch in about 200 second and get around 11% error.
%With 100 epochs you'll get around 1.2% error

rand('state',0)

cnn.layers = {
struct('type', 'i') %input layer
struct('type', 'c', 'outputmaps', 6, 'kernelsize', 5) %convolution layer
struct('type', 's', 'scale', 2) %sub sampling layer
struct('type', 'c', 'outputmaps', 12, 'kernelsize', 5) %convolution layer
struct('type', 's', 'scale', 2) %subsampling layer
};

opts.alpha = 1;
opts.batchsize = 50;
opts.numepochs = 200;

cnn = cnnsetup(cnn, train_x, train_y);
cnn = cnntrain(cnn, train_x, train_y, opts);

clear train train_x train_y
test_y = cnnff(cnn, test_x);
[~, y] = max(test_y.o);
y = y - 1;
y = y'
csvwrite('pre.csv', y);

Sat, 29 Aug 2015 00:01:20

Edit description
pre.csv0.98829
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: