您的位置:首页 > 编程语言

对“视觉机器学习20讲配套仿真代码”的研究心得---增强学习

2016-04-25 14:14 411 查看
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%功能:演示增强学习算法在计算机视觉中的应用

%基于增强学习实现目标分类;

%环境:Win7,Matlab2012b

%Modi: NUDT-VAP

%时间:2014-02-04

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA

global3 % global parameters initialized

stat=struct('Q',zeros(NS,NA),'iter',0,'old_action',1,'old_state',1,'current_state',1,'rimm',0,'total_reward',0);

done=0; % Pnemonic for simulation, 1 stands for end

% 0 stands for continue

while 0==done

[stat,done]=jump_learn(stat);

end

policy=pol_finder(stat);

&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&

function action=action_selector(stat)

global NA

ran=rand(1);

candidate=1;

sum=1/NA;

complete=0;

% Selecting each action with equal probability

while 0==complete

if ran<sum

% action selected

action=candidate;

complete=1;

else

% test if ran is associated with next action

candidate=candidate+1;

sum=sum+(1/NA);

end

end

&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7

function [stat,done]=jump_learn(stat)

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM

% This function simulates a jump and also updates the learning stats

old_state=stat.old_state;

old_action=stat.old_action;

% Determine current state

current_state=state_finder(stat);

% Record Feedback in stat

stat.current_state=current_state;

stat.rimm=TRM(old_state,current_state,old_action);

% DO LEARNING

stat=qlearn(stat);

% Select next action

next_action=action_selector(stat);

% Get ready to get out of this function

stat.old_state=current_state;

stat.old_action=next_action;

if stat.iter>=ITERMAX

% Learning should end

done=1;

else

done=0;

end

&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7

function policy=pol_finder(stat)

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM

for state=1:NS

[maxQfactor,index]=max(stat.Q(state,:));

policy(state)=index;

value_function(state)=maxQfactor;

end

policy

value_function

for state=1:NS

for action=1:NA

state

action

stat.Q(state,action)

end

end

&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&

function stat=qlearn(stat)

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA

% Q-Learning

% Finding the Max factor in the current state

q_next=max(stat.Q(stat.current_state,:));

stat.iter=stat.iter+1;

%learn_rate=1/(stat.iter);

learn_rate=log(stat.iter+1)/(stat.iter+1);

%learn_rate=0.5*300/(300+stat.iter);

q=stat.Q(stat.old_state,stat.old_action);

q=q*(1-learn_rate)+(learn_rate*(stat.rimm+(LAMBDA*q_next)));

stat.Q(stat.old_state,stat.old_action)=q;

&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7

function candidate=state_finder(stat)

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM

ran=rand(1);

old_action=stat.old_action;

old_state=stat.old_state;

sum=TPM(old_state,1,old_action);

candidate=1;

complete=0;

while 0==complete

if ran<sum

complete=1;

else

candidate=candidate+1;

sum=sum+TPM(old_state,candidate,old_action);

end

end

&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA

NO_REPLICATIONS=30; % No of replications of simulation

ITERMAX=10000; % No of iterations of learning

NA=2; % Number of actions in each state

NS=2; % Number of states

LAMBDA=0.8; % discount factor

SMALL=-1000000;

TPM(:,:,1)=[0.7,0.3;0.4,0.6];

TPM(:,:,2)=[0.9,0.1;0.2,0.8];

TRM(:,:,1)=[6,-5;7,12];

TRM(:,:,2)=[10,17;-14,13];

&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA

NO_REPLICATIONS=30; % No of replications of simulation

ITERMAX=10000; % No of iterations of learning

NA=2; % Number of actions in each state

NS=2; % Number of states

LAMBDA=0.8; % discount factor

SMALL=-1000000;

TPM(:,:,1)=[0.7,0.3;0.4,0.6];

TPM(:,:,2)=[0.9,0.1;0.2,0.8];

TRM(:,:,1)=[6,5;7,12];

TRM(:,:,2)=[10,17;14,13];

&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA

NO_REPLICATIONS=30; % No of replications of simulation

ITERMAX=10000; % No of iterations of learning

NA=2; % Number of actions in each state

NS=2; % Number of states

LAMBDA=0.8; % discount factor

SMALL=-1000000;

TPM(:,:,1)=[0.7,0.3;0.4,0.6];

TPM(:,:,2)=[0.9,0.1;0.2,0.8];

TRM(:,:,1)=[6,-5;7,12];

TRM(:,:,2)=[12,17;-14,13];

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA

NO_REPLICATIONS=30; % No of replications of simulation

ITERMAX=10000; % No of iterations of learning

NA=2; % Number of actions in each state

NS=2; % Number of states

LAMBDA=0.8; % discount factor

SMALL=-1000000;

TPM(:,:,1)=[0.7,0.3;0.4,0.6];

TPM(:,:,2)=[0.9,0.1;0.2,0.8];

TRM(:,:,1)=[6,-5;7,12];

TRM(:,:,2)=[12,17;-14,13];

&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA

NO_REPLICATIONS=30; % No of replications of simulation

ITERMAX=10000; % No of iterations of learning

NA=2; % Number of actions in each state

NS=2; % Number of states

LAMBDA=0.8; % discount factor

SMALL=-1000000;

TPM(:,:,1)=[0.7,0.3;0.4,0.6];

TPM(:,:,2)=[0.9,0.1;0.2,0.8];

TRM(:,:,1)=[16,-5;7,12];

TRM(:,:,2)=[0,17;-14,13];

&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: