对“视觉机器学习20讲配套仿真代码”的研究心得---增强学习
2016-04-25 14:14
411 查看
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%功能:演示增强学习算法在计算机视觉中的应用
%基于增强学习实现目标分类;
%环境:Win7,Matlab2012b
%Modi: NUDT-VAP
%时间:2014-02-04
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA
global3 % global parameters initialized
stat=struct('Q',zeros(NS,NA),'iter',0,'old_action',1,'old_state',1,'current_state',1,'rimm',0,'total_reward',0);
done=0; % Pnemonic for simulation, 1 stands for end
% 0 stands for continue
while 0==done
[stat,done]=jump_learn(stat);
end
policy=pol_finder(stat);
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
function action=action_selector(stat)
global NA
ran=rand(1);
candidate=1;
sum=1/NA;
complete=0;
% Selecting each action with equal probability
while 0==complete
if ran<sum
% action selected
action=candidate;
complete=1;
else
% test if ran is associated with next action
candidate=candidate+1;
sum=sum+(1/NA);
end
end
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7
function [stat,done]=jump_learn(stat)
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM
% This function simulates a jump and also updates the learning stats
old_state=stat.old_state;
old_action=stat.old_action;
% Determine current state
current_state=state_finder(stat);
% Record Feedback in stat
stat.current_state=current_state;
stat.rimm=TRM(old_state,current_state,old_action);
% DO LEARNING
stat=qlearn(stat);
% Select next action
next_action=action_selector(stat);
% Get ready to get out of this function
stat.old_state=current_state;
stat.old_action=next_action;
if stat.iter>=ITERMAX
% Learning should end
done=1;
else
done=0;
end
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7
function policy=pol_finder(stat)
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM
for state=1:NS
[maxQfactor,index]=max(stat.Q(state,:));
policy(state)=index;
value_function(state)=maxQfactor;
end
policy
value_function
for state=1:NS
for action=1:NA
state
action
stat.Q(state,action)
end
end
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
function stat=qlearn(stat)
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA
% Q-Learning
% Finding the Max factor in the current state
q_next=max(stat.Q(stat.current_state,:));
stat.iter=stat.iter+1;
%learn_rate=1/(stat.iter);
learn_rate=log(stat.iter+1)/(stat.iter+1);
%learn_rate=0.5*300/(300+stat.iter);
q=stat.Q(stat.old_state,stat.old_action);
q=q*(1-learn_rate)+(learn_rate*(stat.rimm+(LAMBDA*q_next)));
stat.Q(stat.old_state,stat.old_action)=q;
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7
function candidate=state_finder(stat)
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM
ran=rand(1);
old_action=stat.old_action;
old_state=stat.old_state;
sum=TPM(old_state,1,old_action);
candidate=1;
complete=0;
while 0==complete
if ran<sum
complete=1;
else
candidate=candidate+1;
sum=sum+TPM(old_state,candidate,old_action);
end
end
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA
NO_REPLICATIONS=30; % No of replications of simulation
ITERMAX=10000; % No of iterations of learning
NA=2; % Number of actions in each state
NS=2; % Number of states
LAMBDA=0.8; % discount factor
SMALL=-1000000;
TPM(:,:,1)=[0.7,0.3;0.4,0.6];
TPM(:,:,2)=[0.9,0.1;0.2,0.8];
TRM(:,:,1)=[6,-5;7,12];
TRM(:,:,2)=[10,17;-14,13];
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA
NO_REPLICATIONS=30; % No of replications of simulation
ITERMAX=10000; % No of iterations of learning
NA=2; % Number of actions in each state
NS=2; % Number of states
LAMBDA=0.8; % discount factor
SMALL=-1000000;
TPM(:,:,1)=[0.7,0.3;0.4,0.6];
TPM(:,:,2)=[0.9,0.1;0.2,0.8];
TRM(:,:,1)=[6,5;7,12];
TRM(:,:,2)=[10,17;14,13];
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA
NO_REPLICATIONS=30; % No of replications of simulation
ITERMAX=10000; % No of iterations of learning
NA=2; % Number of actions in each state
NS=2; % Number of states
LAMBDA=0.8; % discount factor
SMALL=-1000000;
TPM(:,:,1)=[0.7,0.3;0.4,0.6];
TPM(:,:,2)=[0.9,0.1;0.2,0.8];
TRM(:,:,1)=[6,-5;7,12];
TRM(:,:,2)=[12,17;-14,13];
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA
NO_REPLICATIONS=30; % No of replications of simulation
ITERMAX=10000; % No of iterations of learning
NA=2; % Number of actions in each state
NS=2; % Number of states
LAMBDA=0.8; % discount factor
SMALL=-1000000;
TPM(:,:,1)=[0.7,0.3;0.4,0.6];
TPM(:,:,2)=[0.9,0.1;0.2,0.8];
TRM(:,:,1)=[6,-5;7,12];
TRM(:,:,2)=[12,17;-14,13];
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA
NO_REPLICATIONS=30; % No of replications of simulation
ITERMAX=10000; % No of iterations of learning
NA=2; % Number of actions in each state
NS=2; % Number of states
LAMBDA=0.8; % discount factor
SMALL=-1000000;
TPM(:,:,1)=[0.7,0.3;0.4,0.6];
TPM(:,:,2)=[0.9,0.1;0.2,0.8];
TRM(:,:,1)=[16,-5;7,12];
TRM(:,:,2)=[0,17;-14,13];
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7
%功能:演示增强学习算法在计算机视觉中的应用
%基于增强学习实现目标分类;
%环境:Win7,Matlab2012b
%Modi: NUDT-VAP
%时间:2014-02-04
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA
global3 % global parameters initialized
stat=struct('Q',zeros(NS,NA),'iter',0,'old_action',1,'old_state',1,'current_state',1,'rimm',0,'total_reward',0);
done=0; % Pnemonic for simulation, 1 stands for end
% 0 stands for continue
while 0==done
[stat,done]=jump_learn(stat);
end
policy=pol_finder(stat);
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
function action=action_selector(stat)
global NA
ran=rand(1);
candidate=1;
sum=1/NA;
complete=0;
% Selecting each action with equal probability
while 0==complete
if ran<sum
% action selected
action=candidate;
complete=1;
else
% test if ran is associated with next action
candidate=candidate+1;
sum=sum+(1/NA);
end
end
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7
function [stat,done]=jump_learn(stat)
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM
% This function simulates a jump and also updates the learning stats
old_state=stat.old_state;
old_action=stat.old_action;
% Determine current state
current_state=state_finder(stat);
% Record Feedback in stat
stat.current_state=current_state;
stat.rimm=TRM(old_state,current_state,old_action);
% DO LEARNING
stat=qlearn(stat);
% Select next action
next_action=action_selector(stat);
% Get ready to get out of this function
stat.old_state=current_state;
stat.old_action=next_action;
if stat.iter>=ITERMAX
% Learning should end
done=1;
else
done=0;
end
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7
function policy=pol_finder(stat)
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM
for state=1:NS
[maxQfactor,index]=max(stat.Q(state,:));
policy(state)=index;
value_function(state)=maxQfactor;
end
policy
value_function
for state=1:NS
for action=1:NA
state
action
stat.Q(state,action)
end
end
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
function stat=qlearn(stat)
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA
% Q-Learning
% Finding the Max factor in the current state
q_next=max(stat.Q(stat.current_state,:));
stat.iter=stat.iter+1;
%learn_rate=1/(stat.iter);
learn_rate=log(stat.iter+1)/(stat.iter+1);
%learn_rate=0.5*300/(300+stat.iter);
q=stat.Q(stat.old_state,stat.old_action);
q=q*(1-learn_rate)+(learn_rate*(stat.rimm+(LAMBDA*q_next)));
stat.Q(stat.old_state,stat.old_action)=q;
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7
function candidate=state_finder(stat)
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM
ran=rand(1);
old_action=stat.old_action;
old_state=stat.old_state;
sum=TPM(old_state,1,old_action);
candidate=1;
complete=0;
while 0==complete
if ran<sum
complete=1;
else
candidate=candidate+1;
sum=sum+TPM(old_state,candidate,old_action);
end
end
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA
NO_REPLICATIONS=30; % No of replications of simulation
ITERMAX=10000; % No of iterations of learning
NA=2; % Number of actions in each state
NS=2; % Number of states
LAMBDA=0.8; % discount factor
SMALL=-1000000;
TPM(:,:,1)=[0.7,0.3;0.4,0.6];
TPM(:,:,2)=[0.9,0.1;0.2,0.8];
TRM(:,:,1)=[6,-5;7,12];
TRM(:,:,2)=[10,17;-14,13];
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA
NO_REPLICATIONS=30; % No of replications of simulation
ITERMAX=10000; % No of iterations of learning
NA=2; % Number of actions in each state
NS=2; % Number of states
LAMBDA=0.8; % discount factor
SMALL=-1000000;
TPM(:,:,1)=[0.7,0.3;0.4,0.6];
TPM(:,:,2)=[0.9,0.1;0.2,0.8];
TRM(:,:,1)=[6,5;7,12];
TRM(:,:,2)=[10,17;14,13];
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA
NO_REPLICATIONS=30; % No of replications of simulation
ITERMAX=10000; % No of iterations of learning
NA=2; % Number of actions in each state
NS=2; % Number of states
LAMBDA=0.8; % discount factor
SMALL=-1000000;
TPM(:,:,1)=[0.7,0.3;0.4,0.6];
TPM(:,:,2)=[0.9,0.1;0.2,0.8];
TRM(:,:,1)=[6,-5;7,12];
TRM(:,:,2)=[12,17;-14,13];
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA
NO_REPLICATIONS=30; % No of replications of simulation
ITERMAX=10000; % No of iterations of learning
NA=2; % Number of actions in each state
NS=2; % Number of states
LAMBDA=0.8; % discount factor
SMALL=-1000000;
TPM(:,:,1)=[0.7,0.3;0.4,0.6];
TPM(:,:,2)=[0.9,0.1;0.2,0.8];
TRM(:,:,1)=[6,-5;7,12];
TRM(:,:,2)=[12,17;-14,13];
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7
global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA
NO_REPLICATIONS=30; % No of replications of simulation
ITERMAX=10000; % No of iterations of learning
NA=2; % Number of actions in each state
NS=2; % Number of states
LAMBDA=0.8; % discount factor
SMALL=-1000000;
TPM(:,:,1)=[0.7,0.3;0.4,0.6];
TPM(:,:,2)=[0.9,0.1;0.2,0.8];
TRM(:,:,1)=[16,-5;7,12];
TRM(:,:,2)=[0,17;-14,13];
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&7
相关文章推荐
- Python字典方法总结
- java中的泛型
- 几种排序算法的c#实现
- 对“视觉机器学习20讲配套仿真代码”的研究心得---SVM方法
- 每天一道算法题(七)Leetcode – Word BreakII (Java)
- 对“视觉机器学习20讲配套仿真代码”的研究心得---Adaboost(一)
- php 图片上传
- 配置Eclipse支持java和xml文件的代码补全功能
- 图解VC++开发ActiveX控件C#调用
- C/C++复习:逆向输出数字
- 修改配置myeclipse/eclipse的注释作者名author
- #springMVC返回前台数据的方式
- C/C++复习:数组作实参,指针作形参排序
- java MD5算法 代码
- 对“视觉机器学习20讲配套仿真代码”的研究心得---EM算法
- [教程] 卡尔曼滤波简介及其算法实现代码(C++/C/MATLAB)
- 对“视觉机器学习20讲配套仿真代码”的研究心得---贝叶斯学习
- Java的多态在方法重载和重写时的情况
- qt的addWidget、setColumnStretch 等的使用方法
- STS或eclipse安装SVN插件