//// TD learning clear; clf; gamma0=0.8; reward=[0 0 1 0 0]; pattern_vector=zeros(10,5); pattern_vector(:,3)=[1;0;0;0;0;0;0;0;0;0]; w=zeros(1,10); V_mem=0; previous_state=5; for episode=1:100; TDerror(episode)=0; if episode==50; pattern_vector(:,2)=[0;0;0;1;0;0;0;0;0;0]; end; for pattern=1:5 V=w*pattern_vector(:,pattern); rhat(episode,pattern)=reward(previous_state)+gamma0*V-V_mem; w=w+0.2*rhat(episode,pattern)*pattern_vector(:,previous_state)'; TDerror(episode)=TDerror(episode)+abs(rhat(episode,pattern)); previous_state=pattern; V_mem=V; end end plot(rhat); xlabel('Episode'); ylabel('rhat');