//// TD learning clear; clf; gamma=0.8; reward=[0 1 0 0.5 0]; pattern_vector=rand(10,5); w=rand(1,10); V_mem=0; previous_state=5; for episode=1:100; TDerror(episode)=0; for pattern=1:5 V=w*pattern_vector(:,pattern); rhat=reward(previous_state)+gamma*V-V_mem; w=w+0.2*rhat*pattern_vector(:,previous_state)'; TDerror(episode)=TDerror(episode)+abs(rhat); previous_state=pattern; V_mem=V; end end subplot(2,1,1); plot(TDerror);xlabel('Episode'); ylabel('TD error') subplot(2,1,2); plot(w*pattern_vector);xlabel('Pattern'); ylabel('V(Pattern)')