using Distributions using Random using Plots mutable struct BanditArm m::Number #the win rate m_estimate::Number #How to estimate the win rate N::Number #Number of samples BanditArm(m) = new(m,0,0) end function pull(ban::BanditArm) return rand(Normal(0,1)) + ban.m end function update(ban::BanditArm, x::Number) #x is a sample number ban.N += 1 ban.m_estimate = ((1 - 1/ban.N) * ban.m_estimate) + (1 / (ban.N * x)) end function run_experiment(m1::Number,m2::Number,m3::Number,ϵ::Number,N::Number) bandits = [BanditArm(m1),BanditArm(m2),BanditArm(m3)] means = [m1,m2,m3] true_best = argmax(means) count_suboptimal = 0 data = Array{Number}(undef,N) for i in 1:N p = rand() if p < ϵ j = rand(1:size(bandits)[1]) else j = argmax([b.m_estimate for b in bandits]) end x = pull(bandits[j]) update(bandits[j],x) if j != true_best count_suboptimal += 1 end data[i] = x end gr(); cumulative_average = cumsum(data) ./ Array(1:N) plot(cumulative_average,xaxis=:log) plot!(ones(N) .* m1,xaxis=:log) plot!(ones(N) .* m2,xaxis=:log) display(plot!(ones(N) .* m3,xaxis=:log)) for b in bandits println(b.m_estimate) end println("Perccent suboptimal for ϵ = $ϵ: ", count_suboptimal / N) return cumulative_average end m1 = 1.5 m2 = 2.5 m3 = 3.5 c_1 = run_experiment(m1,m2,m3,0.1,100000); c_05 = run_experiment(m1,m2,m3,0.05,100000); c_01 = run_experiment(m1,m2,m3,0.01,100000); plot(c_1,show=false) plot!(c_05,show=false) plot!(c_01)