New julia scripts
This commit is contained in:
parent
5bdb53b360
commit
0cf8046f47
|
@ -0,0 +1,72 @@
|
|||
using Distributions
|
||||
using Random
|
||||
using Plots
|
||||
|
||||
mutable struct BanditArm
|
||||
m::Number #the win rate
|
||||
m_estimate::Number #How to estimate the win rate
|
||||
N::Number #Number of samples
|
||||
|
||||
BanditArm(m) = new(m,0,0)
|
||||
end
|
||||
|
||||
function pull(ban::BanditArm)
|
||||
return rand(Normal(0,1)) + ban.m
|
||||
end
|
||||
|
||||
function update(ban::BanditArm, x::Number) #x is a sample number
|
||||
ban.N += 1
|
||||
ban.m_estimate = ((1 - 1/ban.N) * ban.m_estimate) + (1 / (ban.N * x))
|
||||
end
|
||||
|
||||
function run_experiment(m1::Number,m2::Number,m3::Number,ϵ::Number,N::Number)
|
||||
bandits = [BanditArm(m1),BanditArm(m2),BanditArm(m3)]
|
||||
means = [m1,m2,m3]
|
||||
true_best = argmax(means)
|
||||
count_suboptimal = 0
|
||||
|
||||
data = Array{Number}(undef,N)
|
||||
|
||||
for i in 1:N
|
||||
p = rand()
|
||||
if p < ϵ
|
||||
j = rand(1:size(bandits)[1])
|
||||
else
|
||||
j = argmax([b.m_estimate for b in bandits])
|
||||
end
|
||||
x = pull(bandits[j])
|
||||
update(bandits[j],x)
|
||||
|
||||
if j != true_best
|
||||
count_suboptimal += 1
|
||||
end
|
||||
data[i] = x
|
||||
end
|
||||
|
||||
gr();
|
||||
cumulative_average = cumsum(data) ./ Array(1:N)
|
||||
plot(cumulative_average,xaxis=:log)
|
||||
plot!(ones(N) .* m1,xaxis=:log)
|
||||
plot!(ones(N) .* m2,xaxis=:log)
|
||||
display(plot!(ones(N) .* m3,xaxis=:log))
|
||||
|
||||
for b in bandits
|
||||
println(b.m_estimate)
|
||||
end
|
||||
println("Perccent suboptimal for ϵ = $ϵ: ", count_suboptimal / N)
|
||||
|
||||
return cumulative_average
|
||||
end
|
||||
|
||||
m1 = 1.5
|
||||
m2 = 2.5
|
||||
m3 = 3.5
|
||||
|
||||
c_1 = run_experiment(m1,m2,m3,0.1,100000);
|
||||
c_05 = run_experiment(m1,m2,m3,0.05,100000);
|
||||
c_01 = run_experiment(m1,m2,m3,0.01,100000);
|
||||
|
||||
plot(c_1,show=false)
|
||||
plot!(c_05,show=false)
|
||||
plot!(c_01)
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
using Random
|
||||
using Plots
|
||||
|
||||
mutable struct Bandit
|
||||
p::Number #the win rate
|
||||
p_estimate::Number #How to estimate the win rate
|
||||
N::Number #Number of samples
|
||||
|
||||
Bandit(p) = new(p,0,0)
|
||||
end
|
||||
|
||||
function pull(ban::Bandit)
|
||||
return convert(Int,rand() < ban.p)
|
||||
end
|
||||
|
||||
function update(ban::Bandit, x::Number) #x is a sample number
|
||||
ban.N += 1
|
||||
ban.p_estimate = ((ban.N - 1) * ban.p_estimate + x) / ban.N
|
||||
end
|
||||
|
||||
num_trials = 10000
|
||||
ϵ = 0.1
|
||||
bandit_probs = [0.2,0.5,0.75]
|
||||
|
||||
bandits = [Bandit(p) for p in bandit_probs]
|
||||
rewards = zeros(num_trials)
|
||||
num_times_explored = 0
|
||||
num_times_exploited = 0
|
||||
num_optimal = 0
|
||||
optimal_j = argmax([b.p for b in bandits])
|
||||
println("Optimal j: ", optimal_j)
|
||||
|
||||
for i in 1:num_trials
|
||||
if rand() < ϵ
|
||||
num_times_explored += 1
|
||||
j = rand(1:size(bandits)[1])
|
||||
else
|
||||
num_times_exploited += 1
|
||||
j = argmax([b.p_estimate for b in bandits])
|
||||
end
|
||||
|
||||
if j == optimal_j
|
||||
num_optimal += 1
|
||||
end
|
||||
|
||||
x = pull(bandits[j])
|
||||
rewards[i] = x
|
||||
update(bandits[j],x)
|
||||
end
|
||||
|
||||
for b in bandits
|
||||
println("Mean estimate: ", b.p_estimate)
|
||||
end
|
||||
|
||||
println("Total reward eaarned: ", sum(rewards))
|
||||
println("Overall win rate: ", sum(rewards)/ num_trials)
|
||||
println("Number times explored: ", num_times_explored)
|
||||
println("Number times exploited: ", num_times_exploited)
|
||||
println("Number of times the optimal bandit was selected: ", num_optimal)
|
||||
|
||||
cumulative_rewards = cumsum(rewards)
|
||||
win_rates = cumulative_rewards ./ Array(1:num_trials)
|
||||
plot(win_rates)
|
||||
plot!(ones(num_trials) .* max(bandit_probs...))
|
Loading…
Reference in New Issue