diff --git a/comparing_epsilons.jl b/comparing_epsilons.jl new file mode 100644 index 0000000..de9fbc9 --- /dev/null +++ b/comparing_epsilons.jl @@ -0,0 +1,72 @@ +using Distributions +using Random +using Plots + +mutable struct BanditArm + m::Number #the win rate + m_estimate::Number #How to estimate the win rate + N::Number #Number of samples + + BanditArm(m) = new(m,0,0) +end + +function pull(ban::BanditArm) + return rand(Normal(0,1)) + ban.m +end + +function update(ban::BanditArm, x::Number) #x is a sample number + ban.N += 1 + ban.m_estimate = ((1 - 1/ban.N) * ban.m_estimate) + (1 / (ban.N * x)) +end + +function run_experiment(m1::Number,m2::Number,m3::Number,ϵ::Number,N::Number) + bandits = [BanditArm(m1),BanditArm(m2),BanditArm(m3)] + means = [m1,m2,m3] + true_best = argmax(means) + count_suboptimal = 0 + + data = Array{Number}(undef,N) + + for i in 1:N + p = rand() + if p < ϵ + j = rand(1:size(bandits)[1]) + else + j = argmax([b.m_estimate for b in bandits]) + end + x = pull(bandits[j]) + update(bandits[j],x) + + if j != true_best + count_suboptimal += 1 + end + data[i] = x + end + + gr(); + cumulative_average = cumsum(data) ./ Array(1:N) + plot(cumulative_average,xaxis=:log) + plot!(ones(N) .* m1,xaxis=:log) + plot!(ones(N) .* m2,xaxis=:log) + display(plot!(ones(N) .* m3,xaxis=:log)) + + for b in bandits + println(b.m_estimate) + end + println("Perccent suboptimal for ϵ = $ϵ: ", count_suboptimal / N) + + return cumulative_average +end + +m1 = 1.5 +m2 = 2.5 +m3 = 3.5 + +c_1 = run_experiment(m1,m2,m3,0.1,100000); +c_05 = run_experiment(m1,m2,m3,0.05,100000); +c_01 = run_experiment(m1,m2,m3,0.01,100000); + +plot(c_1,show=false) +plot!(c_05,show=false) +plot!(c_01) + diff --git a/epsilon_greedy_starter.jl b/epsilon_greedy_starter.jl new file mode 100644 index 0000000..2205b47 --- /dev/null +++ b/epsilon_greedy_starter.jl @@ -0,0 +1,64 @@ +using Random +using Plots + +mutable struct Bandit + p::Number #the win rate + p_estimate::Number #How to estimate the win rate + N::Number #Number of samples + + Bandit(p) = new(p,0,0) +end + +function pull(ban::Bandit) + return convert(Int,rand() < ban.p) +end + +function update(ban::Bandit, x::Number) #x is a sample number + ban.N += 1 + ban.p_estimate = ((ban.N - 1) * ban.p_estimate + x) / ban.N +end + +num_trials = 10000 +ϵ = 0.1 +bandit_probs = [0.2,0.5,0.75] + +bandits = [Bandit(p) for p in bandit_probs] +rewards = zeros(num_trials) +num_times_explored = 0 +num_times_exploited = 0 +num_optimal = 0 +optimal_j = argmax([b.p for b in bandits]) +println("Optimal j: ", optimal_j) + +for i in 1:num_trials + if rand() < ϵ + num_times_explored += 1 + j = rand(1:size(bandits)[1]) + else + num_times_exploited += 1 + j = argmax([b.p_estimate for b in bandits]) + end + + if j == optimal_j + num_optimal += 1 + end + + x = pull(bandits[j]) + rewards[i] = x + update(bandits[j],x) +end + +for b in bandits + println("Mean estimate: ", b.p_estimate) +end + +println("Total reward eaarned: ", sum(rewards)) +println("Overall win rate: ", sum(rewards)/ num_trials) +println("Number times explored: ", num_times_explored) +println("Number times exploited: ", num_times_exploited) +println("Number of times the optimal bandit was selected: ", num_optimal) + +cumulative_rewards = cumsum(rewards) +win_rates = cumulative_rewards ./ Array(1:num_trials) +plot(win_rates) +plot!(ones(num_trials) .* max(bandit_probs...))