New julia scripts

This commit is contained in:
Eduardo Cueto Mendoza 2021-01-02 12:10:47 +00:00
parent 5bdb53b360
commit 0cf8046f47
2 changed files with 136 additions and 0 deletions

72
comparing_epsilons.jl Normal file
View File

@ -0,0 +1,72 @@
using Distributions
using Random
using Plots
mutable struct BanditArm
m::Number #the win rate
m_estimate::Number #How to estimate the win rate
N::Number #Number of samples
BanditArm(m) = new(m,0,0)
end
function pull(ban::BanditArm)
return rand(Normal(0,1)) + ban.m
end
function update(ban::BanditArm, x::Number) #x is a sample number
ban.N += 1
ban.m_estimate = ((1 - 1/ban.N) * ban.m_estimate) + (1 / (ban.N * x))
end
function run_experiment(m1::Number,m2::Number,m3::Number,ϵ::Number,N::Number)
bandits = [BanditArm(m1),BanditArm(m2),BanditArm(m3)]
means = [m1,m2,m3]
true_best = argmax(means)
count_suboptimal = 0
data = Array{Number}(undef,N)
for i in 1:N
p = rand()
if p < ϵ
j = rand(1:size(bandits)[1])
else
j = argmax([b.m_estimate for b in bandits])
end
x = pull(bandits[j])
update(bandits[j],x)
if j != true_best
count_suboptimal += 1
end
data[i] = x
end
gr();
cumulative_average = cumsum(data) ./ Array(1:N)
plot(cumulative_average,xaxis=:log)
plot!(ones(N) .* m1,xaxis=:log)
plot!(ones(N) .* m2,xaxis=:log)
display(plot!(ones(N) .* m3,xaxis=:log))
for b in bandits
println(b.m_estimate)
end
println("Perccent suboptimal for ϵ = : ", count_suboptimal / N)
return cumulative_average
end
m1 = 1.5
m2 = 2.5
m3 = 3.5
c_1 = run_experiment(m1,m2,m3,0.1,100000);
c_05 = run_experiment(m1,m2,m3,0.05,100000);
c_01 = run_experiment(m1,m2,m3,0.01,100000);
plot(c_1,show=false)
plot!(c_05,show=false)
plot!(c_01)

64
epsilon_greedy_starter.jl Normal file
View File

@ -0,0 +1,64 @@
using Random
using Plots
mutable struct Bandit
p::Number #the win rate
p_estimate::Number #How to estimate the win rate
N::Number #Number of samples
Bandit(p) = new(p,0,0)
end
function pull(ban::Bandit)
return convert(Int,rand() < ban.p)
end
function update(ban::Bandit, x::Number) #x is a sample number
ban.N += 1
ban.p_estimate = ((ban.N - 1) * ban.p_estimate + x) / ban.N
end
num_trials = 10000
ϵ = 0.1
bandit_probs = [0.2,0.5,0.75]
bandits = [Bandit(p) for p in bandit_probs]
rewards = zeros(num_trials)
num_times_explored = 0
num_times_exploited = 0
num_optimal = 0
optimal_j = argmax([b.p for b in bandits])
println("Optimal j: ", optimal_j)
for i in 1:num_trials
if rand() < ϵ
num_times_explored += 1
j = rand(1:size(bandits)[1])
else
num_times_exploited += 1
j = argmax([b.p_estimate for b in bandits])
end
if j == optimal_j
num_optimal += 1
end
x = pull(bandits[j])
rewards[i] = x
update(bandits[j],x)
end
for b in bandits
println("Mean estimate: ", b.p_estimate)
end
println("Total reward eaarned: ", sum(rewards))
println("Overall win rate: ", sum(rewards)/ num_trials)
println("Number times explored: ", num_times_explored)
println("Number times exploited: ", num_times_exploited)
println("Number of times the optimal bandit was selected: ", num_optimal)
cumulative_rewards = cumsum(rewards)
win_rates = cumulative_rewards ./ Array(1:num_trials)
plot(win_rates)
plot!(ones(num_trials) .* max(bandit_probs...))