New julia scripts

2021-01-02 12:10:47 +00:00 · 2021-01-02 12:10:47 +00:00 · 0cf8046f47
commit 0cf8046f47
parent 5bdb53b360
2 changed files with 136 additions and 0 deletions
--- a/comparing_epsilons.jl
+++ b/comparing_epsilons.jl
@ -0,0 +1,72 @@
 using Distributions
 using Random
 using Plots
 mutable struct BanditArm
    m::Number #the win rate
    m_estimate::Number #How to estimate the win rate
    N::Number #Number of samples
    BanditArm(m) = new(m,0,0)
 end
 function pull(ban::BanditArm)
   return rand(Normal(0,1)) + ban.m
 end
 function update(ban::BanditArm, x::Number) #x is a sample number
    ban.N += 1
    ban.m_estimate = ((1 - 1/ban.N) * ban.m_estimate) + (1 / (ban.N * x))
 end
 function run_experiment(m1::Number,m2::Number,m3::Number,ϵ::Number,N::Number)
    bandits = [BanditArm(m1),BanditArm(m2),BanditArm(m3)]
    means = [m1,m2,m3]
    true_best = argmax(means)
    count_suboptimal = 0
    data = Array{Number}(undef,N)
    for i in 1:N 
        p = rand()
        if p < ϵ
            j = rand(1:size(bandits)[1])
        else
            j = argmax([b.m_estimate for b in bandits])
        end
        x = pull(bandits[j])
        update(bandits[j],x)
        if j != true_best
            count_suboptimal += 1
        end
        data[i] = x
    end
    gr();
    cumulative_average = cumsum(data) ./ Array(1:N)
    plot(cumulative_average,xaxis=:log)
    plot!(ones(N) .* m1,xaxis=:log)
    plot!(ones(N) .* m2,xaxis=:log)
    display(plot!(ones(N) .* m3,xaxis=:log))
    for b in bandits 
        println(b.m_estimate)
    end
    println("Perccent suboptimal for ϵ = $ϵ: ", count_suboptimal / N)
    return cumulative_average
 end
 m1 = 1.5
 m2 = 2.5
 m3 = 3.5
 c_1 = run_experiment(m1,m2,m3,0.1,100000);
 c_05 = run_experiment(m1,m2,m3,0.05,100000);
 c_01 = run_experiment(m1,m2,m3,0.01,100000);
 plot(c_1,show=false)
 plot!(c_05,show=false)
 plot!(c_01)
--- a/epsilon_greedy_starter.jl
+++ b/epsilon_greedy_starter.jl
@ -0,0 +1,64 @@
 using Random
 using Plots
 mutable struct Bandit
    p::Number #the win rate
    p_estimate::Number #How to estimate the win rate
    N::Number #Number of samples
    Bandit(p) = new(p,0,0)
 end
 function pull(ban::Bandit)
   return convert(Int,rand() < ban.p) 
 end
 function update(ban::Bandit, x::Number) #x is a sample number
    ban.N += 1
    ban.p_estimate = ((ban.N - 1) * ban.p_estimate + x) / ban.N
 end
 num_trials = 10000
 ϵ = 0.1
 bandit_probs = [0.2,0.5,0.75]
 bandits = [Bandit(p) for p in bandit_probs]
 rewards = zeros(num_trials)
 num_times_explored = 0
 num_times_exploited = 0
 num_optimal = 0
 optimal_j = argmax([b.p for b in bandits])
 println("Optimal j: ", optimal_j)
 for i in 1:num_trials 
    if rand() < ϵ
        num_times_explored += 1
        j = rand(1:size(bandits)[1])
    else
        num_times_exploited += 1
        j = argmax([b.p_estimate for b in bandits])
    end
    if j == optimal_j
        num_optimal += 1
    end
    x = pull(bandits[j])
    rewards[i] = x
    update(bandits[j],x)
 end
 for b in bandits
    println("Mean estimate: ", b.p_estimate)
 end
 println("Total reward eaarned: ", sum(rewards))
 println("Overall win rate: ", sum(rewards)/ num_trials)
 println("Number times explored: ", num_times_explored)
 println("Number times exploited: ", num_times_exploited)
 println("Number of times the optimal bandit was selected: ", num_optimal)
 cumulative_rewards = cumsum(rewards)
 win_rates = cumulative_rewards ./ Array(1:num_trials)
 plot(win_rates) 
 plot!(ones(num_trials) .* max(bandit_probs...))