From 0cf8046f472ed187fd7b6b047a5e6faaff552710 Mon Sep 17 00:00:00 2001
From: Eduardo Cueto Mendoza <cueto303@gmail.com>
Date: Sat, 2 Jan 2021 12:10:47 +0000
Subject: [PATCH] New julia scripts

---
 comparing_epsilons.jl     | 72 +++++++++++++++++++++++++++++++++++++++
 epsilon_greedy_starter.jl | 64 ++++++++++++++++++++++++++++++++++
 2 files changed, 136 insertions(+)
 create mode 100644 comparing_epsilons.jl
 create mode 100644 epsilon_greedy_starter.jl

diff --git a/comparing_epsilons.jl b/comparing_epsilons.jl
new file mode 100644
index 0000000..de9fbc9
--- /dev/null
+++ b/comparing_epsilons.jl
@@ -0,0 +1,72 @@
+using Distributions
+using Random
+using Plots
+
+mutable struct BanditArm
+    m::Number #the win rate
+    m_estimate::Number #How to estimate the win rate
+    N::Number #Number of samples
+
+    BanditArm(m) = new(m,0,0)
+end
+
+function pull(ban::BanditArm)
+   return rand(Normal(0,1)) + ban.m
+end
+
+function update(ban::BanditArm, x::Number) #x is a sample number
+    ban.N += 1
+    ban.m_estimate = ((1 - 1/ban.N) * ban.m_estimate) + (1 / (ban.N * x))
+end
+
+function run_experiment(m1::Number,m2::Number,m3::Number,ϵ::Number,N::Number)
+    bandits = [BanditArm(m1),BanditArm(m2),BanditArm(m3)]
+    means = [m1,m2,m3]
+    true_best = argmax(means)
+    count_suboptimal = 0
+
+    data = Array{Number}(undef,N)
+
+    for i in 1:N 
+        p = rand()
+        if p < ϵ
+            j = rand(1:size(bandits)[1])
+        else
+            j = argmax([b.m_estimate for b in bandits])
+        end
+        x = pull(bandits[j])
+        update(bandits[j],x)
+
+        if j != true_best
+            count_suboptimal += 1
+        end
+        data[i] = x
+    end
+
+    gr();
+    cumulative_average = cumsum(data) ./ Array(1:N)
+    plot(cumulative_average,xaxis=:log)
+    plot!(ones(N) .* m1,xaxis=:log)
+    plot!(ones(N) .* m2,xaxis=:log)
+    display(plot!(ones(N) .* m3,xaxis=:log))
+
+    for b in bandits 
+        println(b.m_estimate)
+    end
+    println("Perccent suboptimal for ϵ = $ϵ: ", count_suboptimal / N)
+
+    return cumulative_average
+end
+
+m1 = 1.5
+m2 = 2.5
+m3 = 3.5
+
+c_1 = run_experiment(m1,m2,m3,0.1,100000);
+c_05 = run_experiment(m1,m2,m3,0.05,100000);
+c_01 = run_experiment(m1,m2,m3,0.01,100000);
+
+plot(c_1,show=false)
+plot!(c_05,show=false)
+plot!(c_01)
+
diff --git a/epsilon_greedy_starter.jl b/epsilon_greedy_starter.jl
new file mode 100644
index 0000000..2205b47
--- /dev/null
+++ b/epsilon_greedy_starter.jl
@@ -0,0 +1,64 @@
+using Random
+using Plots
+
+mutable struct Bandit
+    p::Number #the win rate
+    p_estimate::Number #How to estimate the win rate
+    N::Number #Number of samples
+
+    Bandit(p) = new(p,0,0)
+end
+
+function pull(ban::Bandit)
+   return convert(Int,rand() < ban.p) 
+end
+
+function update(ban::Bandit, x::Number) #x is a sample number
+    ban.N += 1
+    ban.p_estimate = ((ban.N - 1) * ban.p_estimate + x) / ban.N
+end
+
+num_trials = 10000
+ϵ = 0.1
+bandit_probs = [0.2,0.5,0.75]
+
+bandits = [Bandit(p) for p in bandit_probs]
+rewards = zeros(num_trials)
+num_times_explored = 0
+num_times_exploited = 0
+num_optimal = 0
+optimal_j = argmax([b.p for b in bandits])
+println("Optimal j: ", optimal_j)
+    
+for i in 1:num_trials 
+    if rand() < ϵ
+        num_times_explored += 1
+        j = rand(1:size(bandits)[1])
+    else
+        num_times_exploited += 1
+        j = argmax([b.p_estimate for b in bandits])
+    end
+
+    if j == optimal_j
+        num_optimal += 1
+    end
+
+    x = pull(bandits[j])
+    rewards[i] = x
+    update(bandits[j],x)
+end
+
+for b in bandits
+    println("Mean estimate: ", b.p_estimate)
+end
+
+println("Total reward eaarned: ", sum(rewards))
+println("Overall win rate: ", sum(rewards)/ num_trials)
+println("Number times explored: ", num_times_explored)
+println("Number times exploited: ", num_times_exploited)
+println("Number of times the optimal bandit was selected: ", num_optimal)
+
+cumulative_rewards = cumsum(rewards)
+win_rates = cumulative_rewards ./ Array(1:num_trials)
+plot(win_rates) 
+plot!(ones(num_trials) .* max(bandit_probs...))