%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
# from matplotlib import cm
from scipy.stats import norm

import numpy.testing as testing
from plotting_functions import *

import mle_map_bayes as mmb

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

# generate data
num_samples = 
mu_true = 
var_true = 
sigma_true = np.sqrt(var_true)
x = mu_true + np.random.randn(num_samples) * sigma_true

# ML estimate (basic test)
mu_mle, var_mle = mmb.ml_estim_normal(np.array([1, 4, 6, 3, 4, 7]))

# check if the result is correct
testing.assert_almost_equal((mu_mle, var_mle), (4.166667, 3.80556), decimal=5)

# ML estimate (random data generated from true distribution)
x_t = mu_true + np.random.randn(10000) * sigma_true
mu_mle, var_mle = mmb.ml_estim_normal(x_t)

# check if the result is correct
# correct implementation should recover values close to the true parameters when there are enough samples, i.e. be asymptotically consistent
testing.assert_almost_equal( (mu_mle, var_mle), (mu_true, sigma_true**2), decimal=1)

mu_mle, var_mle = mmb.ml_estim_normal(x)

# plot the likelihood
# NOTICE: this works only for small num_samples (<=500), otherwise one gets zero everywhere due to numerical errors!
plt.figure(figsize=(10, 3))
plt.subplot(1, 2, 1)
span_x = sigma_true * 5 / (num_samples**0.5) # kind of confidence interval
plot_likelihood(x, mu_true-span_x, mu_true + span_x, 0, sigma_true**2*2)
plt.plot([mu_true], [var_true], 'r+', markeredgewidth=2, markersize=10)
plt.plot([mu_mle], [var_mle], 'kx', markeredgewidth=2, markersize=10)
plt.legend(['true', 'MLE'])

# plot the estimated and true distributions
plt.subplot(1, 2, 2)
span_x = sigma_true * 5
z = np.linspace(mu_true - span_x, mu_true + span_x, 100)
p_true = norm.pdf(z, mu_true, sigma_true)
p_mle = norm.pdf(z, mu_mle, np.sqrt(var_mle))
plt.plot(z, p_true, 'r-')
plt.plot(z, p_mle, 'k-')
plt.plot(x, [-0.001] * x.size, 'ko')
plt.xlabel('x')
plt.ylabel('p(x)')
plt.xlim([mu_true - span_x, mu_true + span_x])
plt.legend(['true pdf', 'MLE pdf', 'data'])
plt.savefig('mle_normal.png')

fig, axes = plt.subplots(1, 3, sharey='row', figsize=(15, 5))

span_x = sigma_true * 5
z = np.linspace(mu_true - span_x, mu_true + span_x, 100)
p_true = norm.pdf(z, mu_true, sigma_true)

for (plot_pos, num) in zip((0, 1, 2), (2, 5, 50)):
    data = x[:num]
    mu_mle_part, var_mle_part = mmb.ml_estim_normal(data)
    
    # plot the estimated and true distributions
    plt.sca(axes[plot_pos])
    p_mle = norm.pdf(z, mu_mle_part, np.sqrt(var_mle_part))
    plt.plot(z, p_true, 'r-', label='true')
    plt.plot(z, p_mle, 'k-', label='MLE')
    plt.plot(data, [-0.001] * data.size, 'ko', label='data')
    plt.xlabel('x')
    plt.ylabel('p(x)')
    plt.xlim([mu_true - span_x, mu_true + span_x])
    plt.legend()

plt.savefig('mle_normal_varying_dataset_size.png')

# MAP prior settings
mu0 = 
nu = 
alpha = 
beta =

# the values of the peak of the prior distribution
mu_prior = 
var_prior = 

# plot the prior
plot_margin = 2
plt.figure(figsize=(6, 5))
prior_grid = plot_prior(mu0, nu, alpha, beta, mu0 - plot_margin, mu0 + plot_margin, 0, 3)

plt.plot([mu_prior], [var_prior], 'mx', markeredgewidth=2, markersize=10)
plt.legend(['prior max'])
plt.savefig('map_prior_normal.png')

# MAP estimate
mu_map, var_map = mmb.map_estim_normal(np.array([2, 7, 3, 5, 1, 0]), mu0=0.0, nu=5.0, alpha=1.0, beta=2.5)
testing.assert_almost_equal((mu_map, var_map), (1.636364, 5.776860), decimal=5)

x_t = mu_true + np.random.randn(10000) * sigma_true
mu_map, var_map = mmb.map_estim_normal(x_t, mu0, nu, alpha, beta)
# correct implementation should recover values close to the true parameters when there are enough samples, i.e. be asymptotically consistent
testing.assert_almost_equal((mu_map, var_map), (mu_true, sigma_true**2), decimal=1)

mu_map, var_map = mmb.map_estim_normal(x, mu0, nu, alpha, beta)

# plot the likelihood, prior and MAP objective together with the MLE and MAP estimates and the prior maximum.

span_x = sigma_true * 5 / (num_samples**0.5)  # kind of confidence interval
mu_min = min(mu_true - span_x, mu_mle)
mu_max = max(mu_true + span_x, mu_mle)
var_min = 0
var_max = sigma_true**2*2

fig, axes = plt.subplots(1, 3, sharey='row', figsize=(15, 5))
plt.sca(axes[0])
plot_likelihood(x, mu_min, mu_max, var_min, var_max)
plt.plot([mu_mle], [var_mle], 'kx', markeredgewidth=2, markersize=10, label='MLE')
plt.plot([mu_true], [var_true], 'r+', markeredgewidth=2, markersize=10, label='True')

plt.sca(axes[1])
plot_prior(mu0, nu, alpha, beta, mu_min, mu_max, var_min, var_max)
plt.plot([mu_prior], [var_prior], 'mx', markeredgewidth=2, markersize=10, label='Prior')
plt.plot([mu_true], [var_true], 'r+', markeredgewidth=2, markersize=10, label='True')

# The following plot is NOT a probability distribution!!! It is not normalised to sum up to one!
plt.sca(axes[2])
plot_MAP_objective(x, mu0, nu, alpha, beta, mu_min, mu_max, var_min, var_max)
plt.plot([mu_mle], [var_mle], 'kx', markeredgewidth=2, markersize=10, label='MLE')
plt.plot([mu_prior], [var_prior], 'mx', markeredgewidth=2, markersize=10, label='Prior')
plt.plot([mu_map], [var_map], 'gx', markeredgewidth=2, markersize=10, label='MAP')
plt.plot([mu_true], [var_true], 'r+', markeredgewidth=2, markersize=10, label='True')
plt.savefig('mle_map_prior_comparison_normal.png')
plt.legend()

<matplotlib.legend.Legend at 0x7fae68950670>

# Plot the MLE and MAP estimates for different dataset sizes (e.g. 1, 5, 50):
#   - plot the MAP objective with the estimates and prior indicated,
#   - plot also the estimated distributions for each dataset size.

span_x = sigma_true * 5 / (5**0.5)  # kind of confidence interval
mu_min = min(mu_true - span_x, mu_mle_part)
mu_max = max(mu_true + span_x, mu_mle_part)
var_min = 0
var_max = sigma_true**2*2

fig, axes = plt.subplots(2, 3, sharey='row', figsize=(10, 7))
for (plot_pos, num) in zip((0, 1, 2), (1, 5, 50)):
    data = x[:num]
    mu_map_part, var_map_part = mmb.map_estim_normal(data, mu0=mu0, nu=nu, alpha=alpha, beta=beta)
    mu_mle_part, var_mle_part = mmb.ml_estim_normal(data)
    
    plt.sca(axes[0, plot_pos])
    plot_MAP_objective(data, mu0, nu, alpha, beta, mu_min, mu_max, var_min, var_max)
    
    if num > 1:
        plt.plot([mu_mle_part], [var_mle_part], 'kx', markeredgewidth=2, markersize=10, label='MLE')
    plt.plot([mu_prior], [var_prior], 'mx', markeredgewidth=2, markersize=10, label='prior')
    plt.plot([mu_map_part], [var_map_part], 'gx', markeredgewidth=2, markersize=10, label='MAP')
    plt.plot([mu_true], [var_true], 'r+', markeredgewidth=2, markersize=10, label='True')
    plt.xlabel('mu')
    plt.ylabel('var')
    plt.legend()
    plt.title('{:d} datapoint{:s}'.format(num, '' if num == 1 else 's'))
    
    # plot the estimated and true distributions
    plt.sca(axes[1, plot_pos])
    span_x = sigma_true * 5
    z = np.linspace(mu_true - span_x, mu_true + span_x, 100)
    p_true = norm.pdf(z, mu_true, sigma_true)
    if var_mle_part > 0:
        p_mle = norm.pdf(z, mu_mle_part, np.sqrt(var_mle_part))
    p_map = norm.pdf(z, mu_map_part, np.sqrt(var_map_part))
    plt.plot(z, p_true, 'r-', label='true')
    if var_mle_part > 0:
        plt.plot(z, p_mle, 'k-', label='MLE')
    plt.plot(z, p_map, 'g-', label='MAP')
    plt.plot(data, [-0.001] * data.size, 'ko', label='data')
    plt.xlabel('x')
    plt.ylabel('p(x)')
    plt.xlim([mu_true - span_x, mu_true + span_x])
    plt.legend()
    
plt.savefig('mle_map_normal_dataset_sizes.png')

NIG_params = mmb.bayes_posterior_params_normal(np.array((0.5, 0.6)), 0.5, 6.3, 0.9, 2.1)
testing.assert_almost_equal(NIG_params, (0.5120481927710843, 8.3, 1.9, 2.1043975903614465), decimal=5)

# plot the posterior probability and mark the MLE, MAP and maximum prior solutions

# assumes the same data and parameters as for MAP estimate
# assumes the MLE and MAP estimates are already computed

span_x = sigma_true * 5 / (num_samples**0.5)  # kind of confidence interval
mu_min = min(mu_true - span_x, mu_mle_part)
mu_max = max(mu_true + span_x, mu_mle_part)
var_min = 0
var_max = sigma_true**2*2

plt.figure(figsize=(6, 5))
prior_grid = plot_posterior_normal(x, mu0, nu, alpha, beta, mu_min, mu_max, var_min, var_max)
plt.plot([mu_mle], [var_mle], 'kx', markeredgewidth=2, markersize=10, label = 'MLE')
plt.plot([mu_prior], [var_prior], 'mx', markeredgewidth=2, markersize=10, label= 'Prior')
plt.plot([mu_map], [var_map], 'gx', markeredgewidth=2, markersize=10, label = 'MAP')
plt.plot([mu_true], [var_true], 'r+', markeredgewidth=2, markersize=10, label='True')
plt.legend()
plt.savefig('bayes_posterior_normal.png')

points = np.linspace(0, 1, 10)
data = np.array([-0.15, -0.10, 0.41, 0.14, 1.45, 0.76, 0.12, 0.44, 0.33,
                 1.49, -0.20, 0.31, -0.85, -2.55, 0.65, 0.86, -0.74, 2.26, -1.45, 0.04])

predictive = mmb.bayes_estim_pdf_normal(points, data, -0.1, 4.0, 0.8, 2.1)

# basic correctness test
testing.assert_almost_equal(predictive, 
                            np.array([0.3526544 , 0.35471464, 0.35307831, 0.34780085, 0.33905956,
                                      0.32714294, 0.31243368, 0.2953868 , 0.27650482, 0.2563119 ]), decimal=3)

# predictive distribution and its comparison with MAP and MLE

fig, axes = plt.subplots(1, 3, sharey='row', figsize=(15,3))

span_x = sigma_true * 5
z = np.linspace(mu_true - span_x, mu_true + span_x, 100)
p_true = norm.pdf(z, mu_true, sigma_true)

for (plot_pos, num) in zip((0, 1, 2), (1, 5, 15)):
    data = x[:num]
    mu_map_part, var_map_part = mmb.map_estim_normal(data, mu0=mu0, nu=nu, alpha=alpha, beta=beta)
    mu_mle_part, var_mle_part = mmb.ml_estim_normal(data)
    
    # plot the estimated and true distributions
    if var_mle_part > 0:
        p_mle = norm.pdf(z, mu_mle_part, np.sqrt(var_mle_part))
    p_map = norm.pdf(z, mu_map_part, np.sqrt(var_map_part))
    p_bayes = mmb.bayes_estim_pdf_normal(z, data, mu0, nu, alpha, beta)
    
    plt.sca(axes[plot_pos])
    plt.plot(z, p_true, 'r-', label='true')
    if var_mle_part > 0:
        plt.plot(z, p_mle, 'k-', label='MLE')
    plt.plot(z, p_map, 'g-', label='MAP')
    plt.plot(z, p_bayes, 'b-', label='Bayes')
    plt.plot(data, [-0.001] * data.size, 'ko', label='data')
    plt.xlabel('x')
    plt.ylabel('p(x)')
    plt.legend()
    plt.xlim([mu_true - span_x, mu_true + span_x])
    plt.title('{:d} datapoint{:s}'.format(num, '' if num == 1 else 's'))

plt.savefig('mle_map_bayes_normal.png')

# add some outlier to the data (point not from the distribution)
x_noise = 
# plot the estimated and true distributions

plt.figure(figsize=(10, 5))
span_x = sigma_true * 5
z = np.linspace(mu_true - span_x, 9, 100)

mu_mle_noise, var_mle_noise = mmb.ml_estim_normal(x_noise)
mu_map_noise, var_map_noise = mmb.map_estim_normal(x_noise, mu0, nu, alpha, beta)

p_true = norm.pdf(z, mu_true, sigma_true)
p_mle = norm.pdf(z, mu_mle_noise, np.sqrt(var_mle_noise))
p_map = norm.pdf(z, mu_map_part, np.sqrt(var_map_noise))
p_bayes = mmb.bayes_estim_pdf_normal(z, data, mu0, nu, alpha, beta)
plt.plot(z, p_true, 'r-')
plt.plot(z, p_mle, 'k-')
plt.plot(z, p_map, 'g-')
plt.plot(z, p_bayes, 'b-')
plt.plot(x_noise, [-0.001] * x_noise.size, 'ko')
plt.xlabel('x')
plt.ylabel('p(x)')
plt.xlim([mu_true - span_x, 9])
leg = plt.legend(['true pdf', 'MLE pdf', 'MAP pdf', 'Bayes pdf', 'data'])

plt.savefig('noise.png')

# generate some data
num_classes = 6
num_samples = 50
pk_true =
np.random.seed(0)
counts = np.random.multinomial(num_samples, pk_true)
print(counts)

[ 5 12 23  0  2  8]

# visualise the data
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

plt.sca(axes[0])
plot_categorical_distr(pk_true, 'true_distribution')

plt.sca(axes[1])
plot_categorical_hist(counts, 'training data histogram')

plt.savefig('categorical_data.png')

# compute the estimate
pc_mle = mmb.ml_estim_categorical(counts)

# visualise the estimate
plot_categorical_distr(pc_mle, 'MLE estimate')

plt.savefig('mle_categorical.png')

# random samples from the conjugate prior

alphas =   
np.random.seed(0)
prior_samples = np.random.dirichlet(alphas, 5)

fig, axes = plt.subplots(1, 5, figsize=(20, 3), sharey='row')
for i in range(5):
    plt.sca(axes[i])
    plot_categorical_distr(prior_samples[i], '')

# compute the MAP estimate
pk_map = mmb.map_estim_categorical(counts, alphas)

plot_categorical_distr(pk_map, 'MAP estimate')
plt.savefig('map_categorical.png')

# compute the posterior
pc_posterior = mmb.bayes_posterior_params_categorical(counts, alphas)

# random samples from the posterior p(pc_posterior|counts)
np.random.seed(0)
posterior_samples = np.random.dirichlet(pc_posterior, 5)

fig, axes = plt.subplots(1, 5, figsize=(20, 3), sharey='row')
for i in range(5):
    plt.sca(axes[i])
    plot_categorical_distr(posterior_samples[i], '')
    
plt.savefig('bayes_posterior_categorical.png')

pc_bayes = mmb.bayes_estim_categorical(counts, alphas)

plot_categorical_distr(pc_bayes, 'Bayesian estimate')
plt.savefig('bayes_categorical.png')

# load data
loaded_data = np.load("data_33rpz_mle_map_bayes.npz", allow_pickle=True)

alphabet = loaded_data["alphabet"]

tst = loaded_data["tst"].item()

trn_20 = loaded_data["trn_20"].item()
trn_200 = loaded_data["trn_200"].item()
trn_2000 = loaded_data["trn_2000"].item()

trn_sets = {'20': trn_20, '200': trn_200, '2000': trn_2000}

# classify the data using all three estimates

# select the training set
picked_set = '20' # your code probably won't work for '200' or '2000' due to numerical limitations
# feel free to search for the source of the numerical problems and come up with a workaround
# (it is doable relatively easily for '200')
trn_set = trn_sets[picked_set]

# computing LR feature vectors (training set)
x_train = 
labels_train = trn_set['labels']

# Splitting the training data into into classes
x_A = x_train[labels_train == 0]
x_C = x_train[labels_train == 1]

# NIG prior settings
mu0_A, nu_A, alpha_A, beta_A = 
mu0_C, nu_C, alpha_C, beta_C =

x_test = mmb.compute_measurement_lr_cont(tst['images'])
q_mle, labels_mle, DA_mle, DC_mle = mmb.mle_Bayes_classif(x_test, x_A, x_C)
error_mle = mmb.classification_error(labels_mle, tst['labels'])
print('MLE classification error: {:.2f} %'.format(error_mle * 100))
testing.assert_almost_equal(error_mle, 0.075, decimal=4)

q_map, labels_map, DA_map, DC_map = mmb.map_Bayes_classif(x_test, 
                                                          x_A, x_C,
                                                          mu0_A, nu_A, alpha_A, beta_A, 
                                                          mu0_C, nu_C, alpha_C, beta_C)
error_map = mmb.classification_error(labels_map, tst['labels'])
print('MAP classification error: {:.2f} %'.format(error_map * 100))
assert error_map < 0.075, "We expect MAP to produce better classification error than MLE, but depending on your prior this may not hold"

labels_Bayes = mmb.bayes_Bayes_classif(x_test, x_A, x_C,
                                       mu0_A, nu_A, alpha_A, beta_A, 
                                       mu0_C, nu_C, alpha_C, beta_C)
error_bayes = mmb.classification_error(labels_Bayes, tst['labels'])
print('Bayes classification error: {:.2f} %'.format(error_bayes * 100))
assert error_bayes < 0.075, "We expect Bayesian predictive classifier to produce better classification error than MLE, but depending on your prior this may not hold"

# visualise the estimates

plt.figure(figsize=(10, 5))
x_tst_all = mmb.compute_measurement_lr_cont(tst['images'])
x_A_tst = x_tst_all[tst['labels'] == 0]
x_C_tst = x_tst_all[tst['labels'] == 1]
plt.hist(x_A_tst, 20, density=True)
plt.hist(x_C_tst, 20, density=True, alpha=0.8)
z = np.linspace(-4000, 3000, 1000)

p_mle_A = norm.pdf(z, DA_mle['Mean'], DA_mle['Sigma'])
p_mle_C = norm.pdf(z, DC_mle['Mean'], DC_mle['Sigma'])
plt.plot(z, p_mle_A, 'b:', label='MLE pdf A')
plt.plot(z, p_mle_C, 'r:', label='MLE pdf C')

p_map_A = norm.pdf(z, DA_map['Mean'], DA_map['Sigma'])
p_map_C = norm.pdf(z, DC_map['Mean'], DC_map['Sigma'])
plt.plot(z, p_map_A, 'b--', label='MAP pdf A')
plt.plot(z, p_map_C, 'r--', label='MAP pdf C')

p_bayes_A = mmb.bayes_estim_pdf_normal(z, x_A, mu0_A, nu_A, alpha_A, beta_A)
p_bayes_C = mmb.bayes_estim_pdf_normal(z, x_C, mu0_C, nu_C, alpha_C, beta_C)
plt.plot(z, p_bayes_A, 'b-', label='Bayes pdf A')
plt.plot(z, p_bayes_C, 'r-', label='Bayes pdf C')

plt.plot([q_mle['t1'], q_mle['t1']], [0, 0.001], 'k:', label='MLE strategy')
plt.plot([q_mle['t2'], q_mle['t2']], [0, 0.001], 'k:', label=None)
plt.plot([q_map['t1'], q_map['t1']], [0, 0.001], 'k--', label='MAP strategy')
plt.plot([q_map['t2'], q_map['t2']], [0, 0.001], 'k--', label=None)

classif_bayes = mmb.bayes_Bayes_classif(z, x_A, x_C,
                                        mu0_A, nu_A, alpha_A, beta_A,
                                        mu0_C, nu_C, alpha_C, beta_C)
y_val = np.ones_like(z) * (-0.00005)
plt.plot(z[classif_bayes == 1], y_val[classif_bayes == 1], 'r.', label='Bayes_strategy_A')
plt.plot(z[classif_bayes == 0], y_val[classif_bayes == 0], 'b.', label='Bayes_strategy_C')

plt.xlim([-4000, 3000])
plt.legend()
plt.xlabel('measurement')
plt.savefig('mle_map_bayes_Bayes_classifier.png')

RPZ Assignment: Maximum Likelihood, Maximum a Posteriori and Bayesian Estimates of Distribution Parameters¶

Introduction¶

Part 1: Normal distribution¶

Maximum Likelihood Estimation (MLE)¶

Maximum a Posteriori Estimation¶

Bayesian Inference¶

Predictive density¶

Take-away summary¶

Part 2: Categorical distribution¶

MLE¶

MAP¶

Bayesian Estimate¶

Take-away summary¶

Part 3: Building a classifier¶

Submission to the BRUTE Upload System¶

References¶