%load_ext autoreload
%autoreload 2

from parzen import *
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as opt

%matplotlib inline

data = np.load("data_33rpz_parzen.npz", allow_pickle=True)
tst = data["tst"].item()
trn = data["trn"].item()

x = compute_measurement_lr_cont(trn['images'])

# splitting the trainning data into classes
idxs = np.squeeze(trn['labels'])
xA = x[idxs == 0]
xC = x[idxs == 1]

# Unit Test:
p = parzen(np.array([1, 2, 3]), np.array([-1, 0, 2, 1.5, 0]), 1.0)

np.testing.assert_array_almost_equal(p, [0.22639369, 0.17268428, 0.07609717], err_msg="Incorrect density estimate.")

# computing the histograms of training data
hist_A, bins_A = np.histogram(xA, 20, density=True)
hist_C, bins_C = np.histogram(xC, 20, density=True)

# estimating conditional probability using Parzen window
x_range = np.arange(np.min(xA), np.max(xA), 100)
h = [100., 500., 1000., 2000.]

y = np.zeros([len(h), x_range.size], np.float64)
for i in range(len(h)):
    y[i,:] = parzen(x_range, xA, h[i])

def plot_parzen(x, y, hist, bins, h=500., cls='A'):
    centers = (bins[:-1] + bins[1:]) / 2
    width = bins[:-1] - bins[1:]
    plt.bar(centers, hist, width=width, edgecolor='k')
    plt.plot(x.T, y.T, 'r', linewidth=2)
    plt.title('h = {:.0f}'.format(h))
    plt.xlabel('x')
    plt.ylabel('p(x|{})'.format(cls))

# plots of the estimates
plt.figure(figsize=(12,6))

for idx in range(4):
    cur_h = h[idx]
    plt.subplot(2,2,idx+1)
    plot_parzen(x_range, y[idx:(idx+1),:], hist_A, bins_A, h=cur_h)
    
plt.savefig('parzen_estimates.png')

# Use the following piece of code for interactive density estimation visualization

# use this one in VSCode
%matplotlib inline
# and this one in PyCharm
# %matplotlib notebook

try:
    from ipywidgets import interact, interactive, fixed
    
    @interact(h=(10., 2000., 10.))
    def plot_parzen_interactive(h=500.):
        plt.figure(figsize=(6, 3))
        x_range = np.arange(np.min(xA), np.max(xA), 100)
        y = parzen(x_range, xA, h)
        plot_parzen(x_range, y, hist_A, bins_A, h=h)
        plt.ylim([0, 4.5e-4])

except ImportError:
    print('Optional feature.')

# h_range = np.arange(100,1000+1e-8,50)
h_range = np.linspace(100,1000,19)
num_folds = 10   # 10-fold cross-validation

np.random.seed(42)   # to get the same example outputs

num_data = xA.size
itrn, itst = crossval(num_data, num_folds)

raise NotImplementedError("You have to implement the rest.")
Lh = None

raise NotImplementedError("You have to implement the rest.")
h_bestA = None
Lh_bestA = None

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(h_range, Lh)
plt.plot(h_bestA, Lh_bestA, 'or')
bottom, _ = plt.ylim()
plt.plot([h_bestA, h_bestA], [bottom, Lh_bestA], '--r');
plt.title('10-fold cross-validation')
plt.xlabel('h')
plt.ylabel('L(h)')
plt.grid('on')

y = parzen(x_range, xA, h_bestA)
plt.subplot(1, 2, 2)
plot_parzen(x_range, y, hist_A, bins_A, h=h_bestA)
plt.title(f'h*={h_bestA:.2f} for class A')
plt.tight_layout()
plt.savefig('optimal_h_classA.png')

x_range = np.arange(np.min(xC), np.max(xC), 100)

np.random.seed(42)   # to get the same example outputs

num_data = xC.size
itrn, itst = crossval(num_data, num_folds)

raise NotImplementedError("You have to implement the rest.")
Lh = None

raise NotImplementedError("You have to implement the rest.")
h_bestC = None
Lh_bestC = None

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(h_range, Lh)
plt.plot(h_bestC, Lh_bestC, 'or')
bottom, _ = plt.ylim()
plt.plot([h_bestC, h_bestC], [bottom, Lh_bestC], '--r');
plt.title('10-fold cross-validation')
plt.xlabel('h')
plt.ylabel('L(h)')
plt.grid('on')

y = parzen(x_range, xC, h_bestC)
plt.subplot(1, 2, 2)
plot_parzen(x_range, y, hist_C, bins_C, h=h_bestC, cls='C')
plt.title(f'h*={h_bestC:.2f} for class C')
plt.tight_layout()
plt.savefig('optimal_h_classC.png')

x_test = compute_measurement_lr_cont(tst['images'])

# computing a priori probabilities

labels = classify_bayes_parzen(x_test, xA, xC, pA, pC, h_bestA, h_bestC)

show_classification(tst['images'], labels, 'AC')
plt.savefig('parzen_classif.png')

# classification error
raise NotImplementedError("You have to implement the rest.")
bayes_error = None
print(bayes_error)

0.075

RPZ Assignment: Parzen Window Probability Density Estimation¶

Introduction¶

Part 1: Parzen window estimation¶

Load data¶

Compute measurements¶

Parzen estimate¶

Visualize the density estimate¶

Part 2: Finding the optimal kernel size¶

Class 'A' cross-validation¶

Optimal value of parameter $h$¶

Visualization of optimal $h$¶

Class 'C' cross-validation¶

Optimal value of parameter $h$¶

Visualization of optimal $h$¶

Part 3 - Bayesian classifier¶

Show the classification¶

Submission to the BRUTE Upload System¶

Bonus task¶

References¶