# Exercise - Importance Sampling

import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import norm

%matplotlib inline
mu_1 = -2.5
sigma_square_1 = 4.
sigma_1 = np.sqrt(sigma_square_1)

mu_2 = 3.5
sigma_square_2 = 4.
sigma_2 = np.sqrt(sigma_square_2)
prob_1 = 0.4
size=40
rv_1 = norm(loc = mu_1, scale = sigma_1)
rv_2 = norm(loc = mu_2, scale = sigma_2)
x_ = np.arange(-14, 16, .1)

p_green = lambda x: prob_1 * rv_1.pdf(x) + (1-prob_1) * rv_2.pdf(x)
plt.plot(x_, p_green(x_) , "g-")

sigma_red = 5
mu_red = 1.
rv_red = norm(loc = mu_red, scale = sigma_red)
p_red = rv_red.pdf
plt.plot(x_, p_red(x_) , "r-")

_ = plt.xlabel("x")

Expectation of $f(x) = x^2$

$E_p[f(x)] = \int f(x) p(x) dx$

- $p(x)$ is the green curve.


def f(x):
return x**2
def sample_from_green(size=size):
num_1 = np.random.binomial(size, prob_1, 1)
num_2 = size - num_1
sample_1 = np.random.normal(loc=mu_1, scale=sigma_1, size=num_1)
sample_2 = np.random.normal(loc=mu_2, scale=sigma_2, size=num_2)
return np.concatenate((sample_1, sample_2))

#### Compute the empirical expectation of f(x):
s = 1000000
green_samples = sample_from_green(s)
(f(green_samples)).mean()

### Importance sampling

Assume we could not direcly sample from $p(x)$ (green curve). But we can sample from the (bad) approximation $q$ (red curve).

Exercise: Implement the computation of $\mathbb E_p[f(x)]$ by Importance Sampling