// ============================================================
// MATH UTILITIES: Normal distribution
// ============================================================
normalCDF = x => {
const a1 = 0.254829592, a2 = -0.284496736, a3 = 1.421413741
const a4 = -1.453152027, a5 = 1.061405429, p = 0.3275911
const sign = x < 0 ? -1 : 1
const z = Math.abs(x) / Math.sqrt(2)
const t = 1.0 / (1.0 + p * z)
const y = 1 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * Math.exp(-z * z)
return 0.5 * (1 + sign * y)
}Extreme Value Theory
Interactive exploration of Extreme Value Theory (EVT) for modelling distribution tails and computing VaR and ES at high confidence levels
Extreme Value Theory (EVT) provides a rigorous mathematical framework for modelling the tails of loss distributions. Standard statistical methods that work well for the centre of a distribution may perform poorly in the tails, where data are sparse. EVT offers principled methods for smoothing the empirical tail distribution and extrapolating beyond observed data to higher confidence levels (Hull 2023, chap. 12; Christoffersen 2012, chap. 6; McNeil et al. 2015, chap. 5).
The key result (Gnedenko’s theorem) states that for a wide class of distributions, the excess distribution over a high threshold converges to a Generalized Pareto Distribution (GPD). This is analogous to how the central limit theorem shows that normalized sums converge to the normal distribution, but EVT applies to extremes rather than averages.
qnorm = {
const a1 = -3.969683028665376e+01, a2 = 2.209460984245205e+02
const a3 = -2.759285104469687e+02, a4 = 1.383577518672690e+02
const a5 = -3.066479806614716e+01, a6 = 2.506628277459239e+00
const b1 = -5.447609879822406e+01, b2 = 1.615858368580409e+02
const b3 = -1.556989798598866e+02, b4 = 6.680131188771972e+01
const b5 = -1.328068155288572e+01
const c1 = -7.784894002430293e-03, c2 = -3.223964580411365e-01
const c3 = -2.400758277161838e+00, c4 = -2.549732539343734e+00
const c5 = 4.374664141464968e+00, c6 = 2.938163982698783e+00
const d1 = 7.784695709041462e-03, d2 = 3.224671290700398e-01
const d3 = 2.445134137142996e+00, d4 = 3.754408661907416e+00
const pLow = 0.02425, pHigh = 1 - pLow
return p => {
if (p <= 0) return -Infinity
if (p >= 1) return Infinity
if (p < pLow) {
const q = Math.sqrt(-2 * Math.log(p))
return (((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q+c6) / ((((d1*q+d2)*q+d3)*q+d4)*q+1)
}
if (p <= pHigh) {
const q = p - 0.5, r = q * q
return (((((a1*r+a2)*r+a3)*r+a4)*r+a5)*r+a6)*q / (((((b1*r+b2)*r+b3)*r+b4)*r+b5)*r+1)
}
const q = Math.sqrt(-2 * Math.log(1 - p))
return -(((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q+c6) / ((((d1*q+d2)*q+d3)*q+d4)*q+1)
}
}// ============================================================
// MATH UTILITIES: Log-gamma and Student-t distribution
// ============================================================
lgamma = {
const g = 7
const coef = [
0.99999999999980993, 676.5203681218851, -1259.1392167224028,
771.32342877765313, -176.61502916214059, 12.507343278686905,
-0.13857109526572012, 9.9843695780195716e-6, 1.5056327351493116e-7
]
function _lgamma(x) {
if (x <= 0) return Infinity
if (x < 0.5) {
return Math.log(Math.PI / Math.sin(Math.PI * x)) - _lgamma(1 - x)
}
x -= 1
let a = coef[0]
const t = x + g + 0.5
for (let i = 1; i < g + 2; i++) a += coef[i] / (x + i)
return 0.5 * Math.log(2 * Math.PI) + (x + 0.5) * Math.log(t) - t + Math.log(a)
}
return _lgamma
}gpdCDF = (y, xi, beta) => {
if (y < 0) return 0
if (Math.abs(xi) < 1e-8) return 1 - Math.exp(-y / beta)
const arg = 1 + xi * y / beta
if (arg <= 0) return xi < 0 ? 1 : 0
return 1 - Math.pow(arg, -1 / xi)
}
// GPD PDF: g(y; xi, beta) = (1/beta) * (1 + xi*y/beta)^(-1/xi - 1)
gpdPDF = (y, xi, beta) => {
if (y < 0) return 0
if (Math.abs(xi) < 1e-8) return (1 / beta) * Math.exp(-y / beta)
const arg = 1 + xi * y / beta
if (arg <= 0) return 0
return (1 / beta) * Math.pow(arg, -1 / xi - 1)
}
// GPD survival function: P(Y > y) = (1 + xi*y/beta)^(-1/xi)
gpdSurvival = (y, xi, beta) => {
if (y < 0) return 1
if (Math.abs(xi) < 1e-8) return Math.exp(-y / beta)
const arg = 1 + xi * y / beta
if (arg <= 0) return xi < 0 ? 0 : 1
return Math.pow(arg, -1 / xi)
}
// GPD quantile: Q(p; xi, beta) = (beta/xi)*((1-p)^(-xi) - 1)
gpdQuantile = (p, xi, beta) => {
if (Math.abs(xi) < 1e-8) return -beta * Math.log(1 - p)
return (beta / xi) * (Math.pow(1 - p, -xi) - 1)
}evtTailProb = (x, u, xi, beta, nu, n) => {
if (x <= u) return nu / n
return (nu / n) * gpdSurvival(x - u, xi, beta)
}
// EVT VaR at confidence level q
evtVaR = (q, u, xi, beta, nu, n) => {
if (Math.abs(xi) < 1e-8) {
return u + beta * Math.log(n * (1 - q) / nu) * (-1)
}
return u + (beta / xi) * (Math.pow((n / nu) * (1 - q), -xi) - 1)
}
// EVT ES at confidence level q
evtES = (q, u, xi, beta, nu, n) => {
const v = evtVaR(q, u, xi, beta, nu, n)
return (v + beta - xi * u) / (1 - xi)
}// ============================================================
// PRNG UTILITIES
// ============================================================
rng_utils = {
function mulberry32(seed) {
return function() {
seed |= 0; seed = seed + 0x6D2B79F5 | 0
let t = Math.imul(seed ^ seed >>> 15, 1 | seed)
t = t + Math.imul(t ^ t >>> 7, 61 | t) ^ t
return ((t ^ t >>> 14) >>> 0) / 4294967296
}
}
function boxMuller(rng) {
const u1 = rng(), u2 = rng()
return Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2)
}
function gammaSample(rng, shape) {
if (shape < 1) {
return gammaSample(rng, shape + 1) * Math.pow(rng(), 1 / shape)
}
const d = shape - 1 / 3
const c = 1 / Math.sqrt(9 * d)
while (true) {
let x, v
do {
x = boxMuller(rng)
v = 1 + c * x
} while (v <= 0)
v = v * v * v
const u = rng()
if (u < 1 - 0.0331 * (x * x) * (x * x)) return d * v
if (Math.log(u) < 0.5 * x * x + d * (1 - v + Math.log(v))) return d * v
}
}
function chiSquaredSample(rng, d) {
return 2 * gammaSample(rng, d / 2)
}
function stdTSample(rng, d) {
const normal = boxMuller(rng)
const chi2 = chiSquaredSample(rng, d)
return normal / Math.sqrt(chi2 / d) * Math.sqrt((d - 2) / d)
}
// GPD sample via inverse transform
function gpdSample(rng, xi, beta) {
const u = rng()
if (Math.abs(xi) < 1e-8) return -beta * Math.log(1 - u)
return (beta / xi) * (Math.pow(1 - u, -xi) - 1)
}
return { mulberry32, boxMuller, stdTSample, gpdSample }
}fmt = (x, d) => x === undefined || isNaN(x) ? "N/A" : x.toFixed(d)
pctFmt = x => (x * 100).toFixed(1) + "%"
fpRound = x => Math.round(x * 1e10) / 1e10
// Probability formatter for log-scale axes: label only at powers of 10
// to avoid overlapping ticks (Plot.js places ticks at 1,2,...,9 per decade)
probFmt = v => {
const log = Math.log10(v)
if (Math.abs(log - Math.round(log)) > 0.01) return ""
if (v >= 0.01) return (v * 100).toFixed(0) + "%"
if (v >= 0.001) return (v * 100).toFixed(1) + "%"
if (v >= 1e-4) return (v * 100).toFixed(2) + "%"
return v.toExponential(0)
}// ============================================================
// NELDER-MEAD OPTIMIZER (for GPD MLE)
// ============================================================
nelderMead = {
return function(f, start, maxIter = 500) {
const n = start.length
const alpha = 1, gamma = 2, rho = 0.5, sigma = 0.5
// Build initial simplex
let simplex = [start.slice()]
for (let i = 0; i < n; i++) {
const p = start.slice()
p[i] += Math.max(Math.abs(p[i]) * 0.5, 0.2)
simplex.push(p)
}
let values = simplex.map(p => f(p))
for (let iter = 0; iter < maxIter; iter++) {
// Sort by function value (ascending: best first)
const order = values.map((v, i) => i).sort((a, b) => values[a] - values[b])
simplex = order.map(i => simplex[i])
values = order.map(i => values[i])
// Convergence check
if (Math.abs(values[n] - values[0]) < 1e-12) break
// Centroid of all but worst
const centroid = new Array(n).fill(0)
for (let i = 0; i < n; i++) {
for (let j = 0; j < n; j++) centroid[j] += simplex[i][j]
}
for (let j = 0; j < n; j++) centroid[j] /= n
// Reflection
const xr = centroid.map((c, j) => c + alpha * (c - simplex[n][j]))
const fr = f(xr)
if (fr < values[n - 1] && fr >= values[0]) {
simplex[n] = xr; values[n] = fr; continue
}
if (fr < values[0]) {
// Expansion
const xe = centroid.map((c, j) => c + gamma * (xr[j] - c))
const fe = f(xe)
if (fe < fr) { simplex[n] = xe; values[n] = fe }
else { simplex[n] = xr; values[n] = fr }
continue
}
// Contraction
const xc = centroid.map((c, j) => c + rho * (simplex[n][j] - c))
const fc = f(xc)
if (fc < values[n]) {
simplex[n] = xc; values[n] = fc; continue
}
// Shrink
for (let i = 1; i <= n; i++) {
for (let j = 0; j < n; j++) simplex[i][j] = simplex[0][j] + sigma * (simplex[i][j] - simplex[0][j])
values[i] = f(simplex[i])
}
}
const best = values.indexOf(Math.min(...values))
return simplex[best]
}
}// ============================================================
// LEGEND HELPER
// ============================================================
legend = (items) => {
const el = document.createElement("div")
el.style.cssText = "display:flex; flex-wrap:wrap; margin-top:-4px; margin-bottom:6px;"
const hidden = new Set()
for (const d of items) {
const key = d.key || d.label
const span = document.createElement("span")
span.style.cssText = "display:inline-flex; align-items:center; gap:4px; margin-right:14px; cursor:pointer; user-select:none; transition:opacity 0.15s;"
let swatchHTML
if (d.type === "dot") {
swatchHTML = `<svg width="12" height="12"><circle cx="6" cy="6" r="5" fill="${d.color}" opacity="0.8"/></svg>`
} else if (d.type === "dashed") {
swatchHTML = `<svg width="22" height="12"><line x1="0" y1="6" x2="22" y2="6" stroke="${d.color}" stroke-width="2" stroke-dasharray="4 2"/></svg>`
} else if (d.type === "rect") {
swatchHTML = `<svg width="14" height="14"><rect width="14" height="14" fill="${d.color}"/></svg>`
} else {
swatchHTML = `<svg width="22" height="12"><line x1="0" y1="6" x2="22" y2="6" stroke="${d.color}" stroke-width="2"/></svg>`
}
span.innerHTML = `${swatchHTML}<span style="font-size:0.82rem;">${d.label}</span>`
span.addEventListener("click", () => {
const nowHidden = !hidden.has(key)
if (nowHidden) hidden.add(key); else hidden.delete(key)
span.style.opacity = nowHidden ? "0.35" : "1"
span.querySelector("span").style.textDecoration = nowHidden ? "line-through" : "none"
el.value = new Set(hidden)
el.dispatchEvent(new Event("input", {bubbles: true}))
})
el.appendChild(span)
}
el.value = new Set(hidden)
return el
}1. The Generalized Pareto Distribution
The GPD is the cornerstone of practical EVT applications. For losses exceeding a threshold \(u\), the excess distribution converges to:
\[ G_{\xi,\beta}(y) = \begin{cases} 1 - \left(1 + \xi \frac{y}{\beta}\right)^{-1/\xi}, & \text{if } \xi \neq 0 \\ 1 - \exp\left(-\frac{y}{\beta}\right), & \text{if } \xi = 0 \end{cases} \]
where \(x\) is the loss, \(u\) is the threshold, \(y = x - u \geq 0\) is the excess of the loss over the threshold, \(\xi\) is the shape parameter (determines tail heaviness), and \(\beta > 0\) is the scale parameter.
The shape parameter \(\xi\) determines the type of tail decay:
- \(\xi > 0\): Heavy tails (Frechet class). Power-law decay. Higher moments may be infinite. This is the typical case for financial data, with \(\xi\) usually between 0.1 and 0.4.
- \(\xi = 0\): Exponential tails (Gumbel class). The normal distribution belongs here.
- \(\xi < 0\): Short tails (Weibull class). Finite right endpoint at \(y = -\beta/\xi\). Less relevant for financial losses.
The scale parameter \(\beta\) controls the spread of excesses over the threshold. Larger \(\beta\) means that excesses tend to be larger: the mean of the GPD is \(\beta/(1-\xi)\) (when \(\xi < 1\)), so \(\beta\) directly scales the expected overshoot above the threshold.
Tip
How to experiment
Increase \(\xi\) from 0 to 0.4 and watch the tail thicken in the PDF plot, the survival function decay more slowly (power law vs. exponential), and high-order moments become infinite. Compare positive \(\xi\) values to the \(\xi = 0\) exponential reference (dashed line).
// Generate GPD plot data
gpdPlotData = {
const xi = gpdXi, beta = gpdBeta
const maxY = xi < 0 ? -beta / xi * 0.98 : Math.max(8, gpdQuantile(0.999, xi, beta) * 1.1)
const nPts = 300
const data = []
for (let i = 0; i <= nPts; i++) {
const y = (i / nPts) * maxY
data.push({
y,
pdf: gpdPDF(y, xi, beta),
pdfRef: gpdPDF(y, 0, beta),
cdf: gpdCDF(y, xi, beta),
survival: gpdSurvival(y, xi, beta),
survivalRef: gpdSurvival(y, 0, beta),
// Normal survival for comparison (standard normal tail beyond y treated as z-score)
survivalNorm: 1 - normalCDF(y)
})
}
return data
}{
const h = gpdPdfLegend
const maxPdf = Math.min(2, Math.max(...gpdPlotData.map(d => d.pdf)) * 1.1)
const pdfPlot = Plot.plot({
height: 280, marginLeft: 55,
x: { label: "Excess y", grid: false },
y: { label: "Density", grid: true, domain: [0, maxPdf] },
marks: [
...(!h.has("exp") ? [
Plot.line(gpdPlotData, { x: "y", y: "pdfRef", stroke: "#2f71d5", strokeWidth: 1.5, strokeDasharray: "6 3" })
] : []),
...(!h.has("gpd") ? [
Plot.line(gpdPlotData, { x: "y", y: "pdf", stroke: "#d62728", strokeWidth: 2 })
] : [])
]
})
const cdfPlot = Plot.plot({
height: 280, marginLeft: 55,
x: { label: "Excess y", grid: false },
y: { label: "CDF", grid: true, domain: [0, 1] },
marks: [
...(!h.has("exp") ? [
Plot.line(gpdPlotData, { x: "y", y: d => gpdCDF(d.y, 0, gpdBeta), stroke: "#2f71d5", strokeWidth: 1.5, strokeDasharray: "6 3" })
] : []),
...(!h.has("gpd") ? [
Plot.line(gpdPlotData, { x: "y", y: "cdf", stroke: "#d62728", strokeWidth: 2 })
] : [])
]
})
return html`<div style="display:flex;gap:16px;flex-wrap:wrap;">
<div style="flex:1;min-width:320px;"><h4 style="text-align:center;margin-bottom:4px;">Probability Density Function</h4>${pdfPlot}</div>
<div style="flex:1;min-width:320px;"><h4 style="text-align:center;margin-bottom:4px;">Cumulative Distribution Function</h4>${cdfPlot}</div>
</div>`
}{
const h = gpdTailLegend
const filtered = gpdPlotData.filter(d => d.survival > 1e-7 && d.y > 0)
return Plot.plot({
height: 400, marginLeft: 60,
x: { label: "Excess y", grid: false },
y: { label: "Survival probability", type: "log", grid: true, tickFormat: probFmt },
marks: [
...(!h.has("norm") ? [
Plot.line(filtered.filter(d => d.survivalNorm > 1e-7), {
x: "y", y: "survivalNorm", stroke: "#999", strokeWidth: 1.5, strokeDasharray: "4 2"
})
] : []),
...(!h.has("exp") ? [
Plot.line(filtered.filter(d => d.survivalRef > 1e-7), {
x: "y", y: "survivalRef", stroke: "#2f71d5", strokeWidth: 1.5, strokeDasharray: "6 3"
})
] : []),
...(!h.has("gpd") ? [
Plot.line(filtered, { x: "y", y: "survival", stroke: "#d62728", strokeWidth: 2 })
] : [])
]
})
}html`<p style="color:#666;font-size:0.85rem;">The <strong>survival probability</strong> is the probability that the excess over the threshold exceeds a given value: 1 − G<sub>ξ,β</sub>(y). On a log scale, the exponential tail (ξ = 0) appears as a straight line (exponential decay). A heavy tail (ξ > 0) curves <strong>upward</strong>, meaning far more probability mass in extreme events. The normal tail decays even faster than the exponential.</p>`{
const xi = gpdXi, beta = gpdBeta
// E[Y^k] exists iff xi < 1/k
const moments = [1, 2, 3, 4].map(k => {
const exists = xi < 1 / k
let value = null
if (exists && k === 1) value = beta / (1 - xi)
if (exists && k === 2) value = 2 * beta * beta / ((1 - xi) * (1 - 2 * xi))
return { k, exists, value }
})
const mean = xi < 1 ? beta / (1 - xi) : null
const variance = xi < 0.5 ? (beta * beta) / ((1 - xi) * (1 - xi) * (1 - 2 * xi)) : null
return html`<table class="table" style="width:100%;max-width:600px;margin:0 auto;">
<thead><tr>
<th>Moment</th>
<th>Exists?</th>
<th>Condition</th>
<th>Value</th>
</tr></thead>
<tbody>
<tr><td>Mean E[Y]</td>
<td style="font-weight:700;color:${moments[0].exists ? '#2e7d32' : '#d62728'};">${moments[0].exists ? "Yes" : "No (infinite)"}</td>
<td>ξ < 1</td>
<td>${moments[0].exists ? fmt(mean, 4) : "∞"}</td></tr>
<tr><td>Variance Var[Y]</td>
<td style="font-weight:700;color:${moments[1].exists ? '#2e7d32' : '#d62728'};">${moments[1].exists ? "Yes" : "No (infinite)"}</td>
<td>ξ < 1/2</td>
<td>${moments[1].exists && variance !== null ? fmt(variance, 4) : "∞"}</td></tr>
<tr><td>3rd moment E[Y³]</td>
<td style="font-weight:700;color:${moments[2].exists ? '#2e7d32' : '#d62728'};">${moments[2].exists ? "Yes" : "No (infinite)"}</td>
<td>ξ < 1/3</td>
<td></td></tr>
<tr><td>4th moment E[Y⁴]</td>
<td style="font-weight:700;color:${moments[3].exists ? '#2e7d32' : '#d62728'};">${moments[3].exists ? "Yes" : "No (infinite)"}</td>
<td>ξ < 1/4</td>
<td></td></tr>
</tbody></table>
<p style="color:#666;font-size:0.85rem;text-align:center;">The <em>k</em>th moment of the GPD is finite only when ξ < 1/<em>k</em>. For financial data with ξ between 0.1 and 0.4, the variance may be finite but higher moments (skewness, kurtosis) may not exist. Current ξ = ${fmt(xi, 2)}: the tail index α = 1/ξ = ${xi > 0 ? fmt(1/xi, 2) : "∞"}, so moments up to order ${xi > 0 ? Math.floor(1/xi) : "all"} are finite.</p>`
}2. Threshold selection and the mean excess plot
The choice of threshold \(u\) involves a bias-variance tradeoff. A threshold that is too low gives many exceedances (low variance) but the GPD approximation may not hold (high bias). A threshold that is too high gives few exceedances (high variance) but better GPD approximation (low bias).
The mean excess plot is a key diagnostic. Given \(n\) loss observations \(v_1, \ldots, v_n\), let \(n_u\) denote the number of observations exceeding the threshold \(u\). The sample mean excess function
\[ e_n(u) = \frac{1}{n_u} \sum_{i: v_i > u} (v_i - u) \]
plotted against \(u\), should become approximately linear above a good threshold. An upward linear trend indicates \(\xi > 0\) (heavy tails).
The Hill estimator provides a simple estimate of \(\xi\) (for \(\xi > 0\)), using the \(n_u\) observations \(v_i\) that exceed the threshold \(u\):
\[ \hat{\xi} = \frac{1}{n_u} \sum_{i=1}^{n_u} \ln\left(\frac{v_i}{u}\right) \]
Note
Simulated data. We simulate \(n\) i.i.d. losses from a Pareto distribution, whose tail is exactly a Generalized Pareto Distribution with known parameters \(\xi\) and \(\beta\). This means the GPD approximation holds exactly above any threshold, and we can compare estimates against the true values without the complication of slow asymptotic convergence that affects distributions like the Student-\(t\).
Tip
How to experiment
Increase \(\xi\) to make the tail heavier. In the mean excess plot, look for where the plot becomes approximately linear, and move the threshold slider there. In the Hill plot, look for a stable plateau around the true \(\xi\). Compare the Hill and GPD MLE estimates in the parameter table. Click “New sample” repeatedly to see sampling variability.
// Generate Pareto/GPD sample: losses = minimum + GPD excess
// Pareto with shape xi and scale beta: P(X > x) = (1 + xi*x/beta)^(-1/xi)
// We add a location shift so losses start at some minimum > 0
threshSample = {
threshSeed
const rng = rng_utils.mulberry32(123 + threshSeed)
const n = threshN
const xi = threshXi
const beta = threshBetaTrue
const losses = new Array(n)
const minLoss = 0.1 // location shift so all losses are positive
for (let i = 0; i < n; i++) {
// GPD inverse CDF: Q(p) = (beta/xi)*((1-p)^(-xi) - 1)
const u = rng()
const gpdExcess = (beta / xi) * (Math.pow(1 - u, -xi) - 1)
losses[i] = minLoss + gpdExcess
}
return losses.sort((a, b) => a - b)
}// Compute threshold and exceedances
threshResults = {
const sorted = threshSample
const n = sorted.length
const idx = Math.floor(n * threshPct / 100)
const u = sorted[idx]
const exceedances = sorted.filter(v => v > u)
const nu = exceedances.length
const trueXi = threshXi
// Hill estimator
const hillXi = exceedances.reduce((s, v) => s + Math.log(v / u), 0) / nu
const hillC = (nu / n) * Math.pow(u, 1 / hillXi)
// GPD MLE via numerical optimization (Nelder-Mead simplex)
const excesses = exceedances.map(v => v - u)
function gpdLogLik(xi, beta) {
if (beta <= 0) return -Infinity
let ll = 0
for (const y of excesses) {
if (Math.abs(xi) < 1e-8) {
ll += -Math.log(beta) - y / beta
} else {
const arg = 1 + xi * y / beta
if (arg <= 0) return -Infinity
ll += -Math.log(beta) - (1 / xi + 1) * Math.log(arg)
}
}
return ll
}
// Multiple starting points for robustness
const meanExcess = excesses.reduce((a, b) => a + b, 0) / excesses.length
const varExcess = excesses.reduce((a, b) => a + (b - meanExcess) ** 2, 0) / excesses.length
const momXi = Math.max(-0.4, Math.min(0.8, 0.5 * (1 - meanExcess * meanExcess / varExcess)))
const momBeta = Math.max(0.01, meanExcess * (1 - momXi))
const negLL = ([xi, beta]) => -gpdLogLik(xi, beta)
const starts = [
[momXi, momBeta],
[hillXi, meanExcess * (1 - hillXi)],
[0.1, meanExcess * 0.9],
[0.3, meanExcess * 0.7]
]
let bestResult = null, bestNLL = Infinity
for (const s of starts) {
const s0 = [s[0], Math.max(0.01, s[1])]
const r = nelderMead(negLL, s0)
const nll = negLL(r)
if (nll < bestNLL && r[1] > 0) { bestNLL = nll; bestResult = r }
}
const mleXi = bestResult[0], mleBeta = Math.max(0.001, bestResult[1])
// Mean excess data
const meData = []
for (let pct = 50; pct <= 99; pct += 1) {
const i = Math.floor(n * pct / 100)
const thresh = sorted[i]
const exc = sorted.filter(v => v > thresh)
if (exc.length < 5) break
const me = exc.reduce((s, v) => s + (v - thresh), 0) / exc.length
meData.push({ threshold: thresh, meanExcess: me, pct })
}
// Hill plot data (varying number of order statistics)
const descSorted = sorted.slice().reverse()
const hillData = []
for (let k = 10; k <= Math.min(n * 0.3, 500); k += 5) {
const uK = descSorted[k]
let sumLog = 0
for (let i = 0; i < k; i++) sumLog += Math.log(descSorted[i] / uK)
const xiHat = sumLog / k
hillData.push({ k, xi: xiHat })
}
return { u, nu, n, exceedances, excesses, trueXi, hillXi, hillC, mleXi, mleBeta, meData, hillData, sorted }
}{
const r = threshResults
const uLine = r.u
return Plot.plot({
height: 380, marginLeft: 60,
x: { label: "Threshold u", grid: false },
y: { label: "Mean excess e(u)", grid: true },
marks: [
Plot.dot(r.meData, { x: "threshold", y: "meanExcess", r: 3, fill: "#333", fillOpacity: 0.5 }),
Plot.ruleX([uLine], { stroke: "#d62728", strokeWidth: 2, strokeDasharray: "6 3" }),
Plot.text([[uLine, Math.max(...r.meData.map(d => d.meanExcess)) * 0.9]], {
x: d => d[0], y: d => d[1],
text: d => `u = ${fmt(d[0], 3)}`,
dx: 8, textAnchor: "start", fill: "#d62728", fontWeight: 700, fontSize: 12
})
]
})
}html`<p style="color:#666;font-size:0.85rem;">The mean excess plot for Pareto-distributed losses with true ξ = ${fmt(threshXi, 2)} and β = ${fmt(threshBetaTrue, 1)}. An <strong>upward linear trend</strong> indicates heavy tails (ξ > 0). The <span style="color:#d62728;font-weight:700;">red dashed line</span> marks the current threshold at the ${threshPct}th percentile. Choose the threshold where the plot becomes approximately linear.</p>`{
const r = threshResults
const trueXi = r.trueXi
return Plot.plot({
height: 380, marginLeft: 60,
x: { label: "Number of largest observations k", grid: false },
y: { label: "Hill estimate ξ̂", grid: true },
marks: [
Plot.ruleY([trueXi], { stroke: "#2e7d32", strokeWidth: 2, strokeDasharray: "6 3" }),
Plot.text([[r.hillData[r.hillData.length - 1].k, trueXi]], {
x: d => d[0], y: d => d[1],
text: [`True ξ = ${fmt(trueXi, 3)}`],
dy: -10, textAnchor: "end", fill: "#2e7d32", fontWeight: 700, fontSize: 12
}),
Plot.line(r.hillData, { x: "k", y: "xi", stroke: "#d62728", strokeWidth: 1.5 }),
Plot.ruleX([r.nu], { stroke: "#e67e22", strokeWidth: 1.5, strokeDasharray: "4 2" }),
Plot.text([[r.nu, Math.max(...r.hillData.map(d => d.xi)) * 0.95]], {
x: d => d[0], y: d => d[1],
text: [`k = ${r.nu} (current)`],
dx: 8, textAnchor: "start", fill: "#e67e22", fontWeight: 600, fontSize: 11
})
]
})
}html`<p style="color:#666;font-size:0.85rem;">The Hill plot shows the estimated ξ as a function of <em>k</em>, the number of largest losses used in the estimation. Increasing <em>k</em> is equivalent to lowering the threshold (including more data). A <strong>stable plateau</strong> near the <span style="color:#2e7d32;font-weight:700;">true ξ</span> indicates a good range for <em>k</em>. Too few (left) gives noisy estimates; too many (right) introduces bias as non-tail observations enter the estimation.</p>`{
const r = threshResults
// Empirical survival of exceedances
const nu = r.exceedances.length
const empData = r.exceedances.map((v, i) => ({
x: v,
empSurv: (nu - i) / (r.n)
}))
// Fitted GPD tail
const xMax = r.exceedances[nu - 1] * 1.2
const fittedData = []
for (let i = 0; i <= 200; i++) {
const x = r.u + (i / 200) * (xMax - r.u)
const surv = (r.nu / r.n) * gpdSurvival(x - r.u, r.mleXi, r.mleBeta)
if (surv > 1e-6) fittedData.push({ x, surv })
}
return Plot.plot({
height: 380, marginLeft: 60,
x: { label: "Loss v", grid: false },
y: { label: "P(V > v)", type: "log", grid: true, tickFormat: probFmt },
marks: [
Plot.dot(empData, { x: "x", y: "empSurv", r: 3, fill: "#333", fillOpacity: 0.5 }),
Plot.line(fittedData, { x: "x", y: "surv", stroke: "#d62728", strokeWidth: 2 })
]
})
}{
const r = threshResults
const var99 = evtVaR(0.99, r.u, r.mleXi, r.mleBeta, r.nu, r.n)
const var999 = evtVaR(0.999, r.u, r.mleXi, r.mleBeta, r.nu, r.n)
const es99 = evtES(0.99, r.u, r.mleXi, r.mleBeta, r.nu, r.n)
const es999 = evtES(0.999, r.u, r.mleXi, r.mleBeta, r.nu, r.n)
// True beta at the chosen threshold u:
// For GPD data starting at x0=0.1 with params (xi, beta0),
// excesses above u have GPD(xi, beta_u) where beta_u = beta0 + xi*(u - x0)
const x0 = 0.1
const trueBetaAtU = threshBetaTrue + threshXi * (r.u - x0)
return html`<table class="table" style="width:100%;max-width:700px;margin:0 auto;">
<thead><tr>
<th></th>
<th style="color:#2e7d32;">True</th>
<th style="color:#d62728;">GPD MLE</th>
<th style="color:#e67e22;">Hill</th>
</tr></thead>
<tbody>
<tr><td style="font-weight:500;">Shape ξ</td>
<td>${fmt(r.trueXi, 4)}</td>
<td>${fmt(r.mleXi, 4)}</td>
<td>${fmt(r.hillXi, 4)}</td></tr>
<tr><td style="font-weight:500;">Scale β (at threshold u)</td>
<td>${fmt(trueBetaAtU, 4)}</td>
<td>${fmt(r.mleBeta, 4)}</td>
<td></td></tr>
<tr><td style="font-weight:500;">Sample size (n)</td>
<td colspan="3">${r.n}</td></tr>
<tr><td style="font-weight:500;">Threshold u</td>
<td colspan="3">${fmt(r.u, 4)} (${threshPct}th percentile)</td></tr>
<tr><td style="font-weight:500;">Exceedances n<sub>u</sub></td>
<td colspan="3">${r.nu} out of ${r.n}</td></tr>
<tr style="border-top:2px solid #333;"><td style="font-weight:500;">99% VaR (GPD MLE)</td>
<td colspan="3" style="font-weight:700;">${fmt(var99, 4)}</td></tr>
<tr><td style="font-weight:500;">99.9% VaR (GPD MLE)</td>
<td colspan="3" style="font-weight:700;">${fmt(var999, 4)}</td></tr>
<tr><td style="font-weight:500;">99% ES (GPD MLE)</td>
<td colspan="3" style="font-weight:700;">${fmt(es99, 4)}</td></tr>
<tr><td style="font-weight:500;">99.9% ES (GPD MLE)</td>
<td colspan="3" style="font-weight:700;">${fmt(es999, 4)}</td></tr>
</tbody></table>
<p style="color:#666;font-size:0.85rem;">The GPD MLE method estimates both ξ and β jointly, while the Hill estimator only estimates ξ (assuming ξ > 0). Since the data are drawn from an exact GPD, the true ξ does not depend on the threshold, but the true β does: β<sub>u</sub> = β<sub>0</sub> + ξ(u − x<sub>0</sub>). With few exceedances (e.g., n<sub>u</sub> < 80), the log-likelihood surface is very flat in the ξ direction, so the MLE can differ substantially from the true value in individual samples. Increasing the sample size or lowering the threshold increases n<sub>u</sub> and improves estimation precision. Click "New sample" repeatedly to see the sampling variability.</p>
<p style="color:#666;font-size:0.85rem;"><strong>Why the Hill estimator tends to overestimate ξ.</strong> The Hill estimator is the MLE for the pure Pareto model P(X > x) ∝ x<sup>−1/ξ</sup>, which is a special case of the GPD where the scale parameter equals ξ · u. When the actual GPD scale β<sub>u</sub> exceeds ξ · u (as it does here, since β<sub>u</sub> ≈ ${fmt(trueBetaAtU, 2)} but ξ · u ≈ ${fmt(threshXi * r.u, 2)}), the excesses are more spread out than a pure Pareto would predict. The Hill estimator interprets this extra spread as a heavier tail, leading to a systematic upward bias. The GPD MLE avoids this by estimating both ξ and β jointly, allowing the scale parameter to absorb the spread without distorting the shape estimate.</p>`
}3. EVT VaR and ES across confidence levels
Given \(n\) total observations, of which \(n_u\) exceed the threshold \(u\), the EVT formulas for VaR and ES at confidence level \(q\) are:
\[ \text{VaR}_q = u + \frac{\beta}{\xi}\left\{\left[\frac{n}{n_u}(1-q)\right]^{-\xi} - 1\right\} \]
\[ \text{ES}_q = \frac{\text{VaR}_q + \beta - \xi u}{1-\xi} \]
A key result is the asymptotic ES/VaR ratio:
\[ \lim_{q \to 1} \frac{\text{ES}_q}{\text{VaR}_q} = \frac{1}{1-\xi} \]
For fat-tailed distributions (\(\xi > 0\)), this ratio exceeds the normal ratio of approximately 1.15 (at 99%), making ES particularly valuable as a risk measure.
The illustrations below use a threshold \(u = 200\) ($000s), scale parameter \(\beta = 80\) ($000s), and \(n_u = 5\%\) of \(n\) exceedances, following the example in Hull (2023, chap. 12). The key control is the shape parameter \(\xi\), which drives the divergence between EVT and Normal risk measures.
Tip
How to experiment
Increase \(\xi\) and watch the EVT VaR and ES diverge from the Normal at high confidence levels. In the ES/VaR ratio tab, observe convergence to \(1/(1-\xi)\). The “Extrapolation power” tab shows why EVT matters: at 99.9% confidence with 500 observations, standard historical simulation is impossible, but EVT gives a principled estimate.
// Compute VaR and ES across confidence levels
varSweep = {
const xi = varXi, beta = varBeta, u = varU, nu = varNu, n = varN
const data = []
for (let q = 0.95; q <= 0.9999; q += 0.0002) {
const evtVar = evtVaR(q, u, xi, beta, nu, n)
const evtEs = evtES(q, u, xi, beta, nu, n)
// Normal approximation: use same VaR_95 to calibrate sigma
// We'll use a simpler comparison: normal with same mean and variance as the EVT implied distribution
// For a fair comparison, calibrate normal to match EVT at 95%
const normSigma = evtVaR(0.95, u, xi, beta, nu, n) / Math.abs(qnorm(0.05))
const normVar = normSigma * Math.abs(qnorm(1 - q))
const normEs = normalES(1 - q, normSigma)
const ratio = evtEs / evtVar
const normRatio = normEs / normVar
data.push({ q, qPct: q * 100, evtVar, evtEs, normVar, normEs, ratio, normRatio })
}
return data
}{
const h = varCmpLegend
return Plot.plot({
height: 400, marginLeft: 70,
x: { label: "Confidence level q (%)", grid: false },
y: { label: "VaR ($000s)", grid: true },
marks: [
...(!h.has("norm") ? [
Plot.line(varSweep, { x: "qPct", y: "normVar", stroke: "#2f71d5", strokeWidth: 2, strokeDasharray: "6 3" })
] : []),
...(!h.has("evt") ? [
Plot.line(varSweep, { x: "qPct", y: "evtVar", stroke: "#d62728", strokeWidth: 2 })
] : [])
]
})
}html`<p style="color:#666;font-size:0.85rem;">EVT VaR diverges sharply from the Normal at high confidence levels. The Normal distribution, calibrated to match EVT at 95%, systematically <strong>underestimates</strong> tail risk at higher confidence levels because its tail decays exponentially rather than as a power law.</p>`{
const h = ratioLegend
const asymp = 1 / (1 - varXi)
const filtered = varSweep.filter(d => d.ratio > 0 && d.ratio < 5)
return Plot.plot({
height: 380, marginLeft: 60,
x: { label: "Confidence level q (%)", grid: false },
y: { label: "ES / VaR ratio", grid: true, domain: [1, Math.min(3, Math.max(2, asymp * 1.3))] },
marks: [
...(!h.has("asymp") ? [
Plot.ruleY([asymp], { stroke: "#2e7d32", strokeWidth: 1.5, strokeDasharray: "4 2" })
] : []),
...(!h.has("norm") ? [
Plot.line(filtered, { x: "qPct", y: "normRatio", stroke: "#2f71d5", strokeWidth: 2, strokeDasharray: "6 3" })
] : []),
...(!h.has("evt") ? [
Plot.line(filtered, { x: "qPct", y: "ratio", stroke: "#d62728", strokeWidth: 2 })
] : [])
]
})
}html`<p style="color:#666;font-size:0.85rem;">The EVT ES/VaR ratio converges to <strong>1/(1−ξ) = ${fmt(1/(1-varXi), 3)}</strong> as the confidence level increases. The Normal ratio converges to approximately 1 (its tail is so thin that ES barely exceeds VaR at extreme confidence levels). With ξ = ${fmt(varXi, 2)}, the ES is ${fmt((1/(1-varXi) - 1) * 100, 0)}% larger than VaR in the tail.</p>`{
const n = varN, nu = varNu
const xi = varXi, beta = varBeta, u = varU
const levels = [
{ q: 0.95, label: "95%" },
{ q: 0.99, label: "99%" },
{ q: 0.995, label: "99.5%" },
{ q: 0.999, label: "99.9%" },
{ q: 0.9997, label: "99.97%" },
{ q: 0.9999, label: "99.99%" }
]
const rows = levels.map(({ q, label }) => {
const tailObs = n * (1 - q)
const feasible = tailObs >= 1
const var_ = evtVaR(q, u, xi, beta, nu, n)
const es_ = evtES(q, u, xi, beta, nu, n)
return { label, q, tailObs, feasible, var_, es_ }
})
return html`<table class="table" style="width:100%;max-width:750px;margin:0 auto;">
<thead><tr>
<th>Confidence</th>
<th>Tail obs for HS</th>
<th>HS feasible?</th>
<th>EVT VaR ($000s)</th>
<th>EVT ES ($000s)</th>
</tr></thead>
<tbody>
${rows.map(r => `<tr>
<td style="font-weight:500;">${r.label}</td>
<td>${fmt(r.tailObs, 1)}</td>
<td style="font-weight:700;color:${r.feasible ? '#2e7d32' : '#d62728'};">${r.feasible ? (r.tailObs < 5 ? "Marginal" : "Yes") : "No"}</td>
<td style="font-weight:700;">${fmt(r.var_, 1)}</td>
<td style="font-weight:700;">${fmt(r.es_, 1)}</td>
</tr>`).join("")}
</tbody></table>
<p style="color:#666;font-size:0.85rem;">With <strong>n = ${n}</strong> observations, standard Historical Simulation needs at least 1 observation in the tail. At 99.9% confidence, only ${fmt(n * 0.001, 1)} observations fall in the tail. EVT provides principled estimates at <strong>any</strong> confidence level by extrapolating from the ${nu} exceedances above the threshold, using the fitted GPD.</p>`
}4. Normal vs. EVT tail comparison
Even when two distributions have the same VaR, their tail behaviour beyond VaR can be dramatically different. This section calibrates a Normal distribution to have the same 1% VaR as the GPD, then compares the tail shapes (Christoffersen 2012, sec. 6.8).
Tip
How to experiment
Increase \(\xi\) and watch the EVT tail extend far beyond the Normal tail. Even though both distributions have the same 1% VaR by construction, the EVT ES is much larger. The probability table shows orders-of-magnitude differences in the probability of extreme losses.
// Calibrate both distributions to have the same VaR at the chosen tail probability
tailCalib = {
const p = tailP / 100
const xi = tailXi
// We set up the comparison in terms of standardized losses (z-scores).
// Normal VaR at probability p: z_p = -qnorm(p) (positive number)
const normVaR = -qnorm(p) // e.g., 2.326 for p=1%
// For the EVT (Hill/power-law) comparison as in Christoffersen:
// F(y) = 1 - c * y^(-1/xi), matched so that P(Y > normVaR) = p
// => c = p * normVaR^(1/xi)
const c = p * Math.pow(normVaR, 1 / xi)
// EVT survival: P(Y > y) = c * y^(-1/xi)
const evtSurvival = y => c * Math.pow(y, -1 / xi)
// EVT ES/VaR ratio for Hill approximation: 1/(1-xi)
const evtESratio = 1 / (1 - xi)
const evtES = normVaR * evtESratio
// Normal ES
const normES = normalPDF(normVaR) / p
// EVT PDF: f(y) = c * (1/xi) * y^(-1/xi - 1)
const evtPDF = y => c * (1 / xi) * Math.pow(y, -1 / xi - 1)
// Normal survival using log-space computation to avoid cancellation
// For x > 0: P(X > x) = 0.5 * erfc(x/sqrt(2))
// Use the continued fraction / Mills ratio for large x:
// P(X > x) ≈ phi(x) * (1/x - 1/x^3 + 3/x^5 - ...)
const normSurvival = y => {
if (y < 5) return 1 - normalCDF(y)
// Mills ratio series for large y
const phi = normalPDF(y)
let sum = 0, term = 1 / y
for (let k = 0; k < 20; k++) {
sum += term
term *= -(2 * k + 1) / (y * y)
if (Math.abs(term) < 1e-16 * Math.abs(sum)) break
}
return phi * sum
}
// Generate tail data starting from some point before VaR
const data = []
const startY = normVaR * 0.7
const endY = normVaR * 4
for (let i = 0; i <= 300; i++) {
const y = startY + (i / 300) * (endY - startY)
const ns = normSurvival(y)
const es = evtSurvival(y)
if (ns > 1e-30 || es > 1e-30) {
data.push({ y, normSurv: Math.max(ns, 1e-30), evtSurv: Math.max(es, 1e-30) })
}
}
// PDF data for the density plot (Christoffersen-style)
const pdfData = []
const pdfEnd = normVaR * 2.5
for (let i = 0; i <= 300; i++) {
const y = startY + (i / 300) * (pdfEnd - startY)
pdfData.push({ y, normPdf: normalPDF(y), evtPdf: evtPDF(y) })
}
return { normVaR, normES, evtES, evtESratio, c, data, pdfData, p, xi }
}{
const h = tailDensLegend
const r = tailCalib
const maxPdf = Math.max(...r.pdfData.map(d => Math.max(d.normPdf, d.evtPdf))) * 1.1
return Plot.plot({
height: 400, marginLeft: 60,
x: { label: "Loss (standard deviations)", grid: false, domain: [r.pdfData[0].y, r.pdfData[r.pdfData.length - 1].y] },
y: { label: "Density", grid: true, domain: [0, maxPdf] },
marks: [
...(!h.has("norm") ? [
Plot.line(r.pdfData, { x: "y", y: "normPdf", stroke: "#2f71d5", strokeWidth: 2 })
] : []),
...(!h.has("evt") ? [
Plot.line(r.pdfData, { x: "y", y: "evtPdf", stroke: "#d62728", strokeWidth: 2 })
] : []),
...(!h.has("var") ? [
Plot.ruleX([r.normVaR], { stroke: "#333", strokeWidth: 1.5, strokeDasharray: "6 3" }),
Plot.text([[r.normVaR, maxPdf * 0.9]], {
x: d => d[0], y: d => d[1],
text: [`${tailP}% VaR = ${fmt(r.normVaR, 2)}σ`],
dx: 8, textAnchor: "start", fontWeight: 700, fontSize: 12
})
] : [])
]
})
}html`<p style="color:#666;font-size:0.85rem;">Tail densities of the Normal (blue) and EVT (red) distributions, both calibrated to the same ${tailP}% VaR of ${fmt(tailCalib.normVaR, 2)}σ (compare with Christoffersen, 2012, Figure 6.8). The area under each curve to the left of VaR equals ${tailP}%. The Normal density peaks near VaR and drops quickly, concentrating most of the ${tailP}% probability close to VaR. The EVT density stays elevated for much larger losses, meaning a non-trivial probability of extreme events well beyond VaR.</p>`{
const h = tailShapeLegend
const r = tailCalib
return Plot.plot({
height: 400, marginLeft: 60,
x: { label: "Loss (standard deviations)", grid: false },
y: { label: "Tail probability P(L > y)", type: "log", grid: true, tickFormat: probFmt, domain: [1e-8, r.p * 5] },
marks: [
...(!h.has("var") ? [
Plot.ruleX([r.normVaR], { stroke: "#333", strokeWidth: 1.5, strokeDasharray: "6 3" }),
Plot.text([[r.normVaR, r.p]], {
x: d => d[0], y: d => d[1],
text: [`VaR = ${fmt(r.normVaR, 2)}σ`],
dx: 8, textAnchor: "start", fontWeight: 700, fontSize: 12
})
] : []),
...(!h.has("norm") ? [
Plot.line(r.data, { x: "y", y: "normSurv", stroke: "#2f71d5", strokeWidth: 2 })
] : []),
...(!h.has("evt") ? [
Plot.line(r.data, { x: "y", y: "evtSurv", stroke: "#d62728", strokeWidth: 2 })
] : [])
]
})
}html`<p style="color:#666;font-size:0.85rem;">Both distributions are calibrated to have the <strong>same ${tailP}% VaR</strong> of ${fmt(tailCalib.normVaR, 2)}σ. Beyond VaR, the EVT tail (power-law decay) stays far above the Normal tail (exponential decay). This means ES, which depends on the <strong>entire</strong> tail beyond VaR, is much larger for the heavy-tailed distribution.</p>`{
const r = tailCalib
return html`<div style="display:flex;gap:30px;flex-wrap:wrap;justify-content:center;align-items:start;">
<div style="min-width:280px;">
<table class="table" style="width:100%;">
<thead><tr>
<th>Measure</th>
<th style="color:#d62728;">EVT</th>
<th style="color:#2f71d5;">Normal</th>
</tr></thead>
<tbody>
<tr><td style="font-weight:500;">VaR (in σ units)</td>
<td>${fmt(r.normVaR, 3)}</td>
<td>${fmt(r.normVaR, 3)}</td></tr>
<tr><td style="font-weight:500;">ES (in σ units)</td>
<td style="font-weight:700;color:#d62728;">${fmt(r.evtES, 3)}</td>
<td style="font-weight:700;color:#2f71d5;">${fmt(r.normES, 3)}</td></tr>
<tr><td style="font-weight:500;">ES / VaR ratio</td>
<td style="font-weight:700;color:#d62728;">${fmt(r.evtESratio, 3)}</td>
<td style="font-weight:700;color:#2f71d5;">${fmt(r.normES / r.normVaR, 3)}</td></tr>
<tr><td style="font-weight:500;">ES excess over VaR</td>
<td style="font-weight:700;color:#d62728;">${fmt((r.evtESratio - 1) * 100, 1)}%</td>
<td style="font-weight:700;color:#2f71d5;">${fmt((r.normES / r.normVaR - 1) * 100, 1)}%</td></tr>
</tbody></table>
</div>
<div style="min-width:280px;">
${Plot.plot({
height: 280, width: 300, marginLeft: 50, marginBottom: 40,
x: { label: null, axis: null },
fx: { label: null, domain: ["VaR", "ES"], padding: 0.2 },
y: { label: "σ units", grid: true, domain: [0, Math.max(r.evtES, r.normES) * 1.15] },
color: { domain: ["EVT", "Normal"], range: ["#d62728", "#2f71d5"], legend: true },
marks: [
Plot.barY([
{ measure: "VaR", value: r.normVaR, method: "EVT" },
{ measure: "VaR", value: r.normVaR, method: "Normal" },
{ measure: "ES", value: r.evtES, method: "EVT" },
{ measure: "ES", value: r.normES, method: "Normal" }
], {
fx: "measure",
x: "method",
y: "value",
fill: "method",
fillOpacity: 0.7,
title: d => `${d.method} ${d.measure}: ${fmt(d.value, 3)}σ`
})
]
})}
</div>
</div>
<p style="color:#666;font-size:0.85rem;text-align:center;">With the same VaR, the EVT distribution's ES is <strong>${fmt(r.evtES - r.normES, 3)}σ</strong> higher than the Normal's. This demonstrates why ES is a more informative risk measure than VaR for heavy-tailed distributions: it reveals the hidden risk beyond VaR that the Normal assumption obscures.</p>`
}{
const r = tailCalib
const levels = [2, 3, 4, 5, 6, 7, 8].filter(s => s >= r.normVaR * 0.8)
return html`<table class="table" style="width:100%;max-width:650px;margin:0 auto;">
<thead><tr>
<th>Loss level</th>
<th style="color:#d62728;">EVT probability</th>
<th style="color:#2f71d5;">Normal probability</th>
<th>Ratio (EVT/Normal)</th>
</tr></thead>
<tbody>
${levels.map(s => {
const normP = 1 - normalCDF(s)
const evtP = r.c * Math.pow(s, -1 / r.xi)
const ratio = evtP / normP
return `<tr>
<td style="font-weight:500;">${s}σ</td>
<td>${evtP.toExponential(2)}</td>
<td>${normP.toExponential(2)}</td>
<td style="font-weight:700;">${ratio < 1000 ? fmt(ratio, 1) + "×" : Math.round(ratio).toLocaleString() + "×"}</td>
</tr>`
}).join("")}
</tbody></table>
<p style="color:#666;font-size:0.85rem;text-align:center;">Both distributions are calibrated to the same ${tailP}% VaR. But the EVT distribution assigns <strong>orders of magnitude</strong> more probability to extreme losses. A 6σ event that would be virtually impossible under normality has a non-trivial probability under the heavy-tailed EVT model.</p>`
}5. Conditional EVT (GARCH + GPD)
For time series with volatility clustering, EVT is applied to the standardized residuals from a GARCH model rather than to raw returns. This is called conditional EVT (Christoffersen 2012, sec. 6.8; Hull 2023, chap. 12):
- Fit a GARCH model to the return series \(R_t\) to obtain the conditional volatility \(\hat{\sigma}_t\)
- Extract standardized residuals: \(\hat{z}_t = R_t / \hat{\sigma}_t\), which should be approximately i.i.d.
- Apply EVT to the tail of the \(\hat{z}_t\) distribution (fit GPD)
- Compute conditional risk measures, scaling by the volatility forecast \(\hat{\sigma}_{t+1}\):
\[ \text{VaR}_q^t = \hat{\sigma}_{t+1} \cdot \text{VaR}_q(z), \quad \text{ES}_q^t = \hat{\sigma}_{t+1} \cdot \text{ES}_q(z) \]
This combines GARCH’s ability to capture volatility dynamics with EVT’s rigorous tail modelling.
Note
Simulated data. A GARCH(1,1) process is simulated with standardized Student-\(t\) shocks. The GARCH parameters and shock distribution are known (since we generated the data). In practice, both the GARCH model and the EVT parameters would need to be estimated. The FHS method simulates 5,000 next-day returns by drawing standardized residuals with replacement and scaling by \(\hat{\sigma}_{t+1}\); VaR and ES are then computed from this simulated distribution.
Tip
How to experiment
Lower the degrees of freedom \(d\) to make the shocks heavier-tailed, and observe that GARCH+EVT captures this while GARCH-Normal underestimates tail risk. Increase the volatility forecast \(\hat{\sigma}_{t+1}\) to see all conditional measures scale proportionally. Compare how the four methods behave at different confidence levels in the “Confidence sweep” tab.
// Simulate GARCH + apply EVT to residuals
cevtSim = {
cevtSeed
const rng = rng_utils.mulberry32(456 + cevtSeed)
const n = cevtN
const alpha = cevtAlpha, beta = cevtBeta
const vl = (1.5 / 100) ** 2 // long-run variance
const omega = vl * (1 - alpha - beta)
const df = cevtDf
const ret = new Array(n)
const sig2 = new Array(n)
const stdResid = new Array(n)
sig2[0] = vl
for (let t = 0; t < n; t++) {
const z = rng_utils.stdTSample(rng, df)
stdResid[t] = z
ret[t] = Math.sqrt(sig2[t]) * z
if (t < n - 1) {
sig2[t + 1] = omega + alpha * ret[t] * ret[t] + beta * sig2[t]
}
}
// Apply EVT to the losses (negative residuals) => work with abs of negative residuals
const losses = stdResid.map(z => -z).sort((a, b) => a - b)
// Keep only positive losses for tail analysis
const posLosses = losses.filter(l => l > 0).sort((a, b) => a - b)
const nPos = posLosses.length
// Threshold at 95th percentile of positive losses
const threshIdx = Math.floor(nPos * 0.95)
const u = posLosses[threshIdx]
const exceedances = posLosses.filter(l => l > u)
const nu = exceedances.length
const excesses = exceedances.map(l => l - u)
// GPD MLE
function gpdLL(xi, b) {
if (b <= 0) return -Infinity
let ll = 0
for (const y of excesses) {
if (Math.abs(xi) < 1e-8) {
ll += -Math.log(b) - y / b
} else {
const arg = 1 + xi * y / b
if (arg <= 0) return -Infinity
ll += -Math.log(b) - (1 / xi + 1) * Math.log(arg)
}
}
return ll
}
// Multiple starting points for robustness
const meExc = excesses.reduce((a, b) => a + b, 0) / excesses.length
const vrExc = excesses.reduce((a, b) => a + (b - meExc) ** 2, 0) / excesses.length
const xi0 = Math.max(-0.4, Math.min(0.8, 0.5 * (1 - meExc * meExc / vrExc)))
const b0 = Math.max(0.01, meExc * (1 - xi0))
// Hill estimator for starting point
const hillXiC = exceedances.reduce((s, v) => s + Math.log(v / u), 0) / nu
const negLLC = ([xi, b]) => -gpdLL(xi, b)
const startsC = [
[xi0, b0],
[hillXiC, meExc * (1 - hillXiC)],
[0.1, meExc * 0.9],
[0.3, meExc * 0.7]
]
let bestResC = null, bestNLLC = Infinity
for (const s of startsC) {
const s0 = [s[0], Math.max(0.01, s[1])]
const r = nelderMead(negLLC, s0)
const nll = negLLC(r)
if (nll < bestNLLC && r[1] > 0) { bestNLLC = nll; bestResC = r }
}
return { ret, sig2, stdResid, u, nu, n: nPos, mleXi: bestResC[0], mleBeta: Math.max(0.001, bestResC[1]), exceedances, totalN: n }
}// Compute all four methods
cevtResults = {
const s = cevtSim
const sigma = cevtSigma / 100
const q = cevtConf / 100
const p = 1 - q // tail probability
// Method 1: GARCH + Normal
const normQuantile = -qnorm(p)
const normVaR = sigma * normQuantile
const normES = sigma * normalPDF(qnorm(p)) / p
// Method 2: FHS (5000 simulated next-day returns)
// Draw z* from standardized residuals with replacement, compute R* = sigma * z*
const nResid = s.stdResid.length
const nBoot = 5000
const rngBoot = rng_utils.mulberry32(999 + cevtSeed)
const fhsSimReturns = new Array(nBoot)
for (let b = 0; b < nBoot; b++) {
const zStar = s.stdResid[Math.floor(rngBoot() * nResid)]
fhsSimReturns[b] = sigma * zStar
}
fhsSimReturns.sort((a, b) => a - b)
const fhsPos = Math.max(0, Math.floor(nBoot * p) - 1)
const fhsVaR = -fhsSimReturns[fhsPos]
const fhsTail = fhsSimReturns.slice(0, fhsPos + 1)
const fhsES = -fhsTail.reduce((a, b) => a + b, 0) / fhsTail.length
// Method 3: GARCH + EVT
const evtVaRz = evtVaR(q, s.u, s.mleXi, s.mleBeta, s.nu, s.n)
const evtESz = evtES(q, s.u, s.mleXi, s.mleBeta, s.nu, s.n)
const evtVaRcond = sigma * evtVaRz
const evtEScond = sigma * evtESz
// Method 4: Standard HS (from raw returns, ignoring volatility)
const rawSorted = s.ret.slice().sort((a, b) => a - b)
const hsPos = Math.max(0, Math.floor(s.totalN * p) - 1)
const hsVaR = -rawSorted[hsPos]
const hsTail = rawSorted.slice(0, hsPos + 1)
const hsES = -hsTail.reduce((a, b) => a + b, 0) / hsTail.length
return {
normVaR, normES, fhsVaR, fhsES, evtVaRcond, evtEScond, hsVaR, hsES,
evtVaRz, evtESz, normQuantile,
mleXi: s.mleXi, mleBeta: s.mleBeta, u: s.u, nu: s.nu,
sigma, q, p
}
}{
const s = cevtSim
// Empirical survival of exceedances
const nu = s.exceedances.length
const empData = s.exceedances.map((v, i) => ({
x: v,
empSurv: (nu - i) / s.n
}))
// Fitted GPD tail
const xMax = s.exceedances[nu - 1] * 1.2
const fittedData = []
for (let i = 0; i <= 200; i++) {
const x = s.u + (i / 200) * (xMax - s.u)
const surv = (s.nu / s.n) * gpdSurvival(x - s.u, s.mleXi, s.mleBeta)
if (surv > 1e-6) fittedData.push({ x, surv })
}
return Plot.plot({
height: 380, marginLeft: 60,
x: { label: "Standardized loss |ẑ|", grid: false },
y: { label: "P(|ẑ| > y)", type: "log", grid: true, tickFormat: probFmt },
marks: [
Plot.dot(empData, { x: "x", y: "empSurv", r: 3, fill: "#333", fillOpacity: 0.5 }),
Plot.line(fittedData, { x: "x", y: "surv", stroke: "#d62728", strokeWidth: 2 })
]
})
}html`<p style="color:#666;font-size:0.85rem;">Empirical tail of standardized residual losses (dots) vs. fitted GPD (red line). Even after GARCH filtering, residuals from Student-<em>t</em>(${cevtDf}) shocks are fat-tailed. GPD MLE estimates: ξ = ${fmt(cevtSim.mleXi, 3)}, β = ${fmt(cevtSim.mleBeta, 3)} (true ξ = 1/<em>d</em> = ${fmt(1/cevtDf, 3)}).</p>`{
const r = cevtResults
return html`<table class="table" style="width:100%;">
<thead><tr>
<th>Method</th>
<th>VaR (% of portfolio)</th>
<th>ES (% of portfolio)</th>
<th>ES/VaR</th>
</tr></thead>
<tbody>
<tr style="background:#fff3e0;"><td style="font-weight:700;color:#d62728;">GARCH + EVT</td>
<td style="font-weight:700;">${fmt(r.evtVaRcond * 100, 3)}%</td>
<td style="font-weight:700;">${fmt(r.evtEScond * 100, 3)}%</td>
<td>${fmt(r.evtEScond / r.evtVaRcond, 3)}</td></tr>
<tr><td style="font-weight:500;color:#2e7d32;">GARCH + FHS</td>
<td>${fmt(r.fhsVaR * 100, 3)}%</td>
<td>${fmt(r.fhsES * 100, 3)}%</td>
<td>${fmt(r.fhsES / r.fhsVaR, 3)}</td></tr>
<tr><td style="font-weight:500;color:#2f71d5;">GARCH + Normal</td>
<td>${fmt(r.normVaR * 100, 3)}%</td>
<td>${fmt(r.normES * 100, 3)}%</td>
<td>${fmt(r.normES / r.normVaR, 3)}</td></tr>
<tr><td style="font-weight:500;color:#e67e22;">Standard HS</td>
<td>${fmt(r.hsVaR * 100, 3)}%</td>
<td>${fmt(r.hsES * 100, 3)}%</td>
<td>${fmt(r.hsES / r.hsVaR, 3)}</td></tr>
</tbody></table>
<p style="color:#666;font-size:0.85rem;">At ${cevtConf}% confidence with σ̂<sub>t+1</sub> = ${cevtSigma}%. GARCH+EVT captures both volatility dynamics (through σ̂) and fat tails (through the GPD fit). GARCH+Normal underestimates tail risk because it assumes Gaussian shocks. Standard HS ignores current volatility entirely. GARCH+FHS simulates 5,000 next-day returns by drawing standardized shocks with replacement and scaling by σ̂<sub>t+1</sub>, then computes VaR and ES from this simulated distribution. It cannot extrapolate beyond the largest observed shock.</p>`
}viewof cevtSweepLegend = legend([
{ key: "evt", label: "GARCH + EVT", color: "#d62728", type: "line" },
{ key: "fhs", label: "GARCH + FHS", color: "#2e7d32", type: "line" },
{ key: "norm", label: "GARCH + Normal", color: "#2f71d5", type: "dashed" },
{ key: "hs", label: "Standard HS", color: "#e67e22", type: "dashed" }
])cevtSweepData = {
const s = cevtSim
const sigma = cevtSigma / 100
const rawSorted = s.ret.slice().sort((a, b) => a - b)
const nResid = s.stdResid.length
// FHS: simulate 5000 next-day returns by resampling residuals
const nBoot = 5000
const rngSweep = rng_utils.mulberry32(888 + cevtSeed)
const fhsSweepReturns = new Array(nBoot)
for (let b = 0; b < nBoot; b++) {
const zStar = s.stdResid[Math.floor(rngSweep() * nResid)]
fhsSweepReturns[b] = sigma * zStar
}
fhsSweepReturns.sort((a, b) => a - b)
const data = []
for (let qPct = 95; qPct <= 99.9; qPct += 0.1) {
const q = qPct / 100
const p = 1 - q
// Normal
const nVar = sigma * (-qnorm(p))
const nEs = sigma * normalPDF(qnorm(p)) / p
// FHS (quantile from simulated returns)
const fhsPos = Math.max(0, Math.floor(nBoot * p) - 1)
const fVar = -fhsSweepReturns[fhsPos]
const fTail = fhsSweepReturns.slice(0, fhsPos + 1)
const fEs = -fTail.reduce((a, b) => a + b, 0) / fTail.length
// EVT
const eVarZ = evtVaR(q, s.u, s.mleXi, s.mleBeta, s.nu, s.n)
const eEsZ = evtES(q, s.u, s.mleXi, s.mleBeta, s.nu, s.n)
const eVar = sigma * eVarZ
const eEs = sigma * eEsZ
// HS
const hsPos = Math.max(0, Math.floor(s.totalN * p) - 1)
const hVar = -rawSorted[hsPos]
const hTail = rawSorted.slice(0, hsPos + 1)
const hEs = -hTail.reduce((a, b) => a + b, 0) / hTail.length
data.push({
qPct, q,
evtVaR: eVar * 100, evtES: eEs * 100,
fhsVaR: fVar * 100, fhsES: fEs * 100,
normVaR: nVar * 100, normES: nEs * 100,
hsVaR: hVar * 100, hsES: hEs * 100
})
}
return data
}{
const h = cevtSweepLegend
const data = cevtSweepData
const varPlot = Plot.plot({
height: 300, marginLeft: 60,
x: { label: "Confidence level q (%)", grid: false },
y: { label: "VaR (% of portfolio)", grid: true },
marks: [
...(!h.has("hs") ? [
Plot.line(data, { x: "qPct", y: "hsVaR", stroke: "#e67e22", strokeWidth: 1.5, strokeDasharray: "4 2" })
] : []),
...(!h.has("norm") ? [
Plot.line(data, { x: "qPct", y: "normVaR", stroke: "#2f71d5", strokeWidth: 2, strokeDasharray: "6 3" })
] : []),
...(!h.has("fhs") ? [
Plot.line(data, { x: "qPct", y: "fhsVaR", stroke: "#2e7d32", strokeWidth: 2 })
] : []),
...(!h.has("evt") ? [
Plot.line(data, { x: "qPct", y: "evtVaR", stroke: "#d62728", strokeWidth: 2 })
] : [])
]
})
const esPlot = Plot.plot({
height: 300, marginLeft: 60,
x: { label: "Confidence level q (%)", grid: false },
y: { label: "ES (% of portfolio)", grid: true },
marks: [
...(!h.has("hs") ? [
Plot.line(data, { x: "qPct", y: "hsES", stroke: "#e67e22", strokeWidth: 1.5, strokeDasharray: "4 2" })
] : []),
...(!h.has("norm") ? [
Plot.line(data, { x: "qPct", y: "normES", stroke: "#2f71d5", strokeWidth: 2, strokeDasharray: "6 3" })
] : []),
...(!h.has("fhs") ? [
Plot.line(data, { x: "qPct", y: "fhsES", stroke: "#2e7d32", strokeWidth: 2 })
] : []),
...(!h.has("evt") ? [
Plot.line(data, { x: "qPct", y: "evtES", stroke: "#d62728", strokeWidth: 2 })
] : [])
]
})
return html`<div>
<h4 style="text-align:center;margin-bottom:4px;">VaR across confidence levels</h4>
${varPlot}
<h4 style="text-align:center;margin-bottom:4px;margin-top:16px;">ES across confidence levels</h4>
${esPlot}
</div>`
}html`<p style="color:#666;font-size:0.85rem;">GARCH+EVT (red) produces higher VaR and ES than GARCH+Normal (blue) across all confidence levels because the GPD captures the heavy tail that the Normal assumption misses. The gap widens at higher confidence levels. GARCH+FHS (green, 5,000 simulated returns) tracks EVT closely but shows step-like jumps at extreme levels where few observations determine the quantile. Standard HS (orange) produces lower estimates because it uses the unconditional return distribution, which mixes high- and low-volatility periods.</p>`{
const s = cevtSim
const trueXi = 1 / cevtDf
return html`<table class="table" style="width:100%;max-width:550px;margin:0 auto;">
<thead><tr>
<th>Parameter</th>
<th>Value</th>
</tr></thead>
<tbody>
<tr><td style="font-weight:500;">True ξ = 1/d</td>
<td style="color:#2e7d32;font-weight:700;">${fmt(trueXi, 4)}</td></tr>
<tr><td style="font-weight:500;">GPD MLE ξ̂</td>
<td style="font-weight:700;">${fmt(s.mleXi, 4)}</td></tr>
<tr><td style="font-weight:500;">GPD MLE β̂</td>
<td>${fmt(s.mleBeta, 4)}</td></tr>
<tr><td style="font-weight:500;">Threshold u (95th pctile of losses)</td>
<td>${fmt(s.u, 4)}</td></tr>
<tr><td style="font-weight:500;">Exceedances n<sub>u</sub></td>
<td>${s.nu}</td></tr>
<tr><td style="font-weight:500;">Asymptotic ES/VaR = 1/(1−ξ̂)</td>
<td style="font-weight:700;">${fmt(1/(1-s.mleXi), 4)}</td></tr>
<tr><td style="font-weight:500;">GARCH α, β</td>
<td>${fmt(cevtAlpha, 2)}, ${fmt(cevtBeta, 2)}</td></tr>
<tr><td style="font-weight:500;">Current σ̂<sub>t+1</sub></td>
<td>${cevtSigma}%</td></tr>
</tbody></table>
<p style="color:#666;font-size:0.85rem;text-align:center;">The GPD is fitted to the tail of standardized residual losses (absolute value of negative residuals exceeding the 95th percentile threshold). The conditional VaR and ES are then computed by scaling the residual-level risk measures by the GARCH volatility forecast σ̂<sub>t+1</sub>.</p>`
}References
Christoffersen, Peter F. 2012. Elements of Financial Risk Management. 2nd ed. Academic Press.
Hull, John. 2023. Risk Management and Financial Institutions. 6th ed. John Wiley & Sons.
McNeil, Alexander J., Rüdiger Frey, and Paul Embrechts. 2015. Quantitative Risk Management: Concepts, Techniques and Tools. Revised. Princeton University Press.