Backtesting VaR

Interactive exploration of VaR backtesting with Christoffersen tests (unconditional coverage, independence, conditional coverage) and binomial coverage analysis with Kupiec test

Risk models promise specific probabilistic guarantees: a 1% VaR should be exceeded only 1% of the time. Backtesting checks whether the model delivers on that promise by comparing forecasts against realized outcomes (Christoffersen 2012, chap. 13; Hull 2023, sec. 11.10).

// ============================================================
// SHARED UTILITIES
// ============================================================

// Seeded PRNG (Mulberry32)
prng = {
  function mulberry32(seed) {
    return function() {
      seed |= 0; seed = seed + 0x6D2B79F5 | 0
      let t = Math.imul(seed ^ seed >>> 15, 1 | seed)
      t = t + Math.imul(t ^ t >>> 7, 61 | t) ^ t
      return ((t ^ t >>> 14) >>> 0) / 4294967296
    }
  }
  function boxMuller(rng) {
    const u1 = rng(), u2 = rng()
    return Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2)
  }
  return { mulberry32, boxMuller }
}

// Standard normal CDF (Abramowitz & Stegun approximation)
normalCDF = x => {
  const a1 = 0.254829592, a2 = -0.284496736, a3 = 1.421413741
  const a4 = -1.453152027, a5 = 1.061405429, p = 0.3275911
  const sign = x < 0 ? -1 : 1
  const z = Math.abs(x) / Math.sqrt(2)
  const t = 1.0 / (1.0 + p * z)
  const y = 1 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * Math.exp(-z * z)
  return 0.5 * (1 + sign * y)
}

// Inverse normal CDF (Acklam's algorithm, max |error| ~ 1.15e-9)
qnorm = {
  const a1 = -3.969683028665376e+01, a2 =  2.209460984245205e+02
  const a3 = -2.759285104469687e+02, a4 =  1.383577518672690e+02
  const a5 = -3.066479806614716e+01, a6 =  2.506628277459239e+00
  const b1 = -5.447609879822406e+01, b2 =  1.615858368580409e+02
  const b3 = -1.556989798598866e+02, b4 =  6.680131188771972e+01
  const b5 = -1.328068155288572e+01
  const c1 = -7.784894002430293e-03, c2 = -3.223964580411365e-01
  const c3 = -2.400758277161838e+00, c4 = -2.549732539343734e+00
  const c5 =  4.374664141464968e+00, c6 =  2.938163982698783e+00
  const d1 =  7.784695709041462e-03, d2 =  3.224671290700398e-01
  const d3 =  2.445134137142996e+00, d4 =  3.754408661907416e+00
  const pLow = 0.02425, pHigh = 1 - pLow
  return p => {
    if (p <= 0) return -Infinity
    if (p >= 1) return Infinity
    if (p < pLow) {
      const q = Math.sqrt(-2 * Math.log(p))
      return (((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q+c6) / ((((d1*q+d2)*q+d3)*q+d4)*q+1)
    }
    if (p <= pHigh) {
      const q = p - 0.5, r = q * q
      return (((((a1*r+a2)*r+a3)*r+a4)*r+a5)*r+a6)*q / (((((b1*r+b2)*r+b3)*r+b4)*r+b5)*r+1)
    }
    const q = Math.sqrt(-2 * Math.log(1 - p))
    return -(((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q+c6) / ((((d1*q+d2)*q+d3)*q+d4)*q+1)
  }
}

// Log-gamma function (Lanczos approximation, g=7)
lgamma = {
  const g = 7
  const coef = [
    0.99999999999980993, 676.5203681218851, -1259.1392167224028,
    771.32342877765313, -176.61502916214059, 12.507343278686905,
    -0.13857109526572012, 9.9843695780195716e-6, 1.5056327351493116e-7
  ]
  function _lgamma(x) {
    if (x <= 0) return Infinity
    if (x < 0.5) {
      return Math.log(Math.PI / Math.sin(Math.PI * x)) - _lgamma(1 - x)
    }
    x -= 1
    let a = coef[0]
    const t = x + g + 0.5
    for (let i = 1; i < g + 2; i++) a += coef[i] / (x + i)
    return 0.5 * Math.log(2 * Math.PI) + (x + 0.5) * Math.log(t) - t + Math.log(a)
  }
  return _lgamma
}

fmt = (x, d) => x === undefined || isNaN(x) ? "N/A" : x.toFixed(d)

pctFmt = x => (x * 100).toFixed(1) + "%"

// Clickable legend: returns a Set of hidden keys
legend = (items) => {
  const el = document.createElement("div")
  el.style.cssText = "display:flex; flex-wrap:wrap; margin-top:-4px; margin-bottom:6px;"
  const hidden = new Set()

  for (const d of items) {
    const key = d.key || d.label
    const span = document.createElement("span")
    span.style.cssText = "display:inline-flex; align-items:center; gap:4px; margin-right:14px; cursor:pointer; user-select:none; transition:opacity 0.15s;"

    let swatchHTML
    if (d.type === "dot") {
      swatchHTML = `<svg width="12" height="12"><circle cx="6" cy="6" r="5" fill="${d.color}" opacity="0.8"/></svg>`
    } else if (d.type === "dashed") {
      swatchHTML = `<svg width="22" height="12"><line x1="0" y1="6" x2="22" y2="6" stroke="${d.color}" stroke-width="2" stroke-dasharray="4 2"/></svg>`
    } else if (d.type === "rect") {
      swatchHTML = `<svg width="14" height="14"><rect width="14" height="14" fill="${d.color}"/></svg>`
    } else {
      swatchHTML = `<svg width="22" height="12"><line x1="0" y1="6" x2="22" y2="6" stroke="${d.color}" stroke-width="2"/></svg>`
    }

    span.innerHTML = `${swatchHTML}<span style="font-size:0.82rem;">${d.label}</span>`
    span.addEventListener("click", () => {
      const nowHidden = !hidden.has(key)
      if (nowHidden) hidden.add(key); else hidden.delete(key)
      span.style.opacity = nowHidden ? "0.35" : "1"
      span.querySelector("span").style.textDecoration = nowHidden ? "line-through" : "none"
      el.value = new Set(hidden)
      el.dispatchEvent(new Event("input", {bubbles: true}))
    })
    el.appendChild(span)
  }

  el.value = new Set(hidden)
  return el
}

safeLog = x => x > 0 ? Math.log(x) : 0

lnBinomPMF = (k, n, p) => {
  if (k < 0 || k > n) return -Infinity
  if (p === 0) return k === 0 ? 0 : -Infinity
  if (p === 1) return k === n ? 0 : -Infinity
  return lgamma(n + 1) - lgamma(k + 1) - lgamma(n - k + 1) +
         k * Math.log(p) + (n - k) * Math.log(1 - p)
}

binomPMF = (k, n, p) => Math.exp(lnBinomPMF(k, n, p))

regularizedGammaP = {
  function _series(a, x) {
    let sum = 1 / a, term = 1 / a
    for (let n = 1; n < 300; n++) {
      term *= x / (a + n)
      sum += term
      if (Math.abs(term) < 1e-14 * Math.abs(sum)) break
    }
    return sum * Math.exp(-x + a * Math.log(x) - lgamma(a))
  }
  function _cf(a, x) {
    const TINY = 1e-30, EPS = 1e-14
    let b = x + 1 - a, c = 1 / TINY, d = 1 / b, h = d
    for (let i = 1; i <= 300; i++) {
      const an = -i * (i - a)
      b += 2
      d = an * d + b; if (Math.abs(d) < TINY) d = TINY
      c = b + an / c; if (Math.abs(c) < TINY) c = TINY
      d = 1 / d
      const del = d * c
      h *= del
      if (Math.abs(del - 1) < EPS) break
    }
    return 1 - Math.exp(-x + a * Math.log(x) - lgamma(a)) * h
  }
  return (a, x) => {
    if (x <= 0) return 0
    if (x < a + 1) return _series(a, x)
    return _cf(a, x)
  }
}

chi2CDF = (x, df) => x <= 0 ? 0 : regularizedGammaP(df / 2, x / 2)

kupiecLR = (n, m, p) => {
  if (m <= 0 || m >= n) return m === 0 && p > 0 ? 2 * n * safeLog(1 - p) * (-1) + 0 : 0
  const piHat = m / n
  return -2 * ((n - m) * safeLog(1 - p) + m * safeLog(p))
         + 2 * ((n - m) * safeLog(1 - piHat) + m * safeLog(piHat))
}

VaR Backtesting

Backtesting compares ex ante VaR forecasts with ex post realized returns. Whenever the loss on a given day exceeds the VaR, we record a violation (or hit):

\[ I_{t+1} = \begin{cases} 1, & \text{if}\; R_{PF,t+1} < -VaR_{t+1}^p \\ 0, & \text{otherwise} \end{cases} \]

We construct the hit sequence \(\{I_{t+1}\}_{t=1}^T\) across \(T\) days. If the VaR model is correctly specified, this sequence should be unpredictable:

\[ H_0: I_{t+1} \sim \text{i.i.d. Bernoulli}(p) \]

This null hypothesis implies two properties: (1) the average violation rate equals \(p\) (unconditional coverage), and (2) violations are randomly scattered over time (independence).

The unconditional coverage test checks whether the observed violation rate \(\hat{\pi} = T_1/T\) differs from \(p\):

\[ LR_{uc} = -2\ln\left[\frac{L(p)}{L(\hat{\pi})}\right] \sim \chi_1^2 \]

The independence test models the hit sequence as a first-order Markov chain and tests whether the probability of a violation depends on yesterday’s outcome. Define \(\pi_{01} = \Pr(I_{t+1}=1 \mid I_t=0)\) and \(\pi_{11} = \Pr(I_{t+1}=1 \mid I_t=1)\). Under independence, \(\pi_{01} = \pi_{11}\):

\[ LR_{ind} = -2\ln\left[\frac{L(\hat{\Pi})}{L(\hat{\Pi}_1)}\right] \sim \chi_1^2 \]

The conditional coverage test combines both:

\[ LR_{cc} = LR_{uc} + LR_{ind} \sim \chi_2^2 \]

Note

Why clustering matters. Even with correct average coverage, clustered violations are dangerous. If all losses concentrate in a short period, the risk of bankruptcy is much higher than if violations are scattered randomly. Historical evidence shows that commercial bank VaRs, particularly those based on Historical Simulation, tend to produce exactly this pattern.

Note

Simulation setup. Returns are simulated from a GARCH(1,1) data-generating process: \(R_t = \sigma_t z_t\) with \(z_t \sim N(0,1)\) and \(\sigma^2_{t+1} = \omega + \alpha R_t^2 + \beta \sigma^2_t\). The three VaR methods differ in what they know about this process:

Normal (constant): estimates a single standard deviation from the full sample and assumes constant volatility. This is misspecified because the true volatility varies over time.
Historical Simulation: uses a rolling window of past raw returns to compute the VaR percentile. Also misspecified, as it adapts slowly to volatility changes.
GARCH(1,1): uses the true conditional volatility \(\sigma_t\) from the simulation. This is correctly specified and should produce well-behaved violations.

Tip

How to experiment

Try the Normal (constant) method first: it assumes constant volatility and will produce clustered violations when the true volatility spikes. Then switch to GARCH(1,1): because it tracks the true volatility dynamics, violations should be scattered randomly. Compare the test statistics across methods. Increase \(\alpha\) to create more volatile data and observe how the Normal and HS methods deteriorate.

viewof btP = Inputs.radio(["1%", "2.5%", "5%"], {
  label: "VaR coverage rate p",
  value: "1%"
})

viewof btMethod = Inputs.radio(["Normal (constant)", "Historical Simulation", "GARCH(1,1)"], {
  label: "VaR method",
  value: "Normal (constant)"
})

viewof btWindow = btMethod === "Historical Simulation" ? Inputs.range([100, 500], {
  label: "HS rolling window",
  step: 50,
  value: 250
}) : Object.assign(html`<span style="display:none"></span>`, { value: 250 })

viewof btAlpha = Inputs.range([0.05, 0.20], {
  label: "α (GARCH reaction, DGP)",
  step: 0.01,
  value: 0.10
})

viewof btBeta = Inputs.range([0.70, 0.95], {
  label: "β (GARCH persistence, DGP)",
  step: 0.01,
  value: 0.85
})

viewof btLRVol = Inputs.range([0.5, 3.0], {
  label: "Long-run daily volatility (%)",
  step: 0.1,
  value: 1.5
})

viewof btN = Inputs.range([500, 3000], {
  label: "Sample size T",
  step: 100,
  value: 1500
})

mutable btSeed = 0

viewof btReseed = {
  const btn = html`<button style="background:#2f71d5;color:white;border:none;border-radius:6px;padding:0.35rem 1rem;font-weight:500;font-size:0.9rem;cursor:pointer;">New sample</button>`
  btn.onclick = () => { mutable btSeed++ }
  return btn
}

// Convert string VaR coverage rate to number
btPNum = parseFloat(btP) / 100

btVL = (btLRVol / 100) ** 2
btOmega = btVL * (1 - btAlpha - btBeta)

// Simulate returns from GARCH(1,1) DGP
btSim = {
  btSeed  // reactivity trigger
  const rng = prng.mulberry32(42 + btSeed)
  const n = btN
  const alpha = btAlpha, beta = btBeta, omega = btOmega, vl = btVL

  const ret = new Array(n)
  const sig2 = new Array(n)
  sig2[0] = vl

  for (let t = 0; t < n; t++) {
    const z = prng.boxMuller(rng)
    ret[t] = Math.sqrt(sig2[t]) * z
    if (t < n - 1) {
      sig2[t + 1] = omega + alpha * ret[t] * ret[t] + beta * sig2[t]
    }
  }
  return { ret, sig2 }
}

// Compute VaR series based on selected method
btVarSeries = {
  const { ret, sig2 } = btSim
  const n = ret.length
  const p = btPNum
  const method = btMethod
  const W = btWindow
  const zp = -qnorm(p)  // positive quantile (e.g., 2.326 for 1%)

  const varArr = new Array(n).fill(NaN)
  const startIdx = method === "Historical Simulation" ? W : 1

  if (method === "Normal (constant)") {
    // Use full-sample standard deviation (realistically misspecified)
    const mean = ret.reduce((a, b) => a + b, 0) / n
    const sd = Math.sqrt(ret.reduce((a, b) => a + (b - mean) ** 2, 0) / (n - 1))
    for (let t = 1; t < n; t++) varArr[t] = zp * sd
  } else if (method === "Historical Simulation") {
    // Rolling window: sort past W returns, take p-th percentile
    for (let t = W; t < n; t++) {
      const window = ret.slice(t - W, t).sort((a, b) => a - b)
      const idx = Math.floor(p * W)
      varArr[t] = -window[idx]
    }
  } else {
    // GARCH(1,1): use true DGP conditional volatility
    for (let t = 1; t < n; t++) varArr[t] = zp * Math.sqrt(sig2[t])
  }

  return { varArr, startIdx }
}

// Build hit sequence and compute Christoffersen tests
btResults = {
  const { ret } = btSim
  const { varArr, startIdx } = btVarSeries
  const p = btPNum
  const n = ret.length

  // Build hit data
  const hitData = []
  for (let t = startIdx; t < n; t++) {
    if (!isNaN(varArr[t])) {
      hitData.push({
        t,
        ret: ret[t],
        var: varArr[t],
        hit: ret[t] < -varArr[t] ? 1 : 0
      })
    }
  }

  const T = hitData.length
  const T1 = hitData.reduce((a, d) => a + d.hit, 0)
  const T0 = T - T1
  const piHat = T > 0 ? T1 / T : 0

  // Transition counts
  let n00 = 0, n01 = 0, n10 = 0, n11 = 0
  for (let i = 1; i < hitData.length; i++) {
    const prev = hitData[i - 1].hit, curr = hitData[i].hit
    if (prev === 0 && curr === 0) n00++
    else if (prev === 0 && curr === 1) n01++
    else if (prev === 1 && curr === 0) n10++
    else n11++
  }

  const pi01 = (n00 + n01) > 0 ? n01 / (n00 + n01) : 0
  const pi11 = (n10 + n11) > 0 ? n11 / (n10 + n11) : 0

  // LR_uc: unconditional coverage
  const LR_uc = -2 * (T0 * safeLog(1 - p) + T1 * safeLog(p)) +
                 2 * (T0 * safeLog(1 - piHat) + T1 * safeLog(piHat))

  // LR_ind: independence
  const L_ind = T0 * safeLog(1 - piHat) + T1 * safeLog(piHat)
  const L_markov = n00 * safeLog(1 - pi01) + n01 * safeLog(pi01) +
                   n10 * safeLog(1 - pi11) + n11 * safeLog(pi11)
  const LR_ind = -2 * L_ind + 2 * L_markov

  const LR_cc = LR_uc + LR_ind

  // p-values
  const pval_uc = 1 - chi2CDF(LR_uc, 1)
  const pval_ind = 1 - chi2CDF(LR_ind, 1)
  const pval_cc = 1 - chi2CDF(LR_cc, 2)

  return { hitData, T, T0, T1, piHat, n00, n01, n10, n11, pi01, pi11,
           LR_uc, LR_ind, LR_cc, pval_uc, pval_ind, pval_cc }
}

viewof btVarLegend = legend([
  { key: "ret", label: "Returns", color: "#999", type: "line" },
  { key: "var", label: `−VaR (${btP})`, color: "#d62728", type: "dashed" },
  { key: "viol", label: `Violations (${btResults.T1} of ${btResults.T}, ${fmt(btResults.piHat * 100, 2)}%)`, color: "#d62728", type: "dot" }
])

{
  const h = btVarLegend
  return Plot.plot({
    height: 380,
    marginLeft: 60,
    x: { label: "Day", grid: false },
    y: { label: "Return", grid: true, tickFormat: pctFmt },
    marks: [
      Plot.ruleY([0], { stroke: "#ccc" }),
      ...(!h.has("ret") ? [Plot.line(btResults.hitData, { x: "t", y: "ret", stroke: "#999", strokeWidth: 0.5 })] : []),
      ...(!h.has("var") ? [Plot.line(btResults.hitData, { x: "t", y: d => -d.var, stroke: "#d62728", strokeWidth: 1.5, strokeDasharray: "6 3" })] : []),
      ...(!h.has("viol") ? [Plot.dot(btResults.hitData.filter(d => d.hit === 1), { x: "t", y: "ret", fill: "#d62728", r: 3 })] : [])
    ]
  })
}

html`<p style="color:#666;font-size:0.85rem;">Method: ${btMethod}. Expected violations: ${fmt(btResults.T * btPNum, 1)} (${btP}). Observed: ${btResults.T1} (${fmt(btResults.piHat * 100, 2)}%).</p>`

{
  const violations = btResults.hitData.filter(d => d.hit === 1)
  return Plot.plot({
    height: 120,
    marginLeft: 60,
    marginBottom: 30,
    x: { label: "Day", grid: false },
    y: { axis: null },
    marks: [
      Plot.ruleY([0], { stroke: "#eee" }),
      Plot.ruleX(violations, { x: "t", stroke: "#d62728", strokeWidth: 1.5, strokeOpacity: 0.7 })
    ]
  })
}

html`<p style="color:#666;font-size:0.85rem;">Each red tick marks a VaR violation. Under a correct model, violations should be scattered randomly. Clustering indicates the model fails when risk is elevated.</p>`

{
  const r = btResults
  const crit5_1 = 3.841, crit10_1 = 2.706
  const crit5_2 = 5.991, crit10_2 = 4.605

  function decisionCell(pval, alpha) {
    if (r.T1 < 2) return `<span style="color:#999;">N/A</span>`
    return pval < alpha
      ? `<span style="color:#d62728;font-weight:600;">Reject</span>`
      : `<span style="color:#2e8b57;font-weight:600;">Cannot reject</span>`
  }

  return html`<table style="font-size:0.9rem;border-collapse:collapse;width:100%;">
    <thead>
      <tr style="border-bottom:2px solid #333;">
        <th style="text-align:left;padding:4px 8px;">Test</th>
        <th style="text-align:right;padding:4px 8px;">Statistic</th>
        <th style="text-align:center;padding:4px 8px;">df</th>
        <th style="text-align:right;padding:4px 8px;">Crit. 5%</th>
        <th style="text-align:right;padding:4px 8px;">Crit. 10%</th>
        <th style="text-align:right;padding:4px 8px;">p-value</th>
        <th style="text-align:center;padding:4px 8px;">Decision (5%)</th>
      </tr>
    </thead>
    <tbody>
      <tr style="border-bottom:1px solid #eee;">
        <td style="padding:4px 8px;">Unconditional coverage (LR<sub>uc</sub>)</td>
        <td style="text-align:right;padding:4px 8px;">${fmt(r.LR_uc, 3)}</td>
        <td style="text-align:center;padding:4px 8px;">1</td>
        <td style="text-align:right;padding:4px 8px;">${fmt(crit5_1, 3)}</td>
        <td style="text-align:right;padding:4px 8px;">${fmt(crit10_1, 3)}</td>
        <td style="text-align:right;padding:4px 8px;">${fmt(r.pval_uc, 4)}</td>
        <td style="text-align:center;padding:4px 8px;">${decisionCell(r.pval_uc, 0.05)}</td>
      </tr>
      <tr style="border-bottom:1px solid #eee;">
        <td style="padding:4px 8px;">Independence (LR<sub>ind</sub>)</td>
        <td style="text-align:right;padding:4px 8px;">${fmt(r.LR_ind, 3)}</td>
        <td style="text-align:center;padding:4px 8px;">1</td>
        <td style="text-align:right;padding:4px 8px;">${fmt(crit5_1, 3)}</td>
        <td style="text-align:right;padding:4px 8px;">${fmt(crit10_1, 3)}</td>
        <td style="text-align:right;padding:4px 8px;">${fmt(r.pval_ind, 4)}</td>
        <td style="text-align:center;padding:4px 8px;">${decisionCell(r.pval_ind, 0.05)}</td>
      </tr>
      <tr>
        <td style="padding:4px 8px;font-weight:600;">Conditional coverage (LR<sub>cc</sub>)</td>
        <td style="text-align:right;padding:4px 8px;font-weight:600;">${fmt(r.LR_cc, 3)}</td>
        <td style="text-align:center;padding:4px 8px;">2</td>
        <td style="text-align:right;padding:4px 8px;">${fmt(crit5_2, 3)}</td>
        <td style="text-align:right;padding:4px 8px;">${fmt(crit10_2, 3)}</td>
        <td style="text-align:right;padding:4px 8px;">${fmt(r.pval_cc, 4)}</td>
        <td style="text-align:center;padding:4px 8px;">${decisionCell(r.pval_cc, 0.05)}</td>
      </tr>
    </tbody>
  </table>
  ${r.T1 < 2 ? html`<p style="color:#d62728;font-size:0.85rem;margin-top:8px;"><strong>Warning:</strong> Fewer than 2 violations observed. Test statistics may be unreliable.</p>` : html``}`
}

{
  const r = btResults
  return html`<table style="font-size:0.9rem;border-collapse:collapse;width:100%;max-width:500px;">
    <thead>
      <tr style="border-bottom:2px solid #333;">
        <th style="padding:4px 8px;"></th>
        <th style="text-align:center;padding:4px 8px;" colspan="2">Tomorrow</th>
      </tr>
      <tr style="border-bottom:1px solid #ccc;">
        <th style="text-align:left;padding:4px 8px;">Today</th>
        <th style="text-align:center;padding:4px 8px;">No violation (0)</th>
        <th style="text-align:center;padding:4px 8px;">Violation (1)</th>
      </tr>
    </thead>
    <tbody>
      <tr style="border-bottom:1px solid #eee;">
        <td style="padding:4px 8px;font-weight:500;">No violation (0)</td>
        <td style="text-align:center;padding:4px 8px;">${fmt(1 - r.pi01, 4)} <span style="color:#999;font-size:0.8rem;">(n=${r.n00})</span></td>
        <td style="text-align:center;padding:4px 8px;">${fmt(r.pi01, 4)} <span style="color:#999;font-size:0.8rem;">(n=${r.n01})</span></td>
      </tr>
      <tr>
        <td style="padding:4px 8px;font-weight:500;">Violation (1)</td>
        <td style="text-align:center;padding:4px 8px;">${fmt(1 - r.pi11, 4)} <span style="color:#999;font-size:0.8rem;">(n=${r.n10})</span></td>
        <td style="text-align:center;padding:4px 8px;">${fmt(r.pi11, 4)} <span style="color:#999;font-size:0.8rem;">(n=${r.n11})</span></td>
      </tr>
    </tbody>
  </table>
  <p style="color:#666;font-size:0.85rem;margin-top:8px;">
    Unconditional violation rate: π̂ = ${fmt(r.piHat, 4)} | Promised rate: p = ${btP}<br>
    π̂<sub>01</sub> = ${fmt(r.pi01, 4)} (violation after no violation) |
    π̂<sub>11</sub> = ${fmt(r.pi11, 4)} (violation after violation)<br>
    ${r.pi11 > r.pi01 + 0.01 ?
      '<span style="color:#d62728;font-weight:500;">Clustering detected: π̂₁₁ > π̂₀₁</span>' :
      '<span style="color:#2e8b57;">No strong evidence of clustering</span>'}
  </p>`
}

Binomial Coverage Test

Under a correctly specified VaR model, the number of violations in \(n\) days follows a binomial distribution: \(M \sim \text{Binomial}(n, p)\). The Kupiec likelihood ratio test checks whether the observed number of violations \(m\) is consistent with the promised coverage rate \(p\):

\[ LR_{Kupiec} = -2\ln\left[(1-p)^{n-m}\,p^m\right] + 2\ln\left[\left(1-\frac{m}{n}\right)^{n-m}\left(\frac{m}{n}\right)^m\right] \sim \chi_1^2 \]

A critical challenge is the low power of backtests at high confidence levels with limited data. At a 99% VaR with 250 trading days, we expect only 2.5 violations, making it difficult to distinguish a correct model from an incorrect one.

Tip

How to experiment

Compare the power curve for \(n = 250\) versus \(n = 1000\) at the 99% confidence level. With fewer observations the power curve is much flatter, meaning the test struggles to distinguish between models with very different true violation rates. A model with a true violation rate of 3% (three times the promised 1%) may still not be rejected.

viewof cvN = Inputs.range([100, 2000], {
  label: "Sample size n",
  step: 50,
  value: 600
})

viewof cvConf = Inputs.radio(["90%", "95%", "97.5%", "99%"], {
  label: "VaR confidence level",
  value: "99%"
})

viewof cvAlpha = Inputs.radio(["1%", "5%", "10%"], {
  label: "Significance level α",
  value: "5%"
})

viewof cvM = Inputs.range([0, Math.min(cvN, Math.ceil(cvN * cvP * 5 + 10))], {
  label: "Observed violations m",
  step: 1,
  value: Math.round(cvN * cvP)
})

cvP = Math.round((1 - parseFloat(cvConf) / 100) * 1e10) / 1e10
cvExpected = cvN * cvP
cvMaxK = Math.min(cvN, Math.ceil(cvN * cvP * 5 + 10))

// Chi-squared critical value lookup for df=1
cvAlphaNum = parseFloat(cvAlpha) / 100

cvCritical = cvAlphaNum <= 0.01 ? 6.635 : cvAlphaNum <= 0.05 ? 3.841 : 2.706

// Compute binomial PMF and rejection region
cvData = {
  const data = []
  const rejSet = new Set()
  for (let k = 0; k <= cvMaxK; k++) {
    const pmf = binomPMF(k, cvN, cvP)
    const lr = kupiecLR(cvN, k, cvP)
    const rejected = lr > cvCritical
    if (rejected) rejSet.add(k)
    data.push({ k, pmf, rejected, lr })
  }
  // Find acceptance range
  let minAccept = 0, maxAccept = cvMaxK
  for (let k = 0; k <= cvMaxK; k++) { if (!rejSet.has(k)) { minAccept = k; break } }
  for (let k = cvMaxK; k >= 0; k--) { if (!rejSet.has(k)) { maxAccept = k; break } }
  return { data, rejSet, minAccept, maxAccept }
}

// Power curve: P(reject | piTrue) for different true violation rates
cvPowerData = {
  const { rejSet, maxAccept } = cvData
  const result = []
  const maxPi = Math.min(1, cvP * 5)
  const step = maxPi / 200
  for (let pi = step; pi <= maxPi; pi += step) {
    // Sum PMF over rejection region (k <= cvMaxK)
    let power = 0
    for (const k of rejSet) {
      power += binomPMF(k, cvN, pi)
    }
    // All k > maxAccept are also in the rejection region
    // Add P(X > cvMaxK) = 1 - sum_{k=0}^{cvMaxK} PMF(k)
    let cumBelow = 0
    for (let k = 0; k <= cvMaxK; k++) {
      cumBelow += binomPMF(k, cvN, pi)
    }
    power += (1 - cumBelow)
    result.push({ piTrue: pi, power: Math.min(power, 1) })
  }
  return result
}

Plot.plot({
  height: 350,
  marginLeft: 60,
  x: { label: "Number of violations (k)", grid: false },
  y: { label: "P(X = k)", grid: true },
  marks: [
    Plot.rectY(cvData.data, {
      x1: d => d.k - 0.4,
      x2: d => d.k + 0.4,
      y: "pmf",
      fill: d => d.rejected ? "#d62728" : "#ccc"
    }),
    Plot.ruleX([cvM], { stroke: "#2f71d5", strokeWidth: 2 }),
    Plot.dot([{ x: cvM, y: binomPMF(cvM, cvN, cvP) }], { x: "x", y: "y", fill: "#2f71d5", r: 5 })
  ]
})

html`<div style="display:flex;gap:18px;font-size:0.85rem;margin-top:-6px;flex-wrap:wrap;">
  <span><svg width="14" height="14"><rect width="14" height="14" fill="#ccc"/></svg> Acceptance region</span>
  <span><svg width="14" height="14"><rect width="14" height="14" fill="#d62728"/></svg> Rejection region</span>
  <span><svg width="8" height="8"><circle cx="4" cy="4" r="4" fill="#2f71d5"/></svg> Observed m = ${cvM}</span>
</div>
<p style="color:#666;font-size:0.85rem;">Binomial(${cvN}, ${fmt(cvP, 4)}) distribution under H₀. Expected violations: ${fmt(cvExpected, 1)}.</p>`

{
  const lr = kupiecLR(cvN, cvM, cvP)
  const pval = 1 - chi2CDF(lr, 1)
  const reject = lr > cvCritical
  const { minAccept, maxAccept } = cvData

  return html`<table style="font-size:0.9rem;border-collapse:collapse;width:100%;">
    <thead>
      <tr style="border-bottom:2px solid #333;">
        <th style="text-align:left;padding:4px 8px;">Parameter</th>
        <th style="text-align:right;padding:4px 8px;">Value</th>
      </tr>
    </thead>
    <tbody>
      <tr style="border-bottom:1px solid #eee;"><td style="padding:4px 8px;">Sample size n</td><td style="text-align:right;padding:4px 8px;">${cvN}</td></tr>
      <tr style="border-bottom:1px solid #eee;"><td style="padding:4px 8px;">VaR coverage rate p</td><td style="text-align:right;padding:4px 8px;">${cvConf} (p = ${fmt(cvP, 4)})</td></tr>
      <tr style="border-bottom:1px solid #eee;"><td style="padding:4px 8px;">Expected violations (n × p)</td><td style="text-align:right;padding:4px 8px;">${fmt(cvExpected, 1)}</td></tr>
      <tr style="border-bottom:1px solid #eee;"><td style="padding:4px 8px;">Significance level α</td><td style="text-align:right;padding:4px 8px;">${cvAlpha}</td></tr>
      <tr style="border-bottom:1px solid #eee;"><td style="padding:4px 8px;">χ² critical value (df=1)</td><td style="text-align:right;padding:4px 8px;">${fmt(cvCritical, 3)}</td></tr>
      <tr style="border-bottom:1px solid #eee;background:#f8f8f8;"><td style="padding:4px 8px;font-weight:600;">Acceptance range</td><td style="text-align:right;padding:4px 8px;font-weight:600;">[${minAccept}, ${maxAccept}]</td></tr>
      <tr style="border-bottom:1px solid #eee;"><td style="padding:4px 8px;">Observed violations m</td><td style="text-align:right;padding:4px 8px;">${cvM}</td></tr>
      <tr style="border-bottom:1px solid #eee;"><td style="padding:4px 8px;">Kupiec LR statistic</td><td style="text-align:right;padding:4px 8px;">${fmt(lr, 3)}</td></tr>
      <tr style="border-bottom:1px solid #eee;"><td style="padding:4px 8px;">p-value</td><td style="text-align:right;padding:4px 8px;">${fmt(pval, 4)}</td></tr>
      <tr><td style="padding:4px 8px;font-weight:600;">Decision</td><td style="text-align:right;padding:4px 8px;">${reject ?
        '<span style="color:#d62728;font-weight:600;">Reject H₀</span>' :
        '<span style="color:#2e8b57;font-weight:600;">Cannot reject H₀</span>'}</td></tr>
    </tbody>
  </table>`
}

Plot.plot({
  height: 350,
  marginLeft: 60,
  x: { label: "True violation rate (π)", grid: true, tickFormat: d => (d * 100).toFixed(1) + "%" },
  y: { label: "P(reject H₀)", grid: true, domain: [0, 1] },
  marks: [
    Plot.ruleX([cvP], { stroke: "#2f71d5", strokeDasharray: "6 3", strokeWidth: 1.5 }),
    Plot.ruleY([cvAlphaNum], { stroke: "#d62728", strokeDasharray: "4 2", strokeWidth: 1 }),
    Plot.line(cvPowerData, { x: "piTrue", y: "power", stroke: "#333", strokeWidth: 2 })
  ]
})

html`<div style="display:flex;gap:18px;font-size:0.85rem;margin-top:-6px;flex-wrap:wrap;">
  <span><svg width="24" height="10"><line x1="0" y1="5" x2="24" y2="5" stroke="#2f71d5" stroke-width="2" stroke-dasharray="4 2"/></svg> Promised rate p = ${(cvP*100).toFixed(1)}%</span>
  <span><svg width="24" height="10"><line x1="0" y1="5" x2="24" y2="5" stroke="#d62728" stroke-width="1" stroke-dasharray="4 2"/></svg> Significance level α = ${cvAlpha}</span>
</div>
<p style="color:#666;font-size:0.85rem;">The power curve shows the probability of rejecting H₀ as a function of the true violation rate π. Because the Kupiec test is <strong>two-tailed</strong> (rejecting both too few and too many violations), the curve has a characteristic U-shape: power is high on the left (too few violations, e.g., an overly conservative model) and on the right (too many violations, e.g., a model that underestimates risk). The minimum near π = p is where the test has the least ability to detect misspecification. For risk management, the right side is most relevant: how well can the test detect a model that produces more violations than promised?</p>`

References

Christoffersen, Peter F. 2012. Elements of Financial Risk Management. 2nd ed. Academic Press.

Hull, John. 2023. Risk Management and Financial Institutions. 6th ed. John Wiley & Sons.