\[ Y = \boldsymbol{X}\boldsymbol{\beta} + \boldsymbol{\epsilon} \]
\[ \begin{bmatrix} Y_1 \\ Y_2 \\ Y_3 \\ \vdots \\ Y_n \end{bmatrix} = \begin{bmatrix} 1 & X_{11} & X_{12} & \dots & X_{1p} \\ 1 & X_{21} & X_{22} & \dots & X_{2p} \\ \vdots & \vdots & \vdots & \ddots & \vdots \\ 1 & X_{n1} & X_{n2} & \dots & X_{np} \end{bmatrix} \begin{bmatrix} \beta_0 \\ \beta_1 \\ \beta_2 \\ \vdots \\ \beta_p \end{bmatrix} + \begin{bmatrix} \epsilon_1 \\ \epsilon_2 \\ \epsilon_3 \\ \vdots \\ \epsilon_n \end{bmatrix} \]
\[ \boldsymbol{\hat{\beta}} = (\boldsymbol{X}^T\boldsymbol{X})^{-1}\boldsymbol{X}^T\boldsymbol{Y} \]
Weakly Informative Priors
Penalized Regression\[ \text{P}(A|B) = \frac{\text{P}(B|A)\text{P}(A)}{\text{P}(B)} \]
\[ \text{P}(AB) = \text{P}(A)\text{P}(B|A) = \text{P}(B)\text{P}(A|B) \]
\[ \text{P}(A|B) = \frac{\text{P}(B|A)\text{P}(A)}{\text{P}(B)} \]
\[ \pi(\theta | x) = \frac{f(x|\theta)\pi(\theta)}{m(x)} \]
\[ \pi(\theta|x) = \frac{f(x|\theta)\pi(\theta)}{\int_\theta f(x|\theta)\pi(\theta)d\theta} \]
\[ \color{red}{\pi(\theta|x)} = \frac{\color{blue}{f(x|\theta)}\color{green}{\pi(\theta)}}{\color{gray}{\int_\theta f(x|\theta)\pi(\theta)d\theta}} \]
\[ \color{red}{\text{Posterior}} = \frac{\color{blue}{\text{Likelihood}} * \color{green}{\text{Prior}}}{\color{gray}{\text{Normalizing Constant}}} \]
\[ \color{red}{\text{Posterior}} \propto \color{blue}{\text{Likelihood}} * \color{green}{\text{Prior}} \]
\[ Y \sim{} \text{N}(\boldsymbol{X}\boldsymbol{\beta}, \sigma) \]
\[ \beta \sim{} \text{cauchy}(l, s) \]
\[ \pi(\theta | x) \propto \frac{1}{\sigma}e^{\frac{-(x-\theta)^2}{2\sigma^2}} \dfrac{s}{\big(s^2 + (\theta - l)^2\big)} \]
data { int<lower=0> N; ... } parameters { real alpha_std; ... } model { alpha ~ normal(0, 10); ... }
\[ \min_{\beta_{0},\beta \in \mathbb{R}^{p+1}} \left[\sum_{i=1}^N \left( y_i - \beta_0 -x_i^T\beta \right)^2 \right] \]
\[ \min_{\beta_{0},\beta \in \mathbb{R}^{p+1}} \left[\sum_{i=1}^N \left( y_i - \beta_0 -x_i^T\beta \right)^2 + \lambda \sum_{j=1}^p |\beta_j|^q \right] \]
\[ \min_{\beta_{0},\beta \in \mathbb{R}^{p+1}} \left[ \frac{1}{2N} \sum_{i=1}^N \left( y_i - \beta_0 -x_i^T\beta \right)^2 + \lambda P_{\alpha} \left(\beta \right) \right] \] where \[ P_{\alpha} \left(\beta \right) = \left(1 - \alpha \right) \frac{1}{2}||\Gamma\beta||_{\mathit{l}_2}^2 + \alpha ||\Gamma\beta||_{\mathit{l}_1} \]
\[ \text{Posterior Mode: } \min_{\beta_{0},\beta \in \mathbb{R}^{p+1}} \left[\sum_{i=1}^N \left( y_i - \beta_0 -x_i^T\beta \right)^2 + \lambda \sum_{j=1}^p |\beta_j|^q \right] \]
\[ \text{Log-prior: } \lambda \sum_{j=1}^p |\beta_j|^q \]
Ridge | Lasso | |
---|---|---|
Penalty | \(\beta_j^2\) | \(|\beta_j|\) |
Prior | \(\text{N}(\boldsymbol{0}, \frac{1}{2p} \boldsymbol{I}_p)\) | \(\frac{\lambda}{2}e^{-\lambda |\beta|}\) |