# Student t copula

Thanks that worked well.

Here is my is simulation in R

library(copula)

# Set the copula parameter
rho <- 0.70
nu <- 3.84  # Degrees of freedom for the Student's t copula

# Create the Student's t copula object

u <- rCopula(500,tCopula(dim=2,rho,df=nu))
plot(u[,1],u[,2],pch='.',col='blue')
cor(u,method='spearman')

# Plot the simulated data
plot(x, y, pch = 16, xlab = "x", ylab = "y", main = "Simulated Data")

# Compute and print the sample correlation
correlation <- cor(x, y)
print(paste("Sample Correlation:", correlation))

# Plot the simulated data
plot(x, y, pch = 16, xlab = "x", ylab = "y", main = "Simulated Data")

data <- list(
N = length(x),
x = (u[, 1]),
y = u[, 2]
)


This is the Stan code for student t copula:

 functions {
real t_copula(vector u, vector v, real rho, real nu) {
int N = num_elements(u);
vector[N] t1 = inv_Phi(u);  // Inverse CDF of the first marginal (standard normal)
vector[N] t2 = inv_Phi(v);  // Inverse CDF of the second marginal (standard normal)
real log_det_J = sum(log1p(square(t1)) + log1p(square(t2)));  // Calculate the log determinant of the Jacobian matrix
real log_pdf = lgamma((nu + 2.0) / 2.0) - lgamma(nu / 2.0) - (N / 2.0) * log(nu * pi())
- 0.5 * (nu + 2.0) * log1p(rho * rho / (nu - 2.0)) - log_det_J;
return log_pdf;
}

}

data {
int<lower=0 N;              // Number of observations
vector[N] x;                // First variable
vector[N] y;                // Second variable
}

parameters {
real<lower=-1, upper=1 rho;  // Copula parameter (correlation)
real<lower=2 nu;             // Degrees of freedom parameter
}

model {
// Priors
rho ~ uniform(-1, 1);
nu ~ gamma(2, 0.5);  // Adjust the shape and scale parameters as needed

// Likelihood

target += t_copula(x, y, rho, nu);

}


Once again I am trying to recover rho, but its way off, any suggestion @spinkney .

Antony.

edit: Admin formatting. Code formatting can be enabled by starting the code block with stan or r and closing the code block with 

1 Like

@ants007 I moved this over to a new topic so it’s easier to find if one searches for student t copula.

For the student t copula you want to use the quantile function of the student t rather than the normal qf (called inv_Phi). It is possible to write this function in Stan with the inv_inc_beta function (see below) and it matches the R function qf. The issue is that this function seems to exhibit numerical issues in the derivative so when nu is a parameter and sampling calculates the derivative wrt to nu I find that sampling is slow and the estimates do not match the simulation (though it seems to work fine with optimization).

  array[] real student_t_qf(vector u, real nu) {
int N = num_elements(u);
array[N] real x;

for (i in 1 : N) {
real y = u[i] < 0.5 ? 2 * u[i] : 2 * (1 - u[i]);
int z = u[i] - 0.5 < 0 ? -1 : 1;
x[i] = z * sqrt(nu / inv_inc_beta(0.5 * nu, 0.5, y) - nu);
}

return x;
}


I wrote my own student-t quantile function which is not as accurate as the above and is much, much worse for 0 < nu < 2. I believe it matches Hill’s algorithm in https://dl.acm.org/doi/10.1145/355598.355600 though I haven’t tested it extensively. With this and the correct student-t lpdf I am able to recover the parameters. In fact, it’s a better fit than the MLE fit from the copula R package for this example.

functions {
real student_t_qf(real p, real v) {
real q, t, z;
real a, b, c, d, x, y;
int negate; // 1 = TRUE, 0 = FALSE

if (p > 0.5) {
negate = 0;
z = 2.0 * (1.0 - p);
} else {
negate = 1;
z = 2.0 * p;
}

a = 1.0 / (v - 0.5);
b = 48.0 / (a * a);
c = ((20700.0 * a / b - 98.0) * a - 16.0) * a + 96.36;
d = ((94.5 / (b + c) - 3.0) / b + 1.0) * sqrt(a * pi() / 2.0) * v;
x = z * d;
y = pow(x, 2.0 / v);

if (y > 0.05 + a) {
x = inv_Phi(z * 0.5);
y = square(x);
if (v < 5.0) {
c = c + 0.3 * (v - 4.5) * (x + 0.6);
}
c = c + (((0.05 * d * x - 5.0) * x - 7.0) * x - 2.0) * x + b;
y = (((((0.4 * y + 6.3) * y + 36.0) * y + 94.5) / c - y - 3.0) / b
+ 1.0)
* x;
y = expm1(a * y * y);
} else {
y = ((1.0
/ (((v + 6.0) / (v * y) - 0.089 * d - 0.822) * (v + 2.0) * 3.0)
+ 0.5 / (v + 4.0))
* y - 1.0)
* (v + 1.0) / (v + 2.0) + 1.0 / y;
}

q = sqrt(v * y);

if (negate == 1) {
q = -q;
}

return q;
}

vector student_t_qf(vector u, real nu) {
int N = num_elements(u);
vector[N] x;

for (i in 1 : N) {
x[i] = student_t_qf(u[i], nu);
}

return x;
}

real t_copula_lpdf(vector u, vector v, real rho, real nu) {
int N = num_elements(u);
vector[N] t1 = student_t_qf(u, nu);
vector[N] t2 = student_t_qf(v, nu);
vector[N] t1_sq = square(t1);
vector[N] t2_sq = square(t2);

real lpdf = N
* (-0.5 * log1m(rho ^ 2) + lgamma(0.5 * (nu + 2))
+ lgamma(0.5 * nu) - 2 * lgamma(0.5 * (nu + 1)));
lpdf += 0.5 * (nu + 1)
* sum(log1p(t1_sq / nu) + log1p(t2_sq / nu));
lpdf += -0.5 * (nu + 2)
* sum(log1p((t1_sq - 2 * t1 .* t2 * rho + t2_sq)
/ (nu * (1 - rho ^ 2))));
return lpdf;
}
}
data {
int<lower=0> N; // Number of observations
vector[N] x; // First variable
vector[N] y; // Second variable
}
parameters {
real<lower=-1, upper=1> rho;
real<lower=2> nu; // Degrees of freedom parameter
}
model {
// Priors
nu ~ exponential(1); // Adjust the shape and scale parameters as needed
// Likelihood
target += t_copula_lpdf(x | y, rho, nu);
}



The R code to run

library(copula)
library(cmdstanr)
library(QRM)

# Set the copula parameter
set.seed(12312)
rho <- 0.70
nu <- 3.84  # Degrees of freedom for the Student's t copula

# Create the Student's t copula object
n <- 100
u <- rCopula(n, tCopula(dim=2, rho, df=nu))

mle_fit <- fit.tcopula(u)

mod <- cmdstan_model("student_t_copula.stan")

mod_out <- mod\$sample(
data = list(N = n,
x = u[, 1],
y = u[, 2]),
parallel_chains = 4
)
`
2 Likes

Thank you @spinkney. appreciate your suggestion. I will work on it little bit more and if there is any more progress, I will post here.
On another note, I ultimately like to use copula to address endogeneity in regression model. Here is a thread https://discourse.mc-stan.org/t/using-copula-to-account-for-endogeneity-in-regression/31554 are you able to assist just with some simple guidance? Thanks very much!