Student t copula

Thanks that worked well.

Now If I may asked about student t copula please :

Here is my is simulation in R

library(copula)

# Set the copula parameter
rho <- 0.70
nu <- 3.84  # Degrees of freedom for the Student's t copula

# Create the Student's t copula object

u <- rCopula(500,tCopula(dim=2,rho,df=nu))
plot(u[,1],u[,2],pch='.',col='blue')
cor(u,method='spearman')


# Plot the simulated data
plot(x, y, pch = 16, xlab = "x", ylab = "y", main = "Simulated Data")

# Compute and print the sample correlation
correlation <- cor(x, y)
print(paste("Sample Correlation:", correlation))


# Plot the simulated data
plot(x, y, pch = 16, xlab = "x", ylab = "y", main = "Simulated Data")


data <- list(
  N = length(x),
  x = (u[, 1]),
  y = u[, 2]
)

This is the Stan code for student t copula:

 functions {
   real t_copula(vector u, vector v, real rho, real nu) {
       int N = num_elements(u);
       vector[N] t1 = inv_Phi(u);  // Inverse CDF of the first marginal (standard normal)
       vector[N] t2 = inv_Phi(v);  // Inverse CDF of the second marginal (standard normal)
       real log_det_J = sum(log1p(square(t1)) + log1p(square(t2)));  // Calculate the log determinant of the Jacobian matrix
       real log_pdf = lgamma((nu + 2.0) / 2.0) - lgamma(nu / 2.0) - (N / 2.0) * log(nu * pi())
         - 0.5 * (nu + 2.0) * log1p(rho * rho / (nu - 2.0)) - log_det_J;
       return log_pdf;
     }
   
     }
 
   data {
       int<lower=0 N;              // Number of observations
       vector[N] x;                // First variable
       vector[N] y;                // Second variable
     }
 
   parameters {
       real<lower=-1, upper=1 rho;  // Copula parameter (correlation)
       real<lower=2 nu;             // Degrees of freedom parameter
     }
 
   model {
       // Priors
       rho ~ uniform(-1, 1);
       nu ~ gamma(2, 0.5);  // Adjust the shape and scale parameters as needed
     
         // Likelihood
     
           target += t_copula(x, y, rho, nu);
         
         }

Once again I am trying to recover rho, but its way off, any suggestion @spinkney .

Thanks in advance for your suggestion.
Antony.

edit: Admin formatting. Code formatting can be enabled by starting the code block with ```stan or ```r and closing the code block with ```

1 Like

@ants007 I moved this over to a new topic so it’s easier to find if one searches for student t copula.

For the student t copula you want to use the quantile function of the student t rather than the normal qf (called inv_Phi). It is possible to write this function in Stan with the inv_inc_beta function (see below) and it matches the R function qf. The issue is that this function seems to exhibit numerical issues in the derivative so when nu is a parameter and sampling calculates the derivative wrt to nu I find that sampling is slow and the estimates do not match the simulation (though it seems to work fine with optimization).

  array[] real student_t_qf(vector u, real nu) {
    int N = num_elements(u);
    array[N] real x;
    
    for (i in 1 : N) {
      real y = u[i] < 0.5 ? 2 * u[i] : 2 * (1 - u[i]);
      int z = u[i] - 0.5 < 0 ? -1 : 1;
      x[i] = z * sqrt(nu / inv_inc_beta(0.5 * nu, 0.5, y) - nu);
    }
  
    return x;
  }

I wrote my own student-t quantile function which is not as accurate as the above and is much, much worse for 0 < nu < 2. I believe it matches Hill’s algorithm in https://dl.acm.org/doi/10.1145/355598.355600 though I haven’t tested it extensively. With this and the correct student-t lpdf I am able to recover the parameters. In fact, it’s a better fit than the MLE fit from the copula R package for this example.

functions {
  real student_t_qf(real p, real v) {
    real q, t, z;
    real a, b, c, d, x, y;
    int negate; // 1 = TRUE, 0 = FALSE
    
    if (p > 0.5) {
      negate = 0;
      z = 2.0 * (1.0 - p);
    } else {
      negate = 1;
      z = 2.0 * p;
    }
    
    a = 1.0 / (v - 0.5);
    b = 48.0 / (a * a);
    c = ((20700.0 * a / b - 98.0) * a - 16.0) * a + 96.36;
    d = ((94.5 / (b + c) - 3.0) / b + 1.0) * sqrt(a * pi() / 2.0) * v;
    x = z * d;
    y = pow(x, 2.0 / v);
    
    if (y > 0.05 + a) {
      x = inv_Phi(z * 0.5);
      y = square(x);
      if (v < 5.0) {
        c = c + 0.3 * (v - 4.5) * (x + 0.6);
      }
      c = c + (((0.05 * d * x - 5.0) * x - 7.0) * x - 2.0) * x + b;
      y = (((((0.4 * y + 6.3) * y + 36.0) * y + 94.5) / c - y - 3.0) / b
           + 1.0)
          * x;
      y = expm1(a * y * y);
    } else {
      y = ((1.0
            / (((v + 6.0) / (v * y) - 0.089 * d - 0.822) * (v + 2.0) * 3.0)
            + 0.5 / (v + 4.0))
           * y - 1.0)
          * (v + 1.0) / (v + 2.0) + 1.0 / y;
    }
    
    q = sqrt(v * y);
    
    if (negate == 1) {
      q = -q;
    }
    
    return q;
  }
  
  vector student_t_qf(vector u, real nu) {
    int N = num_elements(u);
    vector[N] x;
    
    for (i in 1 : N) {
      x[i] = student_t_qf(u[i], nu);
    }
    
    return x;
  }
  
  real t_copula_lpdf(vector u, vector v, real rho, real nu) {
    int N = num_elements(u);
    vector[N] t1 = student_t_qf(u, nu);
    vector[N] t2 = student_t_qf(v, nu);
    vector[N] t1_sq = square(t1);
    vector[N] t2_sq = square(t2);
    
    real lpdf = N
                * (-0.5 * log1m(rho ^ 2) + lgamma(0.5 * (nu + 2))
                   + lgamma(0.5 * nu) - 2 * lgamma(0.5 * (nu + 1)));
    lpdf += 0.5 * (nu + 1)
            * sum(log1p(t1_sq / nu) + log1p(t2_sq / nu));
    lpdf += -0.5 * (nu + 2)
            * sum(log1p((t1_sq - 2 * t1 .* t2 * rho + t2_sq)
                        / (nu * (1 - rho ^ 2))));
    return lpdf;
  }
}
data {
  int<lower=0> N; // Number of observations
  vector[N] x; // First variable
  vector[N] y; // Second variable
}
parameters {
  real<lower=-1, upper=1> rho;
  real<lower=2> nu; // Degrees of freedom parameter
}
model {
  // Priors
  nu ~ exponential(1); // Adjust the shape and scale parameters as needed
  // Likelihood
  target += t_copula_lpdf(x | y, rho, nu);
}

The R code to run

library(copula)
library(cmdstanr)
library(QRM)

# Set the copula parameter
set.seed(12312)
rho <- 0.70
nu <- 3.84  # Degrees of freedom for the Student's t copula

# Create the Student's t copula object
n <- 100
u <- rCopula(n, tCopula(dim=2, rho, df=nu))

mle_fit <- fit.tcopula(u)

mod <- cmdstan_model("student_t_copula.stan")

mod_out <- mod$sample(
  data = list(N = n,
              x = u[, 1],
              y = u[, 2]),
  parallel_chains = 4
)
2 Likes

Thank you @spinkney. appreciate your suggestion. I will work on it little bit more and if there is any more progress, I will post here.
On another note, I ultimately like to use copula to address endogeneity in regression model. Here is a thread https://discourse.mc-stan.org/t/using-copula-to-account-for-endogeneity-in-regression/31554 are you able to assist just with some simple guidance? Thanks very much!