I am trying to proceed and I report results while asking for some technical suggestions.
Mine could be a simple multinomial regression where categories are genes. However, the overall changes in genes can be asymmetric, where some genes increase in rate while other remain stable.
- gene0…70 : slope == 0
- gene71…100 : slope == 5
With a naive modelling we obtain slopes for the non changing genes that appear negative
A normalization that works is to assume there are some NON-changing genes, and force the slopes == 0 for them.
However I would like to exploit the sparsity assumption instead for “normalizing” the data: using something like Horseshoe prior of the slope
With that I improve my normalization, and the slope == 0 is actually centered on 0
I have two questions related to my model (attached at the end)
- with the tuning of these parameters I could not decrease my false positives to 0, although intuitively I should be able to make the attraction to 0 arbitrarily stronger
- data_sigma$hs_df = 3
- data_sigma$hs_df_global = 1
- data_sigma$hs_df_slab = 2
- data_sigma$hs_par_ratio = 0.1
- data_sigma$hs_scale_slab = 1
- Maybe I am converting hs_par_ratio to hs_scale_global in a wrong way
in the formula
real hs_scale_global = hs_par_ratio / sqrt( AMOUNT_OF_DATA_POINTS);
I am setting AMOUNT_OF_DATA_POINTS as number of genes (e.g, 100) * number of replcates (e.g., 13 per gene regression), because I have my data organised as 100 regression with 13 point each
e.g.,
But I’m thinking that if I am out in such calculation of many order of magnitude then the % of non 0 matters little.
My model
functions{
/* Efficient computation of the horseshoe prior
* Args:
* zb: standardized population-level coefficients
* global: global horseshoe parameters
* local: local horseshoe parameters
* scale_global: global scale of the horseshoe prior
* c2: positive real number for regularization
* Returns:
* population-level coefficients following the horseshoe prior
*/
vector horseshoe(vector zb, vector[] local, real[] global,
real scale_global, real c2) {
int K = rows(zb);
vector[K] lambda = local[1] .* sqrt(local[2]);
vector[K] lambda2 = square(lambda);
real tau = global[1] * sqrt(global[2]) * scale_global;
vector[K] lambda_tilde = sqrt(c2 * lambda2 ./ (c2 + tau^2 * lambda2));
return zb .* lambda_tilde * tau;
}
}
data {
int<lower = 0> G; // all genes
int<lower = 0> T; // tube
int<lower=0> R;
int<lower = 0> y[T, G]; // RNA-seq counts
matrix[T,R] X;
// Horseshoe
real<lower=0> hs_df;
real<lower=0> hs_df_global;
real<lower=0> hs_df_slab;
real<lower=0> hs_par_ratio;
real<lower=0> hs_scale_slab;
}
transformed data{
real hs_scale_global = hs_par_ratio / sqrt(G*(T-R));
}
parameters {
// Linear model
row_vector[G] beta0;
// Horseshoe
vector[G] zb;
vector<lower=0>[G] hs_local[2];
real<lower=0> hs_global[2];
real<lower=0> hs_c2;
}
transformed parameters{
matrix[R, G] beta;
matrix[R-1, G] beta1;
// population-level effects
beta1[1] = to_row_vector( horseshoe(zb, hs_local, hs_global, hs_scale_global, hs_scale_slab^2 * hs_c2) );
beta = append_row(beta0, beta1);
}
model {
// Horseshoe
target += normal_lpdf(zb | 0, 1);
target += normal_lpdf(hs_local[1] | 0, 1) - 3 * log(0.5);
target += inv_gamma_lpdf(hs_local[2] | 0.5 * hs_df, 0.5 * hs_df);
target += normal_lpdf(hs_global[1] | 0, 1) - 1 * log(0.5);
target += inv_gamma_lpdf(hs_global[2] | 0.5 * hs_df_global, 0.5 * hs_df_global);
target += inv_gamma_lpdf(hs_c2 | 0.5 * hs_df_slab, 0.5 * hs_df_slab);
target += student_t_lpdf(beta0 | 3, 6, 10);
// Lnear system
beta0 ~ normal(0,5);
for (t in 1:T)
y[t] ~ multinomial( softmax(to_vector(X[t] * beta) ) );
}