functions{ matrix set_temp_bounds(int Nrow, int[] bounded, row_vector bounds, matrix theta, vector pop_mu){ matrix[Nrow,2] temp_bounds; // If lower bound applies: if(bounded[1] < 0 || bounded[1] % 2 == 0){ if(bounded[2] == 0){ // fixed bounds temp_bounds[,1] = rep_vector(bounds[1], Nrow); }else if(bounded[1] == -1 || bounded[1] == 2){ // -1 or 2 means temp_bounds[,1] conditioned on current value of pop_mu/theta[bounded[2]] if(bounded[2] < 0){ // negative index used to indicate pop_mu instead of theta temp_bounds[,1] = rep_vector(pop_mu[-bounded[2]],Nrow); }else{ temp_bounds[,1] = theta[,bounded[2]]; } }else if(bounded[1] == -3 || bounded[1] == 4){ // -3 or 4 means temp_bounds[,1] conditioned on current value of pop_mu/theta[bounded[2]] + bounds[1] if(bounded[2] < 0){ // negative index used to indicate pop_mu instead of theta temp_bounds[,1] = pop_mu[-bounded[2]] + rep_vector(bounds[1], Nrow); }else{ temp_bounds[,1] = theta[,bounded[2]] + rep_vector(bounds[1], Nrow); } } // No lower bound: }else{ temp_bounds[,1] = rep_vector(negative_infinity(), Nrow); } // If upper bound applies: if(bounded[1] > 0){ if(bounded[3] == 0){ // fixed bounds temp_bounds[,2] = rep_vector(bounds[2], Nrow); }else if(bounded[1] == 1 || bounded[1] == 2){ // 1 or 2 means temp_bounds[,1] conditioned on current value of pop_mu/theta[bounded[3]] if(bounded[3] < 0){ // negative index used to indicate pop_mu instead of theta temp_bounds[,2] = rep_vector(pop_mu[-bounded[3]],Nrow); }else{ temp_bounds[,2] = theta[,bounded[3]]; } }else if(bounded[1] == 3 || bounded[1] == 4){ // 3 or 4 means temp_bounds[,1] conditioned on current value of pop_mu/theta[bounded[3]] + bounds[2] if(bounded[3] < 0){ // negative index used to indicate pop_mu instead of theta temp_bounds[,2] = pop_mu[-bounded[3]] + rep_vector(bounds[2], Nrow); }else{ temp_bounds[,2] = theta[,bounded[3]] + rep_vector(bounds[2], Nrow); } } // No upper bound: }else{ temp_bounds[,2] = rep_vector(positive_infinity(), Nrow); } return temp_bounds; } vector[] mv_trunc_norm(real mu, matrix L, int s, matrix b, vector u) { //Based on code documented here: https://docs.google.com/viewer?a=v&pid=forums&srcid=MDAzMzUyMDcyMTIzNzE2NjI3MjkBMDU5MTAzNzQ5MjU2OTMwMjk1MTMBTHZ4amxVQm5Cd0FKATAuMQEBdjI&authuser=0 int K = rows(u); int s_tmp = s; int n_b; // matrix[K,K] L = cholesky_decompose(S); vector[K] d; vector[K] z; vector[K] out[2]; real v; vector[2] z_star; vector[2] u_star; if(s == 0){ n_b = 0; }else if(s == 2 || s == 4){ n_b = 2; }else{ n_b = 1; } for (k in 1:K) { int km1 = k - 1; // Transform original bounds into z-space if(n_b > 0){ // If upper and lower bound: if(s == 2 || s == 4){ z_star[1] = (b[k,1] - (mu + ((k > 1) ? L[k,1:km1] * head(z, km1) : 0))) / L[k,k]; z_star[2] = (b[k,2] - (mu + ((k > 1) ? L[k,1:km1] * head(z, km1) : 0))) / L[k,k]; // If lower bound: }else if(s < 0){ z_star[1] = (b[k,1] - (mu + ((k > 1) ? L[k,1:km1] * head(z, km1) : 0))) / L[k,k]; // If upper bound: }else if(s > 0){ z_star[1] = (b[k,2] - (mu + ((k > 1) ? L[k,1:km1] * head(z, km1) : 0))) / L[k,k]; } // Convert z-space of bounds into value in (0,1) if(n_b == 1){ if((z_star[1] < -37.5 && z_star[1] > 8.25) && ((s < 0 && b[k,1] < mu) || (s > 0 && b[k,2] > mu)) ){ s_tmp = 0; }else{ s_tmp = s; u_star[1] = Phi(z_star[1]); } }else if(n_b == 2){ if((z_star[1] < -37.5 || z_star[1] > 8.25) && (z_star[2] < -37.5 || z_star[2] > 8.25) && b[k,1] < mu && b[k,2] > mu){ s_tmp = 0; }else if(z_star[2] > -37.5 || z_star[2] < 8.25){ //((z_star[1] < -37.5 || z_star[1] > 8.25) && b[k,1] < mu) || s_tmp = 1; u_star[1] = Phi(z_star[2]); }else if(z_star[1] > -37.5 || z_star[1] < 8.25){ //(z_star[2] < -37.5 || z_star[2] > 8.25) && b[k,2] > mu s_tmp = -1; u_star[1] = Phi(z_star[1]); }else{ s_tmp = s; u_star[1:n_b] = Phi(z_star[1:n_b]); } } } if(s_tmp == 0){ // no bounds v = u[k]; d[k] = 1; }else if(s_tmp == 2 || s_tmp == 4){ // upper and lower bounds // convert sample from (0,1) into sample from (u_star[1], u_star[2]) v = u_star[1] + (u_star[2] - u_star[1])*u[k]; // adjust density for the reduced possible u-space d[k] = (u_star[2] - u_star[1]); }else if(s_tmp > 0){ // upper bound only // convert sample from (0,1) into sample from (0, u_star[2]) v = u_star[1]*u[k]; // adjust density for the reduced possible u-space d[k] = u_star[1]; }else if(s_tmp < 0){ // lower bound only // convert sample from (0,1) into sample from (u_star[1], 1) v = u_star[1] + (1 - u_star[1])*u[k]; // adjust density for the reduced possible u-space d[k] = 1 - u_star[1]; } // convert sample in u-space to z-space z[k] = inv_Phi(v); } out[1] = mu + L * z; out[2] = log(d); return out; } vector trunc_norm(real mu, real sigma, int s, row_vector b, real u){ vector[1] out_tmp[2]; matrix[1,1] sigma_tmp = [ [ sigma ] ]; vector[1] u_tmp = [ u ]'; vector[2] out; out_tmp = mv_trunc_norm(mu, sigma_tmp, s, [ b ], u_tmp); out[1] = out_tmp[1][1]; out[2] = out_tmp[2][1]; return out; } // Indices and xi_offset for dimensions packed into x_i int get_Nsim(int[] x_i){ return x_i[1]; } int get_Nobs(int[] x_i){ return x_i[2]; } int get_Nw(int[] x_i){ return x_i[3]; } int get_Nwv(int[] x_i){ return x_i[4]; } int get_Np(int[] x_i){ return x_i[5]; } int get_Ng(int[] x_i){ return x_i[6]; } int get_Nv(int[] x_i){ return x_i[7]; } int x_i_offset(){ return 7; } int[,] unpack_w_ind(int[] x_i){ int xi_offset = x_i_offset(); int Nsim = get_Nsim(x_i); int w_ind[Nsim,2]; for(i in 1:Nsim){ w_ind[i,] = x_i[(xi_offset+(i-1)*2+1):(xi_offset+i*2)]; } return w_ind; } int[,] unpack_obs_ind(int[] x_i){ int Nsim = get_Nsim(x_i); int xi_offset = x_i_offset()+Nsim*3; int obs_ind[Nsim,2]; for(i in 1:Nsim){ obs_ind[i,] = x_i[(xi_offset+(i-1)*2+1):(xi_offset+i*2)]; } return obs_ind; } int[] unpack_g_ind(int[] x_i){ int Nsim = get_Nsim(x_i); int xi_offset = x_i_offset() + Nsim*2 ; int g_ind[Nsim]; g_ind = x_i[(xi_offset + 1):(xi_offset + Nsim)]; return g_ind; } int[] unpack_t_ind(int[] x_i){ int Nsim = get_Nsim(x_i); int Nobs = get_Nobs(x_i); int xi_offset = x_i_offset() + Nsim*5; int t_ind[Nobs]; t_ind = x_i[(xi_offset + 1):(xi_offset + Nobs)]; return t_ind; } int[] unpack_v_ind(int[] x_i){ int Nsim = get_Nsim(x_i); int Nobs = get_Nobs(x_i); int xi_offset = x_i_offset() + Nsim*5 + Nobs; int v_ind[Nobs]; v_ind = x_i[(xi_offset + 1):(xi_offset + Nobs)]; return v_ind; } int[] unpack_v_type(int[] x_i){ int Nv = get_Nv(x_i); int Nsim = get_Nsim(x_i); int Nobs = get_Nobs(x_i); int xi_offset = x_i_offset()+Nsim*5+Nobs*2; int v_type[Nv]; v_type = x_i[(xi_offset+1):(xi_offset+Nv)]; return v_type; } real[,] unpack_wth(int[] x_i, real[] x_r){ int Nw = get_Nw(x_i); int Nwv = get_Nwv(x_i); real wth[Nw,Nwv]; for(i in 1:Nw){ wth[i,1:Nwv] = x_r[((i-1)*Nwv+1):(i*Nwv)]; } return wth; } real[] unpack_obs(int[] x_i, real[] x_r){ int Nw = get_Nw(x_i); int Nwv = get_Nwv(x_i); int Nobs = get_Nobs(x_i); real obs[Nobs]; int xi_offset = Nw*Nwv; obs = x_r[(xi_offset+1):(xi_offset+Nobs)]; return obs; } real dof_fun(real To, real rn, real rd); real A_fun(real To, real rn, real rd); // { // real x = rn*(dof*rd-log(rn)+log(rd-rn))/rd; // real A = rd*exp(x)/(rd - rn); // return A; // } real exp_tt(real T, real A, real rn, real rdenom, real dof); // { // real Dp = A*exp(-rn*T)/(1+exp((rdenom)*(dof-T))); // return Dp; // } real sigmoid(real x, real sen, real mid_pt, real rate); // { // // real fac; // // fac = 1 - sen + sen/(1+exp(-rate*(x-mid_pt))); // // return fac; // // } vector continuous_dydt(vector y, vector theta, data real[] wth){ vector[num_elements(y)] dydt; real raw_tt; real vrn_fac; real ppd_fac; // theta[1] = dpj // theta[2] = dph // theta[3] = At (derived from trten, trted, Topt) // theta[4] = trten // theta[5] = trten + trted // theta[6] = doft (derived from trten, trted, Topt) // theta[7] = vreq // theta[8] = vsens // theta[9] = Av (derived from vrten, vrted, vopt) // theta[10] = vrten // theta[11] = vrten + vrted // theta[12] = dofv (derived from vrten, vrted, vopt) // theta[13] = ppdsens // theta[14] = ppdmid // theta[15] = ppdrte // Cumulative Vernalization Days // wth[1] = TAVG // theta[9] = Av (derived from vrten, vrted, vopt) // theta[10] = vrten // theta[11] = vrten+vrted // theta[12] = dofv (derived from vrten, vrted, vopt) dydt[1] = exp_tt(wth[1], theta[9], theta[10], theta[11], theta[12]); // Photothermal (Physiological) Days // raw_tt = raw thermal time // wth[1] = TAVG // theta[3] = At (derived from trten, trted, Topt) // theta[4] = trten // theta[5] = trten+trted // theta[6] = doft (derived from trten, trted, Topt) raw_tt = exp_tt(wth[1], theta[3], theta[4], theta[5], theta[6]); // vrn_fac = vernalization factor // y[1] = cumulative vernalization days // theta[7] = vsens (vernalization sensitivity) // theta[6] = vernalization requirement vrn_fac = sigmoid(y[1],theta[7], theta[6]/2, 10/theta[6]); // ppd_fac = photoperiod factor // wth[2] = daylength // theta[13] = photoperiod sensitivity // theta[14] = midpoint photoperiod // theta[15] = rate parameter for photoperiod ppd_fac = sigmoid(wth[2], theta[13], theta[14], theta[15]); // calculate change in photothermal days dydt[2] = vrn_fac*ppd_fac*raw_tt; return dydt; } real [ , ] euler_integrate_ode(vector yini, vector theta, int[] t, real[,] forcings, real dt){ int Nv = num_elements(yini); int Nt = num_elements(t); real output[Nt,Nv]; vector[Nv] y; vector[Nv] dy_dt; int j = 1; int i = 1; int n = t[Nt]; y = yini; while(j <= Nt){ dy_dt = continuous_dydt(y, theta, forcings[i,]); for(v in 1:Nv){ y[v] += dy_dt[v]*dt; } if(i == t[j]){ for(v in 1:Nv){ output[j,v] = y[v]; } j += 1; } i += 1; } return output; } vector run_model(vector theta, vector dummy, real[] x_r, int[] x_i){ int Nsim = get_Nsim(x_i); int Nobs = get_Nobs(x_i); int Nw = get_Nw(x_i); int Nwv = get_Nwv(x_i); int Np = get_Np(x_i) + 2; int Ng = get_Ng(x_i); int Nv = get_Nv(x_i); vector[Nobs] sim; int w_ind[Nsim, 2] = unpack_w_ind(x_i); real wth[Nw, Nwv] = unpack_wth(x_i, x_r); int g_ind[Nsim] = unpack_g_ind(x_i); int obs_ind[Nsim, 2] = unpack_obs_ind(x_i); int t_ind[Nobs] = unpack_t_ind(x_i); int v_ind[Nobs] = unpack_v_ind(x_i); int v_type[Nv] = unpack_v_type(x_i); for(s in 1:Nsim){ int Nt = max(t_ind[obs_ind[s,1]:obs_ind[s,2]]); int p[2] = { (g_ind[s] - 1)*Np + 1 , g_ind[s]*Np }; int t[Nt]; int Ncol = 2; vector[Ncol] yini = rep_vector(0,Ncol); real output[Nt, Ncol]; for(i in 1:Nt){ t[i] = i; } output = euler_integrate_ode(yini, theta[p[1]:p[2]], t, wth[w_ind[s,1]:w_ind[s,2],], 1.); for(i in obs_ind[s,1]:obs_ind[s,2]){ if(v_type[v_ind[i]] <= Ncol){ sim[i] = output[t_ind[i],v_type[v_ind[i]]]; }else if(v_type[v_ind[i]] == 3){ // Convert photothermal (physiological) days to relative expression first hollow stem sim[i] = output[t_ind[i],2]/theta[p[1]]; }else if(v_type[v_ind[i]] == 4){ // Convert photothermal (physiological) days to relative expression heading sim[i] = output[t_ind[i],2]/(theta[p[1]+1]); } } } return sim; } vector parallel_log_likelihood(vector theta, vector dummy, data real[] x_r, data int[] x_i){ int Nobs = get_Nobs(x_i); int Ng = get_Ng(x_i); int Np = get_Np(x_i) + 2; int v_ind[Nobs] = unpack_v_ind(x_i); real obs[Nobs] = unpack_obs(x_i, x_r); vector[Nobs] sim = run_model(theta, dummy, x_r, x_i); vector[Nobs] log_lik; for(i in 1:Nobs){ log_lik[i] = normal_lpdf(obs[i] | sim[i], theta[Ng*Np + v_ind[i]]); } return log_lik; } } data{ int Np; // number of parameters int Ng; // number of genotypes int Nshards; // number of shards of data // Training data: int N_row_training; // number of rows in x_i_training and x_r_training int Nxi_col_training; // number of columns in x_i_training int Nxr_col_training; // number of columns in x_r_training int x_i_training[N_row_training,Nxi_col_training]; // packed integer data real x_r_training[N_row_training,Nxr_col_training]; // packed real data // Holdout data: int N_row_holdout; // number of rows in x_i_holdout and x_r_holdout int Nxi_col_holdout; // number of columns in x_i_holdout int Nxr_col_holdout; // number of columns in x_r_holdout int x_i_holdout[N_row_holdout,Nxi_col_holdout]; // packed integer data real x_r_holdout[N_row_holdout,Nxr_col_holdout]; // packed real data // genetic relationship matrix matrix[Ng,Ng] A_mat; // indicator of whether parameter is bounded; int b_index[Np,6]; // -1 = lower bound; // 0 = no bound; // 1 = upper bound; // 2 = upper and lower bound; // if column two != 0 use that parameter number for lower bound; // if column three != 0 use that parameter number for upper bound; // prior information for each parameter matrix[Np,4] prior; } transformed data{ vector[Np] mu_prior = prior[,1]; vector[Np] sigma_prior = prior[,2]; matrix[Np,2] bounds = prior[,3:4]; int pop_b_ind[Np,3] = b_index[,1:3]; int b_ind[Np,3] = b_index[,4:6]; // dummy argument for map_rect() and run_model_parallel() vector[0] dummy[Nshards]; // matrix[Ng,Ng] L = cholesky_decompose(S); int Nv = get_Nv(x_i_training[1,]); int Nobs_training = 0; int Nobs_holdout = 0; if(Nxi_col_training > 0){ for(i in 1:N_row_training){ Nobs_training += get_Nobs(x_i_training[i,]); } } if(Nxi_col_holdout > 0){ for(i in 1:N_row_holdout){ Nobs_holdout += get_Nobs(x_i_holdout[i,]); } } } parameters { // Population raw mu drawn from uniform(0,1) vector[Np] pop_mu_raw; // Population sigmas vector[Np] pop_sigma_raw; // Genotype-specific draws from [0,1] for theta matrix[Ng,Np] theta_raw; // Residual variance vector[Nv] v_resid; } transformed parameters{ real lp_adj; // Population-level mu vector[Np] pop_mu; vector[Np] pop_sigma; // Genotype-level parameter values matrix[Ng,Np] theta_mat; vector[Ng*(Np+2)+Nv] theta_vec; // Temporary variables matrix[Ng,2] temp_bounds; vector[Ng] mvtn_temp[2]; matrix[1,2] tn_bounds; vector[2] tn_temp; matrix[Ng,Ng] Lp; lp_adj = 0; // Apply non-centered parameterization equivalent to: // pop_mu ~ normal(mu_prior,sigma_prior) // pop_sigma ~ normal(0,sigma_prior) for(p in 1:Np){ tn_bounds = set_temp_bounds(1, pop_b_ind[p,], bounds[p,], theta_mat , pop_mu); tn_temp = trunc_norm(mu_prior[p], sigma_prior[p], pop_b_ind[p,1], tn_bounds[1,], pop_mu_raw[p]); pop_mu[p] = tn_temp[1]; pop_sigma[p] = pop_sigma_raw[p]*sigma_prior[p]; lp_adj += tn_temp[2]; } for(p in 1:Np){ // Set bounds for theta[,p] based on absolute thresholds from bounds[] or // relative thresholds based on theta[] temp_bounds = set_temp_bounds(Ng, b_ind[p,], bounds[p,], theta_mat, pop_mu); // Convert relationship matrix (A_mat) into covariance matrix using // pop_sigma[p] and apply cholesky decomposition Lp = cholesky_decompose(A_mat*pow(pop_sigma[p],2)); // Convert theta_raw drawn from uniform(0,1) into multivariate normal // Equivalent to theta[,p] ~ multi_normal(pop_mu[p],S*pop_sigma[p]) mvtn_temp = mv_trunc_norm(pop_mu[p], Lp, b_ind[p,1], temp_bounds, theta_raw[,p]); // Extract sampled values for theta[,p] theta_mat[,p] = mvtn_temp[1]; lp_adj += sum(mvtn_temp[2]); } // Pack all parameters into theta_vec for(g in 1:Ng){ theta_vec[(g-1)*(Np+2)+1] = theta_mat[g,1]; theta_vec[(g-1)*(Np+2)+2] = theta_mat[g,2]; theta_vec[(g-1)*(Np+2)+3] = A_fun(theta_mat[g,3], theta_mat[g,4], sum(theta_mat[g,4:5])); theta_vec[(g-1)*(Np+2)+4] = theta_mat[g,4]; theta_vec[(g-1)*(Np+2)+5] = sum(theta_mat[g,4:5]); theta_vec[(g-1)*(Np+2)+6] = dof_fun(theta_mat[g,3], theta_mat[g,4], sum(theta_mat[g,4:5])); theta_vec[(g-1)*(Np+2)+7] = theta_mat[g,6]; theta_vec[(g-1)*(Np+2)+8] = theta_mat[g,7]; theta_vec[(g-1)*(Np+2)+9] = A_fun(theta_mat[g,8], theta_mat[g,9], sum(theta_mat[g,9:10])); theta_vec[(g-1)*(Np+2)+10] = theta_mat[g,9]; theta_vec[(g-1)*(Np+2)+11] = sum(theta_mat[g,9:10]); theta_vec[(g-1)*(Np+2)+12] = dof_fun(theta_mat[g,8], theta_mat[g,9], sum(theta_mat[g,9:10])); theta_vec[(g-1)*(Np+2)+13] = theta_mat[g,11]; theta_vec[(g-1)*(Np+2)+14] = theta_mat[g,12]; theta_vec[(g-1)*(Np+2)+15] = theta_mat[g,13]; } for(v in 1:Nv){ theta_vec[Ng*(Np+2) + v] = v_resid[v]; } } model { //////////// // Priors // //////////// pop_mu_raw ~ uniform(0,1); pop_sigma_raw ~ std_normal(); v_resid ~ std_normal(); for(p in 1:Np){ theta_raw[,p] ~ uniform(0,1); } target += lp_adj; //////////////// // Likelihood // //////////////// for(i in 1:Nshards){ target += parallel_log_likelihood(theta_vec, dummy[i], x_r_training[i,], x_i_training[i,]); } // target += sum( map_rect(parallel_log_likelihood, theta_vec, dummy, x_r_training, x_i_training) ); } // generated quantities{ // // vector[Nobs_training] yhat_training; // vector[Nobs_holdout] yhat_holdout; // vector[Nobs_holdout] log_lik; // // yhat_training = map_rect(run_model, theta_vec, dummy, x_r_training, x_i_training); // // if(Nobs_holdout > 0){ // yhat_holdout = map_rect(run_model, theta_vec, dummy, x_r_holdout, x_i_holdout); // log_lik = map_rect(parallel_log_likelihood, theta_vec, dummy, x_r_holdout, x_i_holdout); // } // // }