setwd("~/misc/") library(rstan) library(ggplot2) library(psych); standardize = function(x) { mean = mean(x); sd = sd(x); dt_out = (x - mean) / sd; return(list(dt_out, mean, sd)) } set.seed(1); dt2 = read.csv("data_species.csv"); x_tmp2 = as.data.frame(matrix(NA, nrow = nrow(dt2), ncol = ncol(dt2) - 1)); colnames(x_tmp2) = colnames(dt2)[2:ncol(dt2)]; ## encode species as 1,2,3 x_tmp2$ID = as.vector(dt2$ID); x_tmp2$Long = dt2$Long; x_tmp2$Lat = dt2$Lat; x_tmp2$Species = as.numeric(as.factor(dt2$Species)); x_tmp2$Treat = as.numeric(as.factor(dt2$Treat)); x_tmp2$ThinYear0 = dt2$ThinYear0; x_tmp2$ThinYear1 = dt2$ThinYear1; x_tmp2$ThinYear2 = dt2$ThinYear2; x_tmp2$ThinYear3 = dt2$ThinYear3; x_tmp2$ThinYear4 = dt2$ThinYear4; x_tmp2$ThinYear5 = dt2$ThinYear5; x_tmp2_long = x_tmp2[,1:6]; ## year 0, then we add everything on at the end colnames(x_tmp2_long)[6] = "Thin"; x_tmp2_long = cbind(x_tmp2_long, 0); colnames(x_tmp2_long)[7] = "Year"; ## format data into long for(i in 1:nrow(x_tmp2)) { if(!is.na(x_tmp2$ThinYear1[i])) { temp = x_tmp2[i,c(1:5,7)]; temp = c(unlist(temp), 1); names(temp) = colnames(x_tmp2_long); x_tmp2_long = rbind(x_tmp2_long, temp); } } for(i in 1:nrow(x_tmp2)) { if(!is.na(x_tmp2$ThinYear2[i])) { temp = x_tmp2[i,c(1:5,8)]; temp = c(unlist(temp), 2); names(temp) = colnames(x_tmp2_long); x_tmp2_long = rbind(x_tmp2_long, temp); } } for(i in 1:nrow(x_tmp2)) { if(!is.na(x_tmp2$ThinYear3[i])) { temp = x_tmp2[i,c(1:5,9)]; temp = c(unlist(temp), 3); names(temp) = colnames(x_tmp2_long); x_tmp2_long = rbind(x_tmp2_long, temp); } } for(i in 1:nrow(x_tmp2)) { if(!is.na(x_tmp2$ThinYear4[i])) { temp = x_tmp2[i,c(1:5,10)]; temp = c(unlist(temp), 4); names(temp) = colnames(x_tmp2_long); x_tmp2_long = rbind(x_tmp2_long, temp); } } for(i in 1:nrow(x_tmp2)) { if(!is.na(x_tmp2$ThinYear5[i])) { temp = x_tmp2[i,c(1:5,11)]; temp = c(unlist(temp), 5); names(temp) = colnames(x_tmp2_long); x_tmp2_long = rbind(x_tmp2_long, temp); } } ## standarize lat/long for scale x_tmp2_long_std = x_tmp2_long; x_tmp2_long_std$Long = standardize(x_tmp2_long_std$Long)[[1]]; x_tmp2_long_std$Lat = standardize(x_tmp2_long_std$Lat)[[1]]; x_tmp2_long_std$Thin = standardize(x_tmp2_long_std$Thin)[[1]]; x_tmp2_long_std$Year = x_tmp2_long_std$Year / max(x_tmp2_long_std$Year) ## remove 10 trees test_trees = sample(unique(x_tmp2_long_std$ID), floor(length(unique(x_tmp2_long_std$ID)) / 4)); x_tmp2_long_in = x_tmp2_long_std[-which(x_tmp2_long_std$ID %in% test_trees),]; x = cbind(x_tmp2_long_in$Year, x_tmp2_long_in$Long, x_tmp2_long_in$Lat, dummy.code(x_tmp2_long_in$Species), dummy.code(x_tmp2_long_in$Treat)); y = x_tmp2_long_in$Thin; N = nrow(x); D = ncol(x); ## Generating Counterfactual Predictions from a Gaussian Process ## scenario 1: Generate Thin time-series, when all trees untreated (U): ## tree 1, at -81.70730 44.05264; ## standardized lat/long: 0.31149771 -0.59839259 ## Species: 3 x_pred1 = matrix(NA, nrow = length(seq(0, 1, by = .01)), ncol = ncol(x)); x_pred1[, 1] = seq(0, 1, by = .01); ## for the time series x_pred1[, 2] = 0.31149771; ## lat x_pred1[, 3] = -0.59839259; x_pred1[, 4] = 0; x_pred1[, 5] = 0; x_pred1[, 6] = 1; x_pred1[, 7] = 0; x_pred1[, 8] = 1; x_pred1[, 9] = 0; N_pred1 = nrow(x_pred1); ## scenario 2: Generate Thin time-series, when all trees receive treatment (X) x_pred2 = matrix(NA, nrow = length(seq(0, 1, by = .01)), ncol = ncol(x)); x_pred2[, 1] = seq(0, 1, by = .01); ## for the time series x_pred2[, 2] = 0.31149771; ## lat x_pred2[, 3] = -0.59839259; x_pred2[, 4] = 0; x_pred2[, 5] = 0; x_pred2[, 6] = 1; x_pred2[, 7] = 0; x_pred2[, 8] = 0; x_pred2[, 9] = 1; N_pred2 = nrow(x_pred2); ## Scenario 3: Generate thin, all trees receive treatment (T) x_pred3 = matrix(NA, nrow = length(seq(0, 1, by = .01)), ncol = ncol(x)); x_pred3[, 1] = seq(0, 1, by = .01); ## for the time series x_pred3[, 2] = 0.31149771; ## lat x_pred3[, 3] = -0.59839259; x_pred3[, 4] = 0; x_pred3[, 5] = 0; x_pred3[, 6] = 1; x_pred3[, 7] = 1; x_pred3[, 8] = 0; x_pred3[, 9] = 0; N_pred3 = nrow(x_pred3); #### predict time series for one test tree example_test = x_tmp2_long_std[which(x_tmp2_long_std$ID == test_trees[1]),]; x_pred4 = matrix(NA, nrow = length(seq(0, 1, by = .01)), ncol = ncol(x)); x_pred4[, 1] = seq(0, 1, by = .01); ## for the time series x_pred4[, 2] = example_test$Long[1]; ## lat x_pred4[, 3] = example_test$Lat[1]; x_pred4[, 4] = 0; x_pred4[, 5] = 0; x_pred4[, 6] = 1; x_pred4[, 7] = 0; x_pred4[, 8] = 1; x_pred4[, 9] = 0; N_pred4 = nrow(x_pred3); stan_rdump(list = c("x", "y", "N", "D", "x_pred1", "N_pred1", "x_pred2", "N_pred2", "x_pred3", "N_pred3", "x_pred4", "N_pred4"), "gp_trees_demo.input.R") ## massively simplify dataset ```{r} x = cbind(x_tmp2_long_in$Year, x_tmp2_long_in$Long, x_tmp2_long_in$Lat, dummy.code(x_tmp2_long_in$Treat, group = 2)); ## group 2 is untreated trees y = x_tmp2_long_in$Thin; N = nrow(x); D = ncol(x); x_pred1 = matrix(NA, nrow = length(seq(0, 1, by = .01)), ncol = ncol(x)); x_pred1[, 1] = seq(0, 1, by = .01); ## for the time series x_pred1[, 2] = 0.31149771; ## lat x_pred1[, 3] = -0.59839259; x_pred1[, 4] = 1; N_pred1 = nrow(x_pred1); ## scenario 2: Generate Thin time-series, when all trees receive treatment (X) x_pred2 = matrix(NA, nrow = length(seq(0, 1, by = .01)), ncol = ncol(x)); x_pred2[, 1] = seq(0, 1, by = .01); ## for the time series x_pred2[, 2] = 0.31149771; ## lat x_pred2[, 3] = -0.59839259; x_pred2[, 4] = 0; N_pred2 = nrow(x_pred2); example_test = x_tmp2_long_std[which(x_tmp2_long_std$ID == test_trees[1]),]; x_pred4 = matrix(NA, nrow = length(seq(0, 1, by = .01)), ncol = ncol(x)); x_pred4[, 1] = seq(0, 1, by = .01); ## for the time series x_pred4[, 2] = example_test$Long[1]; ## lat x_pred4[, 3] = example_test$Lat[1]; x_pred4[, 4] = 1; N_pred4 = nrow(x_pred4); stan_rdump(list = c("x", "y", "N", "D", "x_pred1", "N_pred1", "x_pred2", "N_pred2", "x_pred3", "N_pred3", "x_pred4", "N_pred4"), "gp_trees_demo_reduced.input.R");