Sure. Here is my model.

```
data {
int<lower=0> N;
vector[N] BIlmt_model2;
vector[N] multi_policy_count_model;
vector[N] unit_value_model2;
vector[N] yrs_owned_model2;
vector[N] class_model2_Sport;
vector[N] class_model2_Street;
vector[N] class_model2_other;
vector[N] Single_Driver_Age_model;
vector[N] Driver_Age_model;
int<lower=0> y1[N];
}
parameters {
real BIlmt_coeff;
real unit_value_model2_coeff;
real yrs_owned_model2_coeff;
real class_model2_Sport_coeff;
real class_model2_Street_coeff;
real class_model2_other_coeff;
real Single_Driver_Age_model_coeff;
real Driver_Age_model_coeff;
real intercept; // parameter for mean or intercept
}
model {
BIlmt_coeff ~ normal(0, 1);
unit_value_model2_coeff ~ normal(0, 1);
yrs_owned_model2_coeff ~ normal(0, 1);
class_model2_Sport_coeff ~ normal(0, 1);
class_model2_Street_coeff ~ normal(0, 1);
class_model2_other_coeff ~ normal(0, 1);
Single_Driver_Age_model_coeff ~ normal(0, 1);
Driver_Age_model_coeff ~ normal(0, 1);
intercept ~ normal(0,5);
y1 ~ poisson_log(BIlmt_model2*BIlmt_coeff +
yrs_owned_model2*yrs_owned_model2_coeff +
class_model2_Sport*class_model2_Sport_coeff +
class_model2_Street*class_model2_Street_coeff +
class_model2_other*class_model2_other_coeff +
Single_Driver_Age_model*Single_Driver_Age_model_coeff +
Driver_Age_model*Driver_Age_model_coeff +
intercept);
}
```

Then I go through with my new data like so.

```
sm_fit = sm_liab.sampling(data=data1, warmup=500, iter = 1000, verbose=True, n_jobs = 1)
my_samples = sm_fit.to_dataframe()
#make arrays of coeffs
BIlmt_coeff = my_samples['BIlmt_coeff']
unit_value_model2_coeff = my_samples['unit_value_model2_coeff']
yrs_owned_model2_coeff = my_samples['yrs_owned_model2_coeff']
class_model2_Sport_coeff = my_samples['class_model2_Sport_coeff']
class_model2_Street_coeff = my_samples['class_model2_Street_coeff']
class_model2_other_coeff = my_samples['class_model2_other_coeff']
Single_Driver_Age_model_coeff = my_samples['Single_Driver_Age_model_coeff']
Driver_Age_model_coeff = my_samples['Driver_Age_model_coeff']
intercept = my_samples['intercept']
#make array of new 'test' values
BIlmt_model2_new = np.asarray(x_test['BIlmt_model2'])
multi_policy_count_model_new = np.asarray(x_test['multi_policy_count_model'])
unit_value_model2_new = np.asarray(x_test['unit_value_model2'])
yrs_owned_model2_new = np.asarray(x_test['yrs_owned_model2'])
class_model2_Sport_new = np.asarray(x_test['class_model2_Sport'])
class_model2_Street_new = np.asarray(x_test['class_model2_Street'])
class_model2_other_new = np.asarray(x_test['class_model2_other'])
Single_Driver_Age_model_new = np.asarray(x_test['marital_status_model_S:Driver_Age_model'])
Driver_Age_model_new = np.asarray(x_test['Driver_Age_model'])
posterior = []
for i in range(num_samples):
y_new = np.exp(intercept[i] + BIlmt_model2_new*BIlmt_coeff[i] +
unit_value_model2_new*unit_value_model2_coeff[i] +
yrs_owned_model2_new*yrs_owned_model2_coeff[i] +
class_model2_Sport_new*class_model2_Sport_coeff[i] +
class_model2_Street_new*class_model2_Street_coeff[i] +
class_model2_other_new*class_model2_other_coeff[i] +
Single_Driver_Age_model_new*Single_Driver_Age_model_coeff[i] +
Driver_Age_model_new*Driver_Age_model_coeff[i])
posterior.append(y_new)
#join to actual results (y_test)
final = y_test.join(pd.DataFrame(posterior, columns=y_test.index).T)
#find lowest SSE
sse = []
for column in columns:
final['error'] = (final['frequency'] - final[column])**2
error_squared = final['error'].sum()
#print(final[column].name, final['error_sq
uared'])
sse.append((column, error_squared))
min(sse, key= lambda t: t[1])
#merge to test data and compare
test_4 = test_4.merge(final[713], left_index = True, right_index = True, how = 'left')
```

That script goes through and finds sample 713 has the lowest SSE out of 2000 samples. Then i compare it to the frequentist model and the actual by sorting the test data by the ratio (old model/new model) of the two models, breaking up the data into deciles and see how the actual lines up. That looks like the below:

As you can see above, the actual frequency lines up with my GLM model (model3_preds) and not the bayesian results `713`

.

I’m new to running this on my real world data so I’m looking to better my model ‘debugging’.