For map_rect how much more inefficient is to pass reals rather than ints

when using map_rect with non rectangular data/parameter structure, I have two options

  1. pass a redundant array of 4000 means (parameter, same length of data)
  2. pass a non-redundant array of ~30 means, and pass a redundant array of 4000 indices which are integers. And within the map_rect block do means[indices]

(1) is much more simple to code, and more memory inefficient. Should I bother with (2)? Or really does not matter much

At the moment with 8 cores, 8 shards I get 400% pick cpu usage and x2 fold speedup. So not that great.

Thanks.

Could you draft a demo code for illustration?

I can give few functions that I use, but really the implementation of data packaging and un-packaging is so dependent on needs that any example I give is going to be pretty useless.

Nonetheless, just to give an idea of functions that take a unidimensional array and returns a sharded 2D structure with fill values for the gaps


int get_real_buffer_size(vector v, real threshold){
		// This function finds how may fake indexes -1 there are in a vector, added for map_rect needs

		real i = threshold; // Value of the index
		int n = 0; // Length of the buffer
		int s = rows(v); // Size of the whole vector

		while(i == threshold){
			i = v[s-n];
			if(i==threshold) n += 1;
		}

		return n;
	}

	vector clear_real_from_buffer(vector v, real buffer_value){
		return v[1:(rows(v)- get_real_buffer_size(v, buffer_value))];
	}

	int[] clear_int_from_buffer(int[] v, int buffer_value){
		return v[1:(num_elements(v) - get_int_buffer_size(v, buffer_value))];
	}

	vector rep_vector_by_array(vector v, int[] reps){
		vector[sum(reps)] v_rep;
		int i = 0;

		for(n in 1:num_elements(reps)){
			v_rep[i+1:i+reps[n]] = rep_vector(v[n], reps[n]);
			i += reps[n];
		}

		return(v_rep);
	}

	int[,] package_int(
		int C,
		int GM,
		int Q,
		int shards,
		int[] counts,

		int[] size_counts_idx_lv_1_MPI,
		int[] size_counts_G_lv_1_MPI,
		int[] size_counts_G_lv_1_MPI_non_redundant,
		int[] size_counts_S_lv_1_MPI,
		int[] size_G1_linear_MPI,
		int[] size_y_linear_S_1_MPI,
		int[] size_y_linear_1_MPI,
		int[,] counts_idx_lv_1_MPI,
		int[,] counts_G_lv_1_MPI_non_redundant_reps,

		int[,]	y_linear_1_MPI

	){
		int threshold = -999;
		int dim_indices = 10;

		int max_col = dim_indices + max(size_counts_idx_lv_1_MPI)+ max(size_counts_G_lv_1_MPI_non_redundant) + max(size_y_linear_1_MPI) ;

		int int_pack[shards,max_col];

		for(i in 1:shards){

			int real_col = dim_indices + size_counts_idx_lv_1_MPI[i]+  size_counts_G_lv_1_MPI_non_redundant[i] + size_y_linear_1_MPI[i] ;

			int_pack[i] =
				concatenate_int_array({
					// indexes
					{C},
					{GM},
					{Q},
					{size_counts_idx_lv_1_MPI[i]},
	 				{size_counts_G_lv_1_MPI[i]},
	 				{size_counts_G_lv_1_MPI_non_redundant[i]},
		 			{size_counts_S_lv_1_MPI[i]},
		 			{size_G1_linear_MPI[i]},
		 			{size_y_linear_S_1_MPI[i]},
		 			{size_y_linear_1_MPI[i]},

					// data
					counts[counts_idx_lv_1_MPI[i, 1:size_counts_idx_lv_1_MPI[i]]],

					// Indexed for optimised MPI
					counts_G_lv_1_MPI_non_redundant_reps[i, 1:size_counts_G_lv_1_MPI_non_redundant[i]],

					// counts mix
					y_linear_1_MPI[i, 1:size_y_linear_1_MPI[i]],


					// Buffer
					rep_int(threshold, max_col-real_col)
				});

			//print("|| int ", num_elements(int_pack[i]));
		}

		return int_pack;
	}

	vector[] package_real(
		int C,
		int Q,
		int shards,

		int[] size_counts_G_lv_1_MPI,
		int[] size_counts_G_lv_1_MPI_non_redundant,
		int[] size_counts_S_lv_1_MPI,
		int[] size_G1_linear_MPI,
		int[] size_y_linear_S_1_MPI,

		int[,] counts_G_lv_1_MPI,
		int[,] counts_G_lv_1_MPI_non_redundant,
		int[,] counts_S_lv_1_MPI,

		int[,] G1_linear_MPI,
		int[,] y_linear_S_1_MPI,

		vector lambda_log,
		vector exposure_rate,
		vector sigma_inv_log,
		vector[] prop_1
	){
		real threshold = -999;

		int max_col =
			max(size_counts_G_lv_1_MPI_non_redundant) +
			max(size_counts_S_lv_1_MPI) +
			max(size_counts_G_lv_1_MPI_non_redundant) +
			(Q*C) +
			max(size_G1_linear_MPI) +
			max(size_y_linear_S_1_MPI);

		vector[max_col] real_pack[shards];

		for(i in 1:shards){

			int real_col =
				size_counts_G_lv_1_MPI_non_redundant[i] +
				size_counts_S_lv_1_MPI[i] +
				size_counts_G_lv_1_MPI_non_redundant[i] +
				(Q*C) +
				size_G1_linear_MPI[i] +
				size_y_linear_S_1_MPI[i];

			real_pack[i] =
				concatenate_vector_array({

					// The gene sample specific redundant estimates of the counts for the reference
					lambda_log[counts_G_lv_1_MPI_non_redundant[i, 1:size_counts_G_lv_1_MPI_non_redundant[i]]],

					// The exposure rate of all samples
					exposure_rate[counts_S_lv_1_MPI[i,1:size_counts_S_lv_1_MPI[i]]],

					// The gene sample specific redundant overdispersions of the counts for the reference
					sigma_inv_log[counts_G_lv_1_MPI_non_redundant[i, 1:size_counts_G_lv_1_MPI_non_redundant[i]]],

					// Proportion vector
					to_vector(vector_array_to_matrix(prop_1)),

					// The estimated of ref to de convolved together
					lambda_log[G1_linear_MPI[i, 1:size_G1_linear_MPI[i]]],

					// The exposure of the mix query samples
					exposure_rate[y_linear_S_1_MPI[i, 1:size_y_linear_S_1_MPI[i]]],

					// Buffer
					rep_vector(threshold, max_col-real_col)
				});

			//print("|| real ", num_elements(real_pack[i]));

		}

		return real_pack;
	}


It’s better to have redundant data than redundant parameters, becase all parameter values and gradients get communicated both ways on map_rect calls. We should get ragged data structures in the next year or so that should make this all a lot easier.

1 Like