| 
					
				 | 
			
			
				@@ -1665,11 +1665,15 @@ void FMM_Pts<FMMNode>::EvalList_cuda(SetupData<Real_t>& setup_data) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         size_t *tmp_a, *tmp_b; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         size_t counter = 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        size_t last = -1; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        //size_t last = -1; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if (vec_cnt > 0) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          size_t last = output_perm[interac_indx*4 + 3]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           int i; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           cudaMallocHost((void**)&tmp_a, sizeof(size_t)*vec_cnt); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           cudaMallocHost((void**)&tmp_b, sizeof(size_t)*vec_cnt); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          for (i = 0; i < 12; i++) std::cout << output_perm[(interac_indx + i)*4 + 3] << ", "; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          std::cout << '\n'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           tmp_a[0] = 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           for (i = 1; i < vec_cnt; i++) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if (output_perm[(interac_indx + i)*4 + 3] != last) { 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -1680,6 +1684,7 @@ void FMM_Pts<FMMNode>::EvalList_cuda(SetupData<Real_t>& setup_data) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           tmp_b[counter] = i; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+          counter ++; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           for (i = 0; i < 12; i++) std::cout << tmp_a[i] << ", "; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           std::cout << '\n'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           for (i = 0; i < 12; i++) std::cout << tmp_b[i] << ", "; 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -1705,6 +1710,7 @@ void FMM_Pts<FMMNode>::EvalList_cuda(SetupData<Real_t>& setup_data) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         cuda_func<Real_t>::out_perm_h (scaling_d, (char *) precomp_data_d.dev_ptr, output_perm_d,  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             (char *) output_data_d.dev_ptr, buff_out_d, interac_indx, M_dim1, vec_cnt, tmp_a, tmp_b, counter); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if (vec_cnt > 0) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           cudaFreeHost(tmp_a); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				           cudaFreeHost(tmp_b); 
			 |