Registration.cu 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. #include "ndt_gpu/Registration.h"
  2. #include "ndt_gpu/debug.h"
  3. #include <iostream>
  4. namespace gpu {
  5. GRegistration::GRegistration()
  6. {
  7. max_iterations_ = 0;
  8. x_ = y_ = z_ = NULL;
  9. points_number_ = 0;
  10. trans_x_ = trans_y_ = trans_z_ = NULL;
  11. converged_ = false;
  12. nr_iterations_ = 0;
  13. transformation_epsilon_ = 0;
  14. target_cloud_updated_ = true;
  15. target_points_number_ = 0;
  16. target_x_ = target_y_ = target_z_ = NULL;
  17. is_copied_ = false;
  18. }
  19. GRegistration::GRegistration(const GRegistration &other)
  20. {
  21. transformation_epsilon_ = other.transformation_epsilon_;
  22. max_iterations_ = other.max_iterations_;
  23. //Original scanned point clouds
  24. x_ = other.x_;
  25. y_ = other.y_;
  26. z_ = other.z_;
  27. points_number_ = other.points_number_;
  28. trans_x_ = other.trans_x_;
  29. trans_y_ = other.trans_y_;
  30. trans_z_ = other.trans_z_;
  31. converged_ = other.converged_;
  32. nr_iterations_ = other.nr_iterations_;
  33. final_transformation_ = other.final_transformation_;
  34. transformation_ = other.transformation_;
  35. previous_transformation_ = other.previous_transformation_;
  36. target_cloud_updated_ = other.target_cloud_updated_;
  37. target_x_ = other.target_x_;
  38. target_y_ = other.target_y_;
  39. target_z_ = other.target_z_;
  40. target_points_number_ = other.target_points_number_;
  41. is_copied_ = true;
  42. }
  43. GRegistration::~GRegistration()
  44. {
  45. if (!is_copied_) {
  46. if (x_ != NULL) {
  47. checkCudaErrors(cudaFree(x_));
  48. x_ = NULL;
  49. }
  50. if (y_ != NULL) {
  51. checkCudaErrors(cudaFree(y_));
  52. y_ = NULL;
  53. }
  54. if (z_ != NULL) {
  55. checkCudaErrors(cudaFree(z_));
  56. z_ = NULL;
  57. }
  58. if (trans_x_ != NULL) {
  59. checkCudaErrors(cudaFree(trans_x_));
  60. trans_x_ = NULL;
  61. }
  62. if (trans_y_ != NULL) {
  63. checkCudaErrors(cudaFree(trans_y_));
  64. trans_y_ = NULL;
  65. }
  66. if (trans_z_ != NULL) {
  67. checkCudaErrors(cudaFree(trans_z_));
  68. trans_z_ = NULL;
  69. }
  70. if (target_x_ != NULL) {
  71. checkCudaErrors(cudaFree(target_x_));
  72. target_x_ = NULL;
  73. }
  74. if (target_y_ != NULL) {
  75. checkCudaErrors(cudaFree(target_y_));
  76. target_y_ = NULL;
  77. }
  78. if (target_z_ != NULL) {
  79. checkCudaErrors(cudaFree(target_z_));
  80. target_z_ = NULL;
  81. }
  82. }
  83. }
  84. void GRegistration::setTransformationEpsilon(double trans_eps)
  85. {
  86. transformation_epsilon_ = trans_eps;
  87. }
  88. double GRegistration::getTransformationEpsilon() const
  89. {
  90. return transformation_epsilon_;
  91. }
  92. void GRegistration::setMaximumIterations(int max_itr)
  93. {
  94. max_iterations_ = max_itr;
  95. }
  96. int GRegistration::getMaximumIterations() const
  97. {
  98. return max_iterations_;
  99. }
  100. Eigen::Matrix<float, 4, 4> GRegistration::getFinalTransformation() const
  101. {
  102. return final_transformation_;
  103. }
  104. int GRegistration::getFinalNumIteration() const
  105. {
  106. return nr_iterations_;
  107. }
  108. bool GRegistration::hasConverged() const
  109. {
  110. return converged_;
  111. }
  112. template <typename T>
  113. __global__ void convertInput(T *input, float *out_x, float *out_y, float *out_z, int point_num)
  114. {
  115. int idx = threadIdx.x + blockIdx.x * blockDim.x;
  116. int stride = blockDim.x * gridDim.x;
  117. for (int i = idx; i < point_num; i += stride) {
  118. T tmp = input[i];
  119. out_x[i] = tmp.x;
  120. out_y[i] = tmp.y;
  121. out_z[i] = tmp.z;
  122. }
  123. }
  124. void GRegistration::setInputSource(pcl::PointCloud<pcl::PointXYZI>::Ptr input)
  125. {
  126. //Convert point cloud to float x, y, z
  127. if (input->size() > 0) {
  128. points_number_ = input->size();
  129. pcl::PointXYZI *tmp;
  130. checkCudaErrors(cudaMalloc(&tmp, sizeof(pcl::PointXYZI) * points_number_));
  131. pcl::PointXYZI *host_tmp = input->points.data();
  132. // Pin the host buffer for accelerating the memory copy
  133. #ifndef __aarch64__
  134. checkCudaErrors(cudaHostRegister(host_tmp, sizeof(pcl::PointXYZI) * points_number_, cudaHostRegisterDefault));
  135. #endif
  136. checkCudaErrors(cudaMemcpy(tmp, host_tmp, sizeof(pcl::PointXYZI) * points_number_, cudaMemcpyHostToDevice));
  137. if (x_ != NULL) {
  138. checkCudaErrors(cudaFree(x_));
  139. x_ = NULL;
  140. }
  141. if (y_ != NULL) {
  142. checkCudaErrors(cudaFree(y_));
  143. y_ = NULL;
  144. }
  145. if (z_ != NULL) {
  146. checkCudaErrors(cudaFree(z_));
  147. z_ = NULL;
  148. }
  149. checkCudaErrors(cudaMalloc(&x_, sizeof(float) * points_number_));
  150. checkCudaErrors(cudaMalloc(&y_, sizeof(float) * points_number_));
  151. checkCudaErrors(cudaMalloc(&z_, sizeof(float) * points_number_));
  152. int block_x = (points_number_ > BLOCK_SIZE_X) ? BLOCK_SIZE_X : points_number_;
  153. int grid_x = (points_number_ - 1) / block_x + 1;
  154. convertInput<pcl::PointXYZI><<<grid_x, block_x>>>(tmp, x_, y_, z_, points_number_);
  155. checkCudaErrors(cudaGetLastError());
  156. checkCudaErrors(cudaDeviceSynchronize());
  157. if (trans_x_ != NULL) {
  158. checkCudaErrors(cudaFree(trans_x_));
  159. trans_x_ = NULL;
  160. }
  161. if (trans_y_ != NULL) {
  162. checkCudaErrors(cudaFree(trans_y_));
  163. trans_y_ = NULL;
  164. }
  165. if (trans_z_ != NULL) {
  166. checkCudaErrors(cudaFree(trans_z_));
  167. trans_z_ = NULL;
  168. }
  169. checkCudaErrors(cudaMalloc(&trans_x_, sizeof(float) * points_number_));
  170. checkCudaErrors(cudaMalloc(&trans_y_, sizeof(float) * points_number_));
  171. checkCudaErrors(cudaMalloc(&trans_z_, sizeof(float) * points_number_));
  172. // Initially, also copy scanned points to transformed buffers
  173. checkCudaErrors(cudaMemcpy(trans_x_, x_, sizeof(float) * points_number_, cudaMemcpyDeviceToDevice));
  174. checkCudaErrors(cudaMemcpy(trans_y_, y_, sizeof(float) * points_number_, cudaMemcpyDeviceToDevice));
  175. checkCudaErrors(cudaMemcpy(trans_z_, z_, sizeof(float) * points_number_, cudaMemcpyDeviceToDevice));
  176. checkCudaErrors(cudaFree(tmp));
  177. // Unpin host buffer
  178. #ifndef __aarch64__
  179. checkCudaErrors(cudaHostUnregister(host_tmp));
  180. #endif
  181. }
  182. }
  183. void GRegistration::setInputSource(pcl::PointCloud<pcl::PointXYZ>::Ptr input)
  184. {
  185. //Convert point cloud to float x, y, z
  186. if (input->size() > 0) {
  187. points_number_ = input->size();
  188. pcl::PointXYZ *tmp;
  189. checkCudaErrors(cudaMalloc(&tmp, sizeof(pcl::PointXYZ) * points_number_));
  190. pcl::PointXYZ *host_tmp = input->points.data();
  191. // Pin the host buffer for accelerating the memory copy
  192. #ifndef __aarch64__
  193. checkCudaErrors(cudaHostRegister(host_tmp, sizeof(pcl::PointXYZ) * points_number_, cudaHostRegisterDefault));
  194. #endif
  195. checkCudaErrors(cudaMemcpy(tmp, host_tmp, sizeof(pcl::PointXYZ) * points_number_, cudaMemcpyHostToDevice));
  196. if (x_ != NULL) {
  197. checkCudaErrors(cudaFree(x_));
  198. x_ = NULL;
  199. }
  200. if (y_ != NULL) {
  201. checkCudaErrors(cudaFree(y_));
  202. y_ = NULL;
  203. }
  204. if (z_ != NULL) {
  205. checkCudaErrors(cudaFree(z_));
  206. z_ = NULL;
  207. }
  208. checkCudaErrors(cudaMalloc(&x_, sizeof(float) * points_number_));
  209. checkCudaErrors(cudaMalloc(&y_, sizeof(float) * points_number_));
  210. checkCudaErrors(cudaMalloc(&z_, sizeof(float) * points_number_));
  211. int block_x = (points_number_ > BLOCK_SIZE_X) ? BLOCK_SIZE_X : points_number_;
  212. int grid_x = (points_number_ - 1) / block_x + 1;
  213. convertInput<pcl::PointXYZ><<<grid_x, block_x>>>(tmp, x_, y_, z_, points_number_);
  214. checkCudaErrors(cudaGetLastError());
  215. checkCudaErrors(cudaDeviceSynchronize());
  216. if (trans_x_ != NULL) {
  217. checkCudaErrors(cudaFree(trans_x_));
  218. trans_x_ = NULL;
  219. }
  220. if (trans_y_ != NULL) {
  221. checkCudaErrors(cudaFree(trans_y_));
  222. trans_y_ = NULL;
  223. }
  224. if (trans_z_ != NULL) {
  225. checkCudaErrors(cudaFree(trans_z_));
  226. trans_z_ = NULL;
  227. }
  228. checkCudaErrors(cudaMalloc(&trans_x_, sizeof(float) * points_number_));
  229. checkCudaErrors(cudaMalloc(&trans_y_, sizeof(float) * points_number_));
  230. checkCudaErrors(cudaMalloc(&trans_z_, sizeof(float) * points_number_));
  231. checkCudaErrors(cudaMemcpy(trans_x_, x_, sizeof(float) * points_number_, cudaMemcpyDeviceToDevice));
  232. checkCudaErrors(cudaMemcpy(trans_y_, y_, sizeof(float) * points_number_, cudaMemcpyDeviceToDevice));
  233. checkCudaErrors(cudaMemcpy(trans_z_, z_, sizeof(float) * points_number_, cudaMemcpyDeviceToDevice));
  234. checkCudaErrors(cudaFree(tmp));
  235. #ifndef __aarch64__
  236. checkCudaErrors(cudaHostUnregister(host_tmp));
  237. #endif
  238. }
  239. }
  240. //Set input MAP data
  241. void GRegistration::setInputTarget(pcl::PointCloud<pcl::PointXYZI>::Ptr input)
  242. {
  243. if (input->size() > 0) {
  244. target_points_number_ = input->size();
  245. pcl::PointXYZI *tmp;
  246. checkCudaErrors(cudaMalloc(&tmp, sizeof(pcl::PointXYZI) * target_points_number_));
  247. pcl::PointXYZI *host_tmp = input->points.data();
  248. #ifndef __aarch64__
  249. checkCudaErrors(cudaHostRegister(host_tmp, sizeof(pcl::PointXYZI) * target_points_number_, cudaHostRegisterDefault));
  250. #endif
  251. checkCudaErrors(cudaMemcpy(tmp, host_tmp, sizeof(pcl::PointXYZI) * target_points_number_, cudaMemcpyHostToDevice));
  252. if (target_x_ != NULL) {
  253. checkCudaErrors(cudaFree(target_x_));
  254. target_x_ = NULL;
  255. }
  256. if (target_y_ != NULL) {
  257. checkCudaErrors(cudaFree(target_y_));
  258. target_y_ = NULL;
  259. }
  260. if (target_z_ != NULL) {
  261. checkCudaErrors(cudaFree(target_z_));
  262. target_z_ = NULL;
  263. }
  264. checkCudaErrors(cudaMalloc(&target_x_, sizeof(float) * target_points_number_));
  265. checkCudaErrors(cudaMalloc(&target_y_, sizeof(float) * target_points_number_));
  266. checkCudaErrors(cudaMalloc(&target_z_, sizeof(float) * target_points_number_));
  267. int block_x = (target_points_number_ > BLOCK_SIZE_X) ? BLOCK_SIZE_X : target_points_number_;
  268. int grid_x = (target_points_number_ - 1) / block_x + 1;
  269. convertInput<pcl::PointXYZI><<<grid_x, block_x>>>(tmp, target_x_, target_y_, target_z_, target_points_number_);
  270. checkCudaErrors(cudaGetLastError());
  271. checkCudaErrors(cudaDeviceSynchronize());
  272. #ifndef __aarch64__
  273. checkCudaErrors(cudaHostUnregister(host_tmp));
  274. #endif
  275. checkCudaErrors(cudaFree(tmp));
  276. }
  277. }
  278. void GRegistration::setInputTarget(pcl::PointCloud<pcl::PointXYZ>::Ptr input)
  279. {
  280. if (input->size() > 0) {
  281. target_points_number_ = input->size();
  282. pcl::PointXYZ *tmp;
  283. checkCudaErrors(cudaMalloc(&tmp, sizeof(pcl::PointXYZ) * target_points_number_));
  284. pcl::PointXYZ *host_tmp = input->points.data();
  285. #ifndef __aarch64__
  286. checkCudaErrors(cudaHostRegister(host_tmp, sizeof(pcl::PointXYZ) * target_points_number_, cudaHostRegisterDefault));
  287. #endif
  288. checkCudaErrors(cudaMemcpy(tmp, host_tmp, sizeof(pcl::PointXYZ) * target_points_number_, cudaMemcpyHostToDevice));
  289. if (target_x_ != NULL) {
  290. checkCudaErrors(cudaFree(target_x_));
  291. target_x_ = NULL;
  292. }
  293. if (target_y_ != NULL) {
  294. checkCudaErrors(cudaFree(target_y_));
  295. target_y_ = NULL;
  296. }
  297. if (target_z_ != NULL) {
  298. checkCudaErrors(cudaFree(target_z_));
  299. target_z_ = NULL;
  300. }
  301. checkCudaErrors(cudaMalloc(&target_x_, sizeof(float) * target_points_number_));
  302. checkCudaErrors(cudaMalloc(&target_y_, sizeof(float) * target_points_number_));
  303. checkCudaErrors(cudaMalloc(&target_z_, sizeof(float) * target_points_number_));
  304. int block_x = (target_points_number_ > BLOCK_SIZE_X) ? BLOCK_SIZE_X : target_points_number_;
  305. int grid_x = (target_points_number_ - 1) / block_x + 1;
  306. convertInput<pcl::PointXYZ><<<grid_x, block_x>>>(tmp, target_x_, target_y_, target_z_, target_points_number_);
  307. checkCudaErrors(cudaGetLastError());
  308. checkCudaErrors(cudaDeviceSynchronize());
  309. checkCudaErrors(cudaFree(tmp));
  310. #ifndef __aarch64__
  311. checkCudaErrors(cudaHostUnregister(host_tmp));
  312. #endif
  313. }
  314. }
  315. void GRegistration::align(const Eigen::Matrix<float, 4, 4> &guess)
  316. {
  317. converged_ = false;
  318. final_transformation_ = transformation_ = previous_transformation_ = Eigen::Matrix<float, 4, 4>::Identity();
  319. computeTransformation(guess);
  320. }
  321. void GRegistration::computeTransformation(const Eigen::Matrix<float, 4, 4> &guess) {
  322. printf("Unsupported by Registration\n");
  323. }
  324. }