move code to the tensorrt_yolox

Signed-off-by: ismetatabay <[email protected]> style(pre-commit): autofix
autowarefoundation · Aug 14, 2024 · 941c3f9 · 941c3f9
1 parent 4638322
commit 941c3f9
Show file tree

Hide file tree

Showing 6 changed files with 42 additions and 35 deletions.
diff --git a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp
@@ -89,15 +89,22 @@ class TrtYoloX
     const std::string & model_path, const std::string & precision, const int num_class = 8,
     const float score_threshold = 0.3, const float nms_threshold = 0.7,
     const tensorrt_common::BuildConfig build_config = tensorrt_common::BuildConfig(),
-    const bool use_gpu_preprocess = false, std::string calibration_image_list_file = std::string(),
-    const double norm_factor = 1.0, [[maybe_unused]] const std::string & cache_dir = "",
+    const bool use_gpu_preprocess = false, const int gpu_id = 0,
+    std::string calibration_image_list_file = std::string(), const double norm_factor = 1.0,
+    [[maybe_unused]] const std::string & cache_dir = "",
     const tensorrt_common::BatchConfig & batch_config = {1, 1, 1},
     const size_t max_workspace_size = (1 << 30), const std::string & color_map_path = "");
   /**
    * @brief Deconstruct TrtYoloX
    */
   ~TrtYoloX();
 
+  /**
+   * @brief select cuda device for a inference
+   * @param[in] GPU id
+   */
+  bool setCudaDeviceId(const uint8_t gpu_id);
+
   /**
    * @brief run inference including pre-process and post-process
    * @param[out] objects results for object detection
@@ -267,7 +274,7 @@ class TrtYoloX
   size_t out_elem_num_per_batch_;
   CudaUniquePtr<float[]> out_prob_d_;
 
-  StreamUniquePtr stream_{makeCudaStream()};
+  StreamUniquePtr stream_;
 
   int32_t max_detections_;
   std::vector<float> scales_;
@@ -280,6 +287,8 @@ class TrtYoloX
 
   // flag whether preprocess are performed on GPU
   bool use_gpu_preprocess_;
+  // GPU id for inference
+  uint8_t gpu_id_;
   // host buffer for preprocessing on GPU
   CudaUniquePtrHost<unsigned char[]> image_buf_h_;
   // device buffer for preprocessing on GPU

diff --git a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp
@@ -80,7 +80,6 @@ class TrtYoloXNode : public rclcpp::Node
   void overlapSegmentByRoi(
     const tensorrt_yolox::Object & object, cv::Mat & mask, const int width, const int height);
   int mapRoiLabel2SegLabel(const int32_t roi_label_index);
-  bool setCudaDeviceId(const uint8_t gpu_id);
   image_transport::Publisher image_pub_;
   image_transport::Publisher mask_pub_;
 
@@ -97,7 +96,6 @@ class TrtYoloXNode : public rclcpp::Node
   bool is_roi_overlap_segment_;
   bool is_publish_color_mask_;
   float overlap_roi_score_threshold_;
-  uint8_t gpu_id_;
   // TODO(badai-nguyen): change to function
   std::map<std::string, int> remap_roi_to_semantic_ = {
     {"UNKNOWN", 3},     // other

diff --git a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -156,16 +156,25 @@ namespace autoware::tensorrt_yolox
 TrtYoloX::TrtYoloX(
   const std::string & model_path, const std::string & precision, const int num_class,
   const float score_threshold, const float nms_threshold, tensorrt_common::BuildConfig build_config,
-  const bool use_gpu_preprocess, std::string calibration_image_list_path, const double norm_factor,
-  [[maybe_unused]] const std::string & cache_dir, const tensorrt_common::BatchConfig & batch_config,
-  const size_t max_workspace_size, const std::string & color_map_path)
+  const bool use_gpu_preprocess, const int gpu_id, std::string calibration_image_list_path,
+  const double norm_factor, [[maybe_unused]] const std::string & cache_dir,
+  const tensorrt_common::BatchConfig & batch_config, const size_t max_workspace_size,
+  const std::string & color_map_path)
 {
+  gpu_id_ = gpu_id;
+  std::cout << "GPU" << gpu_id << "is selected for the inference !" << std::endl;
+  if (!setCudaDeviceId(gpu_id_)) {
+    throw std::runtime_error(
+      "GPU" + std::to_string(gpu_id_) + " does not exist or is not suitable.");
+  }
   src_width_ = -1;
   src_height_ = -1;
   norm_factor_ = norm_factor;
   batch_size_ = batch_config[2];
   multitask_ = 0;
   sematic_color_map_ = get_seg_colormap(color_map_path);
+  stream_ = makeCudaStream();
+
   if (precision == "int8") {
     if (build_config.clip_value <= 0.0) {
       if (calibration_image_list_path.empty()) {
@@ -217,7 +226,6 @@ TrtYoloX::TrtYoloX(
       calibrator.reset(
         new tensorrt_yolox::Int8MinMaxCalibrator(stream, calibration_table, norm_factor_));
     }
-
     trt_common_ = std::make_unique<tensorrt_common::TrtCommon>(
       model_path, precision, std::move(calibrator), batch_config, max_workspace_size, build_config);
   } else {
@@ -262,7 +270,6 @@ TrtYoloX::TrtYoloX(
       throw std::runtime_error{s.str()};
       */
   }
-
   // GPU memory allocation
   const auto input_dims = trt_common_->getBindingDimensions(0);
   const auto input_size =
@@ -341,6 +348,16 @@ TrtYoloX::~TrtYoloX()
   }
 }
 
+bool TrtYoloX::setCudaDeviceId(const uint8_t gpu_id)
+{
+  cudaError_t cuda_err = cudaSetDevice(gpu_id);
+  if (cuda_err != cudaSuccess) {
+    return false;
+  } else {
+    return true;
+  }
+}
+
 void TrtYoloX::initPreprocessBuffer(int width, int height)
 {
   // if size of source input has been changed...
@@ -534,6 +551,9 @@ bool TrtYoloX::doInference(
   const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
   [[maybe_unused]] std::vector<cv::Mat> & color_masks)
 {
+  if (!setCudaDeviceId(gpu_id_)) {
+    return false;
+  }
   if (!trt_common_->isInitialized()) {
     return false;
   }

diff --git a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -57,6 +57,7 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
   const bool preprocess_on_gpu = this->declare_parameter<bool>("preprocess_on_gpu");
   const std::string calibration_image_list_path =
     this->declare_parameter<std::string>("calibration_image_list_path");
+  const uint8_t gpu_id = this->declare_parameter<uint8_t>("gpu_id");
 
   std::string color_map_path = this->declare_parameter<std::string>("color_map_path");
 
@@ -68,7 +69,6 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
   is_roi_overlap_segment_ = declare_parameter<bool>("is_roi_overlap_segment");
   is_publish_color_mask_ = declare_parameter<bool>("is_publish_color_mask");
   overlap_roi_score_threshold_ = declare_parameter<float>("overlap_roi_score_threshold");
-  gpu_id_ = declare_parameter<int>("gpu_id");
   roi_overlay_segment_labels_.UNKNOWN =
     declare_parameter<bool>("roi_overlay_segment_label.UNKNOWN");
   roi_overlay_segment_labels_.CAR = declare_parameter<bool>("roi_overlay_segment_label.CAR");
@@ -92,13 +92,9 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
   const tensorrt_common::BatchConfig batch_config{1, 1, 1};
   const size_t max_workspace_size = (1 << 30);
 
-  if (!setCudaDeviceId(gpu_id_)) {
-    RCLCPP_ERROR(this->get_logger(), "GPU %d does not exist or is not suitable.", gpu_id_);
-  }
-
   trt_yolox_ = std::make_unique<tensorrt_yolox::TrtYoloX>(
     model_path, precision, label_map_.size(), score_threshold, nms_threshold, build_config,
-    preprocess_on_gpu, calibration_image_list_path, norm_factor, cache_dir, batch_config,
+    preprocess_on_gpu, gpu_id, calibration_image_list_path, norm_factor, cache_dir, batch_config,
     max_workspace_size, color_map_path);
 
   timer_ =
@@ -134,12 +130,6 @@ void TrtYoloXNode::onConnect()
 
 void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
 {
-  if (!setCudaDeviceId(gpu_id_)) {
-    RCLCPP_ERROR_THROTTLE(
-      get_logger(), *get_clock(), 2000, "GPU %d does not exist or is not suitable.", gpu_id_);
-    return;
-  }
-
   stop_watch_ptr_->toc("processing_time", true);
   tier4_perception_msgs::msg::DetectedObjectsWithFeature out_objects;
 
@@ -235,18 +225,6 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
   }
 }
 
-bool TrtYoloXNode::setCudaDeviceId(const uint8_t gpu_id)
-{
-  cudaError_t cuda_err = cudaSetDevice(gpu_id);
-  if (cuda_err != cudaSuccess) {
-    RCLCPP_ERROR_THROTTLE(
-      get_logger(), *get_clock(), 2000, "Failed to set gpu device with id: %d ", gpu_id);
-    return false;
-  } else {
-    return true;
-  }
-}
-
 bool TrtYoloXNode::readLabelFile(const std::string & label_path)
 {
   std::ifstream label_file(label_path);

diff --git a/...eption/autoware_traffic_light_fine_detector/config/traffic_light_fine_detector.param.yaml b/...eption/autoware_traffic_light_fine_detector/config/traffic_light_fine_detector.param.yaml
@@ -5,3 +5,4 @@
     fine_detector_precision: fp16
     fine_detector_score_thresh: 0.3
     fine_detector_nms_thresh: 0.65
+    gpu_id: 0
diff --git a/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp b/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp
@@ -65,6 +65,7 @@ TrafficLightFineDetectorNode::TrafficLightFineDetectorNode(const rclcpp::NodeOpt
   std::string model_path = declare_parameter("fine_detector_model_path", "");
   std::string label_path = declare_parameter("fine_detector_label_path", "");
   std::string precision = declare_parameter("fine_detector_precision", "fp32");
+  uint8_t gpu_id = declare_parameter("gpu_id", 0);
   // Objects with a score lower than this value will be ignored.
   // This threshold will be ignored if specified model contains EfficientNMS_TRT module in it
   score_thresh_ = declare_parameter("fine_detector_score_thresh", 0.3);
@@ -91,7 +92,7 @@ TrafficLightFineDetectorNode::TrafficLightFineDetectorNode(const rclcpp::NodeOpt
 
   trt_yolox_ = std::make_unique<autoware::tensorrt_yolox::TrtYoloX>(
     model_path, precision, num_class, score_thresh_, nms_threshold, build_config, cuda_preprocess,
-    calib_image_list, scale, cache_dir, batch_config);
+    gpu_id, calib_image_list, scale, cache_dir, batch_config);
 
   using std::chrono_literals::operator""ms;
   timer_ = rclcpp::create_timer(