From 941c3f9620c6da6160207e7fa0e6a4b939ddad79 Mon Sep 17 00:00:00 2001 From: ismetatabay Date: Wed, 14 Aug 2024 18:57:32 +0300 Subject: [PATCH] move code to the tensorrt_yolox Signed-off-by: ismetatabay style(pre-commit): autofix --- .../tensorrt_yolox/tensorrt_yolox.hpp | 15 ++++++++-- .../tensorrt_yolox/tensorrt_yolox_node.hpp | 2 -- .../src/tensorrt_yolox.cpp | 30 +++++++++++++++---- .../src/tensorrt_yolox_node.cpp | 26 ++-------------- .../traffic_light_fine_detector.param.yaml | 1 + .../src/traffic_light_fine_detector_node.cpp | 3 +- 6 files changed, 42 insertions(+), 35 deletions(-) diff --git a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp index 6610476dc33fc..be7e7f6541360 100644 --- a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp +++ b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp @@ -89,8 +89,9 @@ class TrtYoloX const std::string & model_path, const std::string & precision, const int num_class = 8, const float score_threshold = 0.3, const float nms_threshold = 0.7, const tensorrt_common::BuildConfig build_config = tensorrt_common::BuildConfig(), - const bool use_gpu_preprocess = false, std::string calibration_image_list_file = std::string(), - const double norm_factor = 1.0, [[maybe_unused]] const std::string & cache_dir = "", + const bool use_gpu_preprocess = false, const int gpu_id = 0, + std::string calibration_image_list_file = std::string(), const double norm_factor = 1.0, + [[maybe_unused]] const std::string & cache_dir = "", const tensorrt_common::BatchConfig & batch_config = {1, 1, 1}, const size_t max_workspace_size = (1 << 30), const std::string & color_map_path = ""); /** @@ -98,6 +99,12 @@ class TrtYoloX */ ~TrtYoloX(); + /** + * @brief select cuda device for a inference + * @param[in] GPU id + */ + bool setCudaDeviceId(const uint8_t gpu_id); + /** * @brief run inference including pre-process and post-process * @param[out] objects results for object detection @@ -267,7 +274,7 @@ class TrtYoloX size_t out_elem_num_per_batch_; CudaUniquePtr out_prob_d_; - StreamUniquePtr stream_{makeCudaStream()}; + StreamUniquePtr stream_; int32_t max_detections_; std::vector scales_; @@ -280,6 +287,8 @@ class TrtYoloX // flag whether preprocess are performed on GPU bool use_gpu_preprocess_; + // GPU id for inference + uint8_t gpu_id_; // host buffer for preprocessing on GPU CudaUniquePtrHost image_buf_h_; // device buffer for preprocessing on GPU diff --git a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp index 86c7414995922..2e317139f27c0 100644 --- a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp +++ b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp @@ -80,7 +80,6 @@ class TrtYoloXNode : public rclcpp::Node void overlapSegmentByRoi( const tensorrt_yolox::Object & object, cv::Mat & mask, const int width, const int height); int mapRoiLabel2SegLabel(const int32_t roi_label_index); - bool setCudaDeviceId(const uint8_t gpu_id); image_transport::Publisher image_pub_; image_transport::Publisher mask_pub_; @@ -97,7 +96,6 @@ class TrtYoloXNode : public rclcpp::Node bool is_roi_overlap_segment_; bool is_publish_color_mask_; float overlap_roi_score_threshold_; - uint8_t gpu_id_; // TODO(badai-nguyen): change to function std::map remap_roi_to_semantic_ = { {"UNKNOWN", 3}, // other diff --git a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp index 4b12a8e19c31f..a5da9a62d3163 100644 --- a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp +++ b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp @@ -156,16 +156,25 @@ namespace autoware::tensorrt_yolox TrtYoloX::TrtYoloX( const std::string & model_path, const std::string & precision, const int num_class, const float score_threshold, const float nms_threshold, tensorrt_common::BuildConfig build_config, - const bool use_gpu_preprocess, std::string calibration_image_list_path, const double norm_factor, - [[maybe_unused]] const std::string & cache_dir, const tensorrt_common::BatchConfig & batch_config, - const size_t max_workspace_size, const std::string & color_map_path) + const bool use_gpu_preprocess, const int gpu_id, std::string calibration_image_list_path, + const double norm_factor, [[maybe_unused]] const std::string & cache_dir, + const tensorrt_common::BatchConfig & batch_config, const size_t max_workspace_size, + const std::string & color_map_path) { + gpu_id_ = gpu_id; + std::cout << "GPU" << gpu_id << "is selected for the inference !" << std::endl; + if (!setCudaDeviceId(gpu_id_)) { + throw std::runtime_error( + "GPU" + std::to_string(gpu_id_) + " does not exist or is not suitable."); + } src_width_ = -1; src_height_ = -1; norm_factor_ = norm_factor; batch_size_ = batch_config[2]; multitask_ = 0; sematic_color_map_ = get_seg_colormap(color_map_path); + stream_ = makeCudaStream(); + if (precision == "int8") { if (build_config.clip_value <= 0.0) { if (calibration_image_list_path.empty()) { @@ -217,7 +226,6 @@ TrtYoloX::TrtYoloX( calibrator.reset( new tensorrt_yolox::Int8MinMaxCalibrator(stream, calibration_table, norm_factor_)); } - trt_common_ = std::make_unique( model_path, precision, std::move(calibrator), batch_config, max_workspace_size, build_config); } else { @@ -262,7 +270,6 @@ TrtYoloX::TrtYoloX( throw std::runtime_error{s.str()}; */ } - // GPU memory allocation const auto input_dims = trt_common_->getBindingDimensions(0); const auto input_size = @@ -341,6 +348,16 @@ TrtYoloX::~TrtYoloX() } } +bool TrtYoloX::setCudaDeviceId(const uint8_t gpu_id) +{ + cudaError_t cuda_err = cudaSetDevice(gpu_id); + if (cuda_err != cudaSuccess) { + return false; + } else { + return true; + } +} + void TrtYoloX::initPreprocessBuffer(int width, int height) { // if size of source input has been changed... @@ -534,6 +551,9 @@ bool TrtYoloX::doInference( const std::vector & images, ObjectArrays & objects, std::vector & masks, [[maybe_unused]] std::vector & color_masks) { + if (!setCudaDeviceId(gpu_id_)) { + return false; + } if (!trt_common_->isInitialized()) { return false; } diff --git a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp index 3c44a4606a017..704ff63a0a193 100644 --- a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp +++ b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp @@ -57,6 +57,7 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options) const bool preprocess_on_gpu = this->declare_parameter("preprocess_on_gpu"); const std::string calibration_image_list_path = this->declare_parameter("calibration_image_list_path"); + const uint8_t gpu_id = this->declare_parameter("gpu_id"); std::string color_map_path = this->declare_parameter("color_map_path"); @@ -68,7 +69,6 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options) is_roi_overlap_segment_ = declare_parameter("is_roi_overlap_segment"); is_publish_color_mask_ = declare_parameter("is_publish_color_mask"); overlap_roi_score_threshold_ = declare_parameter("overlap_roi_score_threshold"); - gpu_id_ = declare_parameter("gpu_id"); roi_overlay_segment_labels_.UNKNOWN = declare_parameter("roi_overlay_segment_label.UNKNOWN"); roi_overlay_segment_labels_.CAR = declare_parameter("roi_overlay_segment_label.CAR"); @@ -92,13 +92,9 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options) const tensorrt_common::BatchConfig batch_config{1, 1, 1}; const size_t max_workspace_size = (1 << 30); - if (!setCudaDeviceId(gpu_id_)) { - RCLCPP_ERROR(this->get_logger(), "GPU %d does not exist or is not suitable.", gpu_id_); - } - trt_yolox_ = std::make_unique( model_path, precision, label_map_.size(), score_threshold, nms_threshold, build_config, - preprocess_on_gpu, calibration_image_list_path, norm_factor, cache_dir, batch_config, + preprocess_on_gpu, gpu_id, calibration_image_list_path, norm_factor, cache_dir, batch_config, max_workspace_size, color_map_path); timer_ = @@ -134,12 +130,6 @@ void TrtYoloXNode::onConnect() void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) { - if (!setCudaDeviceId(gpu_id_)) { - RCLCPP_ERROR_THROTTLE( - get_logger(), *get_clock(), 2000, "GPU %d does not exist or is not suitable.", gpu_id_); - return; - } - stop_watch_ptr_->toc("processing_time", true); tier4_perception_msgs::msg::DetectedObjectsWithFeature out_objects; @@ -235,18 +225,6 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) } } -bool TrtYoloXNode::setCudaDeviceId(const uint8_t gpu_id) -{ - cudaError_t cuda_err = cudaSetDevice(gpu_id); - if (cuda_err != cudaSuccess) { - RCLCPP_ERROR_THROTTLE( - get_logger(), *get_clock(), 2000, "Failed to set gpu device with id: %d ", gpu_id); - return false; - } else { - return true; - } -} - bool TrtYoloXNode::readLabelFile(const std::string & label_path) { std::ifstream label_file(label_path); diff --git a/perception/autoware_traffic_light_fine_detector/config/traffic_light_fine_detector.param.yaml b/perception/autoware_traffic_light_fine_detector/config/traffic_light_fine_detector.param.yaml index 0265d0c4c7abb..36d4413472a0b 100644 --- a/perception/autoware_traffic_light_fine_detector/config/traffic_light_fine_detector.param.yaml +++ b/perception/autoware_traffic_light_fine_detector/config/traffic_light_fine_detector.param.yaml @@ -5,3 +5,4 @@ fine_detector_precision: fp16 fine_detector_score_thresh: 0.3 fine_detector_nms_thresh: 0.65 + gpu_id: 0 diff --git a/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp b/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp index 381fa79ed090c..0450e70db315b 100644 --- a/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp +++ b/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp @@ -65,6 +65,7 @@ TrafficLightFineDetectorNode::TrafficLightFineDetectorNode(const rclcpp::NodeOpt std::string model_path = declare_parameter("fine_detector_model_path", ""); std::string label_path = declare_parameter("fine_detector_label_path", ""); std::string precision = declare_parameter("fine_detector_precision", "fp32"); + uint8_t gpu_id = declare_parameter("gpu_id", 0); // Objects with a score lower than this value will be ignored. // This threshold will be ignored if specified model contains EfficientNMS_TRT module in it score_thresh_ = declare_parameter("fine_detector_score_thresh", 0.3); @@ -91,7 +92,7 @@ TrafficLightFineDetectorNode::TrafficLightFineDetectorNode(const rclcpp::NodeOpt trt_yolox_ = std::make_unique( model_path, precision, num_class, score_thresh_, nms_threshold, build_config, cuda_preprocess, - calib_image_list, scale, cache_dir, batch_config); + gpu_id, calib_image_list, scale, cache_dir, batch_config); using std::chrono_literals::operator""ms; timer_ = rclcpp::create_timer(