Skip to content

Commit

Permalink
move code to the tensorrt_yolox
Browse files Browse the repository at this point in the history
Signed-off-by: ismetatabay <[email protected]>

style(pre-commit): autofix
  • Loading branch information
ismetatabay committed Aug 14, 2024
1 parent 4638322 commit 941c3f9
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,22 @@ class TrtYoloX
const std::string & model_path, const std::string & precision, const int num_class = 8,
const float score_threshold = 0.3, const float nms_threshold = 0.7,
const tensorrt_common::BuildConfig build_config = tensorrt_common::BuildConfig(),
const bool use_gpu_preprocess = false, std::string calibration_image_list_file = std::string(),
const double norm_factor = 1.0, [[maybe_unused]] const std::string & cache_dir = "",
const bool use_gpu_preprocess = false, const int gpu_id = 0,
std::string calibration_image_list_file = std::string(), const double norm_factor = 1.0,
[[maybe_unused]] const std::string & cache_dir = "",
const tensorrt_common::BatchConfig & batch_config = {1, 1, 1},
const size_t max_workspace_size = (1 << 30), const std::string & color_map_path = "");
/**
* @brief Deconstruct TrtYoloX
*/
~TrtYoloX();

/**
* @brief select cuda device for a inference
* @param[in] GPU id
*/
bool setCudaDeviceId(const uint8_t gpu_id);

/**
* @brief run inference including pre-process and post-process
* @param[out] objects results for object detection
Expand Down Expand Up @@ -267,7 +274,7 @@ class TrtYoloX
size_t out_elem_num_per_batch_;
CudaUniquePtr<float[]> out_prob_d_;

StreamUniquePtr stream_{makeCudaStream()};
StreamUniquePtr stream_;

int32_t max_detections_;
std::vector<float> scales_;
Expand All @@ -280,6 +287,8 @@ class TrtYoloX

// flag whether preprocess are performed on GPU
bool use_gpu_preprocess_;
// GPU id for inference
uint8_t gpu_id_;
// host buffer for preprocessing on GPU
CudaUniquePtrHost<unsigned char[]> image_buf_h_;
// device buffer for preprocessing on GPU
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ class TrtYoloXNode : public rclcpp::Node
void overlapSegmentByRoi(
const tensorrt_yolox::Object & object, cv::Mat & mask, const int width, const int height);
int mapRoiLabel2SegLabel(const int32_t roi_label_index);
bool setCudaDeviceId(const uint8_t gpu_id);
image_transport::Publisher image_pub_;
image_transport::Publisher mask_pub_;

Expand All @@ -97,7 +96,6 @@ class TrtYoloXNode : public rclcpp::Node
bool is_roi_overlap_segment_;
bool is_publish_color_mask_;
float overlap_roi_score_threshold_;
uint8_t gpu_id_;
// TODO(badai-nguyen): change to function
std::map<std::string, int> remap_roi_to_semantic_ = {
{"UNKNOWN", 3}, // other
Expand Down
30 changes: 25 additions & 5 deletions perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,16 +156,25 @@ namespace autoware::tensorrt_yolox
TrtYoloX::TrtYoloX(
const std::string & model_path, const std::string & precision, const int num_class,
const float score_threshold, const float nms_threshold, tensorrt_common::BuildConfig build_config,
const bool use_gpu_preprocess, std::string calibration_image_list_path, const double norm_factor,
[[maybe_unused]] const std::string & cache_dir, const tensorrt_common::BatchConfig & batch_config,
const size_t max_workspace_size, const std::string & color_map_path)
const bool use_gpu_preprocess, const int gpu_id, std::string calibration_image_list_path,
const double norm_factor, [[maybe_unused]] const std::string & cache_dir,
const tensorrt_common::BatchConfig & batch_config, const size_t max_workspace_size,
const std::string & color_map_path)
{
gpu_id_ = gpu_id;
std::cout << "GPU" << gpu_id << "is selected for the inference !" << std::endl;
if (!setCudaDeviceId(gpu_id_)) {
throw std::runtime_error(
"GPU" + std::to_string(gpu_id_) + " does not exist or is not suitable.");
}
src_width_ = -1;
src_height_ = -1;
norm_factor_ = norm_factor;
batch_size_ = batch_config[2];
multitask_ = 0;
sematic_color_map_ = get_seg_colormap(color_map_path);
stream_ = makeCudaStream();

if (precision == "int8") {
if (build_config.clip_value <= 0.0) {
if (calibration_image_list_path.empty()) {
Expand Down Expand Up @@ -217,7 +226,6 @@ TrtYoloX::TrtYoloX(
calibrator.reset(
new tensorrt_yolox::Int8MinMaxCalibrator(stream, calibration_table, norm_factor_));
}

trt_common_ = std::make_unique<tensorrt_common::TrtCommon>(
model_path, precision, std::move(calibrator), batch_config, max_workspace_size, build_config);
} else {
Expand Down Expand Up @@ -262,7 +270,6 @@ TrtYoloX::TrtYoloX(
throw std::runtime_error{s.str()};
*/
}

Check warning on line 272 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

❌ Getting worse: Complex Method

TrtYoloX::TrtYoloX increases in cyclomatic complexity from 22 to 24, threshold = 9. This function has many conditional statements (e.g. if, for, while), leading to lower code health. Avoid adding more conditionals and code to it without refactoring.

// GPU memory allocation
const auto input_dims = trt_common_->getBindingDimensions(0);
const auto input_size =
Expand Down Expand Up @@ -341,6 +348,16 @@ TrtYoloX::~TrtYoloX()
}
}

bool TrtYoloX::setCudaDeviceId(const uint8_t gpu_id)
{
cudaError_t cuda_err = cudaSetDevice(gpu_id);
if (cuda_err != cudaSuccess) {
return false;
} else {
return true;
}
}

void TrtYoloX::initPreprocessBuffer(int width, int height)
{
// if size of source input has been changed...
Expand Down Expand Up @@ -534,6 +551,9 @@ bool TrtYoloX::doInference(
const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
[[maybe_unused]] std::vector<cv::Mat> & color_masks)
{
if (!setCudaDeviceId(gpu_id_)) {
return false;
}
if (!trt_common_->isInitialized()) {
return false;
}
Expand Down
26 changes: 2 additions & 24 deletions perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
const bool preprocess_on_gpu = this->declare_parameter<bool>("preprocess_on_gpu");
const std::string calibration_image_list_path =
this->declare_parameter<std::string>("calibration_image_list_path");
const uint8_t gpu_id = this->declare_parameter<uint8_t>("gpu_id");

std::string color_map_path = this->declare_parameter<std::string>("color_map_path");

Expand All @@ -68,7 +69,6 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
is_roi_overlap_segment_ = declare_parameter<bool>("is_roi_overlap_segment");
is_publish_color_mask_ = declare_parameter<bool>("is_publish_color_mask");
overlap_roi_score_threshold_ = declare_parameter<float>("overlap_roi_score_threshold");
gpu_id_ = declare_parameter<int>("gpu_id");
roi_overlay_segment_labels_.UNKNOWN =
declare_parameter<bool>("roi_overlay_segment_label.UNKNOWN");
roi_overlay_segment_labels_.CAR = declare_parameter<bool>("roi_overlay_segment_label.CAR");
Expand All @@ -92,13 +92,9 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
const tensorrt_common::BatchConfig batch_config{1, 1, 1};
const size_t max_workspace_size = (1 << 30);

if (!setCudaDeviceId(gpu_id_)) {
RCLCPP_ERROR(this->get_logger(), "GPU %d does not exist or is not suitable.", gpu_id_);
}

trt_yolox_ = std::make_unique<tensorrt_yolox::TrtYoloX>(
model_path, precision, label_map_.size(), score_threshold, nms_threshold, build_config,
preprocess_on_gpu, calibration_image_list_path, norm_factor, cache_dir, batch_config,
preprocess_on_gpu, gpu_id, calibration_image_list_path, norm_factor, cache_dir, batch_config,

Check warning on line 97 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

❌ Getting worse: Large Method

TrtYoloXNode::TrtYoloXNode increases from 73 to 74 lines of code, threshold = 70. Large functions with many lines of code are generally harder to understand and lower the code health. Avoid adding more lines to this function.
max_workspace_size, color_map_path);

timer_ =
Expand Down Expand Up @@ -134,12 +130,6 @@ void TrtYoloXNode::onConnect()

void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
{
if (!setCudaDeviceId(gpu_id_)) {
RCLCPP_ERROR_THROTTLE(
get_logger(), *get_clock(), 2000, "GPU %d does not exist or is not suitable.", gpu_id_);
return;
}

stop_watch_ptr_->toc("processing_time", true);
tier4_perception_msgs::msg::DetectedObjectsWithFeature out_objects;

Expand Down Expand Up @@ -235,18 +225,6 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
}
}

bool TrtYoloXNode::setCudaDeviceId(const uint8_t gpu_id)
{
cudaError_t cuda_err = cudaSetDevice(gpu_id);
if (cuda_err != cudaSuccess) {
RCLCPP_ERROR_THROTTLE(
get_logger(), *get_clock(), 2000, "Failed to set gpu device with id: %d ", gpu_id);
return false;
} else {
return true;
}
}

bool TrtYoloXNode::readLabelFile(const std::string & label_path)
{
std::ifstream label_file(label_path);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
fine_detector_precision: fp16
fine_detector_score_thresh: 0.3
fine_detector_nms_thresh: 0.65
gpu_id: 0
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ TrafficLightFineDetectorNode::TrafficLightFineDetectorNode(const rclcpp::NodeOpt
std::string model_path = declare_parameter("fine_detector_model_path", "");
std::string label_path = declare_parameter("fine_detector_label_path", "");
std::string precision = declare_parameter("fine_detector_precision", "fp32");
uint8_t gpu_id = declare_parameter("gpu_id", 0);
// Objects with a score lower than this value will be ignored.
// This threshold will be ignored if specified model contains EfficientNMS_TRT module in it
score_thresh_ = declare_parameter("fine_detector_score_thresh", 0.3);
Expand All @@ -91,7 +92,7 @@ TrafficLightFineDetectorNode::TrafficLightFineDetectorNode(const rclcpp::NodeOpt

trt_yolox_ = std::make_unique<autoware::tensorrt_yolox::TrtYoloX>(
model_path, precision, num_class, score_thresh_, nms_threshold, build_config, cuda_preprocess,
calib_image_list, scale, cache_dir, batch_config);
gpu_id, calib_image_list, scale, cache_dir, batch_config);

using std::chrono_literals::operator""ms;
timer_ = rclcpp::create_timer(
Expand Down

0 comments on commit 941c3f9

Please sign in to comment.