Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(autoware_tensorrt_yolox): add GPU - CUDA device option #8245

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@
profile_per_layer: false # If true, profiler function will be enabled. Since the profile function may affect execution speed, it is recommended to set this flag true only for development purpose.
clip_value: 6.0 # If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration.
preprocess_on_gpu: true # If true, pre-processing is performed on GPU.
gpu_id: 0 # GPU ID to select CUDA Device
calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization.
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@
profile_per_layer: false # If true, profiler function will be enabled. Since the profile function may affect execution speed, it is recommended to set this flag true only for development purpose.
clip_value: 0.0 # If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration.
preprocess_on_gpu: true # If true, pre-processing is performed on GPU.
gpu_id: 0 # GPU ID to select CUDA Device
calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization.
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,27 @@
const std::string & model_path, const std::string & precision, const int num_class = 8,
const float score_threshold = 0.3, const float nms_threshold = 0.7,
const tensorrt_common::BuildConfig build_config = tensorrt_common::BuildConfig(),
const bool use_gpu_preprocess = false, std::string calibration_image_list_file = std::string(),
const double norm_factor = 1.0, [[maybe_unused]] const std::string & cache_dir = "",
const bool use_gpu_preprocess = false, const uint8_t gpu_id = 0,
std::string calibration_image_list_file = std::string(), const double norm_factor = 1.0,
[[maybe_unused]] const std::string & cache_dir = "",
const tensorrt_common::BatchConfig & batch_config = {1, 1, 1},
const size_t max_workspace_size = (1 << 30), const std::string & color_map_path = "");
/**
* @brief Deconstruct TrtYoloX
*/
~TrtYoloX();

/**
* @brief select cuda device for a inference
* @param[in] GPU id
*/
bool setCudaDeviceId(const uint8_t gpu_id);

/**
* @brief return a flag for gpu initialization
*/
bool isGPUInitialized() const { return is_gpu_initialized_; }

Check warning on line 111 in perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp

View check run for this annotation

Codecov / codecov/patch

perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp#L111

Added line #L111 was not covered by tests

/**
* @brief run inference including pre-process and post-process
* @param[out] objects results for object detection
Expand Down Expand Up @@ -267,7 +279,7 @@
size_t out_elem_num_per_batch_;
CudaUniquePtr<float[]> out_prob_d_;

StreamUniquePtr stream_{makeCudaStream()};
StreamUniquePtr stream_;

int32_t max_detections_;
std::vector<float> scales_;
Expand All @@ -280,6 +292,10 @@

// flag whether preprocess are performed on GPU
bool use_gpu_preprocess_;
// GPU id for inference
const uint8_t gpu_id_;
// flag for gpu initialization
bool is_gpu_initialized_;
// host buffer for preprocessing on GPU
CudaUniquePtrHost<unsigned char[]> image_buf_h_;
// device buffer for preprocessing on GPU
Expand Down
Original file line number Diff line number Diff line change
@@ -1,69 +1,45 @@
<launch>
<arg name="image_raw0" default="/image_raw0"/>
<arg name="gpu_id_image_raw0" default="0"/>

<arg name="image_raw1" default=""/>
<arg name="gpu_id_image_raw1" default="0"/>

<arg name="image_raw2" default=""/>
<arg name="gpu_id_image_raw2" default="0"/>

<arg name="image_raw3" default=""/>
<arg name="gpu_id_image_raw3" default="0"/>

<arg name="image_raw4" default=""/>
<arg name="gpu_id_image_raw4" default="0"/>

<arg name="image_raw5" default=""/>
<arg name="gpu_id_image_raw5" default="0"/>

<arg name="image_raw6" default=""/>
<arg name="gpu_id_image_raw6" default="0"/>

<arg name="image_raw7" default=""/>
<arg name="gpu_id_image_raw7" default="0"/>

<arg name="image_number" default="1"/>
<arg name="output_topic" default="rois"/>

<include if="$(eval &quot;'$(var image_number)'>='1'&quot;)" file="$(find-pkg-share autoware_tensorrt_yolox)/launch/yolox.launch.xml">
<arg name="input/image" value="$(var image_raw0)"/>
<arg name="output/objects" value="rois0"/>
<arg name="gpu_id" value="$(var gpu_id_image_raw0)"/>
</include>
<include if="$(eval &quot;'$(var image_number)'>='2'&quot;)" file="$(find-pkg-share autoware_tensorrt_yolox)/launch/yolox.launch.xml">
<arg name="input/image" value="$(var image_raw1)"/>
<arg name="output/objects" value="rois1"/>
<arg name="gpu_id" value="$(var gpu_id_image_raw1)"/>
</include>
<include if="$(eval &quot;'$(var image_number)'>='3'&quot;)" file="$(find-pkg-share autoware_tensorrt_yolox)/launch/yolox.launch.xml">
<arg name="input/image" value="$(var image_raw2)"/>
<arg name="output/objects" value="rois2"/>
<arg name="gpu_id" value="$(var gpu_id_image_raw2)"/>
</include>
<include if="$(eval &quot;'$(var image_number)'>='4'&quot;)" file="$(find-pkg-share autoware_tensorrt_yolox)/launch/yolox.launch.xml">
<arg name="input/image" value="$(var image_raw3)"/>
<arg name="output/objects" value="rois3"/>
<arg name="gpu_id" value="$(var gpu_id_image_raw3)"/>
</include>
<include if="$(eval &quot;'$(var image_number)'>='5'&quot;)" file="$(find-pkg-share autoware_tensorrt_yolox)/launch/yolox.launch.xml">
<arg name="input/image" value="$(var image_raw4)"/>
<arg name="output/objects" value="rois4"/>
<arg name="gpu_id" value="$(var gpu_id_image_raw4)"/>
</include>
<include if="$(eval &quot;'$(var image_number)'>='6'&quot;)" file="$(find-pkg-share autoware_tensorrt_yolox)/launch/yolox.launch.xml">
<arg name="input/image" value="$(var image_raw5)"/>
<arg name="output/objects" value="rois5"/>
<arg name="gpu_id" value="$(var gpu_id_image_raw5)"/>
</include>
<include if="$(eval &quot;'$(var image_number)'>='7'&quot;)" file="$(find-pkg-share autoware_tensorrt_yolox)/launch/yolox.launch.xml">
<arg name="input/image" value="$(var image_raw6)"/>
<arg name="output/objects" value="rois6"/>
<arg name="gpu_id" value="$(var gpu_id_image_raw6)"/>
</include>
<include if="$(eval &quot;'$(var image_number)'>='8'&quot;)" file="$(find-pkg-share autoware_tensorrt_yolox)/launch/yolox.launch.xml">
<arg name="input/image" value="$(var image_raw7)"/>
<arg name="output/objects" value="rois7"/>
<arg name="gpu_id" value="$(var gpu_id_image_raw7)"/>
</include>
</launch>
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@
"default": true,
"description": "If true, pre-processing is performed on GPU."
},
"gpu_id": {
"type": "integer",
"default": 0,
"description": "GPU ID for selecting CUDA device"
},
"calibration_image_list_path": {
"type": "string",
"default": "",
Expand All @@ -88,7 +93,8 @@
"quantize_last_layer",
"profile_per_layer",
"clip_value",
"preprocess_on_gpu"
"preprocess_on_gpu",
"gpu_id"
]
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@
"default": true,
"description": "If true, pre-processing is performed on GPU."
},
"gpu_id": {
"type": "integer",
"default": 0,
"description": "GPU ID for selecting CUDA device"
},
"calibration_image_list_path": {
"type": "string",
"default": "",
Expand All @@ -88,7 +93,8 @@
"quantize_last_layer",
"profile_per_layer",
"clip_value",
"preprocess_on_gpu"
"preprocess_on_gpu",
"gpu_id"
]
}
},
Expand Down
30 changes: 25 additions & 5 deletions perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023 TIER IV, Inc.

Check notice on line 1 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

ℹ Getting worse: Lines of Code in a Single File

The lines of code increases from 1054 to 1073, improve code health by reducing it to 1000. The number of Lines of Code in a single file. More Lines of Code lowers the code health.

Check notice on line 1 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

✅ Getting better: Overall Code Complexity

The mean cyclomatic complexity decreases from 5.48 to 5.44, threshold = 4. This file has many conditional statements (e.g. if, for, while) across its implementation, leading to lower code health. Avoid adding more conditionals.

Check notice on line 1 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

✅ Getting better: Primitive Obsession

The ratio of primitive types in function arguments decreases from 40.82% to 39.22%, threshold = 30.0%. The functions in this file have too many primitive types (e.g. int, double, float) in their function argument lists. Using many primitive types lead to the code smell Primitive Obsession. Avoid adding more primitive arguments.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -156,16 +156,25 @@
TrtYoloX::TrtYoloX(
const std::string & model_path, const std::string & precision, const int num_class,
const float score_threshold, const float nms_threshold, tensorrt_common::BuildConfig build_config,
const bool use_gpu_preprocess, std::string calibration_image_list_path, const double norm_factor,
[[maybe_unused]] const std::string & cache_dir, const tensorrt_common::BatchConfig & batch_config,
const size_t max_workspace_size, const std::string & color_map_path)
const bool use_gpu_preprocess, const uint8_t gpu_id, std::string calibration_image_list_path,
const double norm_factor, [[maybe_unused]] const std::string & cache_dir,
const tensorrt_common::BatchConfig & batch_config, const size_t max_workspace_size,
const std::string & color_map_path)
: gpu_id_(gpu_id), is_gpu_initialized_(false)

Check warning on line 163 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp

View check run for this annotation

Codecov / codecov/patch

perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp#L162-L163

Added lines #L162 - L163 were not covered by tests
{
if (!setCudaDeviceId(gpu_id_)) {
return;

Check warning on line 166 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp

View check run for this annotation

Codecov / codecov/patch

perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp#L166

Added line #L166 was not covered by tests
}
is_gpu_initialized_ = true;

Check warning on line 168 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp

View check run for this annotation

Codecov / codecov/patch

perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp#L168

Added line #L168 was not covered by tests

src_width_ = -1;
src_height_ = -1;
norm_factor_ = norm_factor;
batch_size_ = batch_config[2];
multitask_ = 0;
sematic_color_map_ = get_seg_colormap(color_map_path);
stream_ = makeCudaStream();

Check warning on line 176 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp

View check run for this annotation

Codecov / codecov/patch

perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp#L176

Added line #L176 was not covered by tests

if (precision == "int8") {
if (build_config.clip_value <= 0.0) {
if (calibration_image_list_path.empty()) {
Expand Down Expand Up @@ -217,7 +226,6 @@
calibrator.reset(
new tensorrt_yolox::Int8MinMaxCalibrator(stream, calibration_table, norm_factor_));
}

trt_common_ = std::make_unique<tensorrt_common::TrtCommon>(
model_path, precision, std::move(calibrator), batch_config, max_workspace_size, build_config);
} else {
Expand Down Expand Up @@ -262,7 +270,6 @@
throw std::runtime_error{s.str()};
*/
}

Check warning on line 272 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

❌ Getting worse: Complex Method

TrtYoloX::TrtYoloX increases in cyclomatic complexity from 22 to 23, threshold = 9. This function has many conditional statements (e.g. if, for, while), leading to lower code health. Avoid adding more conditionals and code to it without refactoring.

// GPU memory allocation
const auto input_dims = trt_common_->getBindingDimensions(0);
const auto input_size =
Expand Down Expand Up @@ -341,6 +348,16 @@
}
}

bool TrtYoloX::setCudaDeviceId(const uint8_t gpu_id)

Check warning on line 351 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp

View check run for this annotation

Codecov / codecov/patch

perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp#L351

Added line #L351 was not covered by tests
{
cudaError_t cuda_err = cudaSetDevice(gpu_id);

Check warning on line 353 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp

View check run for this annotation

Codecov / codecov/patch

perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp#L353

Added line #L353 was not covered by tests
if (cuda_err != cudaSuccess) {
return false;
} else {
return true;

Check warning on line 357 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp

View check run for this annotation

Codecov / codecov/patch

perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp#L357

Added line #L357 was not covered by tests
}
ismetatabay marked this conversation as resolved.
Show resolved Hide resolved
}

void TrtYoloX::initPreprocessBuffer(int width, int height)
{
// if size of source input has been changed...
Expand Down Expand Up @@ -534,6 +551,9 @@
const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
[[maybe_unused]] std::vector<cv::Mat> & color_masks)
{
if (!setCudaDeviceId(gpu_id_)) {
return false;
}
if (!trt_common_->isInitialized()) {
return false;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2022 TIER IV, Inc.

Check notice on line 1 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

ℹ Getting worse: Overall Code Complexity

The mean cyclomatic complexity increases from 5.57 to 5.71, threshold = 4. This file has many conditional statements (e.g. if, for, while) across its implementation, leading to lower code health. Avoid adding more conditionals.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -57,6 +57,7 @@
const bool preprocess_on_gpu = this->declare_parameter<bool>("preprocess_on_gpu");
const std::string calibration_image_list_path =
this->declare_parameter<std::string>("calibration_image_list_path");
const uint8_t gpu_id = this->declare_parameter<uint8_t>("gpu_id");

Check warning on line 60 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp

View check run for this annotation

Codecov / codecov/patch

perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp#L60

Added line #L60 was not covered by tests

std::string color_map_path = this->declare_parameter<std::string>("color_map_path");

Expand Down Expand Up @@ -93,9 +94,16 @@

trt_yolox_ = std::make_unique<tensorrt_yolox::TrtYoloX>(
model_path, precision, label_map_.size(), score_threshold, nms_threshold, build_config,
preprocess_on_gpu, calibration_image_list_path, norm_factor, cache_dir, batch_config,
preprocess_on_gpu, gpu_id, calibration_image_list_path, norm_factor, cache_dir, batch_config,
max_workspace_size, color_map_path);

if (!trt_yolox_->isGPUInitialized()) {
RCLCPP_ERROR(this->get_logger(), "GPU %d does not exist or is not suitable.", gpu_id);
rclcpp::shutdown();

Check warning on line 102 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp

View check run for this annotation

Codecov / codecov/patch

perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp#L101-L102

Added lines #L101 - L102 were not covered by tests
return;
}
RCLCPP_INFO(this->get_logger(), "GPU %d is selected for the inference!", gpu_id);

Check warning on line 105 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp

View check run for this annotation

Codecov / codecov/patch

perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp#L105

Added line #L105 was not covered by tests

Check warning on line 106 in perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

❌ Getting worse: Large Method

TrtYoloXNode::TrtYoloXNode increases from 73 to 80 lines of code, threshold = 70. Large functions with many lines of code are generally harder to understand and lower the code health. Avoid adding more lines to this function.
timer_ =
rclcpp::create_timer(this, get_clock(), 100ms, std::bind(&TrtYoloXNode::onConnect, this));

Expand Down
15 changes: 8 additions & 7 deletions perception/autoware_traffic_light_fine_detector/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,14 @@ Based on the camera image and the global ROI array detected by `map_based_detect

### Node Parameters

| Name | Type | Default Value | Description |
| -------------------------- | ------ | --------------------------- | ------------------------------------------------------------------ |
| `data_path` | string | "$(env HOME)/autoware_data" | packages data and artifacts directory path |
| `fine_detector_model_path` | string | "" | The onnx file name for yolo model |
| `fine_detector_label_path` | string | "" | The label file with label names for detected objects written on it |
| `fine_detector_precision` | string | "fp32" | The inference mode: "fp32", "fp16" |
| `approximate_sync` | bool | false | Flag for whether to ues approximate sync policy |
| Name | Type | Default Value | Description |
| -------------------------- | ------- | --------------------------- | ------------------------------------------------------------------ |
| `data_path` | string | "$(env HOME)/autoware_data" | packages data and artifacts directory path |
| `fine_detector_model_path` | string | "" | The onnx file name for yolo model |
| `fine_detector_label_path` | string | "" | The label file with label names for detected objects written on it |
| `fine_detector_precision` | string | "fp32" | The inference mode: "fp32", "fp16" |
| `approximate_sync` | bool | false | Flag for whether to ues approximate sync policy |
| `gpu_id` | integer | 0 | ID for the selecting CUDA GPU device |

## Assumptions / Known limits

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
fine_detector_precision: fp16
fine_detector_score_thresh: 0.3
fine_detector_nms_thresh: 0.65
gpu_id: 0
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023 TIER IV, Inc.

Check notice on line 1 in perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

ℹ Getting worse: Overall Code Complexity

The mean cyclomatic complexity increases from 4.67 to 4.78, threshold = 4. This file has many conditional statements (e.g. if, for, while) across its implementation, leading to lower code health. Avoid adding more conditionals.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -60,6 +60,7 @@
std::string model_path = declare_parameter("fine_detector_model_path", "");
std::string label_path = declare_parameter("fine_detector_label_path", "");
std::string precision = declare_parameter("fine_detector_precision", "fp32");
const uint8_t gpu_id = declare_parameter("gpu_id", 0);

Check warning on line 63 in perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp

View check run for this annotation

Codecov / codecov/patch

perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp#L63

Added line #L63 was not covered by tests
// Objects with a score lower than this value will be ignored.
// This threshold will be ignored if specified model contains EfficientNMS_TRT module in it
score_thresh_ = declare_parameter("fine_detector_score_thresh", 0.3);
Expand All @@ -86,7 +87,14 @@

trt_yolox_ = std::make_unique<autoware::tensorrt_yolox::TrtYoloX>(
model_path, precision, num_class, score_thresh_, nms_threshold, build_config, cuda_preprocess,
calib_image_list, scale, cache_dir, batch_config);
gpu_id, calib_image_list, scale, cache_dir, batch_config);

if (!trt_yolox_->isGPUInitialized()) {
RCLCPP_ERROR(this->get_logger(), "GPU %d does not exist or is not suitable.", gpu_id);
rclcpp::shutdown();

Check warning on line 94 in perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp

View check run for this annotation

Codecov / codecov/patch

perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp#L93-L94

Added lines #L93 - L94 were not covered by tests
return;
}
RCLCPP_INFO(this->get_logger(), "GPU %d is selected for the inference!", gpu_id);

Check warning on line 97 in perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp

View check run for this annotation

Codecov / codecov/patch

perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp#L97

Added line #L97 was not covered by tests

using std::chrono_literals::operator""ms;
timer_ = rclcpp::create_timer(
Expand Down
Loading