diff --git a/Cargo.toml b/Cargo.toml index ff8bbde..ccd0305 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,4 +43,5 @@ imageproc = { version = "0.24" } ab_glyph = "0.2.23" geo = "0.28.0" prost = "0.12.4" -human_bytes = "0.4.3" \ No newline at end of file +human_bytes = "0.4.3" +fast_image_resize = "3.0.4" \ No newline at end of file diff --git a/assets/liuyifei.png b/assets/liuyifei.png new file mode 100644 index 0000000..778b684 Binary files /dev/null and b/assets/liuyifei.png differ diff --git a/assets/portrait.jpg b/assets/portrait.jpg deleted file mode 100644 index 0ccd686..0000000 Binary files a/assets/portrait.jpg and /dev/null differ diff --git a/examples/db/main.rs b/examples/db/main.rs index 7b04f6d..ac9d7ca 100644 --- a/examples/db/main.rs +++ b/examples/db/main.rs @@ -6,18 +6,18 @@ fn main() -> Result<(), Box> { .with_i00((1, 4, 8).into()) .with_i02((608, 960, 1280).into()) .with_i03((608, 960, 1280).into()) + // .with_trt(0) .with_confs(&[0.4]) .with_min_width(5.0) .with_min_height(12.0) - // .with_trt(0) .with_model("ppocr-v4-db-dyn.onnx")?; - let mut model = DB::new(&options)?; + let mut model = DB::new(options)?; // load image let x = vec![ DataLoader::try_read("./assets/db.png")?, - // DataLoader::try_read("./assets/2.jpg")?, + DataLoader::try_read("./assets/2.jpg")?, ]; // run diff --git a/examples/depth-anything/main.rs b/examples/depth-anything/main.rs index 043f483..de12e89 100644 --- a/examples/depth-anything/main.rs +++ b/examples/depth-anything/main.rs @@ -7,7 +7,7 @@ fn main() -> Result<(), Box> { .with_i00((1, 1, 8).into()) .with_i02((384, 512, 1024).into()) .with_i03((384, 512, 1024).into()); - let mut model = DepthAnything::new(&options)?; + let mut model = DepthAnything::new(options)?; // load let x = vec![DataLoader::try_read("./assets/2.jpg")?]; diff --git a/examples/dinov2/main.rs b/examples/dinov2/main.rs index 5aa6682..fd666c6 100644 --- a/examples/dinov2/main.rs +++ b/examples/dinov2/main.rs @@ -7,7 +7,7 @@ fn main() -> Result<(), Box> { .with_i00((1, 1, 1).into()) .with_i02((224, 224, 224).into()) .with_i03((224, 224, 224).into()); - let _model = Dinov2::new(&options)?; + let _model = Dinov2::new(options)?; println!("TODO..."); // query from vector diff --git a/examples/face-parsing/main.rs b/examples/face-parsing/main.rs index 47f4274..a0b9a9f 100644 --- a/examples/face-parsing/main.rs +++ b/examples/face-parsing/main.rs @@ -10,7 +10,7 @@ fn main() -> Result<(), Box> { // .with_trt(0) // .with_fp16(true) .with_confs(&[0.5]); - let mut model = YOLO::new(&options)?; + let mut model = YOLO::new(options)?; // load image let x = vec![DataLoader::try_read("./assets/nini.png")?]; diff --git a/examples/fastsam/main.rs b/examples/fastsam/main.rs index 38af201..a315a56 100644 --- a/examples/fastsam/main.rs +++ b/examples/fastsam/main.rs @@ -8,7 +8,7 @@ fn main() -> Result<(), Box> { .with_i02((416, 640, 800).into()) .with_i03((416, 640, 800).into()) .with_confs(&[0.4]); - let mut model = YOLO::new(&options)?; + let mut model = YOLO::new(options)?; // load image let x = vec![DataLoader::try_read("./assets/bus.jpg")?]; diff --git a/examples/modnet/demo.png b/examples/modnet/demo.png index 9341e2f..84d1479 100644 Binary files a/examples/modnet/demo.png and b/examples/modnet/demo.png differ diff --git a/examples/modnet/main.rs b/examples/modnet/main.rs index b5c3eb9..66c1a42 100644 --- a/examples/modnet/main.rs +++ b/examples/modnet/main.rs @@ -7,10 +7,10 @@ fn main() -> Result<(), Box> { .with_i00((1, 1, 4).into()) .with_i02((416, 512, 800).into()) .with_i03((416, 512, 800).into()); - let mut model = MODNet::new(&options)?; + let mut model = MODNet::new(options)?; // load image - let x = vec![DataLoader::try_read("./assets/portrait.jpg")?]; + let x = vec![DataLoader::try_read("./assets/liuyifei.png")?]; // run let y = model.run(&x)?; diff --git a/examples/rtdetr/main.rs b/examples/rtdetr/main.rs index 4738604..79f203b 100644 --- a/examples/rtdetr/main.rs +++ b/examples/rtdetr/main.rs @@ -4,9 +4,9 @@ fn main() -> Result<(), Box> { // build model let options = Options::default() .with_model("rtdetr-l-f16.onnx")? - .with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15 + .with_confs(&[0.4, 0.15]) .with_names(&coco::NAMES_80); - let mut model = RTDETR::new(&options)?; + let mut model = RTDETR::new(options)?; // load image let x = vec![DataLoader::try_read("./assets/bus.jpg")?]; diff --git a/examples/rtmo/main.rs b/examples/rtmo/main.rs index e43c6ec..a06bef3 100644 --- a/examples/rtmo/main.rs +++ b/examples/rtmo/main.rs @@ -8,7 +8,7 @@ fn main() -> Result<(), Box> { .with_nk(17) .with_confs(&[0.3]) .with_kconfs(&[0.5]); - let mut model = RTMO::new(&options)?; + let mut model = RTMO::new(options)?; // load image let x = vec![DataLoader::try_read("./assets/bus.jpg")?]; diff --git a/examples/svtr/main.rs b/examples/svtr/main.rs index 2eae102..92987e8 100644 --- a/examples/svtr/main.rs +++ b/examples/svtr/main.rs @@ -8,7 +8,7 @@ fn main() -> Result<(), Box> { .with_confs(&[0.2]) .with_vocab("ppocr_rec_vocab.txt")? .with_model("ppocr-v4-svtr-ch-dyn.onnx")?; - let mut model = SVTR::new(&options)?; + let mut model = SVTR::new(options)?; // load images let dl = DataLoader::default() diff --git a/examples/yolo-world/main.rs b/examples/yolo-world/main.rs index f35cb49..324e774 100644 --- a/examples/yolo-world/main.rs +++ b/examples/yolo-world/main.rs @@ -7,9 +7,9 @@ fn main() -> Result<(), Box> { .with_i00((1, 1, 4).into()) .with_i02((416, 640, 800).into()) .with_i03((416, 640, 800).into()) - .with_confs(&[0.3]) // shoes: 0.2 + .with_confs(&[0.3]) .with_profile(false); - let mut model = YOLO::new(&options)?; + let mut model = YOLO::new(options)?; // load image let x = vec![DataLoader::try_read("./assets/bus.jpg")?]; diff --git a/examples/yolop/main.rs b/examples/yolop/main.rs index 260d81f..8e963dd 100644 --- a/examples/yolop/main.rs +++ b/examples/yolop/main.rs @@ -6,7 +6,7 @@ fn main() -> Result<(), Box> { .with_model("yolopv2-dyn-480x800.onnx")? .with_i00((1, 1, 8).into()) .with_confs(&[0.3]); - let mut model = YOLOPv2::new(&options)?; + let mut model = YOLOPv2::new(options)?; // load image let x = vec![DataLoader::try_read("./assets/car.jpg")?]; diff --git a/examples/yolov5/main.rs b/examples/yolov5/main.rs index 4905bf4..60eae5b 100644 --- a/examples/yolov5/main.rs +++ b/examples/yolov5/main.rs @@ -10,13 +10,12 @@ fn main() -> Result<(), Box> { .with_anchors_first(true) .with_yolo_task(YOLOTask::Segment) .with_model("yolov5s-seg.onnx")? - .with_trt(0) - .with_fp16(true) + // .with_trt(0) + // .with_fp16(true) .with_i00((1, 1, 4).into()) .with_i02((224, 640, 800).into()) - .with_i03((224, 640, 800).into()) - .with_dry_run(3); - let mut model = YOLO::new(&options)?; + .with_i03((224, 640, 800).into()); + let mut model = YOLO::new(options)?; // load image let x = vec![DataLoader::try_read("./assets/bus.jpg")?]; diff --git a/examples/yolov8-face/demo.png b/examples/yolov8-face/demo.png index 72287af..c7ff664 100644 Binary files a/examples/yolov8-face/demo.png and b/examples/yolov8-face/demo.png differ diff --git a/examples/yolov8-face/main.rs b/examples/yolov8-face/main.rs index c605869..b095b53 100644 --- a/examples/yolov8-face/main.rs +++ b/examples/yolov8-face/main.rs @@ -8,7 +8,7 @@ fn main() -> Result<(), Box> { .with_i02((416, 640, 800).into()) .with_i03((416, 640, 800).into()) .with_confs(&[0.15]); - let mut model = YOLO::new(&options)?; + let mut model = YOLO::new(options)?; // load image let x = vec![DataLoader::try_read("./assets/kids.jpg")?]; diff --git a/examples/yolov8-falldown/main.rs b/examples/yolov8-falldown/main.rs index 32824e2..14b114e 100644 --- a/examples/yolov8-falldown/main.rs +++ b/examples/yolov8-falldown/main.rs @@ -3,7 +3,7 @@ use usls::{models::YOLO, Annotator, DataLoader, Options}; fn main() -> Result<(), Box> { // build model let options = Options::default().with_model("yolov8-falldown-f16.onnx")?; - let mut model = YOLO::new(&options)?; + let mut model = YOLO::new(options)?; // load image let x = vec![DataLoader::try_read("./assets/falldown.jpg")?]; diff --git a/examples/yolov8-head/main.rs b/examples/yolov8-head/main.rs index 3280a68..f8af6c8 100644 --- a/examples/yolov8-head/main.rs +++ b/examples/yolov8-head/main.rs @@ -3,7 +3,7 @@ use usls::{models::YOLO, Annotator, DataLoader, Options}; fn main() -> Result<(), Box> { // build model let options = Options::default().with_model("yolov8-head-f16.onnx")?; - let mut model = YOLO::new(&options)?; + let mut model = YOLO::new(options)?; // load image let x = vec![DataLoader::try_read("./assets/kids.jpg")?]; diff --git a/examples/yolov8-trash/main.rs b/examples/yolov8-trash/main.rs index 91870db..6c906d2 100644 --- a/examples/yolov8-trash/main.rs +++ b/examples/yolov8-trash/main.rs @@ -5,7 +5,7 @@ fn main() -> Result<(), Box> { let options = Options::default() .with_model("yolov8-plastic-bag-f16.onnx")? .with_names(&["trash"]); - let mut model = YOLO::new(&options)?; + let mut model = YOLO::new(options)?; // load image let x = vec![DataLoader::try_read("./assets/trash.jpg")?]; diff --git a/examples/yolov8/main.rs b/examples/yolov8/main.rs index c031e76..93e7425 100644 --- a/examples/yolov8/main.rs +++ b/examples/yolov8/main.rs @@ -10,18 +10,18 @@ fn main() -> Result<(), Box> { // .with_model("yolov8m-seg-dyn.onnx")? // .with_model("yolov8m-obb-dyn.onnx")? // .with_model("yolov8m-oiv7-dyn.onnx")? - .with_trt(0) + // .with_trt(0) // .with_fp16(true) // .with_coreml(0) // .with_cuda(3) .with_i00((1, 1, 4).into()) .with_i02((224, 640, 800).into()) .with_i03((224, 640, 800).into()) - .with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15 + .with_confs(&[0.4, 0.15]) // class 0: 0.4, others: 0.15 .with_names2(&coco::KEYPOINTS_NAMES_17) - .with_dry_run(10) - .with_profile(false); - let mut model = YOLO::new(&options)?; + // .with_dry_run(10) + .with_profile(true); + let mut model = YOLO::new(options)?; // build dataloader let dl = DataLoader::default() diff --git a/examples/yolov9/main.rs b/examples/yolov9/main.rs index 6f6d8bf..c82842e 100644 --- a/examples/yolov9/main.rs +++ b/examples/yolov9/main.rs @@ -7,8 +7,8 @@ fn main() -> Result<(), Box> { .with_i00((1, 1, 4).into()) .with_i02((416, 640, 800).into()) .with_i03((416, 640, 800).into()) - .with_confs(&[0.4, 0.15]); // person: 0.4, others: 0.15 - let mut model = YOLO::new(&options)?; + .with_confs(&[0.4, 0.15]); + let mut model = YOLO::new(options)?; // load image let x = vec![DataLoader::try_read("./assets/bus.jpg")?]; diff --git a/src/core/annotator.rs b/src/core/annotator.rs index 8f763c3..20621e4 100644 --- a/src/core/annotator.rs +++ b/src/core/annotator.rs @@ -495,7 +495,7 @@ impl Annotator { // keypoint let color = match &self.keypoints_palette { - None => self.get_color(i + 10), + None => self.get_color(i), Some(keypoints_palette) => keypoints_palette[i], }; imageproc::drawing::draw_filled_circle_mut( diff --git a/src/core/ops.rs b/src/core/ops.rs index 91ad906..26f5118 100644 --- a/src/core/ops.rs +++ b/src/core/ops.rs @@ -1,6 +1,7 @@ use anyhow::Result; +use fast_image_resize as fr; use image::{DynamicImage, GenericImageView, ImageBuffer}; -use ndarray::{Array, Axis, IxDyn}; +use ndarray::{s, Array, Axis, IxDyn}; pub fn standardize(xs: Array, mean: &[f32], std: &[f32]) -> Array { let mean = Array::from_shape_vec((1, mean.len(), 1, 1), mean.to_vec()).unwrap(); @@ -26,18 +27,57 @@ pub fn scale_wh(w0: f32, h0: f32, w1: f32, h1: f32) -> (f32, f32, f32) { (r, (w0 * r).round(), (h0 * r).round()) } -pub fn resize(xs: &[DynamicImage], height: u32, width: u32) -> Result> { +pub fn build_resizer(ty: &str) -> fr::Resizer { + let ty = match ty { + "box" => fr::FilterType::Box, + "bilinear" => fr::FilterType::Bilinear, + "hamming" => fr::FilterType::Hamming, + "catmullRom" => fr::FilterType::CatmullRom, + "mitchell" => fr::FilterType::Mitchell, + "lanczos3" => fr::FilterType::Lanczos3, + _ => todo!(), + }; + fr::Resizer::new(fr::ResizeAlg::Convolution(ty)) +} + +pub fn resize( + xs: &[DynamicImage], + height: u32, + width: u32, + filter: &str, +) -> Result> { let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn(); + let mut resizer = build_resizer(filter); for (idx, x) in xs.iter().enumerate() { - let img = x.resize_exact(width, height, image::imageops::FilterType::Triangle); - for (x, y, rgb) in img.pixels() { - let x = x as usize; - let y = y as usize; - let [r, g, b, _] = rgb.0; - ys[[idx, 0, y, x]] = r as f32; - ys[[idx, 1, y, x]] = g as f32; - ys[[idx, 2, y, x]] = b as f32; - } + // src + let src_image = fr::Image::from_vec_u8( + std::num::NonZeroU32::new(x.width()).unwrap(), + std::num::NonZeroU32::new(x.height()).unwrap(), + x.to_rgb8().into_raw(), + fr::PixelType::U8x3, + ) + .unwrap(); + + // dst + let mut dst_image = fr::Image::new( + std::num::NonZeroU32::new(width).unwrap(), + std::num::NonZeroU32::new(height).unwrap(), + src_image.pixel_type(), + ); + + // resize + resizer + .resize(&src_image.view(), &mut dst_image.view_mut()) + .unwrap(); + let buffer = dst_image.into_vec(); + + // to ndarray + let y_ = Array::from_shape_vec((height as usize, width as usize, 3), buffer) + .unwrap() + .mapv(|x| x as f32) + .permuted_axes([2, 0, 1]); + let mut data = ys.slice_mut(s![idx, .., .., ..]); + data.assign(&y_); } Ok(ys) } @@ -46,27 +86,62 @@ pub fn letterbox( xs: &[DynamicImage], height: u32, width: u32, - bg: f32, + filter: &str, + bg: Option, ) -> Result> { - // TODO: refactor let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn(); - ys.fill(bg); + let mut resizer = build_resizer(filter); for (idx, x) in xs.iter().enumerate() { let (w0, h0) = x.dimensions(); let (_, w_new, h_new) = scale_wh(w0 as f32, h0 as f32, width as f32, height as f32); - let img = x.resize_exact( - w_new as u32, - h_new as u32, - image::imageops::FilterType::CatmullRom, - ); - for (x, y, rgb) in img.pixels() { - let x = x as usize; - let y = y as usize; - let [r, g, b, _] = rgb.0; - ys[[idx, 0, y, x]] = r as f32; - ys[[idx, 1, y, x]] = g as f32; - ys[[idx, 2, y, x]] = b as f32; - } + + // src + let src_image = fr::Image::from_vec_u8( + std::num::NonZeroU32::new(w0).unwrap(), + std::num::NonZeroU32::new(h0).unwrap(), + x.to_rgb8().into_raw(), + fr::PixelType::U8x3, + ) + .unwrap(); + + // dst + let mut dst_image = match bg { + Some(bg) => fr::Image::from_vec_u8( + std::num::NonZeroU32::new(width).unwrap(), + std::num::NonZeroU32::new(height).unwrap(), + vec![bg; 3 * height as usize * width as usize], + src_image.pixel_type(), + ) + .unwrap(), + None => fr::Image::new( + std::num::NonZeroU32::new(width).unwrap(), + std::num::NonZeroU32::new(height).unwrap(), + src_image.pixel_type(), + ), + }; + + // mutable view + let mut dst_view = dst_image + .view_mut() + .crop( + 0, + 0, + std::num::NonZeroU32::new(w_new as u32).unwrap(), + std::num::NonZeroU32::new(h_new as u32).unwrap(), + ) + .unwrap(); + + // resize + resizer.resize(&src_image.view(), &mut dst_view).unwrap(); + let buffer = dst_image.into_vec(); + + // to ndarray + let y_ = Array::from_shape_vec((height as usize, width as usize, 3), buffer) + .unwrap() + .mapv(|x| x as f32) + .permuted_axes([2, 0, 1]); + let mut data = ys.slice_mut(s![idx, .., .., ..]); + data.assign(&y_); } Ok(ys) } @@ -75,23 +150,63 @@ pub fn resize_with_fixed_height( xs: &[DynamicImage], height: u32, width: u32, - bg: f32, + filter: &str, + bg: Option, ) -> Result> { let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn(); - ys.fill(bg); + let mut resizer = build_resizer(filter); for (idx, x) in xs.iter().enumerate() { let (w0, h0) = x.dimensions(); let h_new = height; let w_new = height * w0 / h0; - let img = x.resize_exact(w_new, h_new, image::imageops::FilterType::CatmullRom); - for (x, y, rgb) in img.pixels() { - let x = x as usize; - let y = y as usize; - let [r, g, b, _] = rgb.0; - ys[[idx, 0, y, x]] = r as f32; - ys[[idx, 1, y, x]] = g as f32; - ys[[idx, 2, y, x]] = b as f32; - } + + // src + let src_image = fr::Image::from_vec_u8( + std::num::NonZeroU32::new(w0).unwrap(), + std::num::NonZeroU32::new(h0).unwrap(), + x.to_rgb8().into_raw(), + fr::PixelType::U8x3, + ) + .unwrap(); + + // dst + let mut dst_image = match bg { + Some(bg) => fr::Image::from_vec_u8( + std::num::NonZeroU32::new(width).unwrap(), + std::num::NonZeroU32::new(height).unwrap(), + vec![bg; 3 * height as usize * width as usize], + src_image.pixel_type(), + ) + .unwrap(), + None => fr::Image::new( + std::num::NonZeroU32::new(width).unwrap(), + std::num::NonZeroU32::new(height).unwrap(), + src_image.pixel_type(), + ), + }; + + // mutable view + let mut dst_view = dst_image + .view_mut() + .crop( + 0, + 0, + std::num::NonZeroU32::new(w_new).unwrap(), + std::num::NonZeroU32::new(h_new).unwrap(), + ) + .unwrap(); + + // resize + resizer.resize(&src_image.view(), &mut dst_view).unwrap(); + let buffer = dst_image.into_vec(); + + // to ndarray + let y_ = Array::from_shape_vec((height as usize, width as usize, 3), buffer) + .unwrap() + .mapv(|x| x as f32) + .permuted_axes([2, 0, 1]); + let mut data = ys.slice_mut(s![idx, .., .., ..]); + data.assign(&y_); } Ok(ys) } diff --git a/src/core/options.rs b/src/core/options.rs index f6d6168..9c350b6 100644 --- a/src/core/options.rs +++ b/src/core/options.rs @@ -67,7 +67,7 @@ impl Default for Options { onnx_path: String::new(), device: Device::Cuda(0), profile: false, - num_dry_run: 3, + num_dry_run: 5, i00: None, i01: None, i02: None, diff --git a/src/models/blip.rs b/src/models/blip.rs index 0b64d74..0f1758e 100644 --- a/src/models/blip.rs +++ b/src/models/blip.rs @@ -43,17 +43,20 @@ impl Blip { } pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result { - let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32)?; - let xs_ = ops::normalize(xs_, 0.0, 255.0); + let xs_ = ops::resize( + xs, + self.height.opt as u32, + self.width.opt as u32, + "bilinear", + )?; + let xs_ = ops::normalize(xs_, 0., 255.); let xs_ = ops::standardize( xs_, &[0.48145466, 0.4578275, 0.40821073], &[0.26862954, 0.2613026, 0.2757771], ); let ys: Vec> = self.visual.run(&[xs_])?; - // let ys = ys[0].to_owned(); Ok(Embedding::new(ys[0].to_owned())) - // Ok(ys) } pub fn caption( diff --git a/src/models/clip.rs b/src/models/clip.rs index 75eb087..03eb633 100644 --- a/src/models/clip.rs +++ b/src/models/clip.rs @@ -53,8 +53,13 @@ impl Clip { } pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result { - let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32)?; - let xs_ = ops::normalize(xs_, 0.0, 255.0); + let xs_ = ops::resize( + xs, + self.height.opt as u32, + self.width.opt as u32, + "bilinear", + )?; + let xs_ = ops::normalize(xs_, 0., 255.); let xs_ = ops::standardize( xs_, &[0.48145466, 0.4578275, 0.40821073], diff --git a/src/models/db.rs b/src/models/db.rs index f64af62..18c6d2c 100644 --- a/src/models/db.rs +++ b/src/models/db.rs @@ -17,8 +17,8 @@ pub struct DB { } impl DB { - pub fn new(options: &Options) -> Result { - let mut engine = OrtEngine::new(options)?; + pub fn new(options: Options) -> Result { + let mut engine = OrtEngine::new(&options)?; let (batch, height, width) = ( engine.batch().to_owned(), engine.height().to_owned(), @@ -27,8 +27,8 @@ impl DB { let confs = DynConf::new(&options.confs, 1); let unclip_ratio = options.unclip_ratio; let binary_thresh = 0.2; - let min_width = options.min_width.unwrap_or(0.0); - let min_height = options.min_height.unwrap_or(0.0); + let min_width = options.min_width.unwrap_or(0.); + let min_height = options.min_height.unwrap_or(0.); engine.dry_run()?; Ok(Self { @@ -45,8 +45,14 @@ impl DB { } pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { - let xs_ = ops::letterbox(xs, self.height.opt as u32, self.width.opt as u32, 144.0)?; - let xs_ = ops::normalize(xs_, 0.0, 255.0); + let xs_ = ops::letterbox( + xs, + self.height.opt as u32, + self.width.opt as u32, + "bilinear", + Some(114), + )?; + let xs_ = ops::normalize(xs_, 0., 255.); let xs_ = ops::standardize(xs_, &[0.485, 0.456, 0.406], &[0.229, 0.224, 0.225]); let ys = self.engine.run(&[xs_])?; self.postprocess(ys, xs) diff --git a/src/models/depth_anything.rs b/src/models/depth_anything.rs index 942b81d..c5fd8d5 100644 --- a/src/models/depth_anything.rs +++ b/src/models/depth_anything.rs @@ -12,8 +12,8 @@ pub struct DepthAnything { } impl DepthAnything { - pub fn new(options: &Options) -> Result { - let mut engine = OrtEngine::new(options)?; + pub fn new(options: Options) -> Result { + let mut engine = OrtEngine::new(&options)?; let (batch, height, width) = ( engine.batch().to_owned(), engine.height().to_owned(), @@ -30,7 +30,12 @@ impl DepthAnything { } pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { - let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32)?; + let xs_ = ops::resize( + xs, + self.height.opt as u32, + self.width.opt as u32, + "lanczos3", + )?; let xs_ = ops::normalize(xs_, 0.0, 255.0); let xs_ = ops::standardize(xs_, &[0.485, 0.456, 0.406], &[0.229, 0.224, 0.225]); let ys = self.engine.run(&[xs_])?; diff --git a/src/models/dinov2.rs b/src/models/dinov2.rs index 05349f3..f6f7c18 100644 --- a/src/models/dinov2.rs +++ b/src/models/dinov2.rs @@ -21,14 +21,14 @@ pub struct Dinov2 { } impl Dinov2 { - pub fn new(options: &Options) -> Result { - let mut engine = OrtEngine::new(options)?; + pub fn new(options: Options) -> Result { + let mut engine = OrtEngine::new(&options)?; let (batch, height, width) = ( engine.inputs_minoptmax()[0][0].to_owned(), engine.inputs_minoptmax()[0][2].to_owned(), engine.inputs_minoptmax()[0][3].to_owned(), ); - let which = match &options.onnx_path { + let which = match options.onnx_path { s if s.contains("b14") => Model::B, s if s.contains("s14") => Model::S, _ => todo!(), @@ -49,7 +49,12 @@ impl Dinov2 { } pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { - let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32)?; + let xs_ = ops::resize( + xs, + self.height.opt as u32, + self.width.opt as u32, + "lanczos3", + )?; let xs_ = ops::normalize(xs_, 0.0, 255.0); let xs_ = ops::standardize( xs_, diff --git a/src/models/modnet.rs b/src/models/modnet.rs index f16cafd..2d586b6 100644 --- a/src/models/modnet.rs +++ b/src/models/modnet.rs @@ -13,8 +13,8 @@ pub struct MODNet { } impl MODNet { - pub fn new(options: &Options) -> Result { - let mut engine = OrtEngine::new(options)?; + pub fn new(options: Options) -> Result { + let mut engine = OrtEngine::new(&options)?; let (batch, height, width) = ( engine.batch().to_owned(), engine.height().to_owned(), @@ -31,8 +31,13 @@ impl MODNet { } pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { - let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32)?; - let xs_ = ops::normalize(xs_, 127.5, 255.0); + let xs_ = ops::resize( + xs, + self.height.opt as u32, + self.width.opt as u32, + "lanczos3", + )?; + let xs_ = ops::normalize(xs_, 127.5, 255.); let ys = self.engine.run(&[xs_])?; self.postprocess(ys, xs) } diff --git a/src/models/rtdetr.rs b/src/models/rtdetr.rs index c8bd024..855598f 100644 --- a/src/models/rtdetr.rs +++ b/src/models/rtdetr.rs @@ -17,14 +17,14 @@ pub struct RTDETR { } impl RTDETR { - pub fn new(options: &Options) -> Result { - let mut engine = OrtEngine::new(options)?; + pub fn new(options: Options) -> Result { + let mut engine = OrtEngine::new(&options)?; let (batch, height, width) = ( engine.inputs_minoptmax()[0][0].to_owned(), engine.inputs_minoptmax()[0][2].to_owned(), engine.inputs_minoptmax()[0][3].to_owned(), ); - let names: Option<_> = match &options.names { + let names: Option<_> = match options.names { None => engine.try_fetch("names").map(|names| { let re = Regex::new(r#"(['"])([-()\w '"]+)(['"])"#).unwrap(); let mut names_ = vec![]; @@ -56,7 +56,13 @@ impl RTDETR { } pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { - let xs_ = ops::letterbox(xs, self.height() as u32, self.width() as u32, 144.0)?; + let xs_ = ops::letterbox( + xs, + self.height() as u32, + self.width() as u32, + "catmullRom", + Some(114), + )?; let xs_ = ops::normalize(xs_, 0.0, 255.0); let ys = self.engine.run(&[xs_])?; self.postprocess(ys, xs) diff --git a/src/models/rtmo.rs b/src/models/rtmo.rs index bb99747..7b14ca0 100644 --- a/src/models/rtmo.rs +++ b/src/models/rtmo.rs @@ -15,8 +15,8 @@ pub struct RTMO { } impl RTMO { - pub fn new(options: &Options) -> Result { - let mut engine = OrtEngine::new(options)?; + pub fn new(options: Options) -> Result { + let mut engine = OrtEngine::new(&options)?; let (batch, height, width) = ( engine.batch().to_owned(), engine.height().to_owned(), @@ -39,7 +39,13 @@ impl RTMO { } pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { - let xs_ = ops::letterbox(xs, self.height() as u32, self.width() as u32, 114.0)?; + let xs_ = ops::letterbox( + xs, + self.height() as u32, + self.width() as u32, + "catmullRom", + Some(114), + )?; let ys = self.engine.run(&[xs_])?; self.postprocess(ys, xs) } diff --git a/src/models/svtr.rs b/src/models/svtr.rs index 36acbbc..cb3990c 100644 --- a/src/models/svtr.rs +++ b/src/models/svtr.rs @@ -15,8 +15,8 @@ pub struct SVTR { } impl SVTR { - pub fn new(options: &Options) -> Result { - let mut engine = OrtEngine::new(options)?; + pub fn new(options: Options) -> Result { + let mut engine = OrtEngine::new(&options)?; let (batch, height, width) = ( engine.batch().to_owned(), engine.height().to_owned(), @@ -24,7 +24,7 @@ impl SVTR { ); let confs = DynConf::new(&options.confs, 1); let mut vocab: Vec<_> = - std::fs::read_to_string(options.vocab.as_ref().expect("No vocabulary found"))? + std::fs::read_to_string(options.vocab.expect("No vocabulary found"))? .lines() .map(|line| line.to_string()) .collect(); @@ -43,8 +43,13 @@ impl SVTR { } pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { - let xs_ = - ops::resize_with_fixed_height(xs, self.height.opt as u32, self.width.opt as u32, 0.0)?; + let xs_ = ops::resize_with_fixed_height( + xs, + self.height.opt as u32, + self.width.opt as u32, + "bilinear", + Some(0), + )?; let xs_ = ops::normalize(xs_, 0.0, 255.0); let ys: Vec> = self.engine.run(&[xs_])?; let ys = ys[0].to_owned(); diff --git a/src/models/yolo.rs b/src/models/yolo.rs index 22396ff..ab4d2ec 100644 --- a/src/models/yolo.rs +++ b/src/models/yolo.rs @@ -40,16 +40,16 @@ pub struct YOLO { } impl YOLO { - pub fn new(options: &Options) -> Result { - let mut engine = OrtEngine::new(options)?; + pub fn new(options: Options) -> Result { + let mut engine = OrtEngine::new(&options)?; let (batch, height, width) = ( engine.batch().to_owned(), engine.height().to_owned(), engine.width().to_owned(), ); - let task = match &options.yolo_task { - Some(task) => task.to_owned(), + let task = match options.yolo_task { + Some(task) => task, None => match engine .try_fetch("task") .unwrap_or("detect".to_string()) @@ -60,12 +60,12 @@ impl YOLO { "pose" => YOLOTask::Pose, "segment" => YOLOTask::Segment, "obb" => YOLOTask::Obb, - x => todo!("{:?} is not supported for now!", x), + x => todo!("Not supported: {x:?} "), }, }; // try from custom class names, and then model metadata - let mut names = options.names.to_owned().or(Self::fetch_names(&engine)); + let mut names = options.names.or(Self::fetch_names(&engine)); let nc = match options.nc { Some(nc) => { match &names { @@ -88,7 +88,7 @@ impl YOLO { }, }; - let names_kpt = options.names2.to_owned().or(None); + let names_kpt = options.names2.or(None); // try from model metadata let nk = engine @@ -131,10 +131,18 @@ impl YOLO { pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { let xs_ = match self.task { - YOLOTask::Classify => ops::resize(xs, self.height() as u32, self.width() as u32)?, - _ => ops::letterbox(xs, self.height() as u32, self.width() as u32, 114.0)?, + YOLOTask::Classify => { + ops::resize(xs, self.height() as u32, self.width() as u32, "bilinear")? + } + _ => ops::letterbox( + xs, + self.height() as u32, + self.width() as u32, + "catmullRom", + Some(114), + )?, }; - let xs_ = ops::normalize(xs_, 0.0, 255.0); + let xs_ = ops::normalize(xs_, 0., 255.); let ys = self.engine.run(&[xs_])?; self.postprocess(ys, xs) } diff --git a/src/models/yolop.rs b/src/models/yolop.rs index c6e8dd8..51b1bf9 100644 --- a/src/models/yolop.rs +++ b/src/models/yolop.rs @@ -15,8 +15,8 @@ pub struct YOLOPv2 { } impl YOLOPv2 { - pub fn new(options: &Options) -> Result { - let mut engine = OrtEngine::new(options)?; + pub fn new(options: Options) -> Result { + let mut engine = OrtEngine::new(&options)?; let (batch, height, width) = ( engine.batch().to_owned(), engine.height().to_owned(), @@ -37,8 +37,14 @@ impl YOLOPv2 { } pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { - let xs_ = ops::letterbox(xs, self.height() as u32, self.width() as u32, 114.0)?; - let xs_ = ops::normalize(xs_, 0.0, 255.0); + let xs_ = ops::letterbox( + xs, + self.height() as u32, + self.width() as u32, + "bilinear", + Some(114), + )?; + let xs_ = ops::normalize(xs_, 0., 255.); let ys = self.engine.run(&[xs_])?; self.postprocess(ys, xs) } diff --git a/src/ys/bbox.rs b/src/ys/bbox.rs index efc3d2d..df5fa7c 100644 --- a/src/ys/bbox.rs +++ b/src/ys/bbox.rs @@ -31,7 +31,7 @@ impl std::fmt::Debug for Bbox { f.debug_struct("Bbox") .field("xyxy", &[self.x, self.y, self.xmax(), self.ymax()]) .field("id", &self.id) - .field("id_born", &self.id_born) + // .field("id_born", &self.id_born) .field("name", &self.name) .field("confidence", &self.confidence) .finish()