open-mmlab · CescMessi · Oct 28, 2023 · Oct 29, 2023 · Oct 31, 2023 · Nov 1, 2023
diff --git a/mmengine/structures/pixel_data.py b/mmengine/structures/pixel_data.py
@@ -1,9 +1,10 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import warnings
-from typing import List, Sequence, Union
+from typing import Any, List, Sequence, Union
 
 import numpy as np
 import torch
+from torch.nn.functional import interpolate, pad
 
 from .base_data_element import BaseDataElement
 
@@ -127,4 +128,86 @@ def shape(self):
         else:
             return None
 
-    # TODO padding, resize
+    def resize(self,
+               size: Sequence[int],
+               interpolation: str = 'bilinear') -> 'PixelData':
+        """Resize all values to the given `size`, and return a new `PixelData`.
+
+        Args:
+            size (Sequence[int]): Output spatial size,
+              should be (height, width)
+            interpolation (str, optional): The algorithm used in interpolation.
+              available for resizing are: `nearest`, `bilinear`, `bicubic`,
+              `area`, `nearest-exact`. Defaults to 'bilinear'.
+
+        Returns:
+            PixelData: A resized new `PixelData`
+        """
+        assert len(size) == 2, 'Size should be (height, width)'
+        new_h, new_w = size
+        old_h, old_w = self.shape
+        if new_h == old_h and new_w == old_w:
+            return self.clone()
+        new_data = self.__class__(metainfo=self.metainfo)
+        for k, v in self.items():
+            if isinstance(v, np.ndarray):
+                data = torch.from_numpy(v)
+            else:
+                data = v
+            # torch.nn.functional need a batch dim,
+            # and do not support some dtype
+            data = data.unsqueeze(0).to(torch.float32)
+            resized_data = interpolate(
+                data, size=size, mode=interpolation).squeeze()
+            if isinstance(v, np.ndarray):
+                resized_data = resized_data.numpy().astype(v.dtype)
+            else:
+                resized_data = resized_data.to(v.dtype)
+            setattr(new_data, k, resized_data)
+        return new_data
+
+    def padding(self,
+                pad_size: Sequence[int],
+                mode: str = 'constant',
+                value: Any = 0) -> 'PixelData':
+        """Pad all values with the given `pad_size`, and return a new
+        `PixelData`.
+
+        Args:
+            pad_size (Sequence[int]): The size need to pad.
+              See detail in `torch.nn.functional.pad`.
+              length is 2:
+              (padding_left, padding_right),
+              length is 4:
+              (padding_left, padding_right, padding_top, padding_bottom)
+              length is 6:
+              (padding_left, padding_right,
+                padding_top, padding_bottom, padding_front, padding_back)
+            mode (str, optional): Padding mode.
+              'constant', 'reflect', 'replicate' or 'circular'.
+              Defaults to 'constant'.
+            value (Any, optional): Fill value. Defaults to 0.
+
+        Returns:
+            PixelData: A Padded new `PixelData`
+        """
+        assert len(pad_size) in (2, 4,
+                                 6), 'Pad size length should be 2, 4 or 6'
+        if sum(pad_size) == 0:
+            return self.clone()
+        new_data = self.__class__(metainfo=self.metainfo)
+        for k, v in self.items():
+            if isinstance(v, np.ndarray):
+                data = torch.from_numpy(v)
+            else:
+                data = v
+
+            # some pad mode do not support some dtype
+            data = data.to(torch.float32)
+            pad_data = pad(data, pad=pad_size, mode=mode, value=value)
+            if isinstance(v, np.ndarray):
+                pad_data = pad_data.numpy().astype(v.dtype)
+            else:
+                pad_data = pad_data.to(v.dtype)
+            setattr(new_data, k, pad_data)
+        return new_data
diff --git a/tests/test_structures/test_pixel_data.py b/tests/test_structures/test_pixel_data.py
@@ -81,3 +81,86 @@ def test_shape(self):
         assert pixel_data.shape == (20, 40)
         pixel_data = PixelData()
         assert pixel_data.shape is None
+
+    def test_resize(self):
+        pixel_data = self.setup_data()
+        resized_pixel_data = pixel_data.resize((40, 20),
+                                               interpolation='bilinear')
+        assert resized_pixel_data.shape == (40, 20)
+        resized_pixel_data = pixel_data.resize((40, 20),
+                                               interpolation='nearest')
+        assert resized_pixel_data.shape == (40, 20)
+        resized_pixel_data = pixel_data.resize((40, 20),
+                                               interpolation='bicubic')
+        assert resized_pixel_data.shape == (40, 20)
+        resized_pixel_data = pixel_data.resize((40, 20), interpolation='area')
+        assert resized_pixel_data.shape == (40, 20)
+        resized_pixel_data = pixel_data.resize((40, 20),
+                                               interpolation='nearest-exact')
+        assert resized_pixel_data.shape == (40, 20)
+
+        # only support 5 interpolation mode above
+        with self.assertRaises(NotImplementedError):
+            resized_pixel_data = pixel_data.resize((40, 20),
+                                                   interpolation='linear')
+        with self.assertRaises(NotImplementedError):
+            resized_pixel_data = pixel_data.resize((40, 20),
+                                                   interpolation='trilinear')
+        with self.assertRaises(NotImplementedError):
+            resized_pixel_data = pixel_data.resize((40, 20),
+                                                   interpolation='otherstr')
+
+        # size should be (height, width)
+        with self.assertRaises(TypeError):
+            resized_pixel_data = pixel_data.resize(20)
+        with self.assertRaises(AssertionError):
+            resized_pixel_data = pixel_data.resize((1, 20, 20))
+
+    def test_padding(self):
+        pixel_data = self.setup_data()
+
+        # left=5, right=10
+        padded_pixel_data = pixel_data.padding((5, 10))
+        assert padded_pixel_data.shape == (20, 55)
+
+        # left=5, right=10, top=15, bottom=20
+        padded_pixel_data = pixel_data.padding((5, 10, 15, 20))
+        assert padded_pixel_data.shape == (55, 55)
+
+        # left=5, right=10, top=15, bottom=20, front=2, back=3
+        padded_pixel_data = pixel_data.padding((5, 10, 15, 20, 2, 3))
+        assert padded_pixel_data.shape == (55, 55)
+        assert padded_pixel_data.image.shape == (9, 55, 55)
+        assert padded_pixel_data.featmap.shape == (15, 55, 55)
+
+        with self.assertRaises(TypeError):
+            padded_pixel_data = pixel_data.padding(5)
+
+        # different mode
+        # reflect support width, height
+        padded_pixel_data = pixel_data.padding((5, 10), mode='reflect')
+        assert padded_pixel_data.shape == (20, 55)
+        padded_pixel_data = pixel_data.padding((5, 10, 2, 4), mode='reflect')
+        assert padded_pixel_data.shape == (26, 55)
+        with self.assertRaises(RuntimeError):
+            padded_pixel_data = pixel_data.padding((5, 10, 2, 4, 6, 8),
+                                                   mode='reflect')
+
+        # replicate support width, height
+        padded_pixel_data = pixel_data.padding((5, 10), mode='replicate')
+        assert padded_pixel_data.shape == (20, 55)
+        padded_pixel_data = pixel_data.padding((5, 10, 2, 4), mode='replicate')
+        assert padded_pixel_data.shape == (26, 55)
+        with self.assertRaises(RuntimeError):
+            padded_pixel_data = pixel_data.padding((5, 10, 2, 4, 6, 8),
+                                                   mode='replicate')
+
+        # circular support width
+        padded_pixel_data = pixel_data.padding((5, 10), mode='circular')
+        assert padded_pixel_data.shape == (20, 55)
+        with self.assertRaises(RuntimeError):
+            padded_pixel_data = pixel_data.padding((5, 10, 2, 4),
+                                                   mode='circular')
+        with self.assertRaises(RuntimeError):
+            padded_pixel_data = pixel_data.padding((5, 10, 2, 4, 6, 8),
+                                                   mode='circular')