Skip to content

Commit

Permalink
Refactor and clean up the RAID code and code using the RAID code (#40)
Browse files Browse the repository at this point in the history
  • Loading branch information
pyrco committed Aug 1, 2024
1 parent de2aefd commit 18f600f
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 79 deletions.
40 changes: 24 additions & 16 deletions dissect/volume/ddf/ddf.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,51 @@
from __future__ import annotations

import io
from typing import BinaryIO, Union
from typing import TYPE_CHECKING, BinaryIO

from dissect.util import ts

from dissect.volume.ddf.c_ddf import DEFAULT_SECTOR_SIZE, c_ddf
from dissect.volume.exceptions import DDFError
from dissect.volume.raid.raid import RAID, Configuration, PhysicalDisk, VirtualDisk
from dissect.volume.raid.raid import (
RAID,
Configuration,
DiskMap,
PhysicalDisk,
VirtualDisk,
)
from dissect.volume.raid.stream import Layout, Level

if TYPE_CHECKING:
DDFPhysicalDiskDescriptor = BinaryIO | "DDFPhysicalDisk"

DECADE = 3600 * 24 * (365 * 10 + 2)


class DDF(RAID):
def __init__(self, fh: list[Union[BinaryIO, DDFPhysicalDisk]], sector_size: int = DEFAULT_SECTOR_SIZE):
def __init__(
self,
fh: list[DDFPhysicalDiskDescriptor] | DDFPhysicalDiskDescriptor,
sector_size: int = DEFAULT_SECTOR_SIZE,
):
fhs = [fh] if not isinstance(fh, list) else fh
self.disks = [DDFPhysicalDisk(f, sector_size) if not isinstance(f, DDFPhysicalDisk) else f for f in fhs]
self.sector_size = sector_size
physical_disks = [DDFPhysicalDisk(f, sector_size) if not isinstance(f, DDFPhysicalDisk) else f for f in fhs]

config_map = {}
for pd in self.disks:
for pd in physical_disks:
config_map.setdefault(pd.anchor.DDF_Header_GUID, []).append(pd)

super().__init__([DDFConfiguration(disks) for disks in config_map.values()])


class DDFConfiguration(Configuration):
def __init__(self, fh: list[Union[BinaryIO, PhysicalDisk]], sector_size: int = DEFAULT_SECTOR_SIZE):
fhs = [fh] if not isinstance(fh, list) else fh
self.disks = [DDFPhysicalDisk(f, sector_size) if not isinstance(f, DDFPhysicalDisk) else f for f in fhs]
self.sector_size = sector_size

def __init__(self, physical_disks: list[DDFPhysicalDisk]):
pd_map: dict[int, DDFPhysicalDisk] = {}
vde_map: dict[bytes, VirtualDiskRecord] = {}
vdcr_map: dict[bytes, VirtualDiskConfigurationRecord] = {}
vdcr_uniq: dict[tuple[bytes, int], VirtualDiskConfigurationRecord] = {}

for pd in self.disks:
for pd in physical_disks:
pd_map[pd.reference] = pd
vde_map.update({vde.guid: vde for vde in pd.virtual_disk_records})
vdcr_map.update({vdcr.guid: vdcr for vdcr in pd.virtual_disk_configuration_records})
Expand All @@ -61,22 +69,22 @@ def __init__(self, fh: list[Union[BinaryIO, PhysicalDisk]], sector_size: int = D
i += 1

virtual_disks = [DDFVirtualDisk(vdcr_map[guid], vde_map[guid], vd_map[guid]) for guid in vd_map.keys()]
super().__init__(self.disks, virtual_disks)
super().__init__(physical_disks, virtual_disks)


class DDFVirtualDisk(VirtualDisk):
def __init__(
self,
vdcr: VirtualDiskConfigurationRecord,
vdr: VirtualDiskRecord,
disks: dict[int, tuple[int, DDFPhysicalDisk]],
disks: DiskMap,
):
self.vdcr = vdcr
self.vdr = vdr
self.disks = disks

if (block_size := self.vdcr.block_size) == 0xFFFF:
block_size = list(self.disks.values())[0][1].block_size
block_size = list(disks.values())[0][1].block_size

level, layout, num_disks = _convert_raid_layout(
vdcr.primary_raid_level,
Expand Down Expand Up @@ -218,7 +226,7 @@ def __init__(self, fh: BinaryIO):
self.state = self.header.VD_State
self.init_state = self.header.Init_State
name = self.header.VD_Name.split(b"\x00")[0]
self.name = name.decode("utf-8") if self.type & 0x02 else name.decode()
self.name = name.decode(encoding="utf-8") if self.type & 0x02 else name.decode(encoding="ascii")

def __repr__(self) -> str:
return f"<VirtualDiskRecord guid={self.guid} number={self.number} type={self.type:#x} name={self.name!r}>"
Expand Down
4 changes: 2 additions & 2 deletions dissect/volume/md/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dissect.volume.md.md import MD, Device
from dissect.volume.md.md import MD, MDPhysicalDisk

__all__ = [
"MD",
"Device",
"MDPhysicalDisk",
]
74 changes: 38 additions & 36 deletions dissect/volume/md/md.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import io
import operator
import struct
from typing import BinaryIO, Union
from typing import TYPE_CHECKING, BinaryIO
from uuid import UUID

from dissect.util import ts
Expand All @@ -13,7 +13,8 @@
from dissect.volume.raid.raid import RAID, Configuration, PhysicalDisk, VirtualDisk
from dissect.volume.raid.stream import Level

DeviceDescriptor = Union[BinaryIO, "Device"]
if TYPE_CHECKING:
MDPhysicalDiskDescriptor = BinaryIO | "MDPhysicalDisk"


class MD(RAID):
Expand All @@ -22,62 +23,63 @@ class MD(RAID):
Use this class to read from a RAID set.
Args:
fh: A single file-like object or :class:`Device`, or a list of multiple belonging to the same RAID set.
fh: A single file-like object or :class:`MDPhysicalDisk`, or a list of multiple belonging to the same RAID set.
"""

def __init__(self, fh: Union[list[DeviceDescriptor], DeviceDescriptor]):
def __init__(self, fh: list[MDPhysicalDiskDescriptor] | MDPhysicalDiskDescriptor):
fhs = [fh] if not isinstance(fh, list) else fh
self.devices = [Device(fh) if not isinstance(fh, Device) else fh for fh in fhs]
physical_disks = [MDPhysicalDisk(fh) if not isinstance(fh, MDPhysicalDisk) else fh for fh in fhs]

config_map = {}
for dev in self.devices:
config_map.setdefault(dev.set_uuid, []).append(dev)
for disk in physical_disks:
config_map.setdefault(disk.set_uuid, []).append(disk)

super().__init__([MDConfiguration(devices) for devices in config_map.values()])
super().__init__([MDConfiguration(disks) for disks in config_map.values()])


class MDConfiguration(Configuration):
def __init__(self, devices: list[DeviceDescriptor]):
devices = [Device(fh) if not isinstance(fh, Device) else fh for fh in devices]
def __init__(self, physical_disks: list[MDPhysicalDisk]):
physical_disks = sorted(physical_disks, key=operator.attrgetter("raid_disk"))

self.devices = sorted(devices, key=operator.attrgetter("raid_disk"))
if len({dev.set_uuid for dev in self.devices}) != 1:
raise ValueError("Multiple MD sets detected, supply only the devices of a single set")
if len({disk.set_uuid for disk in physical_disks}) != 1:
raise ValueError("Multiple MD sets detected, supply only the disks of a single set")

virtual_disk = MDDisk(self)
super().__init__(self.devices, [virtual_disk])
virtual_disks = [MDVirtualDisk(physical_disks)]
super().__init__(physical_disks, virtual_disks)


class MDDisk(VirtualDisk):
def __init__(self, configuration: MDConfiguration):
self.configuration = configuration
reference_dev = sorted(configuration.devices, key=operator.attrgetter("events"), reverse=True)[0]
disks = {dev.raid_disk: (0, dev) for dev in self.configuration.devices if dev.raid_disk is not None}
class MDVirtualDisk(VirtualDisk):
def __init__(self, physical_disks: list[MDPhysicalDisk]):
reference_disk = sorted(physical_disks, key=operator.attrgetter("events"), reverse=True)[0]
disk_map = {disk.raid_disk: (0, disk) for disk in physical_disks if disk.raid_disk is not None}

if reference_dev.level == Level.LINEAR:
size = sum(disk.size for _, disk in disks.values())
elif reference_dev.level == Level.RAID0:
if reference_disk.level == Level.LINEAR:
size = sum(disk.size for _, disk in disk_map.values())
elif reference_disk.level == Level.RAID0:
size = 0
for _, disk in disks.values():
size += disk.size & ~(reference_dev.chunk_size - 1)
elif reference_dev.level in (Level.RAID1, Level.RAID4, Level.RAID5, Level.RAID6, Level.RAID10):
size = reference_dev.sb.size * SECTOR_SIZE
for _, disk in disk_map.values():
size += disk.size & ~(reference_disk.chunk_size - 1)
elif reference_disk.level in (Level.RAID1, Level.RAID4, Level.RAID5, Level.RAID6, Level.RAID10):
size = reference_disk.sb.size * SECTOR_SIZE
else:
raise ValueError("Invalid MD RAID configuration: No valid RAID level found for the reference disk")
raise ValueError(
"Invalid MD RAID configuration: No valid RAID level found for the reference disk, found: %d",
reference_disk.level,
)

super().__init__(
reference_dev.set_name,
reference_dev.set_uuid,
reference_disk.set_name,
reference_disk.set_uuid,
size,
reference_dev.level,
reference_dev.layout,
reference_dev.chunk_size,
reference_dev.raid_disks,
disks,
reference_disk.level,
reference_disk.layout,
reference_disk.chunk_size,
reference_disk.raid_disks,
disk_map,
)


class Device(PhysicalDisk):
class MDPhysicalDisk(PhysicalDisk):
"""Parse metadata from an MD device.
Supports 0.90 and 1.x metadata.
Expand Down
9 changes: 6 additions & 3 deletions dissect/volume/raid/raid.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
RAID456Stream,
)

DiskMap = dict[int, tuple[int, "PhysicalDisk"]]
"""A dict of the structure: {disk_idx: {data_offset: PhysicalDisk}}"""


class RAID:
def __init__(self, configurations: list[Configuration]):
Expand Down Expand Up @@ -45,7 +48,7 @@ def __init__(
layout: int,
stripe_size: int,
num_disks: int,
physical_disks: dict[int, tuple[int, PhysicalDisk]],
disk_map: DiskMap,
):
self.name = name
self.uuid = uuid
Expand All @@ -54,7 +57,7 @@ def __init__(
self.layout = layout
self.stripe_size = stripe_size
self.num_disks = num_disks
self.physical_disks = physical_disks
self.disk_map = disk_map

def open(self) -> BinaryIO:
"""Return a file-like object of the RAID volume in this set."""
Expand All @@ -64,7 +67,7 @@ def open(self) -> BinaryIO:
return RAID0Stream(self)
elif self.level == Level.RAID1:
# Don't really care which mirror to read from, so just open the first disk
return self.physical_disks[0][1].open()
return self.disk_map[0][1].open()
elif self.level in (Level.RAID4, Level.RAID5, Level.RAID6):
return RAID456Stream(self)
elif self.level == Level.RAID10:
Expand Down
42 changes: 20 additions & 22 deletions dissect/volume/raid/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from dissect.volume.exceptions import RAIDError

if TYPE_CHECKING:
from dissect.volume.raid.raid import PhysicalDisk, VirtualDisk
from dissect.volume.raid.raid import DiskMap, PhysicalDisk, VirtualDisk


class Level(IntEnum):
Expand Down Expand Up @@ -54,49 +54,47 @@ class LinearStream(MappingStream):
def __init__(self, virtual_disk: VirtualDisk):
super().__init__()
self.virtual_disk = virtual_disk
self.disk_map: DiskMap = dict(sorted(virtual_disk.disk_map.items()))

physical_disks: dict[int, tuple[int, PhysicalDisk]] = dict(sorted(virtual_disk.physical_disks.items()))
if len(physical_disks) != virtual_disk.num_disks:
if len(self.disk_map) != virtual_disk.num_disks:
raise RAIDError(f"Missing disks in linear RAID set {virtual_disk.uuid} ({virtual_disk.name})")

offset = 0
for disk_offset, disk in physical_disks.values():
for disk_offset, disk in self.disk_map.values():
self.add(offset, disk.size, disk.open(), disk_offset)
offset += disk.size


class Zone(NamedTuple):
zone_end: int
dev_start: int
devices: list[tuple[int, PhysicalDisk]]
devices: DiskMap


class RAID0Stream(AlignedStream):
"""Implements a stream on a RAID0 set."""

def __init__(self, virtual_disk: VirtualDisk):
self.virtual_disk = virtual_disk
self.disk_map = dict(sorted(self.virtual_disk.disk_map.items()))

disks = self.virtual_disk.physical_disks
if len(disks) != virtual_disk.num_disks:
if len(self.disk_map) != virtual_disk.num_disks:
raise RAIDError(f"Missing disks in RAID0 set {virtual_disk.uuid} ({virtual_disk.name})")

# Determine how many strip zones we need to construct
# If a RAID0 set consists of devices with different sizes, additional strip zones
# may exist on the larger devices but not on the smaller ones
# Reference: create_strip_zones
disks: dict[int, tuple[int, PhysicalDisk]] = dict(sorted(disks.items()))
rounded_sizes = {}

rounded_sizes: dict[PhysicalDisk, int] = {}
stripe_size = virtual_disk.stripe_size
num_strip_zones = 0
for idx1, (_, dev1) in disks.items():
for idx1, (_, dev1) in self.disk_map.items():
rounded_sizes[dev1] = (dev1.size // stripe_size) * stripe_size

has_same_size = False
# Check if dev1 is unequal in size to the sizes of any of the previous devices
# If so, this means an extra strip zone is present
for idx2, (_, dev2) in disks.items():
for idx2, (_, dev2) in self.disk_map.items():
if idx1 == idx2:
break

Expand All @@ -109,25 +107,25 @@ def __init__(self, virtual_disk: VirtualDisk):

# Determine the smallest device
smallest = None
for _, dev in disks.values():
for _, dev in self.disk_map.values():
if not smallest or rounded_sizes[dev] < rounded_sizes[smallest]:
smallest = dev

# Construct the strip zones
zones = [Zone(rounded_sizes[smallest] * len(disks), 0, disks)]
zones = [Zone(rounded_sizes[smallest] * len(self.disk_map), 0, self.disk_map)]

cur_zone_end = zones[0].zone_end
for _ in range(1, num_strip_zones):
zone_devices = []
zone_devices = {}
dev_start = rounded_sizes[smallest]
smallest = None

# Look for the next smallest device, that is: the smallest device that is larger than the "dev_start" device
for _, dev in disks.values():
for disk_idx, (data_offset, dev) in self.disk_map.items():
if rounded_sizes[dev] <= dev_start:
continue

zone_devices.append(dev)
zone_devices[disk_idx] = (data_offset, dev)
if not smallest or rounded_sizes[dev] < rounded_sizes[smallest]:
smallest = dev

Expand Down Expand Up @@ -192,8 +190,8 @@ def __init__(self, virtual_disk: VirtualDisk):
self.algorithm = self.virtual_disk.layout
self.max_degraded = 2 if self.level == 6 else 1

self.disks = self.virtual_disk.physical_disks
if len(self.disks) < self.virtual_disk.num_disks - self.max_degraded:
self.disk_map = self.virtual_disk.disk_map
if len(self.disk_map) < self.virtual_disk.num_disks - self.max_degraded:
raise RAIDError(f"Missing disks in RAID{self.level} set {virtual_disk.uuid} ({virtual_disk.name})")

super().__init__(self.virtual_disk.size, self.virtual_disk.stripe_size)
Expand Down Expand Up @@ -362,7 +360,7 @@ def _read(self, offset: int, length: int) -> bytes:
while length:
stripe, offset_in_stripe, dd_idx, pd_idx, qd_idx, ddf_layout = self._get_stripe_read_info(offset)
offset_in_device = stripe * stripe_size + offset_in_stripe
dd_start, dd_dev = self.disks[dd_idx]
dd_start, dd_dev = self.disk_map[dd_idx]

stripe_remaining = stripe_size - offset_in_stripe
read_length = min(length, stripe_remaining)
Expand All @@ -383,7 +381,7 @@ class RAID10Stream(AlignedStream):
def __init__(self, virtual_disk: VirtualDisk):
self.virtual_disk = virtual_disk
self.raid_disks = self.virtual_disk.num_disks
self.devices = virtual_disk.physical_disks
self.disk_map = virtual_disk.disk_map

# Reference: setup_geo
layout = virtual_disk.layout
Expand Down Expand Up @@ -420,7 +418,7 @@ def _read(self, offset: int, length: int) -> bytes:

if self.far_offset:
stripe *= self.far_copies
device_start, device = self.devices[dev]
device_start, device = self.disk_map[dev]

stripe_remaining = stripe_size - offset_in_stripe
read_length = min(length, stripe_remaining)
Expand Down

0 comments on commit 18f600f

Please sign in to comment.