Skip to content

Commit

Permalink
Add support for drives connected by MegaRAID (#47)
Browse files Browse the repository at this point in the history
* Add support for drives connected by MegaRAID

* Fixed strict handling of errors generated by MegaRAID

* Separate MegaRAID-related logic to megaraid.py

* Add get_megaraid_device_type

* Added to README an example of a device connected by MegaRAID
  • Loading branch information
misodengaku authored Jan 20, 2024
1 parent 06fded2 commit 4be061a
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 15 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ RUN apk add --no-cache smartmontools \
&& rm -rf /root/.cache/ \
&& find / -name '*.pyc' -delete

COPY ./smartprom.py /smartprom.py
COPY ./smartprom.py /megaraid.py /

EXPOSE 9902
ENTRYPOINT ["/usr/local/bin/python", "-u", "/smartprom.py"]
Expand Down
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ _Note: You don't have to do this if you use the Docker image._
1. Copy the `smartprom.service` file into `/etc/systemd/system` folder.
2. Copy the `smartprom.py` file anywhere into your system.
3. Modify `ExecStart=` in the `smartprom.service` so that it points to `smartprom.py` in your system.
4. Run `chmod +x smartprom.py`
4. Run `chmod +x smartprom.py`
5. Install `prometheus_client` for the root user, example: `sudo -H python3 -m pip install prometheus_client`
6. Run `systemctl enable smartprom` and `systemctl start smartprom`
7. Your metrics will now be available at `http://localhost:9902`
Expand Down Expand Up @@ -56,6 +56,13 @@ smartprom_airflow_temperature_cel_raw{drive="/dev/sda",model_family="Seagate Bar
...
```

If you are using a MegaRAID card to connect the drives, the metrics will export look like these:

```shell
smartprom_power_on_hours_raw{drive="megaraid,0",model_family="Western Digital Ultrastar He10/12",model_name="WDC WD80EMAZ-00M9AA0",serial_number="XXXXXXXX",type="sat"} 28522.0
smartprom_power_on_time_hours{drive="megaraid,1",model_family="Unknown",model_name="HGST HUH728080AL5200",serial_number="XXXXXXXX",type="scsi"} 37341.0
```

## Configuration

All configuration is done with environment variables.
Expand Down
89 changes: 89 additions & 0 deletions megaraid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import json
import re

import smartprom

MEGARAID_TYPE_PATTERN = r"(sat\+)?(megaraid,\d+)"


def get_megaraid_device_info(dev: str, typ: str) -> dict:
"""
Get device information connected with MegaRAID,
and process the information into get_device_info compatible format.
"""
megaraid_id = get_megaraid_device_id(typ)
if megaraid_id is None:
return {}

results, _ = smartprom.run_smartctl_cmd(
["smartctl", "-i", "--json=c", "-d", megaraid_id, dev]
)
results = json.loads(results)
serial_number = results.get("serial_number", "Unknown")
model_family = results.get("model_family", "Unknown")

# When using SAS drive and smartmontools r5286 and later,
# scsi_ prefix is added to model_name field.
# https://sourceforge.net/p/smartmontools/code/5286/
model_name = results.get(
"scsi_model_name",
results.get("model_name", "Unknown"),
)

return {
"model_family": model_family,
"model_name": model_name,
"serial_number": serial_number,
}


def get_megaraid_device_type(dev: str, typ: str) -> str:
megaraid_id = get_megaraid_device_id(typ)
if megaraid_id is None:
return "unknown"

results, _ = smartprom.run_smartctl_cmd(
["smartctl", "-i", "--json=c", "-d", megaraid_id, dev]
)
results = json.loads(results)

if "device" not in results or "protocol" not in results["device"]:
return "unknown"
return "sat" if results["device"]["protocol"] == "ATA" else "scsi"


def get_megaraid_device_id(typ: str) -> str | None:
"""
Returns the device ID on the MegaRAID from the typ string
"""
megaraid_match = re.search(MEGARAID_TYPE_PATTERN, typ)
if not megaraid_match:
return None

return megaraid_match.group(2)


def smart_megaraid(dev: str, megaraid_id: str) -> dict:
"""
Runs the smartctl command on device connected by MegaRAID
and processes its attributes
"""
results, exit_code = smartprom.run_smartctl_cmd(
["smartctl", "-A", "-H", "-d", megaraid_id, "--json=c", dev]
)
results = json.loads(results)

if results["device"]["protocol"] == "ATA":
# SATA device on MegaRAID
data = results["ata_smart_attributes"]["table"]
attributes = smartprom.table_to_attributes_sat(data)
attributes["smart_passed"] = (0, smartprom.get_smart_status(results))
attributes["exit_code"] = (0, exit_code)
return attributes
elif results["device"]["protocol"] == "SCSI":
# SAS device on MegaRAID
attributes = smartprom.results_to_attributes_scsi(results)
attributes["smart_passed"] = smartprom.get_smart_status(results)
attributes["exit_code"] = exit_code
return attributes
return {}
70 changes: 57 additions & 13 deletions smartprom.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
import os
import subprocess
import time
import re
from typing import Tuple

import prometheus_client

import megaraid

LABELS = ['drive', 'type', 'model_family', 'model_name', 'serial_number']
DRIVES = {}
METRICS = {}
Expand Down Expand Up @@ -42,12 +45,34 @@ def get_drives() -> dict:
disks = {}
result, _ = run_smartctl_cmd(['smartctl', '--scan-open', '--json=c'])
result_json = json.loads(result)

# Ignore devices that fail on open, such as Virtual Drives created by MegaRAID.
result_json["devices"] = list(
filter(
lambda x: (
x.get("open_error", "")
!= "DELL or MegaRaid controller, please try adding '-d megaraid,N'"
),
result_json["devices"],
)
)

if 'devices' in result_json:
devices = result_json['devices']
for device in devices:
dev = device["name"]
disk_attrs = get_device_info(dev)
disk_attrs["type"] = device["type"]
if re.match(megaraid.MEGARAID_TYPE_PATTERN, device["type"]):
# If drive is connected by MegaRAID, dev has a bus name like "/dev/bus/0".
# After retrieving the disk information using the bus name,
# replace dev with a disk ID such as "megaraid,0".
disk_attrs = megaraid.get_megaraid_device_info(dev, device["type"])
disk_attrs["type"] = megaraid.get_megaraid_device_type(dev, device["type"])
disk_attrs["bus_device"] = dev
disk_attrs["megaraid_id"] = megaraid.get_megaraid_device_id(device["type"])
dev = disk_attrs["megaraid_id"]
else:
disk_attrs = get_device_info(dev)
disk_attrs["type"] = device["type"]
disks[dev] = disk_attrs
print("Discovered device", dev, "with attributes", disk_attrs)
else:
Expand Down Expand Up @@ -85,11 +110,18 @@ def smart_sat(dev: str) -> dict:
results, exit_code = run_smartctl_cmd(['smartctl', '-A', '-H', '-d', 'sat', '--json=c', dev])
results = json.loads(results)

attributes = {
'smart_passed': (0, get_smart_status(results)),
'exit_code': (0, exit_code)
}
data = results['ata_smart_attributes']['table']
attributes = table_to_attributes_sat(results["ata_smart_attributes"]["table"])
attributes["smart_passed"] = (0, get_smart_status(results))
attributes["exit_code"] = (0, exit_code)
return attributes


def table_to_attributes_sat(data: dict) -> dict:
"""
Returns a results["ata_smart_attributes"]["table"]
processed into an attributes dict
"""
attributes = {}
for metric in data:
code = metric['id']
name = metric['name']
Expand Down Expand Up @@ -146,11 +178,19 @@ def smart_scsi(dev: str) -> dict:
results, exit_code = run_smartctl_cmd(['smartctl', '-A', '-H', '-d', 'scsi', '--json=c', dev])
results = json.loads(results)

attributes = {
'smart_passed': get_smart_status(results),
'exit_code': exit_code
}
for key, value in results.items():
attributes = results_to_attributes_scsi(results)
attributes["smart_passed"] = get_smart_status(results)
attributes["exit_code"] = exit_code
return attributes


def results_to_attributes_scsi(data: dict) -> dict:
"""
Returns the result of smartctl -i on the SCSI device
processed into an attributes dict
"""
attributes = {}
for key, value in data.items():
if type(value) == dict:
for _label, _value in value.items():
if type(_value) == int:
Expand All @@ -169,7 +209,11 @@ def collect():
for drive, drive_attrs in DRIVES.items():
typ = drive_attrs['type']
try:
if typ in SAT_TYPES:
if "megaraid_id" in drive_attrs:
attrs = megaraid.smart_megaraid(
drive_attrs["bus_device"], drive_attrs["megaraid_id"]
)
elif typ in SAT_TYPES:
attrs = smart_sat(drive)
elif typ in NVME_TYPES:
attrs = smart_nvme(drive)
Expand Down

0 comments on commit 4be061a

Please sign in to comment.