From 4be061a3422e7d2a229c82ce815c1d6d5e692df3 Mon Sep 17 00:00:00 2001 From: misodengaku Date: Sun, 21 Jan 2024 01:12:00 +0900 Subject: [PATCH] Add support for drives connected by MegaRAID (#47) * Add support for drives connected by MegaRAID * Fixed strict handling of errors generated by MegaRAID * Separate MegaRAID-related logic to megaraid.py * Add get_megaraid_device_type * Added to README an example of a device connected by MegaRAID --- Dockerfile | 2 +- README.md | 9 +++++- megaraid.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++ smartprom.py | 70 +++++++++++++++++++++++++++++++++-------- 4 files changed, 155 insertions(+), 15 deletions(-) create mode 100644 megaraid.py diff --git a/Dockerfile b/Dockerfile index 3e04617..f8b9ffb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ RUN apk add --no-cache smartmontools \ && rm -rf /root/.cache/ \ && find / -name '*.pyc' -delete -COPY ./smartprom.py /smartprom.py +COPY ./smartprom.py /megaraid.py / EXPOSE 9902 ENTRYPOINT ["/usr/local/bin/python", "-u", "/smartprom.py"] diff --git a/README.md b/README.md index 39e8fd9..fe327d7 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ _Note: You don't have to do this if you use the Docker image._ 1. Copy the `smartprom.service` file into `/etc/systemd/system` folder. 2. Copy the `smartprom.py` file anywhere into your system. 3. Modify `ExecStart=` in the `smartprom.service` so that it points to `smartprom.py` in your system. -4. Run `chmod +x smartprom.py` +4. Run `chmod +x smartprom.py` 5. Install `prometheus_client` for the root user, example: `sudo -H python3 -m pip install prometheus_client` 6. Run `systemctl enable smartprom` and `systemctl start smartprom` 7. Your metrics will now be available at `http://localhost:9902` @@ -56,6 +56,13 @@ smartprom_airflow_temperature_cel_raw{drive="/dev/sda",model_family="Seagate Bar ... ``` +If you are using a MegaRAID card to connect the drives, the metrics will export look like these: + +```shell +smartprom_power_on_hours_raw{drive="megaraid,0",model_family="Western Digital Ultrastar He10/12",model_name="WDC WD80EMAZ-00M9AA0",serial_number="XXXXXXXX",type="sat"} 28522.0 +smartprom_power_on_time_hours{drive="megaraid,1",model_family="Unknown",model_name="HGST HUH728080AL5200",serial_number="XXXXXXXX",type="scsi"} 37341.0 +``` + ## Configuration All configuration is done with environment variables. diff --git a/megaraid.py b/megaraid.py new file mode 100644 index 0000000..01d15f2 --- /dev/null +++ b/megaraid.py @@ -0,0 +1,89 @@ +import json +import re + +import smartprom + +MEGARAID_TYPE_PATTERN = r"(sat\+)?(megaraid,\d+)" + + +def get_megaraid_device_info(dev: str, typ: str) -> dict: + """ + Get device information connected with MegaRAID, + and process the information into get_device_info compatible format. + """ + megaraid_id = get_megaraid_device_id(typ) + if megaraid_id is None: + return {} + + results, _ = smartprom.run_smartctl_cmd( + ["smartctl", "-i", "--json=c", "-d", megaraid_id, dev] + ) + results = json.loads(results) + serial_number = results.get("serial_number", "Unknown") + model_family = results.get("model_family", "Unknown") + + # When using SAS drive and smartmontools r5286 and later, + # scsi_ prefix is added to model_name field. + # https://sourceforge.net/p/smartmontools/code/5286/ + model_name = results.get( + "scsi_model_name", + results.get("model_name", "Unknown"), + ) + + return { + "model_family": model_family, + "model_name": model_name, + "serial_number": serial_number, + } + + +def get_megaraid_device_type(dev: str, typ: str) -> str: + megaraid_id = get_megaraid_device_id(typ) + if megaraid_id is None: + return "unknown" + + results, _ = smartprom.run_smartctl_cmd( + ["smartctl", "-i", "--json=c", "-d", megaraid_id, dev] + ) + results = json.loads(results) + + if "device" not in results or "protocol" not in results["device"]: + return "unknown" + return "sat" if results["device"]["protocol"] == "ATA" else "scsi" + + +def get_megaraid_device_id(typ: str) -> str | None: + """ + Returns the device ID on the MegaRAID from the typ string + """ + megaraid_match = re.search(MEGARAID_TYPE_PATTERN, typ) + if not megaraid_match: + return None + + return megaraid_match.group(2) + + +def smart_megaraid(dev: str, megaraid_id: str) -> dict: + """ + Runs the smartctl command on device connected by MegaRAID + and processes its attributes + """ + results, exit_code = smartprom.run_smartctl_cmd( + ["smartctl", "-A", "-H", "-d", megaraid_id, "--json=c", dev] + ) + results = json.loads(results) + + if results["device"]["protocol"] == "ATA": + # SATA device on MegaRAID + data = results["ata_smart_attributes"]["table"] + attributes = smartprom.table_to_attributes_sat(data) + attributes["smart_passed"] = (0, smartprom.get_smart_status(results)) + attributes["exit_code"] = (0, exit_code) + return attributes + elif results["device"]["protocol"] == "SCSI": + # SAS device on MegaRAID + attributes = smartprom.results_to_attributes_scsi(results) + attributes["smart_passed"] = smartprom.get_smart_status(results) + attributes["exit_code"] = exit_code + return attributes + return {} diff --git a/smartprom.py b/smartprom.py index 7c0cfc5..e755ac6 100755 --- a/smartprom.py +++ b/smartprom.py @@ -3,10 +3,13 @@ import os import subprocess import time +import re from typing import Tuple import prometheus_client +import megaraid + LABELS = ['drive', 'type', 'model_family', 'model_name', 'serial_number'] DRIVES = {} METRICS = {} @@ -42,12 +45,34 @@ def get_drives() -> dict: disks = {} result, _ = run_smartctl_cmd(['smartctl', '--scan-open', '--json=c']) result_json = json.loads(result) + + # Ignore devices that fail on open, such as Virtual Drives created by MegaRAID. + result_json["devices"] = list( + filter( + lambda x: ( + x.get("open_error", "") + != "DELL or MegaRaid controller, please try adding '-d megaraid,N'" + ), + result_json["devices"], + ) + ) + if 'devices' in result_json: devices = result_json['devices'] for device in devices: dev = device["name"] - disk_attrs = get_device_info(dev) - disk_attrs["type"] = device["type"] + if re.match(megaraid.MEGARAID_TYPE_PATTERN, device["type"]): + # If drive is connected by MegaRAID, dev has a bus name like "/dev/bus/0". + # After retrieving the disk information using the bus name, + # replace dev with a disk ID such as "megaraid,0". + disk_attrs = megaraid.get_megaraid_device_info(dev, device["type"]) + disk_attrs["type"] = megaraid.get_megaraid_device_type(dev, device["type"]) + disk_attrs["bus_device"] = dev + disk_attrs["megaraid_id"] = megaraid.get_megaraid_device_id(device["type"]) + dev = disk_attrs["megaraid_id"] + else: + disk_attrs = get_device_info(dev) + disk_attrs["type"] = device["type"] disks[dev] = disk_attrs print("Discovered device", dev, "with attributes", disk_attrs) else: @@ -85,11 +110,18 @@ def smart_sat(dev: str) -> dict: results, exit_code = run_smartctl_cmd(['smartctl', '-A', '-H', '-d', 'sat', '--json=c', dev]) results = json.loads(results) - attributes = { - 'smart_passed': (0, get_smart_status(results)), - 'exit_code': (0, exit_code) - } - data = results['ata_smart_attributes']['table'] + attributes = table_to_attributes_sat(results["ata_smart_attributes"]["table"]) + attributes["smart_passed"] = (0, get_smart_status(results)) + attributes["exit_code"] = (0, exit_code) + return attributes + + +def table_to_attributes_sat(data: dict) -> dict: + """ + Returns a results["ata_smart_attributes"]["table"] + processed into an attributes dict + """ + attributes = {} for metric in data: code = metric['id'] name = metric['name'] @@ -146,11 +178,19 @@ def smart_scsi(dev: str) -> dict: results, exit_code = run_smartctl_cmd(['smartctl', '-A', '-H', '-d', 'scsi', '--json=c', dev]) results = json.loads(results) - attributes = { - 'smart_passed': get_smart_status(results), - 'exit_code': exit_code - } - for key, value in results.items(): + attributes = results_to_attributes_scsi(results) + attributes["smart_passed"] = get_smart_status(results) + attributes["exit_code"] = exit_code + return attributes + + +def results_to_attributes_scsi(data: dict) -> dict: + """ + Returns the result of smartctl -i on the SCSI device + processed into an attributes dict + """ + attributes = {} + for key, value in data.items(): if type(value) == dict: for _label, _value in value.items(): if type(_value) == int: @@ -169,7 +209,11 @@ def collect(): for drive, drive_attrs in DRIVES.items(): typ = drive_attrs['type'] try: - if typ in SAT_TYPES: + if "megaraid_id" in drive_attrs: + attrs = megaraid.smart_megaraid( + drive_attrs["bus_device"], drive_attrs["megaraid_id"] + ) + elif typ in SAT_TYPES: attrs = smart_sat(drive) elif typ in NVME_TYPES: attrs = smart_nvme(drive)