From 28fac488ff79a480602813bc2bf884a894ba0a92 Mon Sep 17 00:00:00 2001
From: flightlessmango <flightlessmangoyt@gmail.com>
Date: Tue, 17 Sep 2024 05:54:47 +0200
Subject: [PATCH] Rework GPUs to allow for multiple

Using sysfs we iterate through available GPUs and determin which one is the
active GPU through fdinfo of the app.
gpu_stats is rewritten to display all available GPUs and their stats or
the current active gpu using the `active_gpu` parameter.
vram is likewise rewritten to display vram for all GPUs or active gpu.
throttling only displays data for the active GPU as we don't expect an
idling GPU to give relevant throttling information.
---
 meson.build                     |   2 +-
 src/amdgpu.cpp                  | 295 +++++++++++++++++++----------
 src/amdgpu.h                    | 113 +++++++-----
 src/app/main.cpp                |  16 +-
 src/cpu.cpp                     |  26 ++-
 src/cpu.h                       |   1 +
 src/gl/gl_hud.cpp               |   1 -
 src/gpu.cpp                     | 277 +++++++++++-----------------
 src/gpu.h                       | 191 +++++++++++++++----
 src/{msm.cpp => gpu_fdinfo.cpp} |  87 ++++-----
 src/gpu_fdinfo.h                |  54 ++++++
 src/gpu_metrics_util.h          |  89 +++++++++
 src/hud_elements.cpp            | 317 ++++++++++++++++++--------------
 src/hud_elements.h              |   2 +
 src/intel.cpp                   | 132 -------------
 src/intel.h                     |  51 -----
 src/loaders/loader_nvctrl.h     |   3 +-
 src/loaders/loader_nvml.cpp     |  14 ++
 src/loaders/loader_nvml.h       |   1 +
 src/meson.build                 |  12 +-
 src/msm.h                       |  34 ----
 src/nvapi.cpp                   |   5 +-
 src/nvctrl.cpp                  | 183 ------------------
 src/nvctrl.h                    |  21 ---
 src/nvidia.cpp                  | 297 ++++++++++++++++++++++++++++++
 src/nvidia.h                    |  69 +++++++
 src/nvidia_info.h               |  19 --
 src/nvml.cpp                    |  79 --------
 src/overlay.cpp                 | 247 ++-----------------------
 src/overlay.h                   |   1 -
 src/overlay_params.h            |   1 +
 src/vulkan.cpp                  |   1 -
 32 files changed, 1320 insertions(+), 1321 deletions(-)
 rename src/{msm.cpp => gpu_fdinfo.cpp} (51%)
 create mode 100644 src/gpu_fdinfo.h
 create mode 100644 src/gpu_metrics_util.h
 delete mode 100644 src/intel.cpp
 delete mode 100644 src/intel.h
 delete mode 100644 src/msm.h
 delete mode 100644 src/nvctrl.cpp
 delete mode 100644 src/nvctrl.h
 create mode 100644 src/nvidia.cpp
 create mode 100644 src/nvidia.h
 delete mode 100644 src/nvidia_info.h
 delete mode 100644 src/nvml.cpp
diff --git a/meson.build b/meson.build
index 79c3d2d80c..a3cbad7df6 100644
--- a/meson.build
+++ b/meson.build
@@ -3,7 +3,7 @@ project('MangoHud',
   version : 'v0.7.2',
   license : 'MIT',
   meson_version: '>=0.60.0',
-  default_options : ['buildtype=release', 'c_std=c99', 'cpp_std=c++14', 'warning_level=2']
+  default_options : ['buildtype=release', 'c_std=c99', 'cpp_std=c++17', 'warning_level=2']
 )
 
 cc = meson.get_compiler('c')
diff --git a/src/amdgpu.cpp b/src/amdgpu.cpp
index c11e492ec7..eb8f4c764a 100644
--- a/src/amdgpu.cpp
+++ b/src/amdgpu.cpp
@@ -11,15 +11,7 @@
 #include "logging.h"
 #include "mesa/util/macros.h"
 
-std::string metrics_path = "";
-struct amdgpu_common_metrics amdgpu_common_metrics;
-std::mutex amdgpu_common_metrics_m;
-std::mutex amdgpu_m;
-std::condition_variable amdgpu_c;
-bool amdgpu_run_thread = true;
-std::unique_ptr<Throttling> throttling;
-
-bool amdgpu_verify_metrics(const std::string& path)
+bool AMDGPU::verify_metrics(const std::string& path)
 {
 	metrics_table_header header {};
 	FILE *f;
@@ -45,7 +37,8 @@ bool amdgpu_verify_metrics(const std::string& path)
 		case 2: // v2_1, v2_2, v2_3, v2_4
 			if(header.content_revision<=0 || header.content_revision>4)// v2_0, not naturally aligned
 				break;
-			cpuStats.cpu_type = "APU";
+
+			this->is_apu = true;
 			return true;
 		default:
 			break;
@@ -56,18 +49,18 @@ bool amdgpu_verify_metrics(const std::string& path)
 }
 
 #define IS_VALID_METRIC(FIELD) (FIELD != 0xffff)
-void amdgpu_get_instant_metrics(struct amdgpu_common_metrics *metrics) {
+void AMDGPU::get_instant_metrics(struct amdgpu_common_metrics *metrics) {
 	FILE *f;
 	void *buf[MAX(sizeof(struct gpu_metrics_v1_3), sizeof(struct gpu_metrics_v2_4))/sizeof(void*)+1];
 	struct metrics_table_header* header = (metrics_table_header*)buf;
 
-	f = fopen(metrics_path.c_str(), "rb");
+	f = fopen(gpu_metrics_path.c_str(), "rb");
 	if (!f)
 		return;
 
 	// Read the whole file
 	if (fread(buf, sizeof(buf), 1, f) != 0) {
-		SPDLOG_DEBUG("amdgpu metrics file '{}' is larger than the buffer", metrics_path.c_str());
+		SPDLOG_DEBUG("amdgpu metrics file '{}' is larger than the buffer", gpu_metrics_path.c_str());
 		fclose(f);
 		return;
 	}
@@ -105,42 +98,7 @@ void amdgpu_get_instant_metrics(struct amdgpu_common_metrics *metrics) {
 			do metrics->average_cpu_power_w = metrics->average_cpu_power_w + amdgpu_metrics->average_core_power[i] / 1000.f;
 			while (++i < ARRAY_SIZE(amdgpu_metrics->average_core_power) && IS_VALID_METRIC(amdgpu_metrics->average_core_power[i]));
 		} else if( IS_VALID_METRIC(amdgpu_metrics->average_socket_power) && IS_VALID_METRIC(amdgpu_metrics->average_gfx_power) ) {
-			// fallback 2: estimate cpu power from total socket power
-			metrics->average_cpu_power_w = amdgpu_metrics->average_socket_power / 1000.f - amdgpu_metrics->average_gfx_power / 1000.f;
-		} else {
-			// giving up
-			metrics->average_cpu_power_w = 0;
-		}
-
-		if( IS_VALID_METRIC(amdgpu_metrics->current_gfxclk) ) {
-			// prefered method
-			metrics->current_gfxclk_mhz = amdgpu_metrics->current_gfxclk;
-		} else if( IS_VALID_METRIC(amdgpu_metrics->average_gfxclk_frequency) ) {
-			// fallback 1
-			metrics->current_gfxclk_mhz = amdgpu_metrics->average_gfxclk_frequency;
-		} else {
-			// giving up
-			metrics->current_gfxclk_mhz = 0;
-		}
-		if( IS_VALID_METRIC(amdgpu_metrics->current_uclk) ) {
-			// prefered method
-			metrics->current_uclk_mhz = amdgpu_metrics->current_uclk;
-		} else if( IS_VALID_METRIC(amdgpu_metrics->average_uclk_frequency) ) {
-			// fallback 1
-			metrics->current_uclk_mhz = amdgpu_metrics->average_uclk_frequency;
-		} else {
-			// giving up
-			metrics->current_uclk_mhz = 0;
-		}
-
-		if( IS_VALID_METRIC(amdgpu_metrics->temperature_soc) ) {
-			// prefered method
-			metrics->soc_temp_c = amdgpu_metrics->temperature_soc / 100;
-		} else if( header->content_revision >= 3 && IS_VALID_METRIC(amdgpu_metrics->average_temperature_soc) ) {
-			// fallback 1
-			metrics->soc_temp_c = amdgpu_metrics->average_temperature_soc / 100;
-		} else {
-			// giving up
+			// fallback 2: estimate cpu power frostd::string pci_dev, uint32_t deviceID, uint32_t vendorID
 			metrics->soc_temp_c = 0;
 		}
 		if( IS_VALID_METRIC(amdgpu_metrics->temperature_gfx) ) {
@@ -167,9 +125,11 @@ void amdgpu_get_instant_metrics(struct amdgpu_common_metrics *metrics) {
 			do cpu_temp = MAX(cpu_temp, amdgpu_metrics->average_temperature_core[i]);
 			while (++i < ARRAY_SIZE(amdgpu_metrics->average_temperature_core) && IS_VALID_METRIC(amdgpu_metrics->average_temperature_core[i]));
 			metrics->apu_cpu_temp_c = cpu_temp / 100;
+#ifdef DETECT_OS_UNIX
 		} else if( cpuStats.ReadcpuTempFile(cpu_temp) ) {
 			// fallback 2: Try temp from file 'm_cpuTempFile' of 'cpu.cpp'
 			metrics->apu_cpu_temp_c = cpu_temp;
+#endif
 		} else {
 			// giving up
 			metrics->apu_cpu_temp_c = 0;
@@ -189,10 +149,12 @@ void amdgpu_get_instant_metrics(struct amdgpu_common_metrics *metrics) {
 		throttling->indep_throttle_status = indep_throttle_status;
 }
 
-void amdgpu_get_samples_and_copy(struct amdgpu_common_metrics metrics_buffer[METRICS_SAMPLE_COUNT], bool &gpu_load_needs_dividing) {
+void AMDGPU::get_samples_and_copy(struct amdgpu_common_metrics metrics_buffer[METRICS_SAMPLE_COUNT], bool &gpu_load_needs_dividing) {
+	while (!stop_thread) {
 		// Get all the samples
 		for (size_t cur_sample_id=0; cur_sample_id < METRICS_SAMPLE_COUNT; cur_sample_id++) {
-			amdgpu_get_instant_metrics(&metrics_buffer[cur_sample_id]);
+			if (gpu_metrics_is_valid)
+				get_instant_metrics(&metrics_buffer[cur_sample_id]);
 
 			// Detect and fix if the gpu load is reported in centipercent
 			if (gpu_load_needs_dividing || metrics_buffer[cur_sample_id].gpu_load_percent > 100){
@@ -203,34 +165,65 @@ void amdgpu_get_samples_and_copy(struct amdgpu_common_metrics metrics_buffer[MET
 			usleep(METRICS_POLLING_PERIOD_MS * 1000);
 		}
 
-		// Copy the results from the different metrics to amdgpu_common_metrics
-		amdgpu_common_metrics_m.lock();
-		UPDATE_METRIC_AVERAGE(gpu_load_percent);
-		UPDATE_METRIC_AVERAGE_FLOAT(average_gfx_power_w);
-		UPDATE_METRIC_AVERAGE_FLOAT(average_cpu_power_w);
+		if (stop_thread) break;
+
+        std::unique_lock<std::mutex> lock(metrics_mutex);
+        cond_var.wait(lock, [this]() { return !paused || stop_thread; });
+		// do one pass of metrics from sysfs nodes
+		// then we replace with GPU metrics if it's available
+		get_sysfs_metrics();
+
+		if (gpu_metrics_is_valid) {
+			UPDATE_METRIC_AVERAGE(gpu_load_percent);
+			UPDATE_METRIC_AVERAGE_FLOAT(average_gfx_power_w);
+			UPDATE_METRIC_AVERAGE_FLOAT(average_cpu_power_w);
+
+			UPDATE_METRIC_AVERAGE(current_gfxclk_mhz);
+			UPDATE_METRIC_AVERAGE(current_uclk_mhz);
+
+			UPDATE_METRIC_AVERAGE(soc_temp_c);
+			UPDATE_METRIC_AVERAGE(gpu_temp_c);
+			UPDATE_METRIC_AVERAGE(apu_cpu_temp_c);
 
-		UPDATE_METRIC_AVERAGE(current_gfxclk_mhz);
-		UPDATE_METRIC_AVERAGE(current_uclk_mhz);
+			UPDATE_METRIC_MAX(is_power_throttled);
+			UPDATE_METRIC_MAX(is_current_throttled);
+			UPDATE_METRIC_MAX(is_temp_throttled);
+			UPDATE_METRIC_MAX(is_other_throttled);
 
-		UPDATE_METRIC_AVERAGE(soc_temp_c);
-		UPDATE_METRIC_AVERAGE(gpu_temp_c);
-		UPDATE_METRIC_AVERAGE(apu_cpu_temp_c);
+			UPDATE_METRIC_MAX(fan_speed);
+			metrics.fan_rpm = true;
 
-		UPDATE_METRIC_MAX(is_power_throttled);
-		UPDATE_METRIC_MAX(is_current_throttled);
-		UPDATE_METRIC_MAX(is_temp_throttled);
-		UPDATE_METRIC_MAX(is_other_throttled);
+			metrics.load = amdgpu_common_metrics.gpu_load_percent;
+			metrics.powerUsage = amdgpu_common_metrics.average_gfx_power_w;
+			metrics.MemClock = amdgpu_common_metrics.current_uclk_mhz;
 
-		UPDATE_METRIC_MAX(fan_speed);
-		amdgpu_common_metrics_m.unlock();
+			// Use hwmon instead, see gpu.cpp
+			if ( deviceID == 0x1435 || deviceID == 0x163f )
+			{
+				// If we are on VANGOGH (Steam Deck), then
+				// always use core clock from GPU metrics.
+				metrics.CoreClock = amdgpu_common_metrics.current_gfxclk_mhz;
+			}
+			metrics.temp = amdgpu_common_metrics.gpu_temp_c;
+			metrics.apu_cpu_power = amdgpu_common_metrics.average_cpu_power_w;
+			metrics.apu_cpu_temp = amdgpu_common_metrics.apu_cpu_temp_c;
+
+			metrics.is_power_throttled = amdgpu_common_metrics.is_power_throttled;
+			metrics.is_current_throttled = amdgpu_common_metrics.is_current_throttled;
+			metrics.is_temp_throttled = amdgpu_common_metrics.is_temp_throttled;
+			metrics.is_other_throttled = amdgpu_common_metrics.is_other_throttled;
+
+			metrics.fan_speed = amdgpu_common_metrics.fan_speed;
+		}
+	}
 }
 
-void amdgpu_metrics_polling_thread() {
+void AMDGPU::metrics_polling_thread() {
 	struct amdgpu_common_metrics metrics_buffer[METRICS_SAMPLE_COUNT];
 	bool gpu_load_needs_dividing = false;  //some GPUs report load as centipercent
 
 	// Initial poll of the metrics, so that we have values to display as fast as possible
-	amdgpu_get_instant_metrics(&amdgpu_common_metrics);
+	get_instant_metrics(&amdgpu_common_metrics);
 	if (amdgpu_common_metrics.gpu_load_percent > 100){
 		gpu_load_needs_dividing = true;
 		amdgpu_common_metrics.gpu_load_percent /= 100;
@@ -240,48 +233,156 @@ void amdgpu_metrics_polling_thread() {
 	memset(metrics_buffer, 0, sizeof(metrics_buffer));
 
 	while (1) {
-		std::unique_lock<std::mutex> lock(amdgpu_m);
-		amdgpu_c.wait(lock, []{return amdgpu_run_thread;});
-		lock.unlock();
 #ifndef TEST_ONLY
 		if (HUDElements.params->no_display && !logger->is_active())
 			usleep(100000);
 		else
 #endif
-			amdgpu_get_samples_and_copy(metrics_buffer, gpu_load_needs_dividing);
+			get_samples_and_copy(metrics_buffer, gpu_load_needs_dividing);
 	}
 }
 
-void amdgpu_get_metrics(uint32_t deviceID){
-	static bool init = false;
-	if (!init){
-		std::thread(amdgpu_metrics_polling_thread).detach();
-		init = true;
+void AMDGPU::get_sysfs_metrics() {
+    int64_t value = 0;
+	if (sysfs_nodes.busy) {
+		rewind(sysfs_nodes.busy);
+		fflush(sysfs_nodes.busy);
+		int value = 0;
+		if (fscanf(sysfs_nodes.busy, "%d", &value) != 1)
+			value = 0;
+		metrics.load = value;
 	}
 
-	amdgpu_common_metrics_m.lock();
-	gpu_info.load = amdgpu_common_metrics.gpu_load_percent;
+	if (sysfs_nodes.memory_clock) {
+		rewind(sysfs_nodes.memory_clock);
+		fflush(sysfs_nodes.memory_clock);
+		if (fscanf(sysfs_nodes.memory_clock, "%" PRId64, &value) != 1)
+			value = 0;
 
-	gpu_info.powerUsage = amdgpu_common_metrics.average_gfx_power_w;
-	gpu_info.MemClock = amdgpu_common_metrics.current_uclk_mhz;
+		metrics.MemClock = value / 1000000;
+	}
 
-	// Use hwmon instead, see gpu.cpp
-	if ( deviceID == 0x1435 || deviceID == 0x163f )
-	{
-		// If we are on VANGOGH (Steam Deck), then
-		// always use use core clock from GPU metrics.
-		gpu_info.CoreClock = amdgpu_common_metrics.current_gfxclk_mhz;
+	// TODO: on some gpus this will use the power1_input instead
+	// this value is instantaneous and should be averaged over time
+	// probably just average everything in this function to be safe
+	if (sysfs_nodes.power_usage) {
+		rewind(sysfs_nodes.power_usage);
+		fflush(sysfs_nodes.power_usage);
+		if (fscanf(sysfs_nodes.power_usage, "%" PRId64, &value) != 1)
+			value = 0;
+
+		metrics.powerUsage = value / 1000000;
+	}
+
+	if (sysfs_nodes.fan) {
+		rewind(sysfs_nodes.fan);
+		fflush(sysfs_nodes.fan);
+		if (fscanf(sysfs_nodes.fan, "%" PRId64, &value) != 1)
+			value = 0;
+		metrics.fan_speed = value;
+		metrics.fan_rpm = true;
+	}
+
+	if (sysfs_nodes.vram_total) {
+		rewind(sysfs_nodes.vram_total);
+		fflush(sysfs_nodes.vram_total);
+		if (fscanf(sysfs_nodes.vram_total, "%" PRId64, &value) != 1)
+			value = 0;
+		metrics.memoryTotal = float(value) / (1024 * 1024 * 1024);
+	}
+
+	if (sysfs_nodes.vram_used) {
+		rewind(sysfs_nodes.vram_used);
+		fflush(sysfs_nodes.vram_used);
+		if (fscanf(sysfs_nodes.vram_used, "%" PRId64, &value) != 1)
+			value = 0;
+		metrics.memoryUsed = float(value) / (1024 * 1024 * 1024);
+	}
+	// On some GPUs SMU can sometimes return the wrong temperature.
+	// As HWMON is way more visible than the SMU metrics, let's always trust it as it is the most likely to work
+	if (sysfs_nodes.core_clock) {
+		rewind(sysfs_nodes.core_clock);
+		fflush(sysfs_nodes.core_clock);
+		if (fscanf(sysfs_nodes.core_clock, "%" PRId64, &value) != 1)
+			value = 0;
+
+		metrics.CoreClock = value / 1000000;
 	}
-	// gpu_info.temp = amdgpu_common_metrics.gpu_temp_c;
-	gpu_info.apu_cpu_power = amdgpu_common_metrics.average_cpu_power_w;
-	gpu_info.apu_cpu_temp = amdgpu_common_metrics.apu_cpu_temp_c;
 
-	gpu_info.is_power_throttled = amdgpu_common_metrics.is_power_throttled;
-	gpu_info.is_current_throttled = amdgpu_common_metrics.is_current_throttled;
-	gpu_info.is_temp_throttled = amdgpu_common_metrics.is_temp_throttled;
-	gpu_info.is_other_throttled = amdgpu_common_metrics.is_other_throttled;
+	if (sysfs_nodes.temp){
+		rewind(sysfs_nodes.temp);
+		fflush(sysfs_nodes.temp);
+		int value = 0;
+		if (fscanf(sysfs_nodes.temp, "%d", &value) != 1)
+			value = 0;
+		metrics.temp = value / 1000;
+	}
 
-	gpu_info.fan_speed = amdgpu_common_metrics.fan_speed;
+	if (sysfs_nodes.junction_temp){
+		rewind(sysfs_nodes.junction_temp);
+		fflush(sysfs_nodes.junction_temp);
+		int value = 0;
+		if (fscanf(sysfs_nodes.junction_temp, "%d", &value) != 1)
+			value = 0;
+		metrics.junction_temp = value / 1000;
+	}
 
-	amdgpu_common_metrics_m.unlock();
+	if (sysfs_nodes.memory_temp){
+		rewind(sysfs_nodes.memory_temp);
+		fflush(sysfs_nodes.memory_temp);
+		int value = 0;
+		if (fscanf(sysfs_nodes.memory_temp, "%d", &value) != 1)
+			value = 0;
+		metrics.memory_temp = value / 1000;
+	}
+
+	if (sysfs_nodes.gtt_used) {
+		rewind(sysfs_nodes.gtt_used);
+		fflush(sysfs_nodes.gtt_used);
+		if (fscanf(sysfs_nodes.gtt_used, "%" PRId64, &value) != 1)
+			value = 0;
+		metrics.gtt_used = float(value) / (1024 * 1024 * 1024);
+	}
+
+	if (sysfs_nodes.gpu_voltage_soc) {
+		rewind(sysfs_nodes.gpu_voltage_soc);
+		fflush(sysfs_nodes.gpu_voltage_soc);
+		if (fscanf(sysfs_nodes.gpu_voltage_soc, "%" PRId64, &value) != 1)
+			value = 0;
+		metrics.voltage = value;
+	}
 }
+
+AMDGPU::AMDGPU(std::string pci_dev, uint32_t device_id, uint32_t vendor_id) {
+	this->pci_dev = pci_dev;
+	this->device_id = device_id;
+	this->vendor_id = vendor_id;
+	const std::string device_path = "/sys/bus/pci/devices/" + pci_dev;
+	gpu_metrics_path = device_path + "/gpu_metrics";
+	gpu_metrics_is_valid = verify_metrics(gpu_metrics_path);
+
+	sysfs_nodes.busy = fopen((device_path + "/gpu_busy_percent").c_str(), "r");
+	sysfs_nodes.vram_total = fopen((device_path + "/mem_info_vram_total").c_str(), "r");
+	sysfs_nodes.vram_used = fopen((device_path + "/mem_info_vram_used").c_str(), "r");
+	sysfs_nodes.gtt_used = fopen((device_path + "/mem_info_gtt_used").c_str(), "r");
+
+	const std::string hwmon_path = device_path + "/hwmon/";
+	if (fs::exists(hwmon_path)){
+		const auto dirs = ls(hwmon_path.c_str(), "hwmon", LS_DIRS);
+		for (const auto& dir : dirs) {
+			sysfs_nodes.temp = fopen((hwmon_path + dir + "/temp1_input").c_str(), "r");
+			sysfs_nodes.junction_temp = fopen((hwmon_path + dir + "/temp2_input").c_str(), "r");
+			sysfs_nodes.memory_temp = fopen((hwmon_path + dir + "/temp3_input").c_str(), "r");
+			sysfs_nodes.core_clock = fopen((hwmon_path + dir + "/freq1_input").c_str(), "r");
+			sysfs_nodes.gpu_voltage_soc = fopen((hwmon_path + dir + "/in0_input").c_str(), "r");
+			sysfs_nodes.memory_clock = fopen((hwmon_path + dir + "/freq2_input").c_str(), "r");
+			sysfs_nodes.power_usage = fopen((hwmon_path + dir + "/power1_average").c_str(), "r");
+			sysfs_nodes.power_usage = fopen((hwmon_path + dir + "/power1_input").c_str(), "r");
+			sysfs_nodes.fan = fopen((hwmon_path + dir + "/fan1_input").c_str(), "r");
+		}
+	}
+
+	throttling = std::make_shared<Throttling>(0x1002);
+	std::thread thread(&AMDGPU::metrics_polling_thread, this);
+	thread.detach();
+}
\ No newline at end of file
diff --git a/src/amdgpu.h b/src/amdgpu.h
index 7cfcead1dc..6a7c9827e0 100644
--- a/src/amdgpu.h
+++ b/src/amdgpu.h
@@ -1,6 +1,4 @@
 #pragma once
-// #include <fstream>
-// #include <iostream>
 #include <stdio.h>
 #include <inttypes.h>
 #include <unistd.h>
@@ -11,12 +9,9 @@
 #include <vector>
 #include <sys/param.h>
 #include <algorithm>
+#include "gpu_metrics_util.h"
 
-#define METRICS_UPDATE_PERIOD_MS 500
-#define METRICS_POLLING_PERIOD_MS 25
-#define METRICS_SAMPLE_COUNT (METRICS_UPDATE_PERIOD_MS/METRICS_POLLING_PERIOD_MS)
 #define NUM_HBM_INSTANCES 4
-
 #define UPDATE_METRIC_AVERAGE(FIELD) do { int value_sum = 0; for (size_t s=0; s < METRICS_SAMPLE_COUNT; s++) { value_sum += metrics_buffer[s].FIELD; } amdgpu_common_metrics.FIELD = value_sum / METRICS_SAMPLE_COUNT; } while(0)
 #define UPDATE_METRIC_AVERAGE_FLOAT(FIELD) do { float value_sum = 0; for (size_t s=0; s < METRICS_SAMPLE_COUNT; s++) { value_sum += metrics_buffer[s].FIELD; } amdgpu_common_metrics.FIELD = value_sum / METRICS_SAMPLE_COUNT; } while(0)
 #define UPDATE_METRIC_MAX(FIELD) do { int cur_max = metrics_buffer[0].FIELD; for (size_t s=1; s < METRICS_SAMPLE_COUNT; s++) { cur_max = MAX(cur_max, metrics_buffer[s].FIELD); }; amdgpu_common_metrics.FIELD = cur_max; } while(0)
@@ -232,6 +227,23 @@ struct gpu_metrics_v2_4 {
 	uint16_t			average_gfx_current;
 };
 
+struct amdgpu_files
+{
+    FILE *vram_total;
+    FILE *vram_used;
+    /* The following can be NULL, in that case we're using the gpu_metrics node */
+    FILE *busy;
+    FILE *temp;
+    FILE *junction_temp;
+    FILE *memory_temp;
+    FILE *core_clock;
+    FILE *memory_clock;
+    FILE *power_usage;
+    FILE *gtt_used;
+    FILE *fan;
+    FILE *gpu_voltage_soc;
+};
+
 /* This structure is used to communicate the latest values of the amdgpu metrics.
  * The direction of communication is amdgpu_polling_thread -> amdgpu_get_metrics().
  */
@@ -262,49 +274,50 @@ struct amdgpu_common_metrics {
 	uint16_t fan_speed;
 };
 
-bool amdgpu_verify_metrics(const std::string& path);
-void amdgpu_get_metrics(uint32_t deviceID);
 extern std::string metrics_path;
-extern std::condition_variable amdgpu_c;
-extern bool amdgpu_run_thread;
-void amdgpu_get_instant_metrics(struct amdgpu_common_metrics *metrics);
-void amdgpu_metrics_polling_thread();
-void amdgpu_get_samples_and_copy(struct amdgpu_common_metrics metrics_buffer[METRICS_SAMPLE_COUNT], bool &gpu_load_needs_dividing);
-void amdgpu_trottling_thread(std::vector<float> &power, std::vector<float> &thermal);
-
-class Throttling {
-	public:
-		std::vector<float> power;
-		std::vector<float> thermal;
-		int64_t indep_throttle_status;
-
-		Throttling()
-			: power(200, 0.0f),
-			thermal(200, 0.0f) {}
-
-		void update(){
-			if (((indep_throttle_status >> 0) & 0xFF) != 0)
-				power.push_back(0.1);
-			else
-				power.push_back(0);
-
 
-			if (((indep_throttle_status >> 32) & 0xFFFF) != 0)
-				thermal.push_back(0.1);
-			else
-				thermal.push_back(0);
-
-			power.erase(power.begin());
-			thermal.erase(thermal.begin());
-		}
-
-		bool power_throttling(){
-			return std::find(power.begin(), power.end(), 0.1f) != power.end();
-		}
-
-		bool thermal_throttling(){
-			return std::find(thermal.begin(), thermal.end(), 0.1f) != thermal.end();
-		}
-};
-
-extern std::unique_ptr<Throttling> throttling;
+class AMDGPU {
+	public:
+		bool is_apu = false;
+		std::shared_ptr<Throttling> throttling;
+
+    	AMDGPU(std::string pci_dev, uint32_t device_id, uint32_t vendor_id);
+
+        gpu_metrics copy_metrics() {
+            std::lock_guard<std::mutex> lock(metrics_mutex);
+            return metrics;
+        };
+
+        void pause() {
+            paused = true;
+            cond_var.notify_one();
+        };
+
+        void resume() {
+            paused = false;
+            cond_var.notify_one();
+        }
+
+	private:
+		std::string pci_dev;
+		std::string gpu_metrics_path;
+		uint32_t device_id;
+		uint32_t vendor_id;
+		std::condition_variable amdgpu_c;
+		std::thread thread;
+		struct amdgpu_files sysfs_nodes;
+		bool gpu_metrics_is_valid = false;
+		std::condition_variable cond_var;
+		std::atomic<bool> stop_thread = false;
+        std::atomic<bool> paused = false;
+		std::mutex metrics_mutex;
+		gpu_metrics metrics;
+		struct amdgpu_common_metrics amdgpu_common_metrics;
+	
+		void get_sysfs_metrics();
+		bool verify_metrics(const std::string& path);
+		void get_instant_metrics(struct amdgpu_common_metrics *metrics);
+		void get_samples_and_copy(struct amdgpu_common_metrics metrics_buffer[METRICS_SAMPLE_COUNT],
+								  bool &gpu_load_needs_dividing);
+		void metrics_polling_thread();
+};
\ No newline at end of file
diff --git a/src/app/main.cpp b/src/app/main.cpp
index 51370ed6cf..808da909ca 100644
--- a/src/app/main.cpp
+++ b/src/app/main.cpp
@@ -345,7 +345,6 @@ int main(int, char**)
     }
 
     HUDElements.vendorID = vendorID;
-    init_gpu_stats(vendorID, 0, params);
     init_system_info();
     sw_stats.engine = EngineTypes::GAMESCOPE;
     std::thread(msg_read_thread).detach();
@@ -364,10 +363,9 @@ int main(int, char**)
                 XChangeProperty(x11_display, x11_window, overlay_atom, XA_CARDINAL, 32, PropModeReplace, (unsigned char *)&value, 1);
                 XSync(x11_display, 0);
                 mangoapp_paused = false;
-                {
-                    amdgpu_run_thread = true;
-                    amdgpu_c.notify_one();
-                }
+                // resume all GPU threads
+                for (auto gpu : HUDElements.gpus->available_gpus)
+                    gpu->resume();
             }
             {
                 std::unique_lock<std::mutex> lk(mangoapp_m);
@@ -407,10 +405,10 @@ int main(int, char**)
             XChangeProperty(x11_display, x11_window, overlay_atom, XA_CARDINAL, 32, PropModeReplace, (unsigned char *)&value, 1);
             XSync(x11_display, 0);
             mangoapp_paused = true;
-            {
-                amdgpu_run_thread = false;
-                amdgpu_c.notify_one();
-            }
+            // pause all GPUs threads
+            for (auto gpu : HUDElements.gpus->available_gpus)
+                gpu->pause();
+
             std::unique_lock<std::mutex> lk(mangoapp_m);
             mangoapp_cv.wait(lk, []{return !params.no_display;});
         }
diff --git a/src/cpu.cpp b/src/cpu.cpp
index 2798f5419f..6d23a5812f 100644
--- a/src/cpu.cpp
+++ b/src/cpu.cpp
@@ -281,16 +281,21 @@ bool CPUStats::ReadcpuTempFile(int& temp) {
 }
 
 bool CPUStats::UpdateCpuTemp() {
-	if (cpu_type == "APU"){
-        m_cpuDataTotal.temp = gpu_info.apu_cpu_temp;
-        return true;
+    if (HUDElements.gpus){
+        for (auto gpu : HUDElements.gpus->available_gpus)
+            if (gpu->is_apu()) {
+                m_cpuDataTotal.temp = gpu->metrics.apu_cpu_temp;
+                return true;
+            }
     } else {
         int temp = 0;
-		bool ret = ReadcpuTempFile(temp);
-		m_cpuDataTotal.temp = temp;
+        bool ret = ReadcpuTempFile(temp);
+        m_cpuDataTotal.temp = temp;
 
         return ret;
     }
+
+    return false;
 }
 
 static bool get_cpu_power_k10temp(CPUPowerData* cpuPowerData, float& power) {
@@ -419,8 +424,15 @@ static bool get_cpu_power_rapl(CPUPowerData* cpuPowerData, float& power) {
 }
 
 static bool get_cpu_power_amdgpu(float& power) {
-    power = gpu_info.apu_cpu_power;
-    return true;
+    if (HUDElements.gpus)
+        for (auto gpu : HUDElements.gpus->available_gpus)
+            if (gpu->is_apu()) {
+                power = gpu->metrics.apu_cpu_power;
+                return true;
+            }
+
+
+    return false;
 }
 
 bool CPUStats::UpdateCpuPower() {
diff --git a/src/cpu.h b/src/cpu.h
index 4f4999bf84..9d04887d4e 100644
--- a/src/cpu.h
+++ b/src/cpu.h
@@ -11,6 +11,7 @@
 #include <windows.h>
 #endif
 #include "timing.hpp"
+#include "hud_elements.h"
 
 typedef struct CPUData_ {
    unsigned long long int totalTime;
diff --git a/src/gl/gl_hud.cpp b/src/gl/gl_hud.cpp
index 83b962d6fc..2d03698314 100644
--- a/src/gl/gl_hud.cpp
+++ b/src/gl/gl_hud.cpp
@@ -152,7 +152,6 @@ void imgui_create(void *ctx, const gl_wsi plat)
         glx_mesa_queryInteger(GLX_RENDERER_DEVICE_ID_MESA, &device_id);
 
     SPDLOG_DEBUG("GL device id: {:04X}", device_id);
-    init_gpu_stats(vendorID, device_id, params);
     sw_stats.gpuName = gpu = remove_parentheses(deviceName);
     SPDLOG_DEBUG("gpu: {}", gpu);
     // Setup Dear ImGui context
diff --git a/src/gpu.cpp b/src/gpu.cpp
index 9a8c8f6750..0684828d4a 100644
--- a/src/gpu.cpp
+++ b/src/gpu.cpp
@@ -4,198 +4,143 @@
 #include <functional>
 #include <thread>
 #include <cstring>
+#include <unistd.h>
+#include <fstream>
 #include <spdlog/spdlog.h>
-#ifdef HAVE_XNVCTRL
-#include "nvctrl.h"
-#endif
 #include "timing.hpp"
-#ifdef HAVE_NVML
-#include "nvidia_info.h"
-#endif
-
 #include "amdgpu.h"
 
+#include "file_utils.h"
 using namespace std::chrono_literals;
 
-struct gpuInfo gpu_info {};
-amdgpu_files amdgpu {};
+#include <iostream>
+#include <filesystem>
+#include <string>
+namespace fs = std::filesystem;
 
-bool checkNvidia(const char *pci_dev){
-    bool nvSuccess = false;
-#ifdef HAVE_NVML
-    nvSuccess = checkNVML(pci_dev) && getNVMLInfo({});
-#endif
-#ifdef HAVE_XNVCTRL
-    if (!nvSuccess)
-        nvSuccess = checkXNVCtrl();
-#endif
-#ifdef _WIN32
-    if (!nvSuccess)
-        nvSuccess = checkNVAPI();
-#endif
-    return nvSuccess;
-}
+GPUS::GPUS() {
+    std::vector<std::string> gpu_entries;
 
-void getNvidiaGpuInfo(const struct overlay_params& params){
-#ifdef HAVE_NVML
-    if (nvmlSuccess){
-        getNVMLInfo(params);
-        gpu_info.load = nvidiaUtilization.gpu;
-        gpu_info.temp = nvidiaTemp;
-        gpu_info.memoryUsed = nvidiaMemory.used / (1024.f * 1024.f * 1024.f);
-        gpu_info.CoreClock = nvidiaCoreClock;
-        gpu_info.MemClock = nvidiaMemClock;
-        gpu_info.powerUsage = nvidiaPowerUsage / 1000;
-        gpu_info.fan_rpm = false;
-        gpu_info.memoryTotal = nvidiaMemory.total / (1024.f * 1024.f * 1024.f);
-        gpu_info.fan_speed = nvidiaFanSpeed;
-        if (params.enabled[OVERLAY_PARAM_ENABLED_throttling_status]){
-            gpu_info.is_temp_throttled = (nvml_throttle_reasons & 0x0000000000000060LL) != 0;
-            gpu_info.is_power_throttled = (nvml_throttle_reasons & 0x000000000000008CLL) != 0;
-            gpu_info.is_other_throttled = (nvml_throttle_reasons & 0x0000000000000112LL) != 0;
-        }
-        #ifdef HAVE_XNVCTRL
-            static bool nvctrl_available = checkXNVCtrl();
-            if (nvctrl_available) {
-                gpu_info.fan_rpm = true;
-                gpu_info.fan_speed = getNvctrlFanSpeed();
-            }
-        #endif
-
-        return;
-    }
-#endif
-#ifdef HAVE_XNVCTRL
-    if (nvctrlSuccess) {
-        getNvctrlInfo();
-        gpu_info.load = nvctrl_info.load;
-        gpu_info.temp = nvctrl_info.temp;
-        gpu_info.memoryUsed = nvctrl_info.memoryUsed / (1024.f);
-        gpu_info.CoreClock = nvctrl_info.CoreClock;
-        gpu_info.MemClock = nvctrl_info.MemClock;
-        gpu_info.powerUsage = 0;
-        gpu_info.memoryTotal = nvctrl_info.memoryTotal;
-        gpu_info.fan_rpm = true;
-        gpu_info.fan_speed = nvctrl_info.fan_speed;
-        return;
-    }
-#endif
-#ifdef _WIN32
-nvapi_util();
-#endif
-}
+    // Collect all relevant GPU entries (e.g., card0, card1, etc.)
+    for (const auto& entry : fs::directory_iterator("/sys/class/drm")) {
+        if (entry.is_directory()) {
+            std::string node_name = entry.path().filename().string();
 
-void getAmdGpuInfo(){
-#ifdef __linux__
-    int64_t value = 0;
-    if (metrics_path.empty()){
-        if (amdgpu.busy) {
-            rewind(amdgpu.busy);
-            fflush(amdgpu.busy);
-            int value = 0;
-            if (fscanf(amdgpu.busy, "%d", &value) != 1)
-                value = 0;
-            gpu_info.load = value;
+            // Check if the directory is a GPU card (e.g., card0, card1, etc.)
+            if (node_name.find("card") == 0 && node_name.length() == 5 && isdigit(node_name[4])) {
+                gpu_entries.push_back(node_name);  // Store the card entry
+            }
         }
+    }
 
-        if (amdgpu.memory_clock) {
-            rewind(amdgpu.memory_clock);
-            fflush(amdgpu.memory_clock);
-            if (fscanf(amdgpu.memory_clock, "%" PRId64, &value) != 1)
-                value = 0;
+    // Sort the entries based on the numeric value of the card number
+    std::sort(gpu_entries.begin(), gpu_entries.end(), [](const std::string& a, const std::string& b) {
+        int num_a = std::stoi(a.substr(4));
+        int num_b = std::stoi(b.substr(4));
+        return num_a < num_b;
+    });
 
-            gpu_info.MemClock = value / 1000000;
-        }
+    // Now process the sorted GPU entries
+    for (const auto& node_name : gpu_entries) {
+        uint32_t vendor_id = std::stoul(read_line("/sys/class/drm/" + node_name + "/device/vendor"), nullptr, 16);
+        uint32_t device_id = std::stoul(read_line("/sys/class/drm/" + node_name + "/device/device"), nullptr, 16);
+        const char* pci_dev = get_pci_device_address("/sys/class/drm/" + node_name).c_str();
 
-        // TODO: on some gpus this will use the power1_input instead
-        // this value is instantaneous and should be averaged over time
-        // probably just average everything in this function to be safe
-        if (amdgpu.power_usage) {
-            rewind(amdgpu.power_usage);
-            fflush(amdgpu.power_usage);
-            if (fscanf(amdgpu.power_usage, "%" PRId64, &value) != 1)
-                value = 0;
+        std::shared_ptr<GPU> ptr = std::make_shared<GPU>(node_name, vendor_id, device_id, pci_dev);
+        available_gpus.emplace_back(ptr);
 
-            gpu_info.powerUsage = value / 1000000;
-        }
+        SPDLOG_DEBUG("GPU Found: node_name: {}, vendor_id: {:x} device_id: {:x} pci_dev: {}", node_name, vendor_id, device_id, pci_dev);
     }
 
-    if (amdgpu.fan) {
-        rewind(amdgpu.fan);
-        fflush(amdgpu.fan);
-        if (fscanf(amdgpu.fan, "%" PRId64, &value) != 1)
-            value = 0;
-        gpu_info.fan_speed = value;
-        gpu_info.fan_rpm = true;
-    }
+    find_active_gpu();
+}
 
-    if (amdgpu.vram_total) {
-        rewind(amdgpu.vram_total);
-        fflush(amdgpu.vram_total);
-        if (fscanf(amdgpu.vram_total, "%" PRId64, &value) != 1)
-            value = 0;
-        gpu_info.memoryTotal = float(value) / (1024 * 1024 * 1024);
-    }
+std::string GPUS::get_pci_device_address(const std::string& drm_card_path) {
+    // Resolve the symbolic link to get the actual device path
+    fs::path device_path = fs::canonical(fs::path(drm_card_path) / "device");
 
-    if (amdgpu.vram_used) {
-        rewind(amdgpu.vram_used);
-        fflush(amdgpu.vram_used);
-        if (fscanf(amdgpu.vram_used, "%" PRId64, &value) != 1)
-            value = 0;
-        gpu_info.memoryUsed = float(value) / (1024 * 1024 * 1024);
-    }
-    // On some GPUs SMU can sometimes return the wrong temperature.
-    // As HWMON is way more visible than the SMU metrics, let's always trust it as it is the most likely to work
-    if (amdgpu.core_clock) {
-        rewind(amdgpu.core_clock);
-        fflush(amdgpu.core_clock);
-        if (fscanf(amdgpu.core_clock, "%" PRId64, &value) != 1)
-            value = 0;
-
-        gpu_info.CoreClock = value / 1000000;
-    }
+    // Convert the resolved device path to a string
+    std::string path_str = device_path.string();
 
-    if (amdgpu.temp){
-        rewind(amdgpu.temp);
-        fflush(amdgpu.temp);
-        int value = 0;
-        if (fscanf(amdgpu.temp, "%d", &value) != 1)
-            value = 0;
-        gpu_info.temp = value / 1000;
-    }
+    // Extract the last PCI address from the path using a regular expression
+    // This regex matches typical PCI addresses like 0000:03:00.0
+    std::regex pci_address_regex(R"((\d{4}:\d{2}:\d{2}\.\d))");
+    std::smatch match;
+    std::string pci_address;
 
-    if (amdgpu.junction_temp){
-        rewind(amdgpu.junction_temp);
-        fflush(amdgpu.junction_temp);
-        int value = 0;
-        if (fscanf(amdgpu.junction_temp, "%d", &value) != 1)
-            value = 0;
-        gpu_info.junction_temp = value / 1000;
+    // Search for all matches and store the last one
+    auto it = std::sregex_iterator(path_str.begin(), path_str.end(), pci_address_regex);
+    auto end = std::sregex_iterator();
+    for (std::sregex_iterator i = it; i != end; ++i) {
+        pci_address = (*i).str();
     }
 
-    if (amdgpu.memory_temp){
-        rewind(amdgpu.memory_temp);
-        fflush(amdgpu.memory_temp);
-        int value = 0;
-        if (fscanf(amdgpu.memory_temp, "%d", &value) != 1)
-            value = 0;
-        gpu_info.memory_temp = value / 1000;
+    if (!pci_address.empty()) {
+        return pci_address;  // Return the last matched PCI address
+    } else {
+        SPDLOG_DEBUG("PCI address not found in the path: " + path_str);
+        return "";
     }
+}
+
+void GPUS::find_active_gpu() {
+    pid_t pid = getpid();
+    std::string fdinfo_dir = "/proc/" + std::to_string(pid) + "/fdinfo/";
+    bool active_gpu_found = false;
+
+    for (const auto& entry : fs::directory_iterator(fdinfo_dir)) {
+        if (entry.is_regular_file()) {
+            std::ifstream file(entry.path());
+            std::string line;
+            std::string drm_pdev;
+            bool has_drm_driver = false;
+            bool has_drm_engine_gfx = false;
+
+            while (std::getline(file, line)) {
+                if (line.find("drm-driver:") != std::string::npos) {
+                    has_drm_driver = true;
+                }
+                if (line.find("drm-pdev:") != std::string::npos) {
+                    drm_pdev = line.substr(line.find(":") + 1);
+                    drm_pdev.erase(0, drm_pdev.find_first_not_of(" \t"));
+                }
+                if (line.find("drm-engine-gfx:") != std::string::npos) {
+                    uint64_t gfx_time = std::stoull(line.substr(line.find(":") + 1));
+                    if (gfx_time > 0) {
+                        has_drm_engine_gfx = true;
+                    }
+                }
+            }
 
-    if (amdgpu.gtt_used) {
-        rewind(amdgpu.gtt_used);
-        fflush(amdgpu.gtt_used);
-        if (fscanf(amdgpu.gtt_used, "%" PRId64, &value) != 1)
-            value = 0;
-        gpu_info.gtt_used = float(value) / (1024 * 1024 * 1024);
+            if (has_drm_driver && has_drm_engine_gfx) {
+                for (const auto& gpu : available_gpus) {
+                    if (gpu->pci_dev == drm_pdev) {
+                        gpu->is_active = true;
+                        SPDLOG_DEBUG("Active GPU Found: node_name: {}, pci_dev: {}", gpu->name, gpu->pci_dev);
+                        return;
+                    }
+                }
+            }
+        }
     }
 
-    if (amdgpu.gpu_voltage_soc) {
-        rewind(amdgpu.gpu_voltage_soc);
-        fflush(amdgpu.gpu_voltage_soc);
-        if (fscanf(amdgpu.gpu_voltage_soc, "%" PRId64, &value) != 1)
-            value = 0;
-        gpu_info.voltage = value;
+    // NVIDIA GPUs will not show up in fdinfo so we use NVML instead to find the active GPU
+    // This will not work for older NVIDIA GPUs
+#ifdef HAVE_NVML
+    if (!active_gpu_found) {
+        for (const auto& gpu : available_gpus) {
+            // NVIDIA vendor ID is 0x10de
+            if (gpu->vendor_id == 0x10de) { 
+                for (auto& pid : gpu->nvidia_pids()) {
+                    if (pid == getpid()) {
+                        gpu->is_active = true;
+                        SPDLOG_DEBUG("Active GPU Found: node_name: {}, pci_dev: {}", gpu->name, gpu->pci_dev);
+                        return;
+                    }
+                }
+
+            }
+        }
     }
 #endif
 }
diff --git a/src/gpu.h b/src/gpu.h
index c27a36621c..0af6583050 100644
--- a/src/gpu.h
+++ b/src/gpu.h
@@ -5,49 +5,162 @@
 #include <cstdio>
 #include <cstdint>
 #include "overlay_params.h"
+#include <thread>
+#include <mutex>
+#include <atomic>
+#include <chrono>
+#include <regex>
+#include <iostream>
+#ifdef HAVE_NVML
+#include "loaders/loader_nvml.h"
+#endif
+#ifdef HAVE_XNVCTRL
+#include "loaders/loader_nvctrl.h"
+#include "loaders/loader_x11.h"
+#endif
+#include "amdgpu.h"
+#include "nvidia.h"
+#include "gpu_metrics_util.h"
+#include "gpu_fdinfo.h"
 
-struct amdgpu_files
-{
-    FILE *vram_total;
-    FILE *vram_used;
-    /* The following can be NULL, in that case we're using the gpu_metrics node */
-    FILE *busy;
-    FILE *temp;
-    FILE *junction_temp;
-    FILE *memory_temp;
-    FILE *core_clock;
-    FILE *memory_clock;
-    FILE *power_usage;
-    FILE *gtt_used;
-    FILE *fan;
-    FILE *gpu_voltage_soc;
-};
+class GPU {
+    public:
+        gpu_metrics metrics;
+        std::string name;
+        std::unique_ptr<NVIDIA> nvidia = nullptr;
+        std::unique_ptr<AMDGPU> amdgpu = nullptr;
+        std::unique_ptr<GPU_fdinfo> fdinfo = nullptr;
+        bool is_active;
+        std::string pci_dev;
+        uint32_t vendor_id;
+
+        GPU(std::string name, uint32_t vendor_id, uint32_t device_id, const char* pci_dev)
+            : name(name), pci_dev(pci_dev), vendor_id(vendor_id), device_id(device_id) {
+                if (vendor_id == 0x10de)
+                    nvidia = std::make_unique<NVIDIA>(pci_dev);
+
+                if (vendor_id == 0x1002)
+                    amdgpu = std::make_unique<AMDGPU>(pci_dev, device_id, vendor_id);
+
+                // For now we're only accepting one of these modules at once
+                // Might be possible that multiple can exist on a system in the future?
+                if (vendor_id == 0x8086)
+                    fdinfo = std::make_unique<GPU_fdinfo>("i915");
+
+                if (vendor_id == 0x5143)
+                    fdinfo = std::make_unique<GPU_fdinfo>("msm");
+        }
+
+        gpu_metrics get_metrics() {
+            if (nvidia)
+                this->metrics = nvidia->copy_metrics();
+
+            if (amdgpu)
+                this->metrics = amdgpu->copy_metrics();
+
+            if (fdinfo)
+                this->metrics = fdinfo->copy_metrics();
+
+            return metrics;
+        };
+
+        std::vector<int> nvidia_pids() {
+#ifdef HAVE_NVML
+            if (nvidia)
+                return nvidia->pids();
+#endif
+            return std::vector<int>();
+        }
+
+        void pause() {
+            if (nvidia)
+                nvidia->pause();
+            
+            if (amdgpu)
+                amdgpu->pause();
+
+            if (fdinfo)
+                fdinfo->pause();
+        }
+
+        void resume() {
+            if (nvidia)
+                nvidia->resume();
+
+            if (amdgpu)
+                amdgpu->resume();
 
-extern amdgpu_files amdgpu;
-
-struct gpuInfo{
-    int load;
-    int temp;
-    int junction_temp {-1};
-    int memory_temp {-1};
-    float memoryUsed;
-    float memoryTotal;
-    int MemClock;
-    int CoreClock;
-    float powerUsage;
-    float apu_cpu_power;
-    int apu_cpu_temp;
-    bool is_power_throttled;
-    bool is_current_throttled;
-    bool is_temp_throttled;
-    bool is_other_throttled;
-    float gtt_used;
-    int fan_speed;
-    int voltage;
-    bool fan_rpm;
+            if (fdinfo)
+                fdinfo->resume();
+        }
+
+        bool is_apu() {
+            if (amdgpu)
+                return amdgpu->is_apu;
+            else
+                return false;
+        }
+
+        std::shared_ptr<Throttling> throttling() {
+            if (nvidia)
+                return nvidia->throttling;
+
+            if (amdgpu)
+                return amdgpu->throttling;
+
+            return nullptr;
+        }
+        
+
+    private:
+        uint32_t device_id;
+        std::thread thread;
 };
 
-extern struct gpuInfo gpu_info;
+class GPUS {
+    public:
+        std::vector<std::shared_ptr<GPU>> available_gpus;
+        std::mutex metrics_mutex;
+        
+        void find_active_gpu();
+        GPUS();
+
+        void pause() {
+            for (auto& gpu : available_gpus)
+                gpu->pause();
+        }
+
+        void resume() {
+            for (auto& gpu : available_gpus)
+                gpu->resume();
+        }
+
+        std::shared_ptr<GPU> active_gpu() {
+            if (!available_gpus.empty()){
+                for (auto gpu : available_gpus) {
+                    if (gpu->is_active)
+                        return gpu;
+                }
+            }
+
+            return nullptr;
+        }
+
+        void update_throttling() {
+            for (auto gpu : available_gpus)
+                if (gpu->throttling())
+                    gpu->throttling()->update();
+        }
+
+        void get_metrics() {
+            std::lock_guard<std::mutex> lock(metrics_mutex);
+            for (auto gpu : available_gpus)
+                gpu->get_metrics();
+        }
+
+    private:
+        std::string get_pci_device_address(const std::string& drm_card_path);
+};
 
 void getNvidiaGpuInfo(const struct overlay_params& params);
 void getAmdGpuInfo(void);
diff --git a/src/msm.cpp b/src/gpu_fdinfo.cpp
similarity index 51%
rename from src/msm.cpp
rename to src/gpu_fdinfo.cpp
index c21127f5da..0bc37c2e0b 100644
--- a/src/msm.cpp
+++ b/src/gpu_fdinfo.cpp
@@ -1,30 +1,8 @@
-#include <filesystem.h>
-#include <mesa/util/os_time.h>
-#include <inttypes.h>
-
-#include "msm.h"
-std::unique_ptr<MSM> msm;
+#include "gpu_fdinfo.h"
 namespace fs = ghc::filesystem;
 
-uint64_t MSM::get_gpu_time() {
-    char line[256];
-    uint64_t total_val = 0;
-    for (auto fd : fdinfo) {
-        rewind(fd);
-        fflush(fd);
-        uint64_t val = 0;
-        while (fgets(line, sizeof(line), fd)){
-            if (sscanf(line, "drm-engine-gpu: %" SCNu64 " ns", &val) == 1) {
-                total_val += val;
-                break;
-            }
-        }
-    }
-
-    return total_val;
-}
-
-void MSM::find_fd() {
+void GPU_fdinfo::find_fd() {
+#if DETECT_OS_UNIX
     DIR* dir = opendir("/proc/self/fdinfo");
     if (!dir) {
         perror("Failed to open directory");
@@ -38,7 +16,7 @@ void MSM::find_fd() {
         char line[256];
         bool found_driver = false;
         while (fgets(line, sizeof(line), file)) {
-            if (strstr(line, "msm") != NULL)
+            if (strstr(line, module) != NULL)
                 found_driver = true;
 
             if (found_driver) {
@@ -54,26 +32,49 @@ void MSM::find_fd() {
     }
 
     closedir(dir);
+#endif
+}
+
+uint64_t GPU_fdinfo::get_gpu_time() {
+    char line[256];
+    uint64_t total_val = 0;
+    for (auto fd : fdinfo) {
+        rewind(fd);
+        fflush(fd);
+        uint64_t val = 0;
+        while (fgets(line, sizeof(line), fd)){
+            if (sscanf(line, "drm-engine-gpu: %" SCNu64 " ns", &val) == 1) {
+                total_val += val;
+                break;
+            }
+        }
+    }
+
+    return total_val;
 }
 
-void MSM::get_fdinfo() {
-    static uint64_t previous_gpu_time, previous_time, now, gpu_time_now;
-    gpu_time_now = get_gpu_time();
-    now = os_time_get_nano();
+void GPU_fdinfo::get_load() {
+    while (!stop_thread) {
+        std::unique_lock<std::mutex> lock(metrics_mutex);
+        cond_var.wait(lock, [this]() { return !paused || stop_thread; });
+
+        static uint64_t previous_gpu_time, previous_time, now, gpu_time_now;
+        gpu_time_now = get_gpu_time();
+        now = os_time_get_nano();
 
-    if (previous_time && previous_gpu_time && gpu_time_now > previous_gpu_time){
-        float time_since_last = now - previous_time;
-        float gpu_since_last = gpu_time_now - previous_gpu_time;
-        auto result = int((gpu_since_last / time_since_last) * 100);
-        if (result > 100)
-            result = 100;
+        if (previous_time && previous_gpu_time && gpu_time_now > previous_gpu_time){
+            float time_since_last = now - previous_time;
+            float gpu_since_last = gpu_time_now - previous_gpu_time;
+            auto result = int((gpu_since_last / time_since_last) * 100);
+            if (result > 100)
+                result = 100;
 
-        gpu_info_msm.load = result;
-        previous_gpu_time = gpu_time_now;
-        previous_time = now;
-    } else {
-        previous_gpu_time = gpu_time_now;
-        previous_time = now;
+            metrics.load = result;
+            previous_gpu_time = gpu_time_now;
+            previous_time = now;
+        } else {
+            previous_gpu_time = gpu_time_now;
+            previous_time = now;
+        }
     }
 }
-
diff --git a/src/gpu_fdinfo.h b/src/gpu_fdinfo.h
new file mode 100644
index 0000000000..9673a6818f
--- /dev/null
+++ b/src/gpu_fdinfo.h
@@ -0,0 +1,54 @@
+#pragma once
+#include <sys/stat.h>
+#include <thread>
+#include <filesystem.h>
+#include <inttypes.h>
+#include <mesa/util/os_time.h>
+#include <spdlog/spdlog.h>
+#include "gpu_metrics_util.h"
+#include <atomic>
+
+class GPU_fdinfo {
+    private:
+        bool init = false;
+        struct gpu_metrics metrics;
+        std::vector<FILE*> fdinfo;
+        const char* module;
+        void find_fd();
+        std::thread thread;
+        std::condition_variable cond_var;
+        std::atomic<bool> stop_thread = false;
+        std::atomic<bool> paused = false;
+        mutable std::mutex metrics_mutex;
+
+        uint64_t get_gpu_time();
+        void get_load();
+
+    public:
+        GPU_fdinfo(const char* module) : module(module) {
+            find_fd();
+            std::thread thread(&GPU_fdinfo::get_load, this);
+            thread.detach();
+        }
+
+        gpu_metrics copy_metrics() const {
+            return metrics;
+        };
+
+        void pause() {
+            paused = true;
+            cond_var.notify_one();
+        }
+
+        void resume() {
+            paused = false;
+            cond_var.notify_one();
+        }
+
+        ~GPU_fdinfo() {
+            for (size_t i = 0; i < fdinfo.size(); i++) {
+                fclose(fdinfo[i]);
+            }
+            fdinfo.clear();
+        }
+};
diff --git a/src/gpu_metrics_util.h b/src/gpu_metrics_util.h
new file mode 100644
index 0000000000..0dad2a7bc6
--- /dev/null
+++ b/src/gpu_metrics_util.h
@@ -0,0 +1,89 @@
+#pragma once
+
+struct gpu_metrics {
+    int load;
+    int temp;
+    int junction_temp {-1};
+    int memory_temp {-1};
+    float memoryUsed;
+    float memoryTotal;
+    int MemClock;
+    int CoreClock;
+    float powerUsage;
+    float apu_cpu_power;
+    int apu_cpu_temp;
+    bool is_power_throttled;
+    bool is_current_throttled;
+    bool is_temp_throttled;
+    bool is_other_throttled;
+    float gtt_used;
+    int fan_speed;
+    int voltage;
+    bool fan_rpm;
+
+    gpu_metrics()
+        : load(0), temp(0), junction_temp(0), memory_temp(0),
+          memoryUsed(0.0f), memoryTotal(0.0f), MemClock(0), CoreClock(0),
+          powerUsage(0.0f), apu_cpu_power(0.0f), apu_cpu_temp(0),
+          is_power_throttled(false), is_current_throttled(false),
+          is_temp_throttled(false), is_other_throttled(false),
+          gtt_used(0.0f), fan_speed(0), voltage(0), fan_rpm(false) {}
+};
+
+#define METRICS_UPDATE_PERIOD_MS 500
+#define METRICS_POLLING_PERIOD_MS 25
+#define METRICS_SAMPLE_COUNT (METRICS_UPDATE_PERIOD_MS/METRICS_POLLING_PERIOD_MS)
+
+#define GPU_UPDATE_METRIC_AVERAGE(FIELD) do { int value_sum = 0; for (size_t s=0; s < METRICS_SAMPLE_COUNT; s++) { value_sum += metrics_buffer[s].FIELD; } metrics.FIELD = value_sum / METRICS_SAMPLE_COUNT; } while(0)
+#define GPU_UPDATE_METRIC_AVERAGE_FLOAT(FIELD) do { float value_sum = 0; for (size_t s=0; s < METRICS_SAMPLE_COUNT; s++) { value_sum += metrics_buffer[s].FIELD; } metrics.FIELD = value_sum / METRICS_SAMPLE_COUNT; } while(0)
+#define GPU_UPDATE_METRIC_MAX(FIELD) do { int cur_max = metrics_buffer[0].FIELD; for (size_t s=1; s < METRICS_SAMPLE_COUNT; s++) { cur_max = MAX(cur_max, metrics_buffer[s].FIELD); }; metrics.FIELD = cur_max; } while(0)
+#define GPU_UPDATE_METRIC_LAST(FIELD) do { metrics.FIELD = metrics_buffer[METRICS_SAMPLE_COUNT - 1].FIELD; } while(0)
+
+class Throttling {
+	public:
+		std::vector<float> power;
+		std::vector<float> thermal;
+		int64_t indep_throttle_status;
+        uint32_t vendor_id;
+
+		Throttling(uint32_t vendor_id)
+			: power(200, 0.0f),
+			thermal(200, 0.0f), vendor_id(vendor_id) {}
+
+		void update(){
+            if (vendor_id == 0x1002) {
+                if (((indep_throttle_status >> 0) & 0xFF) != 0)
+                    power.push_back(0.1);
+                else
+                    power.push_back(0);
+
+
+                if (((indep_throttle_status >> 32) & 0xFFFF) != 0)
+                    thermal.push_back(0.1);
+                else
+                    thermal.push_back(0);
+
+            } else if (vendor_id == 0x10de) {
+                if ((indep_throttle_status & 0x000000000000008CLL) != 0)
+                    power.push_back(0.1);
+                else
+                    power.push_back(0);
+
+                if ((indep_throttle_status & 0x0000000000000060LL) != 0)
+                    thermal.push_back(0.1);
+                else
+                    thermal.push_back(0);
+            }
+
+			power.erase(power.begin());
+			thermal.erase(thermal.begin());
+		}
+
+		bool power_throttling(){
+			return std::find(power.begin(), power.end(), 0.1f) != power.end();
+		}
+
+		bool thermal_throttling(){
+			return std::find(thermal.begin(), thermal.end(), 0.1f) != thermal.end();
+		}
+};
\ No newline at end of file
diff --git a/src/hud_elements.cpp b/src/hud_elements.cpp
index dc35589ff8..8a478f1d38 100644
--- a/src/hud_elements.cpp
+++ b/src/hud_elements.cpp
@@ -184,118 +184,129 @@ void HudElements::version(){
 }
 
 void HudElements::gpu_stats(){
-    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_stats]){
-        ImguiNextColumnFirstItem();
-        const char* gpu_text;
-        if (HUDElements.params->gpu_text.empty())
-            gpu_text = "GPU";
-        else
-            gpu_text = HUDElements.params->gpu_text.c_str();
-        HUDElements.TextColored(HUDElements.colors.gpu, "%s", gpu_text);        ImguiNextColumnOrNewRow();
-        auto text_color = HUDElements.colors.text;
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_load_change]){
-            struct LOAD_DATA gpu_data = {
-                HUDElements.colors.gpu_load_low,
-                HUDElements.colors.gpu_load_med,
-                HUDElements.colors.gpu_load_high,
-                HUDElements.params->gpu_load_value[0],
-                HUDElements.params->gpu_load_value[1]
-            };
+    if (!HUDElements.gpus)
+        HUDElements.gpus = std::make_unique<GPUS>();
 
-            auto load_color = change_on_load_temp(gpu_data, gpu_info.load);
-            right_aligned_text(load_color, HUDElements.ralign_width, "%i", gpu_info.load);
-            ImGui::SameLine(0, 1.0f);
-            HUDElements.TextColored(load_color,"%%");
-        }
-        else {
-            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.load);
-            ImGui::SameLine(0, 1.0f);
-            HUDElements.TextColored(text_color,"%%");
-            // ImGui::SameLine(150);
-            // ImGui::Text("%s", "%");
-        }
+    std::lock_guard<std::mutex> lock(HUDElements.gpus->metrics_mutex);
+    int i = 0;
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_stats]){
+        for (auto gpu : HUDElements.gpus->available_gpus) {
+            if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_active_gpu] && !gpu->is_active)
+                continue;
 
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_temp]){
+            ImguiNextColumnFirstItem();
+            if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_active_gpu]) {
+                HUDElements.TextColored(HUDElements.colors.gpu, "%s", "GPU");
+            } else {
+                HUDElements.TextColored(HUDElements.colors.gpu, "%s", ("GPU" + to_string(i)).c_str());
+            }
             ImguiNextColumnOrNewRow();
-            if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_temp_fahrenheit])
-                right_aligned_text(text_color, HUDElements.ralign_width, "%i", HUDElements.convert_to_fahrenheit(gpu_info.temp));
-            else
-                right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.temp);
-            ImGui::SameLine(0, 1.0f);
-            if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_hud_compact])
-                HUDElements.TextColored(HUDElements.colors.text, "°");
-            else
+            auto text_color = HUDElements.colors.text;
+            if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_load_change]){
+                struct LOAD_DATA gpu_data = {
+                    HUDElements.colors.gpu_load_low,
+                    HUDElements.colors.gpu_load_med,
+                    HUDElements.colors.gpu_load_high,
+                    HUDElements.params->gpu_load_value[0],
+                    HUDElements.params->gpu_load_value[1]
+                };
+
+                auto load_color = change_on_load_temp(gpu_data, gpu->metrics.load);
+                right_aligned_text(load_color, HUDElements.ralign_width, "%i", gpu->metrics.load);
+                ImGui::SameLine(0, 1.0f);
+                HUDElements.TextColored(load_color,"%%");
+            }
+            else {
+                right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->metrics.load);
+                ImGui::SameLine(0, 1.0f);
+                HUDElements.TextColored(text_color,"%%");
+                // ImGui::SameLine(150);
+                // ImGui::Text("%s", "%");
+            }
+
+            if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_temp]){
+                ImguiNextColumnOrNewRow();
+                if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_temp_fahrenheit])
+                    right_aligned_text(text_color, HUDElements.ralign_width, "%i", HUDElements.convert_to_fahrenheit(gpu->metrics.temp));
+                else
+                    right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->metrics.temp);
+                ImGui::SameLine(0, 1.0f);
+                if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_hud_compact])
+                    HUDElements.TextColored(HUDElements.colors.text, "°");
+                else
+                    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_temp_fahrenheit])
+                        HUDElements.TextColored(HUDElements.colors.text, "°F");
+                    else
+                        HUDElements.TextColored(HUDElements.colors.text, "°C");
+            }
+
+            if (gpu->metrics.junction_temp > -1 && HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_junction_temp]) {
+                ImguiNextColumnOrNewRow();
+                if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_temp_fahrenheit])
+                    right_aligned_text(text_color, HUDElements.ralign_width, "%i", HUDElements.convert_to_fahrenheit(gpu->metrics.junction_temp));
+                else
+                    right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->metrics.junction_temp);
+                ImGui::SameLine(0, 1.0f);
                 if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_temp_fahrenheit])
                     HUDElements.TextColored(HUDElements.colors.text, "°F");
                 else
                     HUDElements.TextColored(HUDElements.colors.text, "°C");
-        }
+                ImGui::SameLine(0, 1.0f);
+                ImGui::PushFont(HUDElements.sw_stats->font1);
+                HUDElements.TextColored(HUDElements.colors.text, "Jnc");
+                ImGui::PopFont();
+            }
 
-        if (gpu_info.junction_temp > -1 && HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_junction_temp]) {
-            ImguiNextColumnOrNewRow();
-            if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_temp_fahrenheit])
-                right_aligned_text(text_color, HUDElements.ralign_width, "%i", HUDElements.convert_to_fahrenheit(gpu_info.junction_temp));
-            else
-                right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.junction_temp);
-            ImGui::SameLine(0, 1.0f);
-            if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_temp_fahrenheit])
-                HUDElements.TextColored(HUDElements.colors.text, "°F");
-            else
-                HUDElements.TextColored(HUDElements.colors.text, "°C");
-            ImGui::SameLine(0, 1.0f);
-            ImGui::PushFont(HUDElements.sw_stats->font1);
-            HUDElements.TextColored(HUDElements.colors.text, "Jnc");
-            ImGui::PopFont();
-        }
+            if (HUDElements.vendorID == 0x1002 || HUDElements.vendorID == 0x10de){
+                if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_fan] && cpuStats.cpu_type != "APU"){
+                    ImguiNextColumnOrNewRow();
+                    right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->metrics.fan_speed);
+                    ImGui::SameLine(0, 1.0f);
+                    if (gpu->metrics.fan_rpm) {
+                        ImGui::PushFont(HUDElements.sw_stats->font1);
+                        HUDElements.TextColored(HUDElements.colors.text, "RPM");
+                    } else {
+                        HUDElements.TextColored(HUDElements.colors.text, "%%");
+                        ImGui::PushFont(HUDElements.sw_stats->font1);
+                        ImGui::SameLine(0, 1.0f);
+                        HUDElements.TextColored(HUDElements.colors.text, "FAN");
+                    }
+                    ImGui::PopFont();
+                }
+            }
 
-        if (HUDElements.vendorID == 0x1002 || HUDElements.vendorID == 0x10de){
-            if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_fan] && cpuStats.cpu_type != "APU"){
+            if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_core_clock]){
                 ImguiNextColumnOrNewRow();
-                right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.fan_speed);
+                right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->metrics.CoreClock);
                 ImGui::SameLine(0, 1.0f);
-                if (gpu_info.fan_rpm) {
-                    ImGui::PushFont(HUDElements.sw_stats->font1);
-                    HUDElements.TextColored(HUDElements.colors.text, "RPM");
-                } else {
-                    HUDElements.TextColored(HUDElements.colors.text, "%%");
-                    ImGui::PushFont(HUDElements.sw_stats->font1);
-                    ImGui::SameLine(0, 1.0f);
-                    HUDElements.TextColored(HUDElements.colors.text, "FAN");
-                }
+                ImGui::PushFont(HUDElements.sw_stats->font1);
+                HUDElements.TextColored(HUDElements.colors.text, "MHz");
                 ImGui::PopFont();
             }
-        }
 
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_core_clock]){
-            ImguiNextColumnOrNewRow();
-            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.CoreClock);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::PushFont(HUDElements.sw_stats->font1);
-            HUDElements.TextColored(HUDElements.colors.text, "MHz");
-            ImGui::PopFont();
-        }
-
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_power]) {
-            ImguiNextColumnOrNewRow();
-            char str[16];
-            snprintf(str, sizeof(str), "%.1f", gpu_info.powerUsage);
-            if (strlen(str) > 4)
-                right_aligned_text(text_color, HUDElements.ralign_width, "%.0f", gpu_info.powerUsage);
-            else
-                right_aligned_text(text_color, HUDElements.ralign_width, "%.1f", gpu_info.powerUsage);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::PushFont(HUDElements.sw_stats->font1);
-            HUDElements.TextColored(HUDElements.colors.text, "W");
-            ImGui::PopFont();
-        }
+            if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_power]) {
+                ImguiNextColumnOrNewRow();
+                char str[16];
+                snprintf(str, sizeof(str), "%.1f", gpu->metrics.powerUsage);
+                if (strlen(str) > 4)
+                    right_aligned_text(text_color, HUDElements.ralign_width, "%.0f", gpu->metrics.powerUsage);
+                else
+                    right_aligned_text(text_color, HUDElements.ralign_width, "%.1f", gpu->metrics.powerUsage);
+                ImGui::SameLine(0, 1.0f);
+                ImGui::PushFont(HUDElements.sw_stats->font1);
+                HUDElements.TextColored(HUDElements.colors.text, "W");
+                ImGui::PopFont();
+            }
 
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_voltage]) {
-            ImguiNextColumnOrNewRow();
-            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.voltage);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::PushFont(HUDElements.sw_stats->font1);
-            HUDElements.TextColored(HUDElements.colors.text, "mV");
-            ImGui::PopFont();
+            if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_voltage]) {
+                ImguiNextColumnOrNewRow();
+                right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->metrics.voltage);
+                ImGui::SameLine(0, 1.0f);
+                ImGui::PushFont(HUDElements.sw_stats->font1);
+                HUDElements.TextColored(HUDElements.colors.text, "mV");
+                ImGui::PopFont();
+            }
+            i++;
         }
     }
 }
@@ -490,42 +501,62 @@ void HudElements::io_stats(){
 }
 
 void HudElements::vram(){
+    if (!HUDElements.gpus)
+        HUDElements.gpus = std::make_unique<GPUS>();
+
     if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_vram]){
-        ImguiNextColumnFirstItem();
-        HUDElements.TextColored(HUDElements.colors.vram, "VRAM");
-        ImguiNextColumnOrNewRow();
-        // Add gtt_used to vram usage for APUs
-        if (cpuStats.cpu_type == "APU")
-            right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%.1f", gpu_info.memoryUsed + gpu_info.gtt_used);
-        else
-            right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%.1f", gpu_info.memoryUsed);
-        if (!HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_hud_compact]){
-            ImGui::SameLine(0,1.0f);
-            ImGui::PushFont(HUDElements.sw_stats->font1);
-            HUDElements.TextColored(HUDElements.colors.text, "GiB");
-            ImGui::PopFont();
-        }
+        std::lock_guard<std::mutex> lock(HUDElements.gpus->metrics_mutex);
+        int i = 0;
+        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_stats]){
+            for (auto gpu : HUDElements.gpus->available_gpus) {
+                if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_active_gpu] && !gpu->is_active)
+                    continue;
+
+                ImGui::TableNextRow();
+                ImGui::TableNextColumn();
+                if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_active_gpu]) {
+                    HUDElements.TextColored(HUDElements.colors.vram, "VRAM");
+                } else {
+                    HUDElements.TextColored(HUDElements.colors.vram, ("VRAM" + to_string(i)).c_str());
+                }
 
-        if (gpu_info.memory_temp > -1 && HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_mem_temp]) {
-            ImguiNextColumnOrNewRow();
-            if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_temp_fahrenheit])
-                right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%i", HUDElements.convert_to_fahrenheit(gpu_info.memory_temp));
-            else
-                right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%i", gpu_info.memory_temp);
-            ImGui::SameLine(0, 1.0f);
-            if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_temp_fahrenheit])
-                HUDElements.TextColored(HUDElements.colors.text, "°F");
-            else
-                HUDElements.TextColored(HUDElements.colors.text, "°C");
-        }
+                
+                ImguiNextColumnOrNewRow();
+                // Add gtt_used to vram usage for APUs
+                if (cpuStats.cpu_type == "APU")
+                    right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%.1f", gpu->metrics.memoryUsed + gpu->metrics.gtt_used);
+                else
+                    right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%.1f", gpu->metrics.memoryUsed);
+                if (!HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_hud_compact]){
+                    ImGui::SameLine(0,1.0f);
+                    ImGui::PushFont(HUDElements.sw_stats->font1);
+                    HUDElements.TextColored(HUDElements.colors.text, "GiB");
+                    ImGui::PopFont();
+                }
 
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_mem_clock]){
-            ImguiNextColumnOrNewRow();
-            right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%i", gpu_info.MemClock);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::PushFont(HUDElements.sw_stats->font1);
-            HUDElements.TextColored(HUDElements.colors.text, "MHz");
-            ImGui::PopFont();
+                if (gpu->metrics.memory_temp > -1 && HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_mem_temp]) {
+                    ImguiNextColumnOrNewRow();
+                    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_temp_fahrenheit])
+                        right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%i", HUDElements.convert_to_fahrenheit(gpu->metrics.memory_temp));
+                    else
+                        right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%i", gpu->metrics.memory_temp);
+                    ImGui::SameLine(0, 1.0f);
+                    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_temp_fahrenheit])
+                        HUDElements.TextColored(HUDElements.colors.text, "°F");
+                    else
+                        HUDElements.TextColored(HUDElements.colors.text, "°C");
+                }
+
+                if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_mem_clock]){
+                    ImguiNextColumnOrNewRow();
+                    right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%i", gpu->metrics.MemClock);
+                    ImGui::SameLine(0, 1.0f);
+                    ImGui::PushFont(HUDElements.sw_stats->font1);
+                    HUDElements.TextColored(HUDElements.colors.text, "MHz");
+                    ImGui::PopFont();
+                }
+                i++;
+            }
         }
     }
 }
@@ -815,11 +846,12 @@ void HudElements::frame_timing(){
                         ImPlot::SetupAxesLimits(0, 200, min_time, max_time);
                         ImPlot::SetNextLineStyle(HUDElements.colors.frametime, 1.5);
                         ImPlot::PlotLine("frametime line", frametime_data.data(), frametime_data.size());
-                        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_throttling_status_graph] && throttling){
+                        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_throttling_status_graph] && 
+                            HUDElements.gpus->active_gpu() && HUDElements.gpus->active_gpu()->throttling()){
                             ImPlot::SetNextLineStyle(ImVec4(1.0f, 1.0f, 0.0f, 1.0f), 1.5);
-                            ImPlot::PlotLine("power line", throttling->power.data(), throttling->power.size());
+                            ImPlot::PlotLine("power line", HUDElements.gpus->active_gpu()->throttling()->power.data(), HUDElements.gpus->active_gpu()->throttling()->power.size());
                             ImPlot::SetNextLineStyle(ImVec4(1.0f, 0.0f, 0.0f, 1.0f), 1.5);
-                            ImPlot::PlotLine("thermal line", throttling->thermal.data(), throttling->thermal.size());
+                            ImPlot::PlotLine("thermal line", HUDElements.gpus->active_gpu()->throttling()->thermal.data(), HUDElements.gpus->active_gpu()->throttling()->thermal.size());
                         }
                         ImPlot::EndPlot();
                     }
@@ -829,10 +861,10 @@ void HudElements::frame_timing(){
         }
         ImGui::EndChild();
 #ifdef __linux__
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_throttling_status_graph] && throttling){
+        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_throttling_status_graph] && HUDElements.gpus->active_gpu()->throttling()){
             ImGui::Dummy(ImVec2(0.0f, real_font_size.y / 2));
 
-            if (throttling->power_throttling()) {
+            if (HUDElements.gpus->active_gpu()->throttling()->power_throttling()) {
                 ImGui::TextColored(ImVec4(1.0f, 1.0f, 0.0f, 1.0f), "%s", ICON_FK_SQUARE);
                 ImGui::SameLine();
                 ImGui::Text("Power throttling");
@@ -840,7 +872,7 @@ void HudElements::frame_timing(){
 
             ImGui::Dummy(ImVec2(0.0f, real_font_size.y / 2));
 
-            if (throttling->thermal_throttling()) {
+            if (HUDElements.gpus->active_gpu()->throttling()->thermal_throttling()) {
                 ImGui::TextColored(ImVec4(1.0f, 0.0f, 0.0f, 1.0f), "%s", ICON_FK_SQUARE);
                 ImGui::SameLine();
                 ImGui::Text("Thermal throttling");
@@ -1224,18 +1256,19 @@ void HudElements::fan(){
 
 void HudElements::throttling_status(){
     if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_throttling_status]){
-        if (gpu_info.is_power_throttled || gpu_info.is_current_throttled || gpu_info.is_temp_throttled || gpu_info.is_other_throttled){
+        auto gpu = HUDElements.gpus->active_gpu();
+        if (gpu->metrics.is_power_throttled || gpu->metrics.is_current_throttled || gpu->metrics.is_temp_throttled || gpu->metrics.is_other_throttled){
             ImguiNextColumnFirstItem();
             HUDElements.TextColored(HUDElements.colors.engine, "%s", "Throttling");
             ImguiNextColumnOrNewRow();
             ImguiNextColumnOrNewRow();
-            if (gpu_info.is_power_throttled)
+            if (gpu->metrics.is_power_throttled)
                 right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Power");
-            if (gpu_info.is_current_throttled)
+            if (gpu->metrics.is_current_throttled)
                 right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Current");
-            if (gpu_info.is_temp_throttled)
+            if (gpu->metrics.is_temp_throttled)
                 right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Temp");
-            if (gpu_info.is_other_throttled)
+            if (gpu->metrics.is_other_throttled)
                 right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "Other");
         }
     }
@@ -1339,7 +1372,7 @@ void HudElements::graphs(){
             arr.push_back(float(it.gpu_vram_used));
         }
 
-        HUDElements.max = gpu_info.memoryTotal;
+        HUDElements.max = HUDElements.gpus->active_gpu()->metrics.memoryTotal;
         HUDElements.min = 0;
         HUDElements.TextColored(HUDElements.colors.engine, "%s", "VRAM");
     }
diff --git a/src/hud_elements.h b/src/hud_elements.h
index 1b6622aded..e745ecf105 100644
--- a/src/hud_elements.h
+++ b/src/hud_elements.h
@@ -12,6 +12,7 @@
 #include "net.h"
 #include "overlay_params.h"
 #include "shell.h"
+#include "gpu.h"
 
 struct Function {
     std::function<void()> run;  // Using std::function instead of a raw function pointer for more flexibility
@@ -65,6 +66,7 @@ class HudElements{
         display_servers display_server = UNKNOWN;
         std::unique_ptr<WineSync> winesync_ptr = nullptr;
         std::unique_ptr<Net> net = nullptr;
+        std::unique_ptr<GPUS> gpus = nullptr;
 #ifdef __linux__
         std::unique_ptr<Shell> shell = nullptr;
 #endif
diff --git a/src/intel.cpp b/src/intel.cpp
deleted file mode 100644
index 7e3cd9b925..0000000000
--- a/src/intel.cpp
+++ /dev/null
@@ -1,132 +0,0 @@
-#include "intel.h"
-std::unique_ptr<Intel> intel;
-
-void Intel::intel_gpu_thread(){
-    init = true;
-    static char stdout_buffer[1024];
-    static FILE* intel_gpu_top;
-    if (runtime)
-        intel_gpu_top = popen("steam-runtime-launch-client --alongside-steam --host -- intel_gpu_top -J -s 500", "r");
-    else
-        intel_gpu_top = popen("intel_gpu_top -J -s 500", "r");
-
-    int num_line = 0;
-    std::string buf;
-    int num_iterations = 0;
-    while (fgets(stdout_buffer, sizeof(stdout_buffer), intel_gpu_top)) {
-        if (num_line > 0)
-            buf += stdout_buffer;
-
-        num_line++;
-        if (strlen(stdout_buffer) < 4 && !strchr(stdout_buffer, '{') && !strchr(stdout_buffer, ',')) {
-            if (buf[0] != '{')
-                buf = "{\n" + buf;
-
-            if (num_iterations > 0){
-                buf += "\n}";
-                json j = json::parse(buf);
-                if  (j.contains("engines"))
-                    if (j["engines"].contains("Render/3D/0"))
-                        if (j["engines"]["Render/3D/0"].contains("busy"))
-                            gpu_info_intel.load = j["engines"]["Render/3D/0"]["busy"].get<int>();
-
-                if  (j.contains("engines"))
-                    if (j["engines"].contains("Render/3D"))
-                        if (j["engines"]["Render/3D"].contains("busy"))
-                            gpu_info_intel.load = j["engines"]["Render/3D"]["busy"].get<int>();
-
-                if (j.contains("frequency"))
-                    if (j["frequency"].contains("actual"))
-                        gpu_info_intel.CoreClock = j["frequency"]["actual"].get<int>();
-                if (j.contains("power")){
-                    if (j["power"].contains("GPU"))
-                        gpu_info_intel.powerUsage = j["power"]["GPU"].get<float>();
-                    if (j["power"].contains("Package"))
-                        gpu_info_intel.apu_cpu_power = j["power"]["Package"].get<float>();
-                }
-
-            }
-            buf = "";
-            num_line = 0;
-        }
-        num_iterations++;
-        if (stop)
-            break;
-    }
-
-    int exitcode = pclose(intel_gpu_top) / 256;
-    if (exitcode > 0){
-        if (exitcode == 127)
-        SPDLOG_INFO("Failed to open '{}'", "intel_gpu_top");
-
-        if (exitcode == 1)
-        SPDLOG_INFO("Missing permissions for '{}'", "intel_gpu_top");
-
-    }
-}
-
-uint64_t Intel::get_gpu_time() {
-    rewind(fdinfo);
-    fflush(fdinfo);
-    char line[256];
-    uint64_t val;
-    while (fgets(line, sizeof(line), fdinfo)){
-        if(strstr(line, "drm-engine-render"))
-            sscanf(line, "drm-engine-render: %" SCNu64 " ns", &val);
-    }
-
-    return val;
-}
-
-FILE* Intel::find_fd() {
-    DIR* dir = opendir("/proc/self/fdinfo");
-    if (!dir) {
-        perror("Failed to open directory");
-        return NULL;
-    }
-
-    static uint64_t val;
-    static bool found_driver;
-
-    for (const auto& entry : fs::directory_iterator("/proc/self/fdinfo")){
-        FILE* file = fopen(entry.path().string().c_str(), "r");
-        if (file) {
-            char line[256];
-            while (fgets(line, sizeof(line), file)) {
-                if (strstr(line, "i915") != NULL)
-                    found_driver = true;
-
-                if (found_driver){
-                    if(strstr(line, "drm-engine-render")){
-                        sscanf(line, "drm-engine-render: %" SCNu64 " ns", &val);
-                            return file;
-                    }
-                }
-            }
-        }
-        fclose(file);
-    }
-
-    return NULL;  // Return NULL if no matching file is found
-}
-
-void Intel::get_fdinfo(){
-    static uint64_t previous_gpu_time, previous_time, now, gpu_time_now;
-    gpu_time_now = get_gpu_time();
-    now = os_time_get_nano();
-
-    if (previous_time && previous_gpu_time && gpu_time_now > previous_gpu_time){
-        float time_since_last = now - previous_time;
-        float gpu_since_last = gpu_time_now - previous_gpu_time;
-        auto result = int((gpu_since_last / time_since_last) * 100);
-        if (result > 100)
-            result = 100;
-
-        gpu_info_intel.load = result;
-        previous_gpu_time = gpu_time_now;
-        previous_time = now;
-    } else {
-        previous_gpu_time = gpu_time_now;
-        previous_time = now;
-    }
-}
diff --git a/src/intel.h b/src/intel.h
deleted file mode 100644
index 1fb4be6372..0000000000
--- a/src/intel.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#include <sys/stat.h>
-#include <thread>
-#include <nlohmann/json.hpp>
-#include <filesystem.h>
-#include <inttypes.h>
-#include <mesa/util/os_time.h>
-#include <spdlog/spdlog.h>
-#include "gpu.h"
-#include "hud_elements.h"
-
-using json = nlohmann::json;
-namespace fs = ghc::filesystem;
-
-class Intel {
-    private:
-        bool init = false;
-        bool runtime = false;
-        bool stop = false;
-        struct gpuInfo gpu_info_intel {};
-        FILE* fdinfo;
-        struct stat stat_buffer;
-        std::thread thread;
-
-        FILE* find_fd();
-        void intel_gpu_thread();
-        uint64_t get_gpu_time();
-        void get_fdinfo();
-
-    public:
-        Intel() {
-            if (stat("/run/pressure-vessel", &stat_buffer) == 0)
-                runtime = true;
-
-            fdinfo = find_fd();
-            // thread = std::thread(&Intel::intel_gpu_thread, this);
-        }
-
-        void update() {
-            if (fdinfo)
-                get_fdinfo();
-
-            gpu_info = gpu_info_intel;
-        }
-
-        // ~Intel(){
-        //     stop = true;
-        //     thread.join();
-        // }
-};
-
-extern std::unique_ptr<Intel> intel;
diff --git a/src/loaders/loader_nvctrl.h b/src/loaders/loader_nvctrl.h
index 81bc4359cd..61855c4be9 100644
--- a/src/loaders/loader_nvctrl.h
+++ b/src/loaders/loader_nvctrl.h
@@ -1,6 +1,7 @@
+#pragma once
 #ifndef LIBRARY_LOADER_NVCTRL_H
 #define LIBRARY_LOADER_NVCTRL_H
-#define Bool bool
+// #define Bool bool
 #include <X11/Xlib.h>
 #include "NVCtrl/NVCtrlLib.h"
 #define LIBRARY_LOADER_NVCTRL_H_DLOPEN
diff --git a/src/loaders/loader_nvml.cpp b/src/loaders/loader_nvml.cpp
index 5b8501173c..694a43350a 100644
--- a/src/loaders/loader_nvml.cpp
+++ b/src/loaders/loader_nvml.cpp
@@ -253,6 +253,19 @@ bool libnvml_loader::Load(const std::string& library_name) {
     return false;
   }
 
+#if defined(LIBRARY_LOADER_NVML_H_DLOPEN)
+  nvmlDeviceGetComputeRunningProcesses =
+      reinterpret_cast<decltype(this->nvmlDeviceGetComputeRunningProcesses)>(
+          dlsym(library_, "nvmlDeviceGetComputeRunningProcesses"));
+#endif
+#if defined(LIBRARY_LOADER_NVML_H_DT_NEEDED)
+  nvmlDeviceGetComputeRunningProcesses = &::nvmlDeviceGetComputeRunningProcesses;
+#endif
+  if (!nvmlDeviceGetComputeRunningProcesses) {
+    CleanUp(true);
+    return false;
+  }
+
   loaded_ = true;
   return true;
 }
@@ -278,4 +291,5 @@ void libnvml_loader::CleanUp(bool unload) {
   nvmlUnitGetFanSpeedInfo = NULL;
   nvmlUnitGetHandleByIndex = NULL;
   nvmlDeviceGetFanSpeed = NULL;
+  nvmlDeviceGetComputeRunningProcesses = NULL;
 }
diff --git a/src/loaders/loader_nvml.h b/src/loaders/loader_nvml.h
index 4277ddf8c7..460873799a 100644
--- a/src/loaders/loader_nvml.h
+++ b/src/loaders/loader_nvml.h
@@ -41,6 +41,7 @@ class libnvml_loader {
   decltype(&::nvmlUnitGetFanSpeedInfo) nvmlUnitGetFanSpeedInfo;
   decltype(&::nvmlUnitGetHandleByIndex) nvmlUnitGetHandleByIndex;
   decltype(&::nvmlDeviceGetFanSpeed) nvmlDeviceGetFanSpeed;
+  decltype(&::nvmlDeviceGetComputeRunningProcesses) nvmlDeviceGetComputeRunningProcesses;
 
  private:
   void CleanUp(bool unload);
diff --git a/src/meson.build b/src/meson.build
index 1395678bed..4d961fefa2 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -59,7 +59,10 @@ vklayer_files = files(
   'config.cpp',
   'gpu.cpp',
   'blacklist.cpp',
-  'file_utils.cpp'
+  'file_utils.cpp',
+  'nvidia.cpp',
+  'gpu_fdinfo.cpp',
+  'amdgpu.cpp'
 )
 
 opengl_files  = []
@@ -89,9 +92,6 @@ if is_unixy
     'battery.cpp',
     'control.cpp',
     'device.cpp',
-    'amdgpu.cpp',
-    'intel.cpp',
-    'msm.cpp',
     'net.cpp',
     'shell.cpp'
   )
@@ -115,7 +115,6 @@ if is_unixy
   if nvml_h_found
     pre_args += '-DHAVE_NVML'
     vklayer_files += files(
-      'nvml.cpp',
       'loaders/loader_nvml.cpp',
     )
   endif
@@ -133,8 +132,7 @@ if is_unixy
 
     pre_args += '-DHAVE_XNVCTRL'
     vklayer_files += files(
-      'loaders/loader_nvctrl.cpp',
-      'nvctrl.cpp',
+      'loaders/loader_nvctrl.cpp'
     )
   endif
 
diff --git a/src/msm.h b/src/msm.h
deleted file mode 100644
index e8c2a1e173..0000000000
--- a/src/msm.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <memory>
-#include <vector>
-
-#include "gpu.h"
-
-class MSM {
-    private:
-        struct gpuInfo gpu_info_msm {};
-        std::vector<FILE*> fdinfo;
-        void find_fd();
-        uint64_t get_gpu_time();
-        void get_fdinfo();
-
-    public:
-        MSM() {
-            find_fd();
-        }
-
-        ~MSM() {
-            for (size_t i = 0; i < fdinfo.size(); i++) {
-                fclose(fdinfo[i]);
-            }
-            fdinfo.clear();
-        }
-
-        void update() {
-            if (!fdinfo.empty())
-                get_fdinfo();
-
-            gpu_info = gpu_info_msm;
-        }
-};
-
-extern std::unique_ptr<MSM> msm;
diff --git a/src/nvapi.cpp b/src/nvapi.cpp
index 19e909e139..71952f2d5d 100644
--- a/src/nvapi.cpp
+++ b/src/nvapi.cpp
@@ -1,6 +1,5 @@
 #include <windows.h>
 #include <iostream>
-#include "nvidia_info.h"
 #include "gpu.h"
 
 // magic numbers, do not change them
@@ -62,6 +61,8 @@ void nvapi_util()
     gpuUsages[0] = (NVAPI_MAX_USAGES_PER_GPU * 4) | 0x10000;
     (*NvAPI_EnumPhysicalGPUs)(gpuHandles, &gpuCount);
     (*NvAPI_GPU_GetUsages)(gpuHandles[0], gpuUsages);
-    gpu_info.load = gpuUsages[3];
+    // TODO: create a GPU class for nvapi
+    // otherwise we can't display information
+    // gpu_info.load = gpuUsages[3];
 
 }
\ No newline at end of file
diff --git a/src/nvctrl.cpp b/src/nvctrl.cpp
deleted file mode 100644
index 196ed2b4f3..0000000000
--- a/src/nvctrl.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-#include <iostream>
-#include <cstring>
-#include <sstream>
-#include <unordered_map>
-#include <memory>
-#include <functional>
-#include <spdlog/spdlog.h>
-#include "nvctrl.h"
-#include "loaders/loader_nvctrl.h"
-#include "loaders/loader_x11.h"
-#include "string_utils.h"
-#include "overlay.h"
-
-typedef std::unordered_map<std::string, std::string> string_map;
-static std::unique_ptr<Display, std::function<void(Display*)>> display;
-
-struct nvctrlInfo nvctrl_info;
-bool nvctrlSuccess = false;
-int num_coolers = 0;
-
-static bool find_nv_x11(libnvctrl_loader& nvctrl, Display*& dpy)
-{
-    char buf[8] {};
-    auto libx11 = get_libx11();
-    for (int i = 0; i < 16; i++) {
-        snprintf(buf, sizeof(buf), ":%d", i);
-        Display *d = libx11->XOpenDisplay(buf);
-        if (d) {
-            int s = libx11->XDefaultScreen(d);
-            if (nvctrl.XNVCTRLIsNvScreen(d, s)) {
-                dpy = d;
-                SPDLOG_DEBUG("XNVCtrl is using display {}", buf);
-                return true;
-            }
-            libx11->XCloseDisplay(d);
-        }
-    }
-    return false;
-}
-
-bool checkXNVCtrl()
-{
-    if (!get_libx11()->IsLoaded())
-        return false;
-
-    auto& nvctrl = get_libnvctrl_loader();
-    if (!nvctrl.IsLoaded()) {
-        SPDLOG_DEBUG("XNVCtrl loader failed to load");
-        return false;
-    }
-
-    Display *dpy;
-    nvctrlSuccess = find_nv_x11(nvctrl, dpy);
-
-    if (!nvctrlSuccess) {
-        SPDLOG_DEBUG("XNVCtrl didn't find the correct display");
-        return false;
-    }
-
-    auto local_x11 = get_libx11();
-    display = { dpy,
-        [local_x11](Display *dpy) {
-            local_x11->XCloseDisplay(dpy);
-        }
-    };
-    // get device id at init
-    int64_t pci_id;
-    nvctrl.XNVCTRLQueryTargetAttribute64(display.get(),
-                    NV_CTRL_TARGET_TYPE_GPU,
-                    0,
-                    0,
-                    NV_CTRL_PCI_ID,
-                    &pci_id);
-    deviceID = (pci_id & 0xFFFF);
-
-    // get number of coolers at init
-    nvctrl.XNVCTRLQueryTargetCount(display.get(),
-                    NV_CTRL_TARGET_TYPE_COOLER,
-                    &num_coolers);
-
-    return true;
-}
-
-static void parse_token(std::string token, string_map& options) {
-    std::string param, value;
-
-    size_t equal = token.find("=");
-    if (equal == std::string::npos)
-        return;
-
-    value = token.substr(equal+1);
-
-    param = token.substr(0, equal);
-    trim(param);
-    trim(value);
-    if (!param.empty())
-        options[param] = value;
-}
-
-static char* get_attr_target_string(libnvctrl_loader& nvctrl, int attr, int target_type, int target_id) {
-    char* c = nullptr;
-    if (!nvctrl.XNVCTRLQueryTargetStringAttribute(display.get(), target_type, target_id, 0, attr, &c)) {
-        SPDLOG_ERROR("Failed to query attribute '{}'", attr);
-    }
-    return c;
-}
-
-void getNvctrlInfo(){
-    string_map params;
-    std::string token;
-    auto& nvctrl = get_libnvctrl_loader();
-
-    if (!display)
-        return;
-
-    int enums[] = {
-        NV_CTRL_STRING_GPU_UTILIZATION,
-        NV_CTRL_STRING_GPU_CURRENT_CLOCK_FREQS,
-        0 // keep null
-    };
-
-    for (size_t i=0; enums[i]; i++) {
-        char* str = get_attr_target_string(nvctrl, enums[i], NV_CTRL_TARGET_TYPE_GPU, 0);
-        if (!str)
-            continue;
-
-        std::stringstream ss (str);
-        while (std::getline(ss, token, ',')) {
-            parse_token(token, params);
-        }
-        free(str);
-    }
-
-    if (!try_stoi(nvctrl_info.load, params["graphics"]))
-        nvctrl_info.load = 0;
-    if (!try_stoi(nvctrl_info.CoreClock, params["nvclock"]))
-        nvctrl_info.CoreClock = 0;
-    if (!try_stoi(nvctrl_info.MemClock, params["memclock"]))
-        nvctrl_info.MemClock = 0;
-
-    int64_t temp = 0;
-    nvctrl.XNVCTRLQueryTargetAttribute64(display.get(),
-                        NV_CTRL_TARGET_TYPE_GPU,
-                        0,
-                        0,
-                        NV_CTRL_GPU_CORE_TEMPERATURE,
-                        &temp);
-    nvctrl_info.temp = temp;
-
-    int64_t memtotal = 0;
-    nvctrl.XNVCTRLQueryTargetAttribute64(display.get(),
-                        NV_CTRL_TARGET_TYPE_GPU,
-                        0,
-                        0,
-                        NV_CTRL_TOTAL_DEDICATED_GPU_MEMORY,
-                        &memtotal);
-    nvctrl_info.memoryTotal = memtotal;
-
-    int64_t memused = 0;
-    nvctrl.XNVCTRLQueryTargetAttribute64(display.get(),
-                        NV_CTRL_TARGET_TYPE_GPU,
-                        0,
-                        0,
-                        NV_CTRL_USED_DEDICATED_GPU_MEMORY,
-                        &memused);
-    nvctrl_info.memoryUsed = memused;
-
-    nvctrl_info.fan_speed = getNvctrlFanSpeed();
-}
-
-int64_t getNvctrlFanSpeed(){
-    int64_t fan_speed = 0;
-    if (num_coolers >= 1) {
-        auto& nvctrl = get_libnvctrl_loader();
-        nvctrl.XNVCTRLQueryTargetAttribute64(display.get(),
-                            NV_CTRL_TARGET_TYPE_COOLER,
-                            0,
-                            0,
-                            NV_CTRL_THERMAL_COOLER_SPEED,
-                            &fan_speed);
-    }
-    return fan_speed;
-}
diff --git a/src/nvctrl.h b/src/nvctrl.h
deleted file mode 100644
index 2335a5a085..0000000000
--- a/src/nvctrl.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#pragma once
-#ifndef MANGOHUD_NVCTRL_H
-#define MANGOHUD_NVCTRL_H
-
-struct nvctrlInfo{
-    int load;
-    int temp;
-    float memoryUsed;
-    float memoryTotal;
-    int MemClock;
-    int CoreClock;
-    int fan_speed;
-};
-
-extern struct nvctrlInfo nvctrl_info;
-extern bool nvctrlSuccess;
-bool checkXNVCtrl(void);
-void getNvctrlInfo(void);
-int64_t getNvctrlFanSpeed();
-
-#endif //MANGOHUD_NVCTRL_H
diff --git a/src/nvidia.cpp b/src/nvidia.cpp
new file mode 100644
index 0000000000..a839eb3a4f
--- /dev/null
+++ b/src/nvidia.cpp
@@ -0,0 +1,297 @@
+#include "nvml.h"
+#include "hud_elements.h"
+#include "logging.h"
+#include "gpu.h"
+#include "string_utils.h"
+#include <thread>
+#include <chrono>
+#include "mesa/util/macros.h"
+
+#ifdef HAVE_XNVCTRL
+void NVIDIA::parse_token(std::string token, std::unordered_map<std::string, std::string>& options) {
+    std::string param, value;
+
+    size_t equal = token.find("=");
+    if (equal == std::string::npos)
+        return;
+
+    value = token.substr(equal+1);
+
+    param = token.substr(0, equal);
+    trim(param);
+    trim(value);
+    if (!param.empty())
+        options[param] = value;
+}
+#endif
+
+NVIDIA::NVIDIA(const char* pciBusId) {
+#ifdef HAVE_NVML
+    auto& nvml = get_libnvml_loader();
+    if (nvml.IsLoaded()){
+        nvmlReturn_t result = nvml.nvmlInit();
+        if (NVML_SUCCESS != result) {
+            SPDLOG_ERROR("Nvidia module not loaded");
+            nvml_available = false;
+        } else {
+            result = NVML_ERROR_UNKNOWN;
+                if (pciBusId && ((result = nvml.nvmlDeviceGetHandleByPciBusId(pciBusId, &device)) != NVML_SUCCESS)) {
+                    SPDLOG_ERROR("Getting device handle by PCI bus ID failed: {}", nvml.nvmlErrorString(result));
+                    if (result != NVML_SUCCESS)
+                        SPDLOG_ERROR("Getting device handle failed: {}", nvml.nvmlErrorString(result));
+
+                    nvml_available = false;
+            }
+        }
+    }
+#endif
+
+#ifdef HAVE_XNVCTRL
+    if (!get_libx11()->IsLoaded())
+        SPDLOG_DEBUG("XNVCtrl: X11 not loaded");
+
+    auto& nvctrl = get_libnvctrl_loader();
+    if (!nvctrl.IsLoaded())
+        SPDLOG_DEBUG("XNVCtrl loader failed to load");
+
+    Display *dpy;
+    nvctrl_available = find_nv_x11(nvctrl, dpy);
+    if (!nvctrl_available)
+        SPDLOG_DEBUG("XNVCtrl didn't find the correct display");
+    else {
+        nvctrl.XNVCTRLQueryTargetCount(this->display.get(),
+            NV_CTRL_TARGET_TYPE_COOLER,
+            &num_coolers);
+    }
+
+#endif
+#ifdef HAVE_NVML
+    nvml_available = true;
+    if (nvml_available || nvctrl_available){
+        nvml.nvmlDeviceGetComputeRunningProcesses(device, &infoCount, process_info);
+        // first run is to get the proper infoCount
+        nvml.nvmlDeviceGetComputeRunningProcesses(device, &infoCount, process_info);
+        throttling = std::make_shared<Throttling>(0x10de);
+        std::thread thread(&NVIDIA::get_samples_and_copy, this);
+        thread.detach();
+    }
+#endif
+}
+
+#ifdef HAVE_NVML
+void NVIDIA::get_instant_metrics_nvml(struct gpu_metrics *metrics) {
+    auto params = HUDElements.params;
+    nvmlReturn_t response;
+    auto& nvml = get_libnvml_loader();
+    if (nvml_available) {
+        struct nvmlUtilization_st nvml_utilization;
+        response = nvml.nvmlDeviceGetUtilizationRates(device, &nvml_utilization);
+        if (response == NVML_ERROR_NOT_SUPPORTED) {
+            if (nvml_available)
+                SPDLOG_ERROR("nvmlDeviceGetUtilizationRates failed, disabling nvml metrics");
+            nvml_available = false;
+        }
+        metrics->load = nvml_utilization.gpu;
+
+        if (params->enabled[OVERLAY_PARAM_ENABLED_gpu_temp] || logger->is_active()) {
+            unsigned int temp;
+            nvml.nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temp);
+            metrics->temp = temp;
+        }
+
+        if (params->enabled[OVERLAY_PARAM_ENABLED_vram] || logger->is_active()) {
+            struct nvmlMemory_st nvml_memory;
+            nvml.nvmlDeviceGetMemoryInfo(device, &nvml_memory);
+            metrics->memoryTotal = nvml_memory.total / (1024.f * 1024.f * 1024.f);
+            metrics->memoryUsed = nvml_memory.used / (1024.f * 1024.f * 1024.f);
+        }
+
+        if (params->enabled[OVERLAY_PARAM_ENABLED_gpu_core_clock] || logger->is_active()) {
+            unsigned int core_clock;
+            nvml.nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &core_clock);
+            metrics->CoreClock = core_clock;
+        }
+
+        if (params->enabled[OVERLAY_PARAM_ENABLED_gpu_mem_clock] || logger->is_active()) {
+            unsigned int memory_clock;
+            nvml.nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &memory_clock);
+            metrics->MemClock = memory_clock;
+        }
+
+        if (params->enabled[OVERLAY_PARAM_ENABLED_gpu_power] || logger->is_active()) {
+            unsigned int power;
+            nvml.nvmlDeviceGetPowerUsage(device, &power);
+            metrics->powerUsage = power / 1000;
+        }
+
+        if (params->enabled[OVERLAY_PARAM_ENABLED_throttling_status]) {
+            unsigned long long nvml_throttle_reasons;
+            nvml.nvmlDeviceGetCurrentClocksThrottleReasons(device, &nvml_throttle_reasons);
+            metrics->is_temp_throttled = (nvml_throttle_reasons & 0x0000000000000060LL) != 0;
+            metrics->is_power_throttled = (nvml_throttle_reasons & 0x000000000000008CLL) != 0;
+            metrics->is_other_throttled = (nvml_throttle_reasons & 0x0000000000000112LL) != 0;
+            if (throttling)
+		        throttling->indep_throttle_status = nvml_throttle_reasons;
+        }
+
+        if (params->enabled[OVERLAY_PARAM_ENABLED_gpu_fan] || logger->is_active()){
+            unsigned int fan_speed;
+            nvml.nvmlDeviceGetFanSpeed(device, &fan_speed);
+            metrics->fan_speed = fan_speed;
+            metrics->fan_rpm = false;
+        }
+    #ifdef HAVE_XNVCTRL
+        if (nvctrl_available) {
+            metrics->fan_rpm = true;
+            metrics->fan_speed = NVIDIA::get_nvctrl_fan_speed();
+        }
+    #endif
+    }
+}
+#endif
+
+#ifdef HAVE_XNVCTRL
+void NVIDIA::get_instant_metrics_xnvctrl(struct gpu_metrics *metrics) {
+    std::unordered_map<std::string, std::string> xnvctrl_params;
+    std::string token;
+    auto& nvctrl = get_libnvctrl_loader();
+
+    if (!display)
+        nvctrl_available = false;
+
+    if (nvctrl_available && !nvml_available) {
+
+        int enums[] = {
+            NV_CTRL_STRING_GPU_UTILIZATION,
+            NV_CTRL_STRING_GPU_CURRENT_CLOCK_FREQS,
+            0 // keep null
+        };
+
+        for (size_t i=0; enums[i]; i++) {
+            char* str = get_attr_target_string(nvctrl, enums[i], NV_CTRL_TARGET_TYPE_GPU, 0);
+            if (!str)
+                continue;
+
+            std::stringstream ss (str);
+            while (std::getline(ss, token, ',')) {
+                parse_token(token, xnvctrl_params);
+            }
+            free(str);
+        }
+
+        if (!try_stoi(metrics->load, xnvctrl_params["graphics"]))
+            metrics->load = 0;
+        if (!try_stoi(metrics->CoreClock, xnvctrl_params["nvclock"]))
+            metrics->CoreClock = 0;
+        if (!try_stoi(metrics->MemClock, xnvctrl_params["memclock"]))
+            metrics->MemClock = 0;
+
+        int64_t temp = 0;
+        nvctrl.XNVCTRLQueryTargetAttribute64(display.get(),
+                            NV_CTRL_TARGET_TYPE_GPU,
+                            0,
+                            0,
+                            NV_CTRL_GPU_CORE_TEMPERATURE,
+                            &temp);
+        metrics->temp = temp;
+
+        int64_t memtotal = 0;
+        nvctrl.XNVCTRLQueryTargetAttribute64(display.get(),
+                            NV_CTRL_TARGET_TYPE_GPU,
+                            0,
+                            0,
+                            NV_CTRL_TOTAL_DEDICATED_GPU_MEMORY,
+                            &memtotal);
+        metrics->memoryTotal = memtotal;
+
+        int64_t memused = 0;
+        nvctrl.XNVCTRLQueryTargetAttribute64(display.get(),
+                            NV_CTRL_TARGET_TYPE_GPU,
+                            0,
+                            0,
+                            NV_CTRL_USED_DEDICATED_GPU_MEMORY,
+                            &memused);
+        metrics->memoryUsed = memused;
+
+        metrics->fan_speed = NVIDIA::get_nvctrl_fan_speed();
+    }
+}
+#endif
+
+void NVIDIA::get_samples_and_copy() {
+    struct gpu_metrics metrics_buffer[METRICS_SAMPLE_COUNT] {};
+    while(!stop_thread) {
+        for (size_t cur_sample_id=0; cur_sample_id < METRICS_SAMPLE_COUNT; cur_sample_id++) {
+#ifdef HAVE_NVML
+            NVIDIA::get_instant_metrics_nvml(&metrics_buffer[cur_sample_id]);
+#endif
+#ifdef HAVE_XNVCTRL
+            NVIDIA::get_instant_metrics_xnvctrl(&metrics_buffer[cur_sample_id]);
+#endif
+            usleep(METRICS_POLLING_PERIOD_MS * 1000);
+        }
+
+        if (stop_thread) break;
+
+        std::unique_lock<std::mutex> lock(metrics_mutex);
+        cond_var.wait(lock, [this]() { return !paused || stop_thread; });
+        GPU_UPDATE_METRIC_AVERAGE(load);
+        GPU_UPDATE_METRIC_AVERAGE_FLOAT(powerUsage);
+        GPU_UPDATE_METRIC_AVERAGE(CoreClock);
+        GPU_UPDATE_METRIC_AVERAGE(MemClock);
+
+        GPU_UPDATE_METRIC_AVERAGE(temp);
+
+        GPU_UPDATE_METRIC_MAX(is_power_throttled);
+        GPU_UPDATE_METRIC_MAX(is_current_throttled);
+        GPU_UPDATE_METRIC_MAX(is_temp_throttled);
+        GPU_UPDATE_METRIC_MAX(is_other_throttled);
+
+        GPU_UPDATE_METRIC_MAX(fan_speed);
+    }
+}
+
+#ifdef HAVE_XNVCTRL
+int64_t NVIDIA::get_nvctrl_fan_speed(){
+    int64_t fan_speed = 0;
+    if (num_coolers >= 1) {
+        auto& nvctrl = get_libnvctrl_loader();
+        nvctrl.XNVCTRLQueryTargetAttribute64(display.get(),
+                            NV_CTRL_TARGET_TYPE_COOLER,
+                            0,
+                            0,
+                            NV_CTRL_THERMAL_COOLER_SPEED,
+                            &fan_speed);
+    }
+    return fan_speed;
+}
+#endif
+
+#ifdef HAVE_XNVCTRL
+char* NVIDIA::get_attr_target_string(libnvctrl_loader& nvctrl, int attr, int target_type, int target_id) {
+    char* c = nullptr;
+    if (!nvctrl.XNVCTRLQueryTargetStringAttribute(NVIDIA::display.get(), target_type, target_id, 0, attr, &c)) {
+        SPDLOG_ERROR("Failed to query attribute '{}'", attr);
+    }
+    return c;
+}
+#endif
+
+#ifdef HAVE_X11
+bool NVIDIA::find_nv_x11(libnvctrl_loader& nvctrl, Display*& dpy)
+{
+    char buf[8] {};
+    auto libx11 = get_libx11();
+    Display *d = libx11->XOpenDisplay(getenv("DISPLAY"));
+    if (d) {
+        int s = libx11->XDefaultScreen(d);
+        if (nvctrl.XNVCTRLIsNvScreen(d, s)) {
+            dpy = d;
+            SPDLOG_DEBUG("XNVCtrl is using display {}", buf);
+            return true;
+        }
+        libx11->XCloseDisplay(d);
+    }
+    return false;
+}
+#endif
\ No newline at end of file
diff --git a/src/nvidia.h b/src/nvidia.h
new file mode 100644
index 0000000000..7e8d8b9583
--- /dev/null
+++ b/src/nvidia.h
@@ -0,0 +1,69 @@
+#pragma once
+
+class NVIDIA {
+    public:
+        std::shared_ptr<Throttling> throttling;
+        
+        gpu_metrics copy_metrics() {
+            std::lock_guard<std::mutex> lock(metrics_mutex);
+            return metrics;
+        };
+
+        void get_samples_and_copy();
+
+        NVIDIA(const char* pciBusId);
+        ~NVIDIA() {
+            stop_thread = true;
+            if (thread.joinable())
+                thread.join();
+        };
+
+#ifdef HAVE_NVML
+        std::vector<int> pids() {
+            std::vector<int> vec;
+            for (size_t i = 0; i < infoCount; i++)
+                vec.push_back(static_cast<int> (process_info[i].pid));
+
+            return vec;
+        };
+#endif        
+
+        void pause() {
+            paused = true;
+            cond_var.notify_one();
+        };
+
+        void resume() {
+            paused = false;
+            cond_var.notify_one();
+        }
+
+    private:
+#ifdef HAVE_XNVCTRL
+        std::unique_ptr<Display, std::function<void(Display*)>> display;
+        int num_coolers;
+        unsigned int infoCount = 0;
+        int64_t get_nvctrl_fan_speed();
+#endif
+#ifdef HAVE_NVML
+        nvmlProcessInfo_t *process_info = new nvmlProcessInfo_t[infoCount];
+        nvmlDevice_t device;
+        void get_instant_metrics_nvml(struct gpu_metrics *metrics);
+#endif
+        bool nvml_available;
+        bool nvctrl_available;
+        bool failed;
+        
+        std::mutex metrics_mutex;
+        gpu_metrics metrics;
+        std::thread thread;
+        std::condition_variable cond_var;
+        std::atomic<bool> stop_thread = false;
+        std::atomic<bool> paused = false;
+#ifdef HAVE_XNVCTRL
+        void get_instant_metrics_xnvctrl(struct gpu_metrics *metrics);
+        void parse_token(std::string token, std::unordered_map<std::string, std::string>& options);
+        bool find_nv_x11(libnvctrl_loader& nvctrl, Display*& dpy);
+        char* get_attr_target_string(libnvctrl_loader& nvctrl, int attr, int target_type, int target_id);
+#endif
+};
\ No newline at end of file
diff --git a/src/nvidia_info.h b/src/nvidia_info.h
deleted file mode 100644
index e74943c9cb..0000000000
--- a/src/nvidia_info.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#pragma once
-#ifndef MANGOHUD_NVIDIA_INFO_H
-#define MANGOHUD_NVIDIA_INFO_H
-
-#include <nvml.h>
-#include "overlay_params.h"
-
-extern nvmlReturn_t result;
-extern unsigned int nvidiaTemp, processSamplesCount, *vgpuInstanceSamplesCount, nvidiaCoreClock, nvidiaMemClock, nvidiaPowerUsage, nvidiaFanSpeed;
-extern nvmlDevice_t nvidiaDevice;
-extern struct nvmlUtilization_st nvidiaUtilization;
-extern struct nvmlMemory_st nvidiaMemory;
-extern bool nvmlSuccess;
-extern unsigned long long nvml_throttle_reasons;
-
-bool checkNVML(const char* pciBusId);
-bool getNVMLInfo(const struct overlay_params& params);
-
-#endif //MANGOHUD_NVIDIA_INFO_H
diff --git a/src/nvml.cpp b/src/nvml.cpp
deleted file mode 100644
index 4a21aec619..0000000000
--- a/src/nvml.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-#include <spdlog/spdlog.h>
-#include "loaders/loader_nvml.h"
-#include "nvidia_info.h"
-#include <iostream>
-#include "overlay.h"
-#include "overlay_params.h"
-#include "nvctrl.h"
-#include "logging.h"
-
-nvmlReturn_t result;
-nvmlDevice_t nvidiaDevice;
-nvmlPciInfo_t nvidiaPciInfo;
-bool nvmlSuccess = false;
-unsigned int nvidiaTemp = 0, nvidiaCoreClock = 0, nvidiaMemClock = 0, nvidiaPowerUsage = 0, nvidiaFanSpeed = 0;
-unsigned long long nvml_throttle_reasons;
-struct nvmlUtilization_st nvidiaUtilization;
-struct nvmlMemory_st nvidiaMemory {};
-struct nvmlUnit_st* nvidiaUnit {};
-
-bool checkNVML(const char* pciBusId){
-    auto& nvml = get_libnvml_loader();
-    if (nvml.IsLoaded()){
-        result = nvml.nvmlInit();
-        if (NVML_SUCCESS != result) {
-            SPDLOG_ERROR("Nvidia module not loaded");
-        } else {
-            nvmlReturn_t ret = NVML_ERROR_UNKNOWN;
-            if (pciBusId && ((ret = nvml.nvmlDeviceGetHandleByPciBusId(pciBusId, &nvidiaDevice)) != NVML_SUCCESS)) {
-                SPDLOG_ERROR("Getting device handle by PCI bus ID failed: {}", nvml.nvmlErrorString(ret));
-                SPDLOG_ERROR("Using index 0.");
-            }
-
-            if (ret != NVML_SUCCESS)
-                ret = nvml.nvmlDeviceGetHandleByIndex(0, &nvidiaDevice);
-
-            if (ret != NVML_SUCCESS)
-                SPDLOG_ERROR("Getting device handle failed: {}", nvml.nvmlErrorString(ret));
-
-            nvmlSuccess = (ret == NVML_SUCCESS);
-            if (ret == NVML_SUCCESS)
-                nvml.nvmlDeviceGetPciInfo_v3(nvidiaDevice, &nvidiaPciInfo);
-
-            return nvmlSuccess;
-        }
-    } else {
-        SPDLOG_ERROR("Failed to load NVML");
-    }
-
-    return false;
-}
-
-bool getNVMLInfo(const struct overlay_params& params){
-    nvmlReturn_t response;
-    auto& nvml = get_libnvml_loader();
-    response = nvml.nvmlDeviceGetUtilizationRates(nvidiaDevice, &nvidiaUtilization);
-    if (params.enabled[OVERLAY_PARAM_ENABLED_gpu_temp] || logger->is_active())
-        nvml.nvmlDeviceGetTemperature(nvidiaDevice, NVML_TEMPERATURE_GPU, &nvidiaTemp);
-    if (params.enabled[OVERLAY_PARAM_ENABLED_vram] || logger->is_active())
-        nvml.nvmlDeviceGetMemoryInfo(nvidiaDevice, &nvidiaMemory);
-    if (params.enabled[OVERLAY_PARAM_ENABLED_gpu_core_clock] || logger->is_active())
-        nvml.nvmlDeviceGetClockInfo(nvidiaDevice, NVML_CLOCK_GRAPHICS, &nvidiaCoreClock);
-    if (params.enabled[OVERLAY_PARAM_ENABLED_gpu_mem_clock] || logger->is_active())
-        nvml.nvmlDeviceGetClockInfo(nvidiaDevice, NVML_CLOCK_MEM, &nvidiaMemClock);
-    if (params.enabled[OVERLAY_PARAM_ENABLED_gpu_power] || logger->is_active())
-        nvml.nvmlDeviceGetPowerUsage(nvidiaDevice, &nvidiaPowerUsage);
-    deviceID = nvidiaPciInfo.pciDeviceId >> 16;
-    if (params.enabled[OVERLAY_PARAM_ENABLED_throttling_status])
-        nvml.nvmlDeviceGetCurrentClocksThrottleReasons(nvidiaDevice, &nvml_throttle_reasons);
-
-    if (params.enabled[OVERLAY_PARAM_ENABLED_gpu_fan] || logger->is_active())
-        nvml.nvmlDeviceGetFanSpeed(nvidiaDevice, &nvidiaFanSpeed);
-
-    if (response == NVML_ERROR_NOT_SUPPORTED) {
-        if (nvmlSuccess)
-            SPDLOG_ERROR("nvmlDeviceGetUtilizationRates failed");
-        nvmlSuccess = false;
-    }
-    return nvmlSuccess;
-}
diff --git a/src/overlay.cpp b/src/overlay.cpp
index 6f21dd6493..4976c02cc6 100644
--- a/src/overlay.cpp
+++ b/src/overlay.cpp
@@ -24,8 +24,6 @@
 #include "iostats.h"
 #include "amdgpu.h"
 #include "fps_metrics.h"
-#include "intel.h"
-#include "msm.h"
 #include "net.h"
 
 #ifdef __linux__
@@ -128,20 +126,8 @@ void update_hw_info(const struct overlay_params& params, uint32_t vendorID)
 #endif
    }
    if (params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] || logger->is_active()) {
-      if (vendorID == 0x1002)
-         getAmdGpuInfo();
-#ifdef __linux__
-      if (gpu_metrics_exists)
-         amdgpu_get_metrics(deviceID);
-#endif
-      if (vendorID == 0x10de)
-         getNvidiaGpuInfo(params);
-#ifdef __linux__
-      if (vendorID== 0x8086)
-         if (intel) intel->update();
-      if (vendorID == 0x5143)
-         if (msm) msm->update();
-#endif
+      if (HUDElements.gpus)
+         HUDElements.gpus->get_metrics();
    }
 
 #ifdef __linux__
@@ -160,13 +146,14 @@ void update_hw_info(const struct overlay_params& params, uint32_t vendorID)
    if (params.enabled[OVERLAY_PARAM_ENABLED_io_read] || params.enabled[OVERLAY_PARAM_ENABLED_io_write])
       getIoStats(g_io_stats);
 #endif
-
-   currentLogData.gpu_load = gpu_info.load;
-   currentLogData.gpu_temp = gpu_info.temp;
-   currentLogData.gpu_core_clock = gpu_info.CoreClock;
-   currentLogData.gpu_mem_clock = gpu_info.MemClock;
-   currentLogData.gpu_vram_used = gpu_info.memoryUsed;
-   currentLogData.gpu_power = gpu_info.powerUsage;
+   if (HUDElements.gpus && HUDElements.gpus->active_gpu()) {
+      currentLogData.gpu_load = HUDElements.gpus->active_gpu()->metrics.load;
+      currentLogData.gpu_temp = HUDElements.gpus->active_gpu()->metrics.temp;
+      currentLogData.gpu_core_clock = HUDElements.gpus->active_gpu()->metrics.CoreClock;
+      currentLogData.gpu_mem_clock = HUDElements.gpus->active_gpu()->metrics.MemClock;
+      currentLogData.gpu_vram_used = HUDElements.gpus->active_gpu()->metrics.memoryUsed;
+      currentLogData.gpu_power = HUDElements.gpus->active_gpu()->metrics.powerUsage;
+   }
 #ifdef __linux__
    currentLogData.ram_used = memused;
    currentLogData.swap_used = swapused;
@@ -257,8 +244,8 @@ void update_hud_info_with_frametime(struct swapchain_stats& sw_stats, const stru
       frametime_data.erase(frametime_data.begin());
    }
 #ifdef __linux__
-   if (throttling)
-      throttling->update();
+   if (HUDElements.gpus)
+      HUDElements.gpus->update_throttling();
 #endif
    frametime = frametime_ms;
    fps = double(1000 / frametime_ms);
@@ -765,216 +752,6 @@ struct pci_bus {
    int func;
 };
 
-void init_gpu_stats(uint32_t& vendorID, uint32_t reported_deviceID, overlay_params& params)
-{
-   //if (!params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats])
-   //   return;
-
-   pci_bus pci;
-   bool pci_bus_parsed = false;
-   const char *pci_dev = nullptr;
-   if (!params.pci_dev.empty())
-      pci_dev = params.pci_dev.c_str();
-
-   // for now just checks if pci bus parses correctly, if at all necessary
-   if (pci_dev) {
-      if (sscanf(pci_dev, "%04x:%02x:%02x.%x",
-               &pci.domain, &pci.bus,
-               &pci.slot, &pci.func) == 4) {
-         pci_bus_parsed = true;
-         // reformat back to sysfs file name's and nvml's expected format
-         // so config file param's value format doesn't have to be as strict
-         std::stringstream ss;
-         ss << std::hex
-            << std::setw(4) << std::setfill('0') << pci.domain << ":"
-            << std::setw(2) << pci.bus << ":"
-            << std::setw(2) << pci.slot << "."
-            << std::setw(1) << pci.func;
-         params.pci_dev = ss.str();
-         pci_dev = params.pci_dev.c_str();
-         SPDLOG_DEBUG("PCI device ID: '{}'", pci_dev);
-      } else {
-         SPDLOG_ERROR("Failed to parse PCI device ID: '{}'", pci_dev);
-         SPDLOG_ERROR("Specify it as 'domain:bus:slot.func'");
-      }
-   }
-
-#ifdef __linux__
-   // NVIDIA
-   if (vendorID == 0x10de)
-      if(checkNvidia(pci_dev))
-         vendorID = 0x10de;
-
-   string path;
-   string drm = "/sys/class/drm/";
-
-   if (vendorID==0x8086){
-      auto dirs = ls(drm.c_str(), "card");
-      for (auto& dir : dirs) {
-         if (dir.find("-") != std::string::npos)
-             continue; // filter display adapters
-
-         FILE *fp;
-         string device = path + "/device/device";
-         if ((fp = fopen(device.c_str(), "r"))){
-            uint32_t temp = 0;
-            if (fscanf(fp, "%x", &temp) == 1) {
-               if (temp != reported_deviceID){
-                  fclose(fp);
-                  SPDLOG_DEBUG("DeviceID does not match vulkan report {:X}", reported_deviceID);
-                  continue;
-               }
-               deviceID = temp;
-            }
-            fclose(fp);
-         }
-
-         string vendor = path + "/device/vendor";
-         if ((fp = fopen(vendor.c_str(), "r"))){
-            uint32_t temp = 0;
-            if (fscanf(fp, "%x", &temp) != 1 || temp != 0x8086) {
-               fclose(fp);
-               continue;
-            }
-            fclose(fp);
-         }
-         path = drm + dir;
-         drm_dev = dir;
-         SPDLOG_DEBUG("Intel: using drm device {}", drm_dev);
-         intel = std::make_unique<Intel>();
-         break;
-      }
-   }
-
-   if (vendorID == 0x5143) {
-      auto dirs = ls(drm.c_str(), "card");
-      for (auto& dir : dirs) {
-         if (dir.find("-") != std::string::npos) {
-             continue; // filter display adapters
-         }
-         path = drm + dir;
-         drm_dev = dir;
-         SPDLOG_DEBUG("msm: using drm device {}", drm_dev);
-         msm = std::make_unique<MSM>();
-      }
-   }
-
-   if (vendorID == 0x1002
-       || gpu.find("Radeon") != std::string::npos
-       || gpu.find("AMD") != std::string::npos) {
-      string path;
-      string drm = "/sys/class/drm/";
-
-      auto dirs = ls(drm.c_str(), "card");
-      for (auto& dir : dirs) {
-         if (dir.find("-") != std::string::npos) {
-             continue; // filter display adapters
-         }
-         path = drm + dir;
-
-         SPDLOG_DEBUG("drm path check: {}", path);
-         if (pci_bus_parsed && pci_dev) {
-            string pci_device = read_symlink((path + "/device").c_str());
-            SPDLOG_DEBUG("PCI device symlink: '{}'", pci_device);
-            if (!ends_with(pci_device, pci_dev)) {
-               SPDLOG_DEBUG("skipping GPU, no PCI ID match");
-               continue;
-            }
-         }
-
-         FILE *fp;
-         string device = path + "/device/device";
-         if ((fp = fopen(device.c_str(), "r"))){
-            uint32_t temp = 0;
-            if (fscanf(fp, "%x", &temp) == 1) {
-               if (!pci_bus_parsed && reported_deviceID && temp != reported_deviceID){
-                  fclose(fp);
-                  SPDLOG_DEBUG("DeviceID does not match vulkan report {:X}", reported_deviceID);
-                  continue;
-               }
-               deviceID = temp;
-            }
-            fclose(fp);
-         }
-
-         string vendor = path + "/device/vendor";
-         if ((fp = fopen(vendor.c_str(), "r"))){
-            uint32_t temp = 0;
-            if (fscanf(fp, "%x", &temp) != 1 || temp != 0x1002) {
-               fclose(fp);
-               continue;
-            }
-            fclose(fp);
-         }
-
-         const std::string device_path = path + "/device";
-         const std::string gpu_metrics_path = device_path + "/gpu_metrics";
-         if (amdgpu_verify_metrics(gpu_metrics_path)) {
-            gpu_info.fan_rpm = true;
-            gpu_metrics_exists = true;
-            metrics_path = gpu_metrics_path;
-            throttling = std::make_unique<Throttling>();
-            SPDLOG_DEBUG("Using gpu_metrics of {}", gpu_metrics_path);
-         }
-
-         if (!amdgpu.vram_total)
-            amdgpu.vram_total = fopen((device_path + "/mem_info_vram_total").c_str(), "r");
-         if (!amdgpu.vram_used)
-            amdgpu.vram_used = fopen((device_path + "/mem_info_vram_used").c_str(), "r");
-         if (!amdgpu.gtt_used)
-            amdgpu.gtt_used = fopen((device_path + "/mem_info_gtt_used").c_str(), "r");
-
-         const std::string hwmon_path = device_path + "/hwmon/";
-         if (fs::exists(hwmon_path)){
-            const auto dirs = ls(hwmon_path.c_str(), "hwmon", LS_DIRS);
-            for (const auto& dir : dirs) {
-               if (!amdgpu.temp)
-                  amdgpu.temp = fopen((hwmon_path + dir + "/temp1_input").c_str(), "r");
-               if (!amdgpu.junction_temp)
-                  amdgpu.junction_temp = fopen((hwmon_path + dir + "/temp2_input").c_str(), "r");
-               if (!amdgpu.memory_temp)
-                  amdgpu.memory_temp = fopen((hwmon_path + dir + "/temp3_input").c_str(), "r");
-               if (!amdgpu.core_clock)
-                  amdgpu.core_clock = fopen((hwmon_path + dir + "/freq1_input").c_str(), "r");
-               if (!amdgpu.gpu_voltage_soc)
-                  amdgpu.gpu_voltage_soc = fopen((hwmon_path + dir + "/in0_input").c_str(), "r");
-            }
-
-            if (!metrics_path.empty())
-               break;
-
-            // The card output nodes - cardX-output, will point to the card node
-            // As such the actual metrics nodes will be missing.
-            amdgpu.busy = fopen((device_path + "/gpu_busy_percent").c_str(), "r");
-            if (!amdgpu.busy)
-               continue;
-
-            SPDLOG_DEBUG("using amdgpu path: {}", device_path);
-
-            for (const auto& dir : dirs) {
-               if (!amdgpu.memory_clock)
-                  amdgpu.memory_clock = fopen((hwmon_path + dir + "/freq2_input").c_str(), "r");
-               if (!amdgpu.power_usage)
-                  amdgpu.power_usage = fopen((hwmon_path + dir + "/power1_average").c_str(), "r");
-               if (!amdgpu.power_usage)
-                  amdgpu.power_usage = fopen((hwmon_path + dir + "/power1_input").c_str(), "r");
-               if (!amdgpu.fan)
-                  amdgpu.fan = fopen((hwmon_path + dir + "/fan1_input").c_str(), "r");
-            }
-         }
-         break;
-      }
-
-      // don't bother then
-      if (metrics_path.empty() && !amdgpu.busy && vendorID != 0x8086) {
-         params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] = false;
-      }
-   }
-#endif
-   if (!params.permit_upload)
-      SPDLOG_DEBUG("Uploading is disabled (permit_upload = 0)");
-}
-
 void init_system_info(){
    #ifdef __linux__
       const char* ld_preload = getenv("LD_PRELOAD");
diff --git a/src/overlay.h b/src/overlay.h
index 8b53285c09..f97a6c8bf2 100644
--- a/src/overlay.h
+++ b/src/overlay.h
@@ -101,7 +101,6 @@ void render_imgui(swapchain_stats& data, struct overlay_params& params, ImVec2&
 void update_hud_info(struct swapchain_stats& sw_stats, const struct overlay_params& params, uint32_t vendorID);
 void update_hud_info_with_frametime(struct swapchain_stats& sw_stats, const struct overlay_params& params, uint32_t vendorID, uint64_t frametime_ns);
 void update_hw_info(const struct overlay_params& params, uint32_t vendorID);
-void init_gpu_stats(uint32_t& vendorID, uint32_t reported_deviceID, overlay_params& params);
 void init_cpu_stats(overlay_params& params);
 void check_keybinds(overlay_params& params, uint32_t vendorID);
 void init_system_info(void);
diff --git a/src/overlay_params.h b/src/overlay_params.h
index 1ed33ed046..8378b4238b 100644
--- a/src/overlay_params.h
+++ b/src/overlay_params.h
@@ -115,6 +115,7 @@ typedef unsigned long KeySym;
    OVERLAY_PARAM_BOOL(present_mode)                  \
    OVERLAY_PARAM_BOOL(time_no_label)                 \
    OVERLAY_PARAM_BOOL(display_server)                \
+   OVERLAY_PARAM_BOOL(active_gpu)                    \
    OVERLAY_PARAM_CUSTOM(fps_sampling_period)         \
    OVERLAY_PARAM_CUSTOM(output_folder)               \
    OVERLAY_PARAM_CUSTOM(output_file)                 \
diff --git a/src/vulkan.cpp b/src/vulkan.cpp
index 2db5310029..e9f896cd03 100644
--- a/src/vulkan.cpp
+++ b/src/vulkan.cpp
@@ -1859,7 +1859,6 @@ static VkResult overlay_CreateDevice(
       gpu = device_data->properties.deviceName;
       SPDLOG_DEBUG("gpu: {}", gpu);
 #endif
-      init_gpu_stats(device_data->properties.vendorID, device_data->properties.deviceID, device_data->instance->params);
    }
 
    return result;