Skip to content

Commit

Permalink
Refactor NVML, allow unavailable items to disappear, make thermal dis…
Browse files Browse the repository at this point in the history
…play color thresholds configurable both compile-time and runtime
  • Loading branch information
Spudz76 committed Oct 20, 2019
1 parent 69af502 commit 625ea00
Show file tree
Hide file tree
Showing 13 changed files with 198 additions and 37 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ Use [config.xmrig.com](https://config.xmrig.com/nvidia) to generate, edit or sha
--cuda-bfactor=[0-12] run CryptoNight core kernel in smaller pieces
--cuda-bsleep=N insert a delay of N microseconds between kernel launches
--cuda-affinity=N affine GPU threads to a CPU
--temp-low=N list of celsius temperature below which is green
--temp-high=N list of celsius temperature above which is red
--no-color disable colored output
--variant algorithm PoW variant
--donate-level=N donate level, default 5% (5 minutes in 100 minutes)
Expand Down
2 changes: 2 additions & 0 deletions src/common/interfaces/IConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ class IConfig
CudaLaunchKey = 1204,
CudaAffinityKey = 1205,
CudaMaxUsageKey = 1206,
NvmlTempL = 1207,
NvmlTempH = 1208,
};

virtual ~IConfig() = default;
Expand Down
10 changes: 10 additions & 0 deletions src/core/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ void xmrig::Config::getJSON(rapidjson::Document &doc) const
doc.AddMember("cuda-bfactor", m_cudaCLI.bfactor(), allocator);
doc.AddMember("cuda-bsleep", m_cudaCLI.bsleep(), allocator);
doc.AddMember("cuda-max-threads", m_maxGpuThreads, allocator);
doc.AddMember("temp-low", m_cudaCLI.temp_low(), allocator);
doc.AddMember("temp-high", m_cudaCLI.temp_high(), allocator);
doc.AddMember("donate-level", donateLevel(), allocator);
doc.AddMember("log-file", logFile() ? Value(StringRef(logFile())).Move() : Value(kNullType).Move(), allocator);
doc.AddMember("pools", m_pools.toJSON(doc), allocator);
Expand Down Expand Up @@ -181,6 +183,14 @@ bool xmrig::Config::parseString(int key, const char *arg)
case CudaMaxUsageKey:
return parseUint64(key, strtoul(arg, nullptr, 10));

case NvmlTempL: /* --temp-low */
m_cudaCLI.parseTempLow(arg);
break;

case NvmlTempH: /* --temp-high */
m_cudaCLI.parseTempHigh(arg);
break;

default:
break;
}
Expand Down
4 changes: 4 additions & 0 deletions src/core/ConfigLoader_platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ static struct option const options[] = {
{ "cuda-max-threads", 1, nullptr, xmrig::IConfig::CudaMaxThreadsKey },
{ "max-gpu-threads", 1, nullptr, xmrig::IConfig::CudaMaxThreadsKey }, // deprecated, use --cuda-max-threads instead.
{ "max-gpu-usage", 1, nullptr, xmrig::IConfig::CudaMaxUsageKey }, // deprecated.
{ "temp-low", 1, nullptr, xmrig::IConfig::NvmlTempL },
{ "temp-high", 1, nullptr, xmrig::IConfig::NvmlTempH },
{ "config", 1, nullptr, xmrig::IConfig::ConfigKey },
{ "donate-level", 1, nullptr, xmrig::IConfig::DonateLevelKey },
{ "dry-run", 0, nullptr, xmrig::IConfig::DryRunKey },
Expand Down Expand Up @@ -106,6 +108,8 @@ static struct option const config_options[] = {
{ "cuda-max-threads", 1, nullptr, xmrig::IConfig::CudaMaxThreadsKey },
{ "max-gpu-threads", 1, nullptr, xmrig::IConfig::CudaMaxThreadsKey }, // deprecated, use --cuda-max-threads instead.
{ "max-gpu-usage", 1, nullptr, xmrig::IConfig::CudaMaxUsageKey }, // deprecated.
{ "temp-low", 1, nullptr, xmrig::IConfig::NvmlTempL },
{ "temp-high", 1, nullptr, xmrig::IConfig::NvmlTempH },
{ nullptr, 0, nullptr, 0 }
};

Expand Down
2 changes: 2 additions & 0 deletions src/core/usage.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ Options:\n\
--cuda-bfactor=[0-12] run CryptoNight core kernel in smaller pieces\n\
--cuda-bsleep=N insert a delay of N microseconds between kernel launches\n\
--cuda-affinity=N affine GPU threads to a CPU\n\
--temp-low=N list of celsius temperature below which is green\n\
--temp-high=N list of celsius temperature above which is red\n\
--no-color disable colored output\n\
--variant algorithm PoW variant\n\
--donate-level=N donate level, default 5%% (5 minutes in 100 minutes)\n\
Expand Down
31 changes: 31 additions & 0 deletions src/defaults.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/* XMRig
* Copyright 2010 Jeff Garzik <[email protected]>
* Copyright 2012-2014 pooler <[email protected]>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <[email protected]>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <[email protected]>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#ifndef __DEFAULTS_H__
#define __DEFAULTS_H__

//temperature display points
// (below L is green, between is yellow, above H is red)
#define DFL_nvmlTempL 45
#define DFL_nvmlTempH 65

#endif /* __DEFAULTS_H__ */
28 changes: 28 additions & 0 deletions src/nvidia/CudaCLI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,34 @@ void CudaCLI::parseLaunch(const char *arg)
}


void CudaCLI::parseTempLow(const char *arg)
{
char *value = strdup(arg);
char *pch = strtok(value, ",");

while (pch != nullptr) {
m_temp_low.push_back(static_cast<int>(strtoul(pch, nullptr, 10)));
pch = strtok(nullptr, ",");
}

free(value);
}


void CudaCLI::parseTempHigh(const char *arg)
{
char *value = strdup(arg);
char *pch = strtok(value, ",");

while (pch != nullptr) {
m_temp_high.push_back(static_cast<int>(strtoul(pch, nullptr, 10)));
pch = strtok(nullptr, ",");
}

free(value);
}


int CudaCLI::get(const std::vector<int> &vector, int index, int defaultValue) const
{
if (vector.empty()) {
Expand Down
8 changes: 8 additions & 0 deletions src/nvidia/CudaCLI.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <vector>


#include "defaults.h"
#include "common/xmrig.h"


Expand All @@ -46,6 +47,8 @@ class CudaCLI
void autoConf(std::vector<xmrig::IThread *> &threads, xmrig::Algo algo, bool isCNv2);
void parseDevices(const char *arg);
void parseLaunch(const char *arg);
void parseTempLow(const char *arg);
void parseTempHigh(const char *arg);

inline void addBFactor(int bfactor) { m_bfactors.push_back(bfactor); }
inline void addBSleep(int bsleep) { m_bsleeps.push_back(bsleep); }
Expand All @@ -71,6 +74,9 @@ class CudaCLI
# endif
}

inline int temp_low(int index = 0) const { return get(m_temp_low, index, DFL_nvmlTempL); }
inline int temp_high(int index = 0) const { return get(m_temp_high, index, DFL_nvmlTempH); }

private:
inline int affinity(int index) const { return get(m_affinity, index, -1); }
inline int blocks(int index) const { return get(m_blocks, index, -1); }
Expand All @@ -87,6 +93,8 @@ class CudaCLI
std::vector<int> m_bsleeps;
std::vector<int> m_devices;
std::vector<int> m_threads;
std::vector<int> m_temp_low;
std::vector<int> m_temp_high;
};


Expand Down
1 change: 1 addition & 0 deletions src/nvidia/Health.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include <stdint.h>

#define PROBED_UNSUPPORTED 0x8675309

class Health
{
Expand Down
52 changes: 35 additions & 17 deletions src/nvidia/NvmlApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@


static uv_lib_t nvmlLib;
static char nvmlVerion[80] = { 0 };
static char nvmlVersion[NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE] = { 0 };


bool NvmlApi::m_available = false;
Expand All @@ -50,8 +50,8 @@ static nvmlReturn_t(*pNvmlDeviceGetPciInfo)(nvmlDevice_t device, nvmlPciInfo_t *
bool NvmlApi::init()
{
# ifdef _WIN32
char tmp[512];
ExpandEnvironmentStringsA("%PROGRAMFILES%\\NVIDIA Corporation\\NVSMI\\nvml.dll", tmp, sizeof(tmp));
char tmp[261]; //LoadLibrary calls are still "260 char" limited
ExpandEnvironmentStringsA(R"(%ProgramFiles%\NVIDIA Corporation\NVSMI\nvml.dll)", tmp, sizeof(tmp));
if (uv_dlopen(tmp, &nvmlLib) == -1 && uv_dlopen("nvml.dll", &nvmlLib) == -1) {
return false;
}
Expand All @@ -78,7 +78,7 @@ bool NvmlApi::init()
m_available = pNvmlInit() == NVML_SUCCESS;

if (pNvmlSystemGetNVMLVersion) {
pNvmlSystemGetNVMLVersion(nvmlVerion, sizeof(nvmlVerion));
pNvmlSystemGetNVMLVersion(nvmlVersion, sizeof(nvmlVersion));
}

return m_available;
Expand All @@ -95,34 +95,52 @@ void NvmlApi::release()
}


bool NvmlApi::health(int id, Health &health)
bool NvmlApi::health(int i, Health &health)
{
if (id == -1 || !isAvailable()) {
const auto id = static_cast<unsigned int>(i);
nvmlDevice_t device;

if (i == -1 || !isAvailable()
||
(pNvmlDeviceGetHandleByIndex && pNvmlDeviceGetHandleByIndex(id, &device) != NVML_SUCCESS)
) {
return false;
}

health.reset();
// cache items previously pegged as unavailable via function call failure
// this has to happen before the reset or we don't see the previous value
const bool hasPowerUsage = PROBED_UNSUPPORTED != health.power;
const bool hasFanSpeed = PROBED_UNSUPPORTED != health.fanSpeed;
const bool hasClockInfo = PROBED_UNSUPPORTED != health.clock;

nvmlDevice_t device;
if (pNvmlDeviceGetHandleByIndex && pNvmlDeviceGetHandleByIndex(id, &device) != NVML_SUCCESS) {
return false;
}
health.reset();

if (pNvmlDeviceGetTemperature) {
pNvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &health.temperature);
}

if (pNvmlDeviceGetPowerUsage) {
pNvmlDeviceGetPowerUsage(device, &health.power);
if (!hasPowerUsage || pNvmlDeviceGetPowerUsage(device, &health.power) != NVML_SUCCESS){
health.power = PROBED_UNSUPPORTED;
}
}

if (pNvmlDeviceGetFanSpeed) {
pNvmlDeviceGetFanSpeed(device, &health.fanSpeed);
if (!hasFanSpeed || pNvmlDeviceGetFanSpeed(device, &health.fanSpeed) != NVML_SUCCESS){
health.fanSpeed = PROBED_UNSUPPORTED;
}
}

if (pNvmlDeviceGetClockInfo) {
pNvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &health.clock);
pNvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &health.memClock);
if (!hasClockInfo
||
pNvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &health.clock) != NVML_SUCCESS
||
pNvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &health.memClock) != NVML_SUCCESS
) {
health.clock = PROBED_UNSUPPORTED;
health.memClock = PROBED_UNSUPPORTED;
}
}

return true;
Expand All @@ -131,7 +149,7 @@ bool NvmlApi::health(int id, Health &health)

const char *NvmlApi::version()
{
return nvmlVerion;
return nvmlVersion;
}


Expand All @@ -158,7 +176,7 @@ void NvmlApi::bind(const std::vector<xmrig::IThread*> &threads)
}

for (xmrig::IThread *t : threads) {
auto thread = static_cast<CudaThread *>(t);
auto thread = dynamic_cast<CudaThread *>(t);
if (thread->pciBusID() == pci.bus && thread->pciDeviceID() == pci.device && thread->pciDomainID() == pci.domain) {
thread->setNvmlId(i);
break;
Expand Down
17 changes: 17 additions & 0 deletions src/workers/CudaThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <string.h>


#include "defaults.h"
#include "rapidjson/document.h"
#include "workers/CudaThread.h"

Expand All @@ -38,6 +39,8 @@ CudaThread::CudaThread() :
m_clockRate(0),
m_memoryClockRate(0),
m_nvmlId(-1),
m_nvmlTempL(DFL_nvmlTempL),
m_nvmlTempH(DFL_nvmlTempH),
m_smx(0),
m_threads(0),
m_affinity(-1),
Expand All @@ -63,6 +66,8 @@ CudaThread::CudaThread(const nvid_ctx &ctx, int64_t affinity, xmrig::Algo algori
m_clockRate(ctx.device_clockRate),
m_memoryClockRate(ctx.device_memoryClockRate),
m_nvmlId(-1),
m_nvmlTempL(DFL_nvmlTempL),
m_nvmlTempH(DFL_nvmlTempH),
m_smx(ctx.device_mpcount),
m_threads(ctx.device_threads),
m_affinity(affinity),
Expand All @@ -88,6 +93,8 @@ CudaThread::CudaThread(const rapidjson::Value &object) :
m_clockRate(0),
m_memoryClockRate(0),
m_nvmlId(-1),
m_nvmlTempL(DFL_nvmlTempL),
m_nvmlTempH(DFL_nvmlTempH),
m_smx(0),
m_threads(0),
m_affinity(-1),
Expand Down Expand Up @@ -117,6 +124,16 @@ CudaThread::CudaThread(const rapidjson::Value &object) :
if (affinity.IsInt()) {
setAffinity(affinity.GetInt());
}

const rapidjson::Value &tempL = object["temp_low"];
if (tempL.IsInt()) {
setNvmlTempL(static_cast<uint32_t>(tempL.GetInt()));
}

const rapidjson::Value &tempH = object["temp_high"];
if (tempH.IsInt()) {
setNvmlTempH(static_cast<uint32_t>(tempH.GetInt()));
}
}


Expand Down
6 changes: 6 additions & 0 deletions src/workers/CudaThread.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ class CudaThread : public xmrig::IThread
inline size_t memoryTotal() const { return m_memoryTotal; }
inline size_t memoryFree() const { return m_memoryFree; }
inline int nvmlId() const { return m_nvmlId; }
inline uint32_t nvmlTempL() const { return m_nvmlTempL; }
inline uint32_t nvmlTempH() const { return m_nvmlTempH; }
inline int smx() const { return m_smx; }
inline int threads() const { return m_threads; }
inline size_t threadId() const { return m_threadId; }
Expand All @@ -74,6 +76,8 @@ class CudaThread : public xmrig::IThread
inline void setBSleep(int bsleep) { m_bsleep = bsleep; }
inline void setIndex(size_t index) { m_index = index; }
inline void setNvmlId(int id) { m_nvmlId = id; }
inline void setNvmlTempL(uint32_t temp) { m_nvmlTempL = temp; }
inline void setNvmlTempH(uint32_t temp) { m_nvmlTempH = temp; }
inline void setThreadId(size_t threadId) { m_threadId = threadId; }
inline void setThreads(int threads) { m_threads = threads; }
inline void setSyncMode(uint32_t syncMode) { m_syncMode = syncMode > 3 ? 3 : syncMode; }
Expand All @@ -98,6 +102,8 @@ class CudaThread : public xmrig::IThread
int m_clockRate;
int m_memoryClockRate;
int m_nvmlId;
uint32_t m_nvmlTempL;
uint32_t m_nvmlTempH;
int m_smx;
int m_threads;
int64_t m_affinity;
Expand Down
Loading

0 comments on commit 625ea00

Please sign in to comment.