Skip to content

Commit

Permalink
Merge branch 'Spudz76-dev-fixCUDA8' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
xmrig committed Mar 21, 2019
2 parents b535699 + 8913809 commit f0469b5
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 76 deletions.
16 changes: 13 additions & 3 deletions cmake/CUDA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ set(DEFAULT_CUDA_ARCH "30;50")

# Fermi GPUs are only supported with CUDA < 9.0
if (CUDA_VERSION VERSION_LESS 9.0)
list(APPEND DEFAULT_CUDA_ARCH "20")
list(APPEND DEFAULT_CUDA_ARCH "20 21")
endif()

# add Pascal support for CUDA >= 8.0
Expand Down Expand Up @@ -61,6 +61,7 @@ foreach(CUDA_ARCH_ELEM ${CUDA_ARCH})
"Use '20' (for compute architecture 2.0) or higher.")
endif()
endforeach()
list(SORT CUDA_ARCH)

option(CUDA_SHOW_REGISTER "Show registers used for each kernel and compute architecture" OFF)
option(CUDA_KEEP_FILES "Keep all intermediate files that are generated during internal compilation steps" OFF)
Expand Down Expand Up @@ -89,11 +90,20 @@ elseif("${CUDA_COMPILER}" STREQUAL "nvcc")
if (CUDA_VERSION VERSION_LESS 8.0)
add_definitions(-D_FORCE_INLINES)
add_definitions(-D_MWAITXINTRIN_H_INCLUDED)
elseif(CUDA_VERSION VERSION_LESS 9.0)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Wno-deprecated-gpu-targets")
endif()
foreach(CUDA_ARCH_ELEM ${CUDA_ARCH})
# set flags to create device code for the given architecture
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
"-Wno-deprecated-gpu-targets --generate-code arch=compute_${CUDA_ARCH_ELEM},code=sm_${CUDA_ARCH_ELEM} --generate-code arch=compute_${CUDA_ARCH_ELEM},code=compute_${CUDA_ARCH_ELEM}")
if("${CUDA_ARCH_ELEM}" STREQUAL "21")
# "2.1" actually does run faster when compiled as itself, versus in "2.0" compatible mode
# strange virtual code type on top of compute_20, with no compute_21 (so the normal rule fails)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
"--generate-code arch=compute_20,code=sm_21")
else()
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
"--generate-code arch=compute_${CUDA_ARCH_ELEM},code=sm_${CUDA_ARCH_ELEM} --generate-code arch=compute_${CUDA_ARCH_ELEM},code=compute_${CUDA_ARCH_ELEM}")
endif()
endforeach()

# give each thread an independent default stream
Expand Down
13 changes: 9 additions & 4 deletions src/Summary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <[email protected]>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <[email protected]>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2019 Spudz76 <https://github.com/Spudz76>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <[email protected]>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -65,10 +67,11 @@ static void print_algo(xmrig::Config *config)

static void print_gpu(xmrig::Config *config)
{
constexpr size_t byteToMiB = 1024u * 1024u;
for (const xmrig::IThread *t : config->threads()) {
auto thread = static_cast<const CudaThread *>(t);
Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("GPU #%-8zu") YELLOW("PCI:%04x:%02x:%02x") GREEN(" %s @ %d/%d MHz") " \x1B[1;30m%dx%d %dx%d arch:%d%d SMX:%d"
: " * GPU #%-8zuPCI:%04x:%02x:%02x %s @ %d/%d MHz %dx%d %dx%d arch:%d%d SMX:%d",
Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("GPU #%-8zu") YELLOW("PCI:%04x:%02x:%02x") GREEN(" %s @ %d/%d MHz") " \x1B[1;30m%dx%d %dx%d arch:%d%d SMX:%d MEM:%zu/%zu MiB"
: " * GPU #%-8zuPCI:%04x:%02x:%02x %s @ %d/%d MHz %dx%d %dx%d arch:%d%d SMX:%d MEM:%zu/%zu MiB",
thread->index(),
thread->pciDomainID(),
thread->pciBusID(),
Expand All @@ -82,7 +85,9 @@ static void print_gpu(xmrig::Config *config)
thread->bsleep(),
thread->arch()[0],
thread->arch()[1],
thread->smx()
thread->smx(),
thread->memoryFree() / byteToMiB,
thread->memoryTotal() / byteToMiB
);
}
}
Expand Down
48 changes: 26 additions & 22 deletions src/nvidia/cryptonight.h
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
/* XMRig
* Copyright 2010 Jeff Garzik <[email protected]>
* Copyright 2012-2014 pooler <[email protected]>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <[email protected]>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <[email protected]>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
* Copyright 2010 Jeff Garzik <[email protected]>
* Copyright 2012-2014 pooler <[email protected]>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <[email protected]>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2019 Spudz76 <https://github.com/Spudz76>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <[email protected]>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/


#pragma once
Expand Down Expand Up @@ -50,6 +52,8 @@ typedef struct {
int device_bsleep;
int device_clockRate;
int device_memoryClockRate;
size_t device_memoryTotal;
size_t device_memoryFree;
uint32_t device_pciBusID;
uint32_t device_pciDeviceID;
uint32_t device_pciDomainID;
Expand Down
89 changes: 46 additions & 43 deletions src/nvidia/cuda_extra.cu
Original file line number Diff line number Diff line change
@@ -1,27 +1,28 @@
/* XMRig
* Copyright 2010 Jeff Garzik <[email protected]>
* Copyright 2012-2014 pooler <[email protected]>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <[email protected]>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <[email protected]>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

* Copyright 2010 Jeff Garzik <[email protected]>
* Copyright 2012-2014 pooler <[email protected]>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <[email protected]>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2019 Spudz76 <https://github.com/Spudz76>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <[email protected]>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include <algorithm>
#include <stdio.h>
Expand Down Expand Up @@ -529,6 +530,28 @@ int cuda_get_deviceinfo(nvid_ctx* ctx, xmrig::Algo algo, bool isCNv2)
return 1;
}

// a device must be selected to get the right memory usage later on
if (cudaSetDevice(ctx->device_id) != cudaSuccess) {
printf("WARNING: NVIDIA GPU %d: cannot be selected.\n", ctx->device_id);
return 2;
}

// trigger that a context on the gpu will be allocated
int* tmp;
if (cudaMalloc(&tmp, 256) != cudaSuccess) {
printf("WARNING: NVIDIA GPU %d: context cannot be created.\n", ctx->device_id);
return 3;
}

size_t freeMemory = 0;
size_t totalMemory = 0;

CUDA_CHECK(ctx->device_id, cudaMemGetInfo(&freeMemory, &totalMemory));
CUDA_CHECK(ctx->device_id, cudaFree(tmp));
CUDA_CHECK(ctx->device_id, cudaDeviceReset());
ctx->device_memoryFree = freeMemory;
ctx->device_memoryTotal = totalMemory;

cudaDeviceProp props;
err = cudaGetDeviceProperties(&props, ctx->device_id);
if (err != cudaSuccess) {
Expand Down Expand Up @@ -593,26 +616,6 @@ int cuda_get_deviceinfo(nvid_ctx* ctx, xmrig::Algo algo, bool isCNv2)
maxMemUsage = size_t(1024u) * byteToMiB;
}

// a device must be selected to get the right memory usage later on
if (cudaSetDevice(ctx->device_id) != cudaSuccess) {
printf("WARNING: NVIDIA GPU %d: cannot be selected.\n", ctx->device_id);
return 2;
}

// trigger that a context on the gpu will be allocated
int* tmp;
if (cudaMalloc(&tmp, 256) != cudaSuccess) {
printf("WARNING: NVIDIA GPU %d: context cannot be created.\n", ctx->device_id);
return 3;
}

size_t freeMemory = 0;
size_t totalMemory = 0;

CUDA_CHECK(ctx->device_id, cudaMemGetInfo(&freeMemory, &totalMemory));
CUDA_CHECK(ctx->device_id, cudaFree(tmp));
CUDA_CHECK(ctx->device_id, cudaDeviceReset());

const size_t hashMemSize = xmrig::cn_select_memory(algo);
# ifdef _WIN32
/* We use in windows bfactor (split slow kernel into smaller parts) to avoid
Expand Down
10 changes: 9 additions & 1 deletion src/workers/CudaThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <[email protected]>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <[email protected]>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2019 Spudz76 <https://github.com/Spudz76>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <[email protected]>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -40,6 +42,8 @@ CudaThread::CudaThread() :
m_threads(0),
m_affinity(-1),
m_index(0),
m_memoryFree(0),
m_memoryTotal(0),
m_threadId(0),
m_pciBusID(0),
m_pciDeviceID(0),
Expand All @@ -63,6 +67,8 @@ CudaThread::CudaThread(const nvid_ctx &ctx, int64_t affinity, xmrig::Algo algori
m_threads(ctx.device_threads),
m_affinity(affinity),
m_index(static_cast<size_t>(ctx.device_id)),
m_memoryFree(ctx.device_memoryFree),
m_memoryTotal(ctx.device_memoryTotal),
m_threadId(0),
m_pciBusID(ctx.device_pciBusID),
m_pciDeviceID(ctx.device_pciDeviceID),
Expand Down Expand Up @@ -145,6 +151,8 @@ bool CudaThread::init(xmrig::Algo algorithm)

m_clockRate = ctx.device_clockRate;
m_memoryClockRate = ctx.device_memoryClockRate;
m_memoryTotal = ctx.device_memoryTotal;
m_memoryFree = ctx.device_memoryFree;
m_pciBusID = ctx.device_pciBusID;
m_pciDeviceID = ctx.device_pciDeviceID;
m_pciDomainID = ctx.device_pciDomainID;
Expand Down
12 changes: 9 additions & 3 deletions src/workers/CudaThread.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <[email protected]>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <[email protected]>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2019 Spudz76 <https://github.com/Spudz76>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <[email protected]>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -48,6 +50,8 @@ class CudaThread : public xmrig::IThread
inline int bsleep() const { return m_bsleep; }
inline int clockRate() const { return m_clockRate; }
inline int memoryClockRate() const { return m_memoryClockRate; }
inline size_t memoryTotal() const { return m_memoryTotal; }
inline size_t memoryFree() const { return m_memoryFree; }
inline int nvmlId() const { return m_nvmlId; }
inline int smx() const { return m_smx; }
inline int threads() const { return m_threads; }
Expand Down Expand Up @@ -75,8 +79,8 @@ class CudaThread : public xmrig::IThread
inline void setSyncMode(uint32_t syncMode) { m_syncMode = syncMode > 3 ? 3 : syncMode; }

protected:
# ifdef APP_DEBUG
void print() const override;
# ifdef APP_DEBUG
void print() const override;
# endif

# ifndef XMRIG_NO_API
Expand All @@ -98,6 +102,8 @@ class CudaThread : public xmrig::IThread
int m_threads;
int64_t m_affinity;
size_t m_index;
size_t m_memoryFree;
size_t m_memoryTotal;
size_t m_threadId;
uint32_t m_pciBusID;
uint32_t m_pciDeviceID;
Expand Down

0 comments on commit f0469b5

Please sign in to comment.