Skip to content

Commit

Permalink
2.4.8
Browse files Browse the repository at this point in the history
  • Loading branch information
IndeedMiners committed Jun 2, 2018
1 parent 4ed658c commit a8286e9
Show file tree
Hide file tree
Showing 29 changed files with 346 additions and 167 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ if(CUDA_ENABLE)
endif()

if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" AND
(CUDA_VERSION VERSION_EQUAL 9.0 OR CUDA_VERSION VERSION_EQUAL 9.1)
(CUDA_VERSION VERSION_EQUAL 9.0 OR CUDA_VERSION VERSION_EQUAL 9.1 OR CUDA_VERSION VERSION_EQUAL 9.2)
)
# workaround find_package(CUDA) is using the wrong path to the CXX host compiler
# overwrite the CUDA host compiler variable with the used CXX MSVC
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ Besides [Monero](https://getmonero.org), following coins can be mined using this
- [Intense](https://intensecoin.com)
- [IPBC](https://ipbc.io)
- [Karbo](https://karbo.io)
- [Masari](https://getmasari.org)
- [Sumokoin](https://www.sumokoin.org)
- [TurtleCoin](https://turtlecoin.lol)

Expand Down
9 changes: 8 additions & 1 deletion THIRD-PARTY-LICENSES
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ This application bundles the following third-party software in accordance with t
Package: Original NVidia mining code
Authors: tsiv and KlausT
License: GNU GPLv3
Notes: Improvements are (c) of Xmr-Stak team
Notes: Improvements are (c) of Xmr-Stak team and are covered by GNU GPLv3

-------------------------------------------------------------------------

Expand All @@ -27,3 +27,10 @@ Authors: okdshin
License: MIT License

-------------------------------------------------------------------------

Package: cpputil
Authors: Will Zhang
Source: https://github.com/willzhang4a58/cpputil
License: MIT License

-------------------------------------------------------------------------
3 changes: 2 additions & 1 deletion doc/compile_Linux.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

### AMD APP SDK 3.0 (only needed to use AMD GPUs)

- download and install the latest version from [http://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/](http://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/)
- download and install the latest version from https://www.dropbox.com/sh/mpg882ekirnsfa7/AADWz5X-TgVdsmWt0QwMgTWLa/AMD-APP-SDKInstaller-v3.0.130.136-GA-linux64.tar.bz2?dl=0
(do not wonder why it is a link to a dropbox but AMD has removed the SDK downloads, see https://community.amd.com/thread/228059)

### Cuda 8.0+ (only needed to use NVIDIA GPUs)

Expand Down
7 changes: 4 additions & 3 deletions doc/compile_Windows.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
- During install choose following components:
- `Desktop development with C++` (left side)
- `VC++ 2015.3 v140 toolset for desktop` (right side - **NOT** needed for CUDA 9 or AMD GPU)
- Since release of VS2017 15.5 (12/04/17), require `VC++ 2017 version 15.4 v14.11 toolset` (under tab `Individual Components`, section `Compilers, build tools, and runtimes`), as CUDA 9.1 is not compatible with compiler 14.12.X
- Since release of VS2017 15.5 (12/04/17), require `VC++ 2017 version 15.4 v14.11 toolset` (under tab `Individual Components`, section `Compilers, build tools, and runtimes`), as CUDA 9.x is not compatible with compiler 14.12.X

### CMake for Win64

Expand All @@ -32,7 +32,8 @@

### AMD APP SDK 3.0 (only needed for AMD GPUs)

- Download and install the latest version from http://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/
- Download and install the latest version from https://www.dropbox.com/s/gq8vqhelq0m6gj4/AMD-APP-SDKInstaller-v3.0.130.135-GA-windows-F-x64.exe
(do not wonder why it is a link to a dropbox but AMD has removed the SDK downloads, see https://community.amd.com/thread/222855)

### Dependencies OpenSSL/Hwloc and Microhttpd
- For CUDA 8*:
Expand Down Expand Up @@ -82,7 +83,7 @@
- Go to extracted source code directory (e.g. `cd C:\Users\USERNAME\xmr-stak-<version>`)
- Execute the following commands (NOTE: path to Visual Studio Community 2017 can be different)
```
# Execute next line only if compiling for Cuda 9.1 and using Visual Studio 2017 >= 15.5 (released 12/04/17)
# Execute next line only if compiling for Cuda 9.x and using Visual Studio 2017 >= 15.5 (released 12/04/17)
"C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 -vcvars_ver=14.11
"C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\VsMSBuildCmd.bat"
Expand Down
2 changes: 1 addition & 1 deletion xmrstak/backend/amd/amd_gpu/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1004,7 +1004,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
return(ERR_OCL_API);
}

if(miner_algo == cryptonight_monero || miner_algo == cryptonight_aeon || miner_algo == cryptonight_ipbc || cryptonight_stellite)
if(miner_algo == cryptonight_monero || miner_algo == cryptonight_aeon || miner_algo == cryptonight_ipbc || miner_algo == cryptonight_stellite)
{
// Input
if ((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 3, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS)
Expand Down
31 changes: 6 additions & 25 deletions xmrstak/backend/amd/minethd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,27 +158,6 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
return pvThreads;
}

void minethd::switch_work(miner_work& pWork)
{
// iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work
// faster than threads can consume them. This should never happen in real life.
// Pool cant physically send jobs faster than every 250ms or so due to net latency.

while (globalStates::inst().iConsumeCnt.load(std::memory_order_seq_cst) < globalStates::inst().iThreadCount)
std::this_thread::sleep_for(std::chrono::milliseconds(100));

globalStates::inst().oGlobalWork = pWork;
globalStates::inst().iConsumeCnt.store(0, std::memory_order_seq_cst);
globalStates::inst().iGlobalJobNo++;
}

void minethd::consume_work()
{
memcpy(&oWork, &globalStates::inst().oGlobalWork, sizeof(miner_work));
iJobNo++;
globalStates::inst().iConsumeCnt++;

}

void minethd::work_main()
{
Expand All @@ -198,8 +177,6 @@ void minethd::work_main()
auto miner_algo = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot();
cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);

globalStates::inst().iConsumeCnt++;

uint8_t version = 0;
size_t lastPoolId = 0;

Expand All @@ -215,7 +192,7 @@ void minethd::work_main()
while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
std::this_thread::sleep_for(std::chrono::milliseconds(100));

consume_work();
globalStates::inst().consume_work(oWork, iJobNo);
continue;
}

Expand Down Expand Up @@ -254,7 +231,11 @@ void minethd::work_main()
if((round_ctr++ & 0xF) == 0)
{
globalStates::inst().calc_start_nonce(pGpuCtx->Nonce, oWork.bNiceHash, h_per_round * 16);
// check if the job is still valid, there is a small possibility that the job is switched
if(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) != iJobNo)
break;
}


cl_uint results[0x100];
memset(results,0,sizeof(cl_uint)*(0x100));
Expand Down Expand Up @@ -285,7 +266,7 @@ void minethd::work_main()
std::this_thread::yield();
}

consume_work();
globalStates::inst().consume_work(oWork, iJobNo);
}
}

Expand Down
5 changes: 1 addition & 4 deletions xmrstak/backend/amd/minethd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ class minethd : public iBackend
{
public:

static void switch_work(miner_work& pWork);
static std::vector<iBackend*>* thread_starter(uint32_t threadOffset, miner_work& pWork);
static bool init_gpus();

Expand All @@ -30,11 +29,9 @@ class minethd : public iBackend
minethd(miner_work& pWork, size_t iNo, GpuContext* ctx, const jconf::thd_cfg cfg);

void work_main();
void consume_work();

uint64_t iJobNo;

static miner_work oGlobalWork;

miner_work oWork;

std::promise<void> order_fix;
Expand Down
3 changes: 0 additions & 3 deletions xmrstak/backend/backendConnector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@ bool BackendConnector::self_test()

std::vector<iBackend*>* BackendConnector::thread_starter(miner_work& pWork)
{
globalStates::inst().iGlobalJobNo = 0;
globalStates::inst().iConsumeCnt = 0;


std::vector<iBackend*>* pvThreads = new std::vector<iBackend*>;

Expand Down
2 changes: 1 addition & 1 deletion xmrstak/backend/cpu/crypto/cryptonight_aesni.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
*hi = r >> 64;
return (uint64_t)r;
}
#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)

#else
#include <intrin.h>
#endif // __GNUC__
Expand Down
22 changes: 10 additions & 12 deletions xmrstak/backend/cpu/minethd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,13 +347,6 @@ std::vector<iBackend*> minethd::thread_starter(uint32_t threadOffset, miner_work
return pvThreads;
}

void minethd::consume_work()
{
memcpy(&oWork, &globalStates::inst().inst().oGlobalWork, sizeof(miner_work));
iJobNo++;
globalStates::inst().inst().iConsumeCnt++;
}

minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo)
{
// We have two independent flag bits in the functions
Expand Down Expand Up @@ -450,7 +443,6 @@ void minethd::work_main()

piHashVal = (uint64_t*)(result.bResult + 24);
piNonce = (uint32_t*)(oWork.bWorkBlob + 39);
globalStates::inst().inst().iConsumeCnt++;
result.iThreadId = iThreadNo;

uint8_t version = 0;
Expand All @@ -468,7 +460,7 @@ void minethd::work_main()
while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
std::this_thread::sleep_for(std::chrono::milliseconds(100));

consume_work();
globalStates::inst().consume_work(oWork, iJobNo);
continue;
}

Expand Down Expand Up @@ -511,6 +503,9 @@ void minethd::work_main()
if((nonce_ctr++ & (nonce_chunk-1)) == 0)
{
globalStates::inst().calc_start_nonce(result.iNonce, oWork.bNiceHash, nonce_chunk);
// check if the job is still valid, there is a small posibility that the job is switched
if(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) != iJobNo)
break;
}

*piNonce = result.iNonce;
Expand All @@ -524,7 +519,7 @@ void minethd::work_main()
std::this_thread::yield();
}

consume_work();
globalStates::inst().consume_work(oWork, iJobNo);
}

cryptonight_free_ctx(ctx);
Expand Down Expand Up @@ -773,7 +768,7 @@ void minethd::multiway_work_main()
while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
std::this_thread::sleep_for(std::chrono::milliseconds(100));

consume_work();
globalStates::inst().consume_work(oWork, iJobNo);
prep_multiway_work<N>(bWorkBlob, piNonce);
continue;
}
Expand Down Expand Up @@ -818,6 +813,9 @@ void minethd::multiway_work_main()
{
globalStates::inst().calc_start_nonce(iNonce, oWork.bNiceHash, nonce_chunk);
nonce_ctr = nonce_chunk;
// check if the job is still valid, there is a small posibility that the job is switched
if(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) != iJobNo)
break;
}

for (size_t i = 0; i < N; i++)
Expand All @@ -836,7 +834,7 @@ void minethd::multiway_work_main()
std::this_thread::yield();
}

consume_work();
globalStates::inst().consume_work(oWork, iJobNo);
prep_multiway_work<N>(bWorkBlob, piNonce);
}

Expand Down
3 changes: 0 additions & 3 deletions xmrstak/backend/cpu/minethd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,8 @@ class minethd : public iBackend
void quad_work_main();
void penta_work_main();

void consume_work();

uint64_t iJobNo;

static miner_work oGlobalWork;
miner_work oWork;

std::promise<void> order_fix;
Expand Down
31 changes: 22 additions & 9 deletions xmrstak/backend/globalStates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,24 +33,37 @@
namespace xmrstak
{

void globalStates::consume_work( miner_work& threadWork, uint64_t& currentJobId)
{
jobLock.ReadLock();

threadWork = oGlobalWork;
currentJobId = iGlobalJobNo.load(std::memory_order_relaxed);

jobLock.UnLock();
}

void globalStates::switch_work(miner_work& pWork, pool_data& dat)
{
// iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work
// faster than threads can consume them. This should never happen in real life.
// Pool cant physically send jobs faster than every 250ms or so due to net latency.

while (iConsumeCnt.load(std::memory_order_seq_cst) < iThreadCount)
std::this_thread::sleep_for(std::chrono::milliseconds(100));
jobLock.WriteLock();

/* This notifies all threads that the job has changed.
* To avoid duplicated shared this must be done before the nonce is exchanged.
*/
iGlobalJobNo++;

size_t xid = dat.pool_id;
dat.pool_id = pool_id;
pool_id = xid;

dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_seq_cst);
/* Maybe a worker thread is updating the nonce while we read it.
* To avoid duplicated share calculations the job ID is checked in the worker thread
* after the nonce is read.
*/
dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_relaxed);
oGlobalWork = pWork;
iConsumeCnt.store(0, std::memory_order_seq_cst);
iGlobalJobNo++;

jobLock.UnLock();
}

} // namespace xmrstak
22 changes: 8 additions & 14 deletions xmrstak/backend/globalStates.hpp
Original file line number Diff line number Diff line change
@@ -1,26 +1,16 @@
#pragma once

#include "miner_work.hpp"
#include "xmrstak/backend/miner_work.hpp"
#include "xmrstak/misc/environment.hpp"
#include "xmrstak/misc/console.hpp"
#include "xmrstak/backend/pool_data.hpp"
#include "xmrstak/cpputil/read_write_lock.h"

#include <atomic>

constexpr static size_t invalid_pool_id = (-1);

namespace xmrstak
{

struct pool_data
{
uint32_t iSavedNonce;
size_t pool_id;

pool_data() : iSavedNonce(0), pool_id(invalid_pool_id)
{
}
};

struct globalStates
{
static inline globalStates& inst()
Expand All @@ -42,6 +32,8 @@ struct globalStates
nonce = iGlobalNonce.fetch_add(reserve_count);
}

void consume_work( miner_work& threadWork, uint64_t& currentJobId);

miner_work oGlobalWork;
std::atomic<uint64_t> iGlobalJobNo;
std::atomic<uint64_t> iConsumeCnt;
Expand All @@ -50,9 +42,11 @@ struct globalStates
size_t pool_id = invalid_pool_id;

private:
globalStates() : iThreadCount(0)
globalStates() : iThreadCount(0), iGlobalJobNo(0), iConsumeCnt(0)
{
}

::cpputil::RWLock jobLock;
};

} // namespace xmrstak
4 changes: 3 additions & 1 deletion xmrstak/backend/miner_work.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include "xmrstak/backend/pool_data.hpp"

#include <thread>
#include <atomic>
#include <mutex>
Expand All @@ -20,7 +22,7 @@ namespace xmrstak
bool bStall;
size_t iPoolId;

miner_work() : iWorkSize(0), bNiceHash(false), bStall(true), iPoolId(0) { }
miner_work() : iWorkSize(0), bNiceHash(false), bStall(true), iPoolId(invalid_pool_id) { }

miner_work(const char* sJobID, const uint8_t* bWork, uint32_t iWorkSize,
uint64_t iTarget, bool bNiceHash, size_t iPoolId) : iWorkSize(iWorkSize),
Expand Down
Loading

0 comments on commit a8286e9

Please sign in to comment.