Skip to content

Commit

Permalink
2.10.3
Browse files Browse the repository at this point in the history
  • Loading branch information
IndeedMiners committed Mar 24, 2019
1 parent c787976 commit dc6643f
Show file tree
Hide file tree
Showing 9 changed files with 439 additions and 157 deletions.
237 changes: 134 additions & 103 deletions xmrstak/backend/amd/OclCryptonightR_gen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <chrono>
#include <thread>
#include <iostream>
#include <regex>


namespace xmrstak
Expand Down Expand Up @@ -63,15 +64,15 @@ static std::string get_code(const V4_Instruction* code, int code_size)

struct CacheEntry
{
CacheEntry(xmrstak_algo algo, uint64_t height, size_t deviceIdx, cl_program program) :
CacheEntry(xmrstak_algo algo, uint64_t height_offset, size_t deviceIdx, cl_program program) :
algo(algo),
height(height),
height_offset(height_offset),
deviceIdx(deviceIdx),
program(program)
{}

xmrstak_algo algo;
uint64_t height;
uint64_t height_offset;
size_t deviceIdx;
cl_program program;
};
Expand Down Expand Up @@ -99,6 +100,34 @@ static std::mutex background_tasks_mutex;
static std::vector<BackgroundTaskBase*> background_tasks;
static std::thread* background_thread = nullptr;

static cl_program search_program(
const GpuContext* ctx,
xmrstak_algo algo,
uint64_t height_offset,
bool lock_cache = true
)
{
if(lock_cache)
CryptonightR_cache_mutex.ReadLock();

// Check if the cache has this program
for (const CacheEntry& entry : CryptonightR_cache)
{
if ((entry.algo == algo) && (entry.height_offset == height_offset) && (entry.deviceIdx == ctx->deviceIdx))
{
printer::inst()->print_msg(LDEBUG, "CryptonightR: program for height_offset %llu found in cache", height_offset);
auto result = entry.program;
if(lock_cache)
CryptonightR_cache_mutex.UnLock();
return result;
}
}
if(lock_cache)
CryptonightR_cache_mutex.UnLock();

return nullptr;
}

static void background_thread_proc()
{
std::vector<BackgroundTaskBase*> tasks;
Expand Down Expand Up @@ -133,60 +162,48 @@ static void background_exec(T&& func)
static cl_program CryptonightR_build_program(
const GpuContext* ctx,
xmrstak_algo algo,
uint64_t height,
uint64_t height_offset,
uint64_t height_chunk_size,
uint32_t precompile_count,
std::string source_code,
std::string options)
{
std::vector<cl_program> old_programs;
old_programs.reserve(32);
{
std::vector<cl_program> old_programs;
old_programs.reserve(32);
{
CryptonightR_cache_mutex.WriteLock();

// Remove old programs from cache
for(size_t i = 0; i < CryptonightR_cache.size();)
{
const CacheEntry& entry = CryptonightR_cache[i];
if ((entry.algo == algo) && (entry.height + 2 + precompile_count < height))
{
printer::inst()->print_msg(LDEBUG, "CryptonightR: program for height %llu released (old program)", entry.height);
old_programs.push_back(entry.program);
CryptonightR_cache[i] = std::move(CryptonightR_cache.back());
CryptonightR_cache.pop_back();
}
else
{
++i;
}
}
// Remove old programs from cache
for(size_t i = 0; i < CryptonightR_cache.size();)
{
const CacheEntry& entry = CryptonightR_cache[i];
if ((entry.algo == algo) && (entry.height_offset + (2 + precompile_count) * height_chunk_size < height_offset))
{
printer::inst()->print_msg(LDEBUG, "CryptonightR: program for height_offset %llu released (old program)", entry.height_offset);
old_programs.push_back(entry.program);
CryptonightR_cache[i] = std::move(CryptonightR_cache.back());
CryptonightR_cache.pop_back();
}
else
{
++i;
}
}
CryptonightR_cache_mutex.UnLock();
}
}

for(cl_program p : old_programs) {
clReleaseProgram(p);
}
for(cl_program p : old_programs)
{
clReleaseProgram(p);
}

std::lock_guard<std::mutex> g1(CryptonightR_build_mutex);
std::lock_guard<std::mutex> g1(CryptonightR_build_mutex);

cl_program program = nullptr;
{
CryptonightR_cache_mutex.ReadLock();
cl_program program = search_program(ctx, algo, height_offset);

// Check if the cache already has this program (some other thread might have added it first)
for (const CacheEntry& entry : CryptonightR_cache)
{
if ((entry.algo == algo) && (entry.height == height) && (entry.deviceIdx == ctx->deviceIdx))
{
program = entry.program;
break;
}
}
CryptonightR_cache_mutex.UnLock();
}

if (program) {
return program;
}
if(program) {
return program;
}

cl_int ret;
const char* source = source_code.c_str();
Expand Down Expand Up @@ -239,54 +256,83 @@ static cl_program CryptonightR_build_program(
}
while(status == CL_BUILD_IN_PROGRESS);

CryptonightR_cache_mutex.WriteLock();
auto cached_program = search_program(ctx, algo, height_offset, false);

printer::inst()->print_msg(LDEBUG, "CryptonightR: program for height %llu compiled", height);
if(cached_program)
{
printer::inst()->print_msg(LDEBUG, "CryptonightR: release already existing program %llu", height_offset);
clReleaseProgram(program);
program = cached_program;
}
else
{
CryptonightR_cache.emplace_back(algo, height_offset, ctx->deviceIdx, program);
printer::inst()->print_msg(LDEBUG, "CryptonightR: cache compiled program for height_offset %llu", height_offset);
}

CryptonightR_cache_mutex.WriteLock();
CryptonightR_cache.emplace_back(algo, height, ctx->deviceIdx, program);
CryptonightR_cache_mutex.UnLock();
return program;
return program;
}

cl_program CryptonightR_get_program(GpuContext* ctx, xmrstak_algo algo, uint64_t height, uint32_t precompile_count, bool background)
cl_program CryptonightR_get_program(GpuContext* ctx, xmrstak_algo algo, uint64_t height_offset, uint64_t height_chunk_size, uint32_t precompile_count, bool background)
{
printer::inst()->print_msg(LDEBUG, "CryptonightR: start %llu released",height);
if (background)
{
background_exec([=](){ CryptonightR_get_program(ctx, algo, height_offset, height_chunk_size, precompile_count, false); });
return nullptr;
}

if (background) {
background_exec([=](){ CryptonightR_get_program(ctx, algo, height, precompile_count, false); });
return nullptr;
}
auto program = search_program(ctx, algo, height_offset);

const char* source_code_template =
#include "amd_gpu/opencl/wolf-aes.cl"
#include "amd_gpu/opencl/cryptonight_r.cl"
;
const char include_name[] = "XMRSTAK_INCLUDE_RANDOM_MATH";
const char* offset = strstr(source_code_template, include_name);
if (!offset)
{
printer::inst()->print_msg(LDEBUG, "CryptonightR_get_program: XMRSTAK_INCLUDE_RANDOM_MATH not found in cryptonight_r.cl", algo);
return nullptr;
}
if(program != nullptr)
return program;

V4_Instruction code[256];
int code_size;
switch (algo.Id())
{
case cryptonight_r_wow:
code_size = v4_random_math_init<cryptonight_r_wow>(code, height);
break;
case cryptonight_r:
code_size = v4_random_math_init<cryptonight_r>(code, height);
break;
default:
printer::inst()->print_msg(L0, "CryptonightR_get_program: invalid algo %d", algo);
return nullptr;
}
printer::inst()->print_msg(LDEBUG, "CryptonightR: create code for block %llu to %llu",height_offset, height_offset + height_chunk_size);

const char* source_code_definitions=
#include "amd_gpu/opencl/wolf-aes.cl"
#include "amd_gpu/opencl/cryptonight_r_def.rtcl"
;

const char* source_code_template =
#include "amd_gpu/opencl/cryptonight_r.rtcl"
;
const char include_name[] = "XMRSTAK_INCLUDE_RANDOM_MATH";
const char* offset = strstr(source_code_template, include_name);
if (!offset)
{
printer::inst()->print_msg(LDEBUG, "CryptonightR_get_program: XMRSTAK_INCLUDE_RANDOM_MATH not found in cryptonight_r.cl", algo);
return nullptr;
}

std::string source_code(source_code_definitions);

for(uint64_t c = 0; c < height_chunk_size; ++c)
{
V4_Instruction code[256];
int code_size;
switch (algo.Id())
{
case cryptonight_r_wow:
code_size = v4_random_math_init<cryptonight_r_wow>(code, height_offset + c);
break;
case cryptonight_r:
code_size = v4_random_math_init<cryptonight_r>(code, height_offset + c);
break;
default:
printer::inst()->print_msg(L0, "CryptonightR_get_program: invalid algo %d", algo);
return nullptr;
}

std::string source_code(source_code_template, offset);
source_code.append(get_code(code, code_size));
source_code.append(offset + sizeof(include_name) - 1);
std::string kernel_code(source_code_template, offset);
kernel_code.append(get_code(code, code_size));
kernel_code.append(offset + sizeof(include_name) - 1);

std::string kernel_name = "cn1_cryptonight_r_" + std::to_string(height_offset + c);

source_code += std::regex_replace(kernel_code, std::regex("cn1_cryptonight_r"), kernel_name);
}

// scratchpad size for the selected mining algorithm
size_t hashMemSize = algo.Mem();
Expand Down Expand Up @@ -325,27 +371,12 @@ cl_program CryptonightR_get_program(GpuContext* ctx, xmrstak_algo algo, uint64_t
options += " -cl-fp32-correctly-rounded-divide-sqrt";


const char* source = source_code.c_str();
program = search_program(ctx, algo, height_offset);

{
CryptonightR_cache_mutex.ReadLock();

// Check if the cache has this program
for (const CacheEntry& entry : CryptonightR_cache)
{
if ((entry.algo == algo) && (entry.height == height) && (entry.deviceIdx == ctx->deviceIdx))
{
printer::inst()->print_msg(LDEBUG, "CryptonightR: program for height %llu found in cache", height);
auto result = entry.program;
CryptonightR_cache_mutex.UnLock();
return result;
}
}
CryptonightR_cache_mutex.UnLock();

}
if(program != nullptr)
return program;

return CryptonightR_build_program(ctx, algo, height, precompile_count, source, options);
return CryptonightR_build_program(ctx, algo, height_offset, precompile_count, height_chunk_size, source_code, options);
}

} // namespace amd
Expand Down
2 changes: 1 addition & 1 deletion xmrstak/backend/amd/OclCryptonightR_gen.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace amd
{

cl_program CryptonightR_get_program(GpuContext* ctx, const xmrstak_algo algo,
uint64_t height, uint32_t precompile_count, bool background = false);
uint64_t height_offset, uint64_t height_chunk_size, uint32_t precompile_count, bool background = false);

} // namespace amd
} // namespace xmrstak
15 changes: 10 additions & 5 deletions xmrstak/backend/amd/amd_gpu/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -938,14 +938,17 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar

if ((miner_algo == cryptonight_r) || (miner_algo == cryptonight_r_wow)) {

uint32_t PRECOMPILATION_DEPTH = 4;
uint32_t PRECOMPILATION_DEPTH = 1;
constexpr uint64_t height_chunk_size = 25;
uint64_t height_offset = (height / height_chunk_size) * height_chunk_size;

// Get new kernel
cl_program program = xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height, PRECOMPILATION_DEPTH);
cl_program program = xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height_offset, height_chunk_size, PRECOMPILATION_DEPTH);

if (program != ctx->ProgramCryptonightR) {
if (program != ctx->ProgramCryptonightR || ctx->last_block_height != height) {
cl_int ret;
cl_kernel kernel = clCreateKernel(program, "cn1_cryptonight_r", &ret);
std::string kernel_name = "cn1_cryptonight_r_" + std::to_string(height);
cl_kernel kernel = clCreateKernel(program, kernel_name.c_str(), &ret);

if (ret != CL_SUCCESS) {
printer::inst()->print_msg(LDEBUG, "CryptonightR: clCreateKernel returned error %s", err_to_str(ret));
Expand All @@ -958,10 +961,12 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
Kernels[1] = kernel;
}
ctx->ProgramCryptonightR = program;
ctx->last_block_height = height;
printer::inst()->print_msg(LDEBUG, "Set height %llu", height);

// Precompile next program in background
for (int i = 1; i <= PRECOMPILATION_DEPTH; ++i)
xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height + i, PRECOMPILATION_DEPTH, true);
xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height_offset + i * height_chunk_size, height_chunk_size, PRECOMPILATION_DEPTH, true);

printer::inst()->print_msg(LDEBUG, "Thread #%zu updated CryptonightR", ctx->deviceIdx);
}
Expand Down
1 change: 1 addition & 0 deletions xmrstak/backend/amd/amd_gpu/gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ struct GpuContext
std::map<xmrstak_algo_id, cl_program> Program;
std::map<xmrstak_algo_id, std::array<cl_kernel,8>> Kernels;
cl_program ProgramCryptonightR = nullptr;
uint64_t last_block_height = 0u;
size_t freeMem;
size_t maxMemPerAlloc;
int computeUnits;
Expand Down
Loading

0 comments on commit dc6643f

Please sign in to comment.