fireice-uk · cppdev-123 · Mar 3, 2019 · Mar 7, 2019 · Mar 11, 2019 · Mar 15, 2019
diff --git a/xmrstak/backend/amd/OclCryptonightR_gen.cpp b/xmrstak/backend/amd/OclCryptonightR_gen.cpp
@@ -138,6 +138,7 @@ static cl_program CryptonightR_build_program(
     std::string source_code,
     std::string options)
 {
+
     std::vector<cl_program> old_programs;
     old_programs.reserve(32);
     {

diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -488,7 +488,8 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
 		{
 			KernelNames.push_back(std::string("cn00_cn_gpu") + std::to_string(miner_algo));
 		}
-
+
+		ctx->Kernels[miner_algo] = {};
 		for(int i = 0; i < KernelNames.size(); ++i)
 		{
 			ctx->Kernels[miner_algo][i] = clCreateKernel(ctx->Program[miner_algo], KernelNames[i].c_str(), &ret);
@@ -503,6 +504,32 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
 	return 0;
 }
 
+size_t FinalizeOpenCL(GpuContext* ctx)
+{
+	auto neededAlgorithms = ::jconf::inst()->GetCurrentCoinSelection().GetAllAlgorithms();
+
+	for (auto algo : neededAlgorithms)
+	{
+		for (int i = 0; i < 8; ++i)
+			if (ctx->Kernels[algo][i])
+				clReleaseKernel(ctx->Kernels[algo][i]);
+	}
+
+	ctx->Kernels.clear();
+
+	for (size_t i = 0; i < 6; ++i)
+		clReleaseMemObject(ctx->ExtraBuffers[i]);
+	clReleaseMemObject(ctx->InputBuffer);
+	clReleaseMemObject(ctx->OutputBuffer);
+
+	for (auto algo : neededAlgorithms)
+		clReleaseProgram(ctx->Program[algo]);
+	ctx->Program.clear();
+
+	clReleaseCommandQueue(ctx->CommandQueues);
+	clReleaseDevice(ctx->DeviceID);
+}
+
 const cl_platform_info attributeTypes[5] = {
 	CL_PLATFORM_NAME,
 	CL_PLATFORM_VENDOR,

diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp
@@ -215,6 +215,7 @@ std::vector<GpuContext> getAMDDevices(int index);
 
 size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx);
 size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, const xmrstak_algo& miner_algo, uint64_t height);
+size_t FinalizeOpenCL(GpuContext* ctx);
 size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, const xmrstak_algo& miner_algo);
 uint64_t interleaveAdjustDelay(GpuContext* ctx, const bool enableAutoAdjustment = true);
 uint64_t updateTimings(GpuContext* ctx, const uint64_t t);
diff --git a/xmrstak/backend/amd/jconf.cpp b/xmrstak/backend/amd/jconf.cpp
@@ -228,48 +228,68 @@ size_t jconf::GetAutoTune()
 
 size_t jconf::GetThreadCount()
 {
-	return prv->configValues[aGpuThreadsConf]->Size();
+	if (!prv->configValues[aGpuThreadsConf]->IsArray())
+		return 0;
+	size_t available_gpu_threads = prv->configValues[aGpuThreadsConf]->Size();
+	size_t max_gpu_threads = (size_t)ceil((float)available_gpu_threads * (params::inst().max_gpu_threads_count / 100.0));
+	if (available_gpu_threads > max_gpu_threads)
+		available_gpu_threads = max_gpu_threads;
+	return available_gpu_threads;
 }
 
 bool jconf::parse_config(const char* sFilename)
 {
-	FILE * pFile;
 	char * buffer;
 	size_t flen;
 
-	pFile = fopen(sFilename, "rb");
-	if (pFile == NULL)
+	if (params::inst().no_config_files)
 	{
-		printer::inst()->print_msg(L0, "Failed to open config file %s.", sFilename);
-		return false;
+		if (params::inst().configFileAMD.size() <= 16)
+		{
+			printer::inst()->print_msg(L0, "File is empty or too short - %s.", sFilename);
+			return false;
+		}
+		flen = params::inst().configFileAMD.size();
+		buffer = (char*)malloc(flen + 3);
+		memcpy(buffer + 1, params::inst().configFileAMD.c_str(), flen);
 	}
+	else
+	{
+		FILE * pFile = fopen(sFilename, "rb");
+		if (pFile == NULL)
+		{
+			printer::inst()->print_msg(L0, "Failed to open config file %s.", sFilename);
+			return false;
+		}
 
-	fseek(pFile,0,SEEK_END);
-	flen = ftell(pFile);
-	rewind(pFile);
+		fseek(pFile, 0, SEEK_END);
+		flen = ftell(pFile);
+		rewind(pFile);
 
-	if(flen >= 64*1024)
-	{
-		fclose(pFile);
-		printer::inst()->print_msg(L0, "Oversized config file - %s.", sFilename);
-		return false;
-	}
+		if (flen >= 64 * 1024)
+		{
+			fclose(pFile);
+			printer::inst()->print_msg(L0, "Oversized config file - %s.", sFilename);
+			return false;
+		}
 
-	if(flen <= 16)
-	{
-		printer::inst()->print_msg(L0, "File is empty or too short - %s.", sFilename);
-		return false;
-	}
+		if (flen <= 16)
+		{
+			printer::inst()->print_msg(L0, "File is empty or too short - %s.", sFilename);
+			return false;
+		}
 
-	buffer = (char*)malloc(flen + 3);
-	if(fread(buffer+1, flen, 1, pFile) != 1)
-	{
-		free(buffer);
+		buffer = (char*)malloc(flen + 3);
+		if (fread(buffer + 1, flen, 1, pFile) != 1)
+		{
+			free(buffer);
+			fclose(pFile);
+			printer::inst()->print_msg(L0, "Read error while reading %s.", sFilename);
+			return false;
+		}
 		fclose(pFile);
-		printer::inst()->print_msg(L0, "Read error while reading %s.", sFilename);
-		return false;
+
 	}
-	fclose(pFile);
 
 	//Replace Unicode BOM with spaces - we always use UTF-8
 	unsigned char* ubuffer = (unsigned char*)buffer;

diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp
@@ -51,8 +51,8 @@ minethd::minethd(miner_work& pWork, size_t iNo, GpuContext* ctx, const jconf::th
 {
 	this->backendType = iBackend::AMD;
 	oWork = pWork;
-	bQuit = 0;
 	iThreadNo = (uint8_t)iNo;
+	thdNo = iNo;
 	iJobNo = 0;
 	iHashCount = 0;
 	iTimestamp = 0;
@@ -113,7 +113,7 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
 {
 	std::vector<iBackend*>* pvThreads = new std::vector<iBackend*>();
 
-	if(!configEditor::file_exist(params::inst().configFileAMD))
+	if(!configEditor::file_exist(params::inst().configFileAMD) || params::inst().no_config_files)
 	{
 		autoAdjust adjust;
 		if(!adjust.printConfig())
@@ -161,7 +161,6 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
 	return pvThreads;
 }
 
-
 void minethd::work_main()
 {
 	if(affinity >= 0) //-1 means no affinity
@@ -213,8 +212,10 @@ void minethd::work_main()
 			 * raison d'etre of this software it us sensible to just wait until we have something
 			 */
 
-			while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+			while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo && !bQuit)
 				std::this_thread::sleep_for(std::chrono::milliseconds(100));
+			if (bQuit)
+				break;
 
 			globalStates::inst().consume_work(oWork, iJobNo);
 			continue;
@@ -251,8 +252,14 @@ void minethd::work_main()
 		if(oWork.bNiceHash)
 			pGpuCtx->Nonce = *(uint32_t*)(oWork.bWorkBlob + 39);
 
-		while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+		while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo && !bQuit)
 		{
+			while ((bSuspend || pause_idle) && !bQuit)
+				std::this_thread::sleep_for(std::chrono::milliseconds(100));
+
+			if (bQuit)
+				break;
+
 			//Allocate a new nonce every 16 rounds
 			if((round_ctr++ & 0xF) == 0)
 			{
@@ -349,6 +356,11 @@ void minethd::work_main()
 
 		globalStates::inst().consume_work(oWork, iJobNo);
 	}
+
+	FinalizeOpenCL(pGpuCtx);
+
+	cryptonight_free_ctx(cpu_ctx);
+
 }
 
 } // namespace amd

diff --git a/xmrstak/backend/amd/minethd.hpp b/xmrstak/backend/amd/minethd.hpp
@@ -34,14 +34,9 @@ class minethd  : public iBackend
 
 	miner_work oWork;
 
-	std::promise<void> order_fix;
-	std::mutex thd_aff_set;
-
-	std::thread oWorkThd;
 	int64_t affinity;
 	uint32_t autoTune;
 
-	bool bQuit;
 	bool bNoPrefetch;
 
 	//Mutable ptr to vector below, different for each thread