2.7.1

IndeedMiners · Jan 6, 2019 · c0a67e0 · c0a67e0
1 parent 85c5bc3
commit c0a67e0
Show file tree

Hide file tree

Showing 27 changed files with 737 additions and 301 deletions.
diff --git a/doc/compile_Windows.md b/doc/compile_Windows.md
@@ -30,15 +30,16 @@
     - CUDA/Runtime
     - Driver components
 
-### AMD DRIVER/APP SDK 3.0 (only needed for AMD GPUs)
+### AMD DRIVER/OCL-SDK (only needed for AMD GPUs)
 
 - Download & install the AMD driver: https://www.amd.com/en/support
 
 **ATTENTION** Many windows driver 18.5+ creating invalid shares.
 If you have an issue with `invalid shares` please downgrade your driver.
 
-- Download and install the latest version from http://amd-dev.wpengine.netdna-cdn.com/app-sdk/installers/APPSDKInstaller/3.0.130.135-GA/full/AMD-APP-SDKInstaller-v3.0.130.135-GA-windows-F-x64.exe
-  (do not wonder why it is a link to a netdna-cdn.com but AMD has removed the SDK downloads, see https://community.amd.com/thread/222855)
+- Download and install the latest version of the OCL-SDK from https://github.com/GPUOpen-LibrariesAndSDKs/OCL-SDK/releases 
+
+Do not follow old information that you need the AMD APP SDK. AMD has removed the APP SDK and is now shipping the OCL-SDK_light.
 
 ### Dependencies OpenSSL/Hwloc and Microhttpd
 - For CUDA 8*:
@@ -115,4 +116,4 @@ If you have an issue with `invalid shares` please downgrade your driver.
 
   copy C:\xmr-stak-dep\openssl\bin\* .
   ```
-- Miner is by default compiled for NVIDIA GPUs (if CUDA is installed), AMD GPUs (if the AMD APP SDK is installed) and CPUs.
+- Miner is by default compiled for NVIDIA GPUs (if CUDA is installed), AMD GPUs (if the AMD OCL-SDK_light is installed) and CPUs.
diff --git a/doc/img/interleave.png b/doc/img/interleave.png
diff --git a/doc/tuning.md b/doc/tuning.md
@@ -10,6 +10,7 @@
   * [Choose `intensity` and `worksize`](#choose-intensity-and-worksize)
   * [Add more GPUs](#add-more-gpus)
   * [Two Threads per GPU](two-threads-per-gpu)
+  * [Interleave Tuning](interleave-tuning )
   * [disable comp_mode](#disable-comp_mode)
   * [change the scratchpad memory pattern](change-the-scratchpad-memory-pattern)
   * [Increase Memory Pool](#increase-memory-pool)
@@ -83,13 +84,13 @@ If you are unsure of either GPU or platform index value, you can use `clinfo` to
 ```
 "gpu_threads_conf" :
 [
-    {
-      "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false,
-      "strided_index" : true, "mem_chunk" : 2, "unroll" : 8, "comp_mode" : true
+    { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false,
+      "strided_index" : true, "mem_chunk" : 2, "unroll" : 8, "comp_mode" : true,
+      "interleave" : 40
     },
-    {
-      "index" : 1, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false,
-      "strided_index" : true, "mem_chunk" : 2, "unroll" : 8, "comp_mode" : true
+    { "index" : 1, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false,
+      "strided_index" : true, "mem_chunk" : 2, "unroll" : 8, "comp_mode" : true,
+      "interleave" : 40
     },
 ],
 
@@ -107,19 +108,49 @@ Therefore adjust your intensity by hand.
 ```
 "gpu_threads_conf" :
 [
-    {
-      "index" : 0, "intensity" : 768, "worksize" : 8, "affine_to_cpu" : false,
-      "strided_index" : true, "mem_chunk" : 2, "unroll" : 8, "comp_mode" : true
+    { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false,
+      "strided_index" : true, "mem_chunk" : 2, "unroll" : 8, "comp_mode" : true,
+      "interleave" : 40
     },
-    {
-      "index" : 0, "intensity" : 768, "worksize" : 8, "affine_to_cpu" : false,
-      "strided_index" : true, "mem_chunk" : 2, "unroll" : 8, "comp_mode" : true
+    { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false,
+      "strided_index" : true, "mem_chunk" : 2, "unroll" : 8, "comp_mode" : true,
+      "interleave" : 40
     },
 ],
 
 "platform_index" : 0,
 ```
 
+### Interleave Tuning
+
+Interleave controls when a worker thread is starting to calculate a bunch of hashes 
+if two worker threads are used to utilize one GPU.
+This option has no effect if only one worker thread is used per GPU.
+
+![Interleave](img/interleave.png) 
+
+Interleave defines how long a thread needs to wait to start the next hash calculation relative to the last started worker thread.
+To choose a interleave value larger than 50% makes no sense because than the gpu will not be utilized well enough.
+In the most cases the default 40 is a good value but on some systems e.g. Linux Rocm 1.9.1 driver with RX5XX you need to adjust the value.
+If you get many interleave message in a row (over 1 minute) you should adjust the value.
+
+```
+OpenCL Interleave 0|1: 642/2400.50 ms - 30.1
+OpenCL Interleave 0|0: 355/2265.05 ms - 30.2
+OpenCL Interleave 0|1: 221/2215.65 ms - 30.2
+```
+
+description:
+```
+<gpu id>|<thread id on the gpu>: <last delay>/<average calculation per hash bunch> ms - <interleave value>
+
+```
+`last delay` should gou slowly to 0.
+If it goes down and than jumps to a very large value multiple times within a minute you should reduce the intensity by 5.
+The `intensity value` will automatically go up and down within the range of +-5% to adjust kernel run-time fluctuations.
+Automatic adjustment is disabled as long as `auto-tuning` is active and will be started after it is finished. 
+If `last delay` goes down to 10ms and the messages stops and repeated from time to time with delays up to 15ms you will have already a good value.
+
 ### disable comp_mode
 
 `comp_mode` means compatibility mode and removes some checks in compute kernel those takes care that the miner can be used on a wide range of AMD/OpenCL GPU devices.