Skip to content

Commit 44e9b3b

Browse files
authoredMar 5, 2019
Merge pull request #227 from SChernykh/dev
CryptoNight v8 ReverseWaltz
2 parents d856f83 + 04e2685 commit 44e9b3b

38 files changed

+3470
-3855
lines changed
 

‎cmake/asm.cmake

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
2323
if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
2424
set(XMRIG_ASM_FILES
2525
"src/crypto/asm/win64/cn_main_loop.S"
26-
"src/crypto/asm/win64/CryptonightR_template.S"
26+
"src/crypto/asm/CryptonightR_template.S"
2727
)
2828
else()
2929
set(XMRIG_ASM_FILES
@@ -36,7 +36,7 @@ if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
3636
endif()
3737

3838
add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILES})
39-
set(XMRIG_ASM_SOURCES "")
39+
set(XMRIG_ASM_SOURCES src/crypto/CryptonightR_gen.cpp)
4040
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
4141
else()
4242
set(XMRIG_ASM_SOURCES "")

‎src/amd/OclGPU.cpp

+14-1
Original file line numberDiff line numberDiff line change
@@ -106,14 +106,21 @@ inline static int cn1KernelOffset(xmrig::Variant variant)
106106
case xmrig::VARIANT_HALF:
107107
return 12;
108108

109+
// 13, 14 reserved for cn/gpu cn0
110+
109111
# ifndef XMRIG_NO_CN_GPU
110112
case xmrig::VARIANT_GPU:
111113
return 15;
112114
# endif
113115

116+
// 16 reserved for cn/gpu cn2
117+
118+
case xmrig::VARIANT_RWZ:
119+
return 17;
120+
114121
case xmrig::VARIANT_WOW:
115122
case xmrig::VARIANT_4:
116-
return 17;
123+
return 18;
117124

118125
default:
119126
break;
@@ -249,11 +256,17 @@ size_t InitOpenCLGpu(int index, cl_context opencl_ctx, GpuContext* ctx, const ch
249256
"cn1_monero", "cn1_msr", "cn1_xao", "cn1_tube", "cn1_v2_monero", "cn1_v2_half",
250257
# ifndef XMRIG_NO_CN_GPU
251258
"cn0_cn_gpu", "cn00_cn_gpu", "cn1_cn_gpu", "cn2_cn_gpu",
259+
# else
260+
"", "", "", "",
252261
# endif
262+
"cn1_v2_rwz",
253263

254264
nullptr
255265
};
256266
for (int i = 0; KernelNames[i]; ++i) {
267+
if (!KernelNames[i][0])
268+
continue;
269+
257270
ctx->Kernels[i] = OclLib::createKernel(ctx->Program, KernelNames[i], &ret);
258271
if (ret != CL_SUCCESS) {
259272
return OCL_ERR_API;

‎src/amd/opencl/cryptonight.cl

+156
Original file line numberDiff line numberDiff line change
@@ -989,6 +989,162 @@ __kernel void cn1_v2_half(__global uint4 *Scratchpad, __global ulong *states, ui
989989
# endif
990990
}
991991

992+
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
993+
__kernel void cn1_v2_rwz(__global uint4 *Scratchpad, __global ulong *states, uint variant, __global ulong *input, uint Threads)
994+
{
995+
# if (ALGO == CRYPTONIGHT)
996+
ulong a[2], b[4];
997+
__local uint AES0[256], AES1[256], AES2[256], AES3[256];
998+
999+
const ulong gIdx = getIdx();
1000+
1001+
for(int i = get_local_id(0); i < 256; i += WORKSIZE)
1002+
{
1003+
const uint tmp = AES0_C[i];
1004+
AES0[i] = tmp;
1005+
AES1[i] = rotate(tmp, 8U);
1006+
AES2[i] = rotate(tmp, 16U);
1007+
AES3[i] = rotate(tmp, 24U);
1008+
}
1009+
1010+
barrier(CLK_LOCAL_MEM_FENCE);
1011+
1012+
# if (COMP_MODE == 1)
1013+
// do not use early return here
1014+
if (gIdx < Threads)
1015+
# endif
1016+
{
1017+
states += 25 * gIdx;
1018+
1019+
# if defined(__NV_CL_C_VERSION)
1020+
Scratchpad += gIdx * (0x40000 >> 2);
1021+
# else
1022+
# if (STRIDED_INDEX == 0)
1023+
Scratchpad += gIdx * (MEMORY >> 4);
1024+
# elif (STRIDED_INDEX == 1)
1025+
Scratchpad += gIdx;
1026+
# elif (STRIDED_INDEX == 2)
1027+
Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + MEM_CHUNK * get_local_id(0);
1028+
# endif
1029+
# endif
1030+
1031+
a[0] = states[0] ^ states[4];
1032+
a[1] = states[1] ^ states[5];
1033+
1034+
b[0] = states[2] ^ states[6];
1035+
b[1] = states[3] ^ states[7];
1036+
b[2] = states[8] ^ states[10];
1037+
b[3] = states[9] ^ states[11];
1038+
}
1039+
1040+
ulong2 bx0 = ((ulong2 *)b)[0];
1041+
ulong2 bx1 = ((ulong2 *)b)[1];
1042+
1043+
mem_fence(CLK_LOCAL_MEM_FENCE);
1044+
1045+
# ifdef __NV_CL_C_VERSION
1046+
__local uint16 scratchpad_line_buf[WORKSIZE];
1047+
__local uint16* scratchpad_line = scratchpad_line_buf + get_local_id(0);
1048+
# define SCRATCHPAD_CHUNK(N) (*(__local uint4*)((__local uchar*)(scratchpad_line) + (idx1 ^ (N << 4))))
1049+
# else
1050+
# if (STRIDED_INDEX == 0)
1051+
# define SCRATCHPAD_CHUNK(N) (*(__global uint4*)((__global uchar*)(Scratchpad) + (idx ^ (N << 4))))
1052+
# elif (STRIDED_INDEX == 1)
1053+
# define SCRATCHPAD_CHUNK(N) (*(__global uint4*)((__global uchar*)(Scratchpad) + mul24(as_uint(idx ^ (N << 4)), Threads)))
1054+
# elif (STRIDED_INDEX == 2)
1055+
# define SCRATCHPAD_CHUNK(N) (*(__global uint4*)((__global uchar*)(Scratchpad) + (((idx ^ (N << 4)) % (MEM_CHUNK << 4)) + ((idx ^ (N << 4)) / (MEM_CHUNK << 4)) * WORKSIZE * (MEM_CHUNK << 4))))
1056+
# endif
1057+
# endif
1058+
1059+
# if (COMP_MODE == 1)
1060+
// do not use early return here
1061+
if (gIdx < Threads)
1062+
# endif
1063+
{
1064+
uint2 division_result = as_uint2(states[12]);
1065+
uint sqrt_result = as_uint2(states[13]).s0;
1066+
1067+
#pragma unroll UNROLL_FACTOR
1068+
for(int i = 0; i < 0x60000; ++i)
1069+
{
1070+
# ifdef __NV_CL_C_VERSION
1071+
uint idx = a[0] & 0x1FFFC0;
1072+
uint idx1 = a[0] & 0x30;
1073+
1074+
*scratchpad_line = *(__global uint16*)((__global uchar*)(Scratchpad) + idx);
1075+
# else
1076+
uint idx = a[0] & MASK;
1077+
# endif
1078+
1079+
uint4 c = SCRATCHPAD_CHUNK(0);
1080+
c = AES_Round(AES0, AES1, AES2, AES3, c, ((uint4 *)a)[0]);
1081+
1082+
{
1083+
const ulong2 chunk1 = as_ulong2(SCRATCHPAD_CHUNK(3));
1084+
const ulong2 chunk2 = as_ulong2(SCRATCHPAD_CHUNK(2));
1085+
const ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(1));
1086+
1087+
SCRATCHPAD_CHUNK(1) = as_uint4(chunk3 + bx1);
1088+
SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + bx0);
1089+
SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]);
1090+
}
1091+
1092+
SCRATCHPAD_CHUNK(0) = as_uint4(bx0) ^ c;
1093+
1094+
# ifdef __NV_CL_C_VERSION
1095+
*(__global uint16*)((__global uchar*)(Scratchpad) + idx) = *scratchpad_line;
1096+
1097+
idx = as_ulong2(c).s0 & 0x1FFFC0;
1098+
idx1 = as_ulong2(c).s0 & 0x30;
1099+
1100+
*scratchpad_line = *(__global uint16*)((__global uchar*)(Scratchpad) + idx);
1101+
# else
1102+
idx = as_ulong2(c).s0 & MASK;
1103+
# endif
1104+
1105+
uint4 tmp = SCRATCHPAD_CHUNK(0);
1106+
1107+
{
1108+
tmp.s0 ^= division_result.s0;
1109+
tmp.s1 ^= division_result.s1 ^ sqrt_result;
1110+
1111+
division_result = fast_div_v2(as_ulong2(c).s1, (c.s0 + (sqrt_result << 1)) | 0x80000001UL);
1112+
sqrt_result = fast_sqrt_v2(as_ulong2(c).s0 + as_ulong(division_result));
1113+
}
1114+
1115+
ulong2 t;
1116+
t.s0 = mul_hi(as_ulong2(c).s0, as_ulong2(tmp).s0);
1117+
t.s1 = as_ulong2(c).s0 * as_ulong2(tmp).s0;
1118+
{
1119+
const ulong2 chunk1 = as_ulong2(SCRATCHPAD_CHUNK(1)) ^ t;
1120+
const ulong2 chunk2 = as_ulong2(SCRATCHPAD_CHUNK(2));
1121+
t ^= chunk2;
1122+
const ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(3));
1123+
1124+
SCRATCHPAD_CHUNK(1) = as_uint4(chunk1 + bx1);
1125+
SCRATCHPAD_CHUNK(2) = as_uint4(chunk3 + bx0);
1126+
SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]);
1127+
}
1128+
1129+
a[1] += t.s1;
1130+
a[0] += t.s0;
1131+
1132+
SCRATCHPAD_CHUNK(0) = ((uint4 *)a)[0];
1133+
1134+
# ifdef __NV_CL_C_VERSION
1135+
*(__global uint16*)((__global uchar*)(Scratchpad) + idx) = *scratchpad_line;
1136+
# endif
1137+
1138+
((uint4 *)a)[0] ^= tmp;
1139+
bx1 = bx0;
1140+
bx0 = as_ulong2(c);
1141+
}
1142+
1143+
# undef SCRATCHPAD_CHUNK
1144+
}
1145+
mem_fence(CLK_GLOBAL_MEM_FENCE);
1146+
# endif
1147+
}
9921148

9931149
)==="
9941150
R"===(

‎src/base/net/Pool.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ xmrig::Pool::Pool(const rapidjson::Value &object) :
132132

133133

134134
xmrig::Pool::Pool(const char *host, uint16_t port, const char *user, const char *password, int keepAlive, bool nicehash, bool tls) :
135+
m_enabled(true),
135136
m_nicehash(nicehash),
136137
m_tls(tls),
137138
m_keepAlive(keepAlive),
@@ -483,6 +484,7 @@ void xmrig::Pool::rebuild()
483484
m_algorithms.push_back(m_algorithm);
484485

485486
# ifndef XMRIG_PROXY_PROJECT
487+
addVariant(VARIANT_RWZ);
486488
addVariant(VARIANT_4);
487489
addVariant(VARIANT_WOW);
488490
addVariant(VARIANT_2);

‎src/common/crypto/Algorithm.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ static AlgoData const algorithms[] = {
6666
{ "cryptonight/xtlv9", "cn/xtlv9", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF },
6767
{ "cryptonight/wow", "cn/wow", xmrig::CRYPTONIGHT, xmrig::VARIANT_WOW },
6868
{ "cryptonight/r", "cn/r", xmrig::CRYPTONIGHT, xmrig::VARIANT_4 },
69+
{ "cryptonight/rwz", "cn/rwz", xmrig::CRYPTONIGHT, xmrig::VARIANT_RWZ },
6970

7071
# ifndef XMRIG_NO_AEON
7172
{ "cryptonight-lite", "cn-lite", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO },
@@ -133,6 +134,7 @@ static const char *variants[] = {
133134
"gpu",
134135
"wow",
135136
"r",
137+
"rwz"
136138
};
137139

138140

‎src/common/xmrig.h

+1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ enum Variant {
7676
VARIANT_GPU = 11, // CryptoNight-GPU (Ryo)
7777
VARIANT_WOW = 12, // CryptoNightR (Wownero)
7878
VARIANT_4 = 13, // CryptoNightR (Monero's variant 4)
79+
VARIANT_RWZ = 14, // CryptoNight variant 2 with 3/4 iterations and reversed shuffle operation (Graft)
7980
VARIANT_MAX
8081
};
8182

0 commit comments

Comments
 (0)
Please sign in to comment.