8000 Add ARM NEON optimization by k06a · Pull Request #434 · Creepsky/creepMiner · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Add ARM NEON optimization #434

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ file(GLOB SOURCE_FILES
option(USE_SSE4 "If yes, SSE4 will be enabled" ON)
option(USE_AVX "If yes, AVX will be enabled" ON)
option(USE_AVX2 "If yes, AVX2 will be enabled" ON)
option(USE_NEON "If yes, NEON will be enabled" OFF)

if (USE_SSE4 AND NOT MINIMAL_BUILD)
add_definitions(-DUSE_SSE4)
Expand Down Expand Up @@ -176,6 +177,18 @@ if (USE_AVX2 AND NOT MINIMAL_BUILD)
endif ()
endif ()

if (USE_NEON AND NOT MINIMAL_BUILD)
add_definitions(-DUSE_NEON)
set(SOURCE_FILES ${SOURCE_FILES} src/shabal/mshabal/mshabal_neon.cpp)
if (UNIX OR APPLE)
set_source_files_properties(src/shabal/mshabal/mshabal_neon.cpp PROPERTIES COMPILE_FLAGS -marm)
set_source_files_properties(src/shabal/mshabal/mshabal_neon.cpp PROPERTIES COMPILE_FLAGS -march=native)
set_source_files_properties(src/shabal/mshabal/mshabal_neon.cpp PROPERTIES COMPILE_FLAGS -mfpu=neon)
elseif (MSVC)
set_source_files_properties(src/shabal/mshabal/mshabal_neon.cpp PROPERTIES COMPILE_FLAGS /arch:ARMv7)
endif ()
endif ()

if (USE_CUDA AND NOT MINIMAL_BUILD AND NOT NO_GPU)
set(SOURCE_FILES ${SOURCE_FILES} src/shabal/cuda/Shabal.cu)
endif ()
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ The creepMiner is a client application for mining Burst on a pool or solo. For m
creepMiner is written in C++ and is multi-threaded to get the best performance, it can also be compiled on most operating systems.

## Features
- Mine with your **CPU** (__SSE2__/__SSE4__/__AVX__/__AVX2__) or your **GPU** (__OpenCL__, __CUDA__)
- Mine with your **CPU** (__SSE2__/__SSE4__/__AVX__/__AVX2__/__NEON__) or your **GPU** (__OpenCL__, __CUDA__)
- Mine **solo** or in a **pool**
- Multi Mining (Build a network of several miners)
- Filter bad deadlines with the auto target deadline feature
Expand Down
13 changes: 10 additions & 3 deletions resources/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

usage()
{
echo "Usage: install.sh [cpu] [gpu] [min] [cuda] [cl] [sse4] [avx] [avx2] [help]"
echo "cpu: builds the cpu version (sse2 + sse4 + avx + avx2)"
echo "Usage: install.sh [cpu] [gpu] [min] [cuda] [cl] [sse4] [avx] [avx2] [neon] [help]"
echo "cpu: builds the cpu version (sse2 + sse4 + avx + avx2 + neon)"
echo "gpu: builds the gpu version (opencl + cuda + cpu)"
echo "min: builds the minimal version (only sse2)"
echo "cuda: adds CUDA to the build"
Expand All @@ -13,6 +13,7 @@ usage()
echo "sse4: adds sse4 to the build"
echo "avx: adds avx to the build"
echo "avx2: adds avx2 to the build"
echo "neon: adds neon to the build"
echo "help: shows this help"
}

Expand All @@ -21,6 +22,7 @@ set_cpu()
sse4=$1
avx=$1
avx2=$1
neon=$1
}

set_gpu()
Expand Down Expand Up @@ -58,6 +60,9 @@ do
elif [ $i = "avx2" ]
then
avx2=true
elif [ $i = "neon" ]
then
neon=true
elif [ $i = "cl" ]
then
opencl=true
Expand All @@ -79,16 +84,18 @@ use_flag()
use_sse4=$(use_flag "USE_SSE4" $sse4)
use_avx=$(use_flag "USE_AVX" $avx)
use_avx2=$(use_flag "USE_AVX2" $avx2)
use_neon=$(use_flag "USE_NEON" $neon)
use_opencl=$(use_flag "USE_OPENCL" $opencl)
use_cuda=$(use_flag "USE_CUDA" $cuda)

echo $use_sse4
echo $use_avx
echo $use_avx2
echo $use_neon
echo $use_opencl
echo $use_cuda

conan install . --build=missing -s compiler.libcxx=libstdc++11
rm CMakeCache.txt -f
cmake . -DCMAKE_BUILD_TYPE=RELEASE $use_sse4 $use_avx $use_avx2 $use_opencl $use_cuda
cmake . -DCMAKE_BUILD_TYPE=RELEASE $use_sse4 $use_avx $use_avx2 $use_neon $use_opencl $use_cuda
make -j$(nproc)
1 change: 1 addition & 0 deletions resources/settingsui/MainWindow.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ public MainWindow()
this.CPUInstSet.Add(new Base("SSE4"));
this.CPUInstSet.Add(new Base("AVX"));
this.CPUInstSet.Add(new Base("AVX2"));
this.CPUInstSet.Add(new Base("NEON"));

this.ProcessorType.Add(new Base("CPU"));
this.ProcessorType.Add(new Base("CUDA"));
Expand Down
6 changes: 6 additions & 0 deletions src/Declarations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ const bool Burst::Settings::Avx2 = true;
const bool Burst::Settings::Avx2 = false;
#endif

#ifdef USE_NEON
const bool Burst::Settings::Neon = true;
#else
const bool Burst::Settings::Neon = false;
#endif

#ifdef USE_CUDA
const bool Burst::Settings::Cuda = true;
#else
Expand Down
2 changes: 1 addition & 1 deletion src/Declarations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ namespace Burst
extern std::string Cpu_Instruction_Set;
extern ProjectData Project;

extern const bool Sse4, Avx, Avx2, Cuda, OpenCl;
extern const bool Sse4, Avx, Avx2, Neon, Cuda, OpenCl;

void setCpuInstructionSet(std::string cpuInstructionSet);
};
Expand Down
15 changes: 14 additions & 1 deletion src/MinerUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -927,14 +927,27 @@ bool Burst::cpuHasInstructionSet(CpuInstructionSet cpuInstructionSet)
case sse4: return (instructionSets & sse4) == sse4;
case avx: return (instructionSets & avx) == avx;
case avx2: return (instructionSets & avx2) == avx2;
case neon: return (instructionSets & neon) == neon;
default: return false;
}
}

int Burst::cpuGetInstructionSets()
{
#if defined __arm__
return sse2;
auto instruction_sets = 0;

instruction_sets |= sse2;

#if defined __ARM_NEON__
instruction_sets |= neon;
#endif

#if defined __ARM_FEATURE_SIMD32
instruction_sets |= neon;
#endif

return instruction_sets;
#elif defined __GNUC__
auto instruction_sets = 0;

Expand Down
3 changes: 2 additions & 1 deletion src/MinerUtil.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ namespace Burst
sse2 = 1 << 0,
sse4 = 1 << 1,
avx = 1 << 2,
avx2 = 1 << 3
avx2 = 1 << 3,
neon = 1 << 4
};

bool isNumberStr(const std::string& str);
Expand Down
1 change: 1 addition & 0 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ int main(const int argc, const char* argv[])
checkAndPrint(Sse4, "SSE4");
checkAndPrint(Avx, "AVX");
checkAndPrint(Avx2, "AVX2");
checkAndPrint(Neon, "NEON");

log_information(general, Burst::Settings::Project.nameAndVersionVerbose);
log_information(general, "%s mode%s", mode, sstream.str());
Expand Down
2 changes: 2 additions & 0 deletions src/mining/Miner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,8 @@ void Burst::Miner::createPlotVerifiers()
createWorker(MinerHelper::create_worker_default<PlotVerifier_avx>);
else if (cpuInstructionSet == "AVX2" && Settings::Avx2)
createWorker(MinerHelper::create_worker_default<PlotVerifier_avx2>);
else if (cpuInstructionSet == "NEON" && Settings::Neon)
createWorker(MinerHelper::create_worker_default<PlotVerifier_neon>);
else if (cpuInstructionSet == "SSE2")
createWorker(MinerHelper::create_worker_default<PlotVerifier_sse2>);
else
Expand Down
2 changes: 2 additions & 0 deletions src/mining/MinerConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,8 @@ Burst::ReadConfigFileResult Burst::MinerConfig::readConfigFile(const std::string
cpuInstructionSet_ = "AVX";
else if (cpuHasInstructionSet(CpuInstructionSet::sse4))
cpuInstructionSet_ = "SSE4";
else if (cpuHasInstructionSet(CpuInstructionSet::neon))
cpuInstructionSet_ = "NEON";
else
cpuInstructionSet_ = "SSE2";
}
Expand Down
12 changes: 12 additions & 0 deletions src/plots/PlotGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,11 @@ std::array<std::vector<char>, Burst::Shabal256_AVX2::HashSize> Burst::PlotGenera
return generate<Shabal256_AVX2, PlotGeneratorOperations8<Shabal256_AVX2>>(account, startNonce);
}

std::array<std::vector<char>, Burst::Shabal256_NEON::HashSize> Burst::PlotGenerator::generateNeon(const Poco::UInt64 account, const Poco::UInt64 startNonce)
{
return generate<Shabal256_NEON, PlotGeneratorOperations4<Shabal256_NEON>>(account, startNonce);
}

Poco::UInt64 Burst::PlotGenerator::calculateDeadlineSse2(std::vector<char>& gendata,
GensigData& generationSignature, const Poco::UInt64 scoop, const Poco::UInt64 baseTarget)
{
Expand Down Expand Up @@ -254,6 +259,13 @@ std::array<Poco::UInt64, Burst::Shabal256_AVX2::HashSize> Burst::PlotGenerator::
return calculateDeadline<Shabal256_AVX2, PlotGeneratorOperations8<Shabal256_AVX2>>(gendatas, generationSignature, scoop, baseTarget);
}

std::array<Poco::UInt64, Burst::Shabal256_NEON::HashSize> Burst::PlotGenerator::
calculateDeadlineNeon(std::array<std::vector<char>, Shabal256_NEON::HashSize>& gendatas,
GensigData& generationSignature, const Poco::UInt64 scoop, const Poco::UInt64 baseTarget)
{
return calculateDeadline<Shabal256_NEON, PlotGeneratorOperations4<Shabal256_NEON>>(gendatas, generationSignature, scoop, baseTarget);
}

void Burst::PlotGenerator::convertToPoC2(char* gendata)
{
std::array<char, Settings::HashSize> buffer{};
Expand Down
5 changes: 5 additions & 0 deletions src/plots/PlotGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ namespace Burst
static std::array<std::vector<char>, Shabal256_AVX::HashSize> generateAvx(Poco::UInt64 account, Poco::UInt64 startNonce);
static std::array<std::vector<char>, Shabal256_SSE4::HashSize> generateSse4(Poco::UInt64 account, Poco::UInt64 startNonce);
static std::array<std::vector<char>, Shabal256_AVX2::HashSize> generateAvx2(Poco::UInt64 account, Poco::UInt64 startNonce);
static std::array<std::vector<char>, Shabal256_NEON::HashSize> generateNeon(Poco::UInt64 account, Poco::UInt64 startNonce);

static Poco::UInt64 calculateDeadlineSse2(std::vector<char>& gendata,
GensigData& generationSignature, Poco::UInt64 scoop, Poco::UInt64 baseTarget);
Expand All @@ -62,6 +63,10 @@ namespace Burst
std::array<std::vector<char>, Shabal256_AVX2::HashSize>& gendatas,
GensigData& generationSignature, Poco::UInt64 scoop, Poco::UInt64 baseTarget);

static std::array<Poco::UInt64, Shabal256_NEON::HashSize> calculateDeadlineNeon(
std::array<std::vector<char>, Shabal256_NEON::HashSize>& gendatas,
GensigData& generationSignature, Poco::UInt64 scoop, Poco::UInt64 baseTarget);

private:
template <typename TShabal, typename TOperations>
static std::array<std::vector<char>, TShabal::HashSize> generate(const Poco::UInt64 account, const Poco::UInt64 startNonce)
Expand Down
3 changes: 3 additions & 0 deletions src/plots/PlotVerifier.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,16 +320,19 @@ namespace Burst
using PlotVerifierOperation_sse4 = PlotVerifierOperations_4<Shabal256_SSE4>;
using PlotVerifierOperation_avx = PlotVerifierOperations_4<Shabal256_AVX>;
using PlotVerifierOperation_avx2 = PlotVerifierOperations_8<Shabal256_AVX2>;
using PlotVerifierOperation_neon = PlotVerifierOperations_4<Shabal256_NEON>;

using PlotVerifierAlgorithm_sse2 = PlotVerifierAlgorithm_cpu<Shabal256_SSE2, PlotVerifierOperation_sse2>;
using PlotVerifierAlgorithm_sse4 = PlotVerifierAlgorithm_cpu<Shabal256_SSE4, PlotVerifierOperation_sse4>;
using PlotVerifierAlgorithm_avx = PlotVerifierAlgorithm_cpu<Shabal256_AVX, PlotVerifierOperation_avx>;
using PlotVerifierAlgorithm_avx2 = PlotVerifierAlgorithm_cpu<Shabal256_AVX2, PlotVerifierOperation_avx2>;
using PlotVerifierAlgorithm_neon = PlotVerifierAlgorithm_cpu<Shabal256_NEON, PlotVerifierOperation_neon>;

using PlotVerifier_sse2 = PlotVerifier<PlotVerifierAlgorithm_sse2>;
using PlotVerifier_sse4 = PlotVerifier<PlotVerifierAlgorithm_sse4>;
using PlotVerifier_avx = PlotVerifier<PlotVerifierAlgorithm_avx>;
using PlotVerifier_avx2 = PlotVerifier<PlotVerifierAlgorithm_avx2>;
using PlotVerifier_neon = PlotVerifier<PlotVerifierAlgorithm_neon>;

using PlotVerifierAlgorithm_cuda = PlotVerifierAlgorithm_gpu<GpuCuda, Gpu_Algorithm_Atomic>;
using PlotVerifierAlgorithm_opencl = PlotVerifierAlgorithm_gpu<GpuOpenCL, Gpu_Algorithm_Atomic>;
Expand Down
2 changes: 2 additions & 0 deletions src/shabal/MinerShabal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "shabal/impl/mshabal_avx2_impl.hpp"
#include "shabal/impl/mshabal_avx_impl.hpp"
#include "shabal/impl/mshabal_sse4_impl.hpp"
#include "shabal/impl/mshabal_neon_impl.hpp"
#include "shabal/impl/sphlib_impl.hpp"
#include <Poco/ByteOrder.h>

Expand Down Expand Up @@ -68,4 +69,5 @@ namespace Burst
using Shabal256_AVX = Shabal256_Shell<Mshabal_avx_Impl>;
using Shabal256_SSE4 = Shabal256_Shell<Mshabal_sse4_Impl>;
using Shabal256_SSE2 = Shabal256_Shell<Sphlib_Impl>;
using Shabal256_NEON = Shabal256_Shell<Mshabal_neon_Impl>;
}
71 changes: 71 additions & 0 deletions src/shabal/impl/mshabal_neon_impl.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// ==========================================================================
//
// creepMiner - Burstcoin cryptocurrency CPU and GPU miner
// Copyright (C) 2016-2018 Creepsky (creepsky@gmail.com)
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Fre B7FD e Software Foundation,
// Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110 - 1301 USA
//
// ==========================================================================

#pragma once

#include "shabal/mshabal/mshabal.h"

namespace Burst
{
struct Mshabal_neon_Impl
{
static constexpr size_t HashSize = 4;

using context_t = mshabal_context;

static void init(context_t& context)
{
neon_mshabal_init(&context, 256);
}

static void update(context_t& context, const void* data, size_t length)
{
update(context, data, data, data, data, length);
}

static void update(context_t& context,
const void* data1, const void* data2, const void* data3, const void* data4,
size_t length)
{
neon_mshabal(&context, data1, data2, data3, data4, length);
}

static void close(context_t& context, void* output)
{
neon_mshabal_close(&context, 0, 0, 0, 0, 0, output, nullptr, nullptr, nullptr);
}

static void close(context_t& context,
void* out1, void* out2, void* out3, void* out4)
{
neon_mshabal_close(&context, 0, 0, 0, 0, 0, out1, out2, out3, out4);
}
};
}

#ifndef USE_NEON
inline void neon_mshabal_init(mshabal_context *sc, unsigned out_size) {}

inline void neon_mshabal(mshabal_context *sc, const void *data0, const void *data1, const void *data2, const void *data3, size_t len) {}

inline void neon_mshabal_close(mshabal_context* sc, unsigned ub0, unsigned ub1, unsigned ub2, unsigned ub3, unsigned n,
void* dst0, void* dst1, void* dst2, void* dst3) {}
#endif
Loading
0