8000 Add fastest block-splitter variant by Cyan4973 · Pull Request #4178 · facebook/zstd · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Add fastest block-splitter variant #4178

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions lib/compress/hist.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ unsigned HIST_isError(size_t code) { return ERR_isError(code); }
/*-**************************************************************
* Histogram functions
****************************************************************/
void HIST_add(unsigned* count, const void* src, size_t srcSize)
{
const BYTE* ip = (const BYTE*)src;
const BYTE* const end = ip + srcSize;

while (ip<end) {
count[*ip++]++;
}
}

unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
{
Expand Down
7 changes: 7 additions & 0 deletions lib/compress/hist.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,10 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
*/
unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize);

/*! HIST_add() :
* Lowest level: just add nb of occurences of characters from @src into @count.
* @count is not reset. @count array is presumed large enough (i.e. 1 KB).
@ This function does not need any additional stack memory.
*/
void HIST_add(unsigned* count, const void* src, size_t srcSize);
22 changes: 6 additions & 16 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -4493,32 +4493,22 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,

static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t srcSize, size_t blockSizeMax, ZSTD_strategy strat, S64 savings)
{
/* split level based on compression strategy, from `fast` to `btultra2` */
static const int splitLevels[] = { 0, 0, 1, 2, 2, 3, 3, 4, 4, 4 };
/* note: conservatively only split full blocks (128 KB) currently.
* While it's possible to go lower, let's keep it simple for a first implementation.
* Besides, benefits of splitting are reduced when blocks are already small.
*/
if (srcSize < 128 KB || blockSizeMax < 128 KB)
return MIN(srcSize, blockSizeMax);
/* do not split incompressible data though:
* ensure a 3 bytes per full block overhead limit.
* Note: as a consequence, the first full block skips the splitting detector.
* require verified savings to allow pre-splitting.
* Note: as a consequence, the first full block is not split.
*/
if (savings < 3) return 128 KB;
/* dynamic splitting has a cpu cost for analysis,
* due to that cost it's only used for higher levels */
if (strat >= ZSTD_btopt)
return ZSTD_splitBlock(src, blockSizeMax, 3, cctx->tmpWorkspace, cctx->tmpWkspSize);
if (strat >= ZSTD_lazy2)
return ZSTD_splitBlock(src, blockSizeMax, 2, cctx->tmpWorkspace, cctx->tmpWkspSize);
if (strat >= ZSTD_greedy)
return ZSTD_splitBlock(src, blockSizeMax, 1, cctx->tmpWorkspace, cctx->tmpWkspSize);
if (strat >= ZSTD_dfast)
return ZSTD_splitBlock(src, blockSizeMax, 0, cctx->tmpWorkspace, cctx->tmpWkspSize);
/* blind split strategy
* heuristic value, tested as being "generally better".
* no cpu cost, but can over-split homegeneous data.
*/
return 92 KB;
* select a variant among multiple gradual speed/accuracy tradeoffs */
return ZSTD_splitBlock(src, blockSizeMax, splitLevels[strat], cctx->tmpWorkspace, cctx->tmpWkspSize);
}

/*! ZSTD_compress_frameChunk() :
Expand Down
53 changes: 48 additions & 5 deletions lib/compress/zstd_preSplit.c
8000
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "../common/mem.h" /* S64 */
#include "../common/zstd_deps.h" /* ZSTD_memset */
#include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */
#include "hist.h" /* HIST_add */
#include "zstd_preSplit.h"


Expand Down Expand Up @@ -77,10 +78,10 @@ typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize)

#define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate

#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \
#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \
static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
{ \
recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \
{ \
recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \
}

ZSTD_GEN_RECORD_FINGERPRINT(1, 10)
Expand Down Expand Up @@ -185,10 +186,52 @@ static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize,
(void)flushEvents; (void)removeEvents;
}

/* ZSTD_splitBlock_fromBorders(): very fast strategy :
* compare fingerprint from beginning and end of the block,
* derive from their difference if it's preferable to split in the middle,
* repeat the process a second time, for finer grained decision.
* 3 times did not brought improvements, so I stopped at 2.
* Benefits are good enough for a cheap heuristic.
* More accurate splitting saves more, but speed impact is also more perceptible.
* For better accuracy, use more elaborate variant *_byChunks.
*/
static size_t ZSTD_splitBlock_fromBorders(const void* blockStart, size_t blockSize,
void* workspace, size_t wkspSize)
{
#define SEGMENT_SIZE 512
FPStats* const fpstats = (FPStats*)workspace;
Fingerprint* middleEvents = (Fingerprint*)(void*)((char*)workspace + 512 * sizeof(unsigned));
assert(blockSize == (128 << 10));
assert(workspace != NULL);
assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;

initStats(fpstats);
HIST_add(fpstats->pastEvents.events, blockStart, SEGMENT_SIZE);
HIST_add(fpstats->newEvents.events, (const char*)blockStart + blockSize - SEGMENT_SIZE, SEGMENT_SIZE);
fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents = SEGMENT_SIZE;
if (!compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, 0, 8))
return blockSize;

HIST_add(middleEvents->events, (const char*)blockStart + blockSize/2 - SEGMENT_SIZE/2, SEGMENT_SIZE);
middleEvents->nbEvents = SEGMENT_SIZE;
{ U64 const distFromBegin = fpDistance(&fpstats->pastEvents, middleEvents, 8);
U64 const distFromEnd = fpDistance(&fpstats->newEvents, middleEvents, 8);
U64 const minDistance = SEGMENT_SIZE * SEGMENT_SIZE / 3;
if (abs64((S64)distFromBegin - (S64)distFromEnd) < minDistance)
return 64 KB;
return (distFromBegin > distFromEnd) ? 32 KB : 96 KB;
}
}

size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
int level,
void* workspace, size_t wkspSize)
{
assert(0<=level && level<=3);
return ZSTD_splitBlock_byChunks(blockStart, blockSize, level, workspace, wkspSize);
assert(0<=level && level<=4);
if (level == 0)
return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize);
/* level >= 1*/
return ZSTD_splitBlock_byChunks(blockStart, blockSize, level-1, workspace, wkspSize);
}
16 changes: 9 additions & 7 deletions lib/compress/zstd_preSplit.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@ extern "C" {

#define ZSTD_SLIPBLOCK_WORKSPACESIZE 8208

/* @level must be a value between 0 and 3.
* higher levels spend more energy to find block boundaries
* @workspace must be aligned on 8-bytes boundaries
/* ZSTD_splitBlock():
* @level must be a value between 0 and 4.
* higher levels spend more energy to detect block boundaries.
* @workspace must be aligned for size_t.
* @wkspSize must be at least >= ZSTD_SLIPBLOCK_WORKSPACESIZE
* note2:
* for the time being, this function only accepts full 128 KB blocks,
* therefore @blockSizeMax must be == 128 KB.
* This could be extended to smaller sizes in the future.
* note:
* For the time being, this function only accepts full 128 KB blocks.
* Therefore, @blockSize must be == 128 KB.
* While this could be extended to smaller sizes in the future,
* it is not yet clear if this would be useful. TBD.
*/
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
int level,
Expand Down
Loading
0