From de10f56be2765e8375939b97bb27ad3e378f217f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 29 Jan 2024 23:25:24 -0800 Subject: [PATCH 01/14] improve high compression ratio for file like #3793 this works great for 32-bit arrays, notably the synthetic ones, with extreme regularity, unfortunately, it's not universal, and in some cases, it's a loss. Crucially, on average, it's a loss on silesia. The most negatively impacted file is x-ray. It deserves an investigation before suggesting it as an evolution. --- lib/compress/zstd_opt.c | 54 +++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 68537e60097..e16e5e4fb13 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -566,7 +566,7 @@ void ZSTD_updateTree_internal( const BYTE* const base = ms->window.base; U32 const target = (U32)(ip - base); U32 idx = ms->nextToUpdate; - DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", + DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", idx, target, dictMode); while(idx < target) { @@ -1069,6 +1069,10 @@ listStats(const U32* table, int lastEltID) #endif +#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel) +#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel) +#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1)) + FORCE_INLINE_TEMPLATE ZSTD_ALLOW_POINTER_OVERFLOW_ATTR size_t @@ -1122,7 +1126,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, U32 const ll0 = !litlen; U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch); ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, - (U32)(ip-istart), (U32)(iend - ip)); + (U32)(ip-istart), (U32)(iend-ip)); if (!nbMatches) { ip++; continue; } /* initialize opt[0] */ @@ -1134,7 +1138,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, * in every price. We include the literal length to avoid negative * prices when we subtract the previous literal length. */ - opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel); + opt[0].price = LL_PRICE(litlen); /* large match -> immediate encoding */ { U32 const maxML = matches[nbMatches-1].len; @@ -1155,11 +1159,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, /* set prices for first matches starting position == 0 */ assert(opt[0].price >= 0); - { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + { U32 const literalsPrice = (U32)opt[0].price + (U32)LL_PRICE(0); U32 pos; U32 matchNb; for (pos = 1; pos < minMatch; pos++) { opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ + opt[pos].mlen = 0; } for (matchNb = 0; matchNb < nbMatches; matchNb++) { U32 const offBase = matches[matchNb].off; @@ -1173,7 +1178,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, opt[pos].off = offBase; opt[pos].litlen = litlen; opt[pos].price = (int)sequencePrice; - } } + } + opt[pos].price = ZSTD_MAX_PRICE; + } last_pos = pos-1; } } @@ -1187,18 +1194,38 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, /* Fix current position with one literal if cheaper */ { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; int const price = opt[cur-1].price - + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) - + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) - - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); + + LIT_PRICE(ip+cur-1) + + LL_INCPRICE(litlen); assert(price < 1000000000); /* overflow check */ if (price <= opt[cur].price) { DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); - opt[cur].mlen = 0; - opt[cur].off = 0; - opt[cur].litlen = litlen; - opt[cur].price = price; + if ( (optLevel == 2) /* additional check only for high modes */ + && (opt[cur].mlen > 0) /* interrupt a match */ + && (LL_INCPRICE(1) < 0) ) /* ll1 is cheaper than ll0 */ + { + /* check next position, in case it would be cheaper */ + int with1literal = opt[cur].price + LL_INCPRICE(1); + int withMoreLiterals = price + LL_INCPRICE(litlen+1); + DEBUGLOG(7, "But at next rPos %u : match+1lit %.2f vs %ulits %.2f", + cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals)); + if (with1literal < withMoreLiterals) { + DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) !!!", ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals)); + /* do not take this literal */ + } else { + opt[cur].mlen = 0; + opt[cur].off = 0; + opt[cur].litlen = litlen; + opt[cur].price = price; + } + } else { + /* normal case: take the literal, it's expected to be cheaper */ + opt[cur].mlen = 0; + opt[cur].off = 0; + opt[cur].litlen = litlen; + opt[cur].price = price; + } } else { DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), @@ -1236,7 +1263,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, { U32 const ll0 = (opt[cur].mlen != 0); U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; U32 const previousPrice = (U32)opt[cur].price; - U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 const basePrice = previousPrice + (U32)LL_PRICE(0); U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch); U32 matchNb; @@ -1291,6 +1318,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ } } } } + opt[last_pos+1].price = ZSTD_MAX_PRICE; } /* for (cur = 1; cur <= last_pos; cur++) */ lastSequence = opt[last_pos]; From 4683667785c6248a20eba83dd192dc9baea70d84 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 31 Jan 2024 02:51:46 -0800 Subject: [PATCH 02/14] refactor optimal parser store stretches as intermediate solution instead of sequences. makes it possible to link a solution to a predecessor. --- lib/compress/zstd_compress.c | 8 +- lib/compress/zstd_compress_internal.h | 14 +- lib/compress/zstd_opt.c | 196 +++++++++++++++---------- lib/decompress/zstd_decompress_block.c | 17 ++- 4 files changed, 138 insertions(+), 97 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index b4beff7a972..23c517d2be0 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1661,8 +1661,8 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32)) + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32)) + ZSTD_cwksp_aligned_alloc_size((1<strategy, useRowMatchFinder) ? ZSTD_cwksp_aligned_alloc_size(hSize) : 0; @@ -2045,8 +2045,8 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms, ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); - ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); - ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); + ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+2) * sizeof(ZSTD_match_t)); + ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+2) * sizeof(ZSTD_optimal_t)); } ms->cParams = *cParams; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 60f22239e1a..ec34f2a7749 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -159,11 +159,11 @@ typedef struct { UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; typedef struct { - int price; - U32 off; - U32 mlen; - U32 litlen; - U32 rep[ZSTD_REP_NUM]; + int price; /* price from beginning of segment to this position */ + U32 off; /* offset of previous match */ + U32 mlen; /* length of previous match */ + U32 litlen; /* nb of literals after previous match */ + U32 rep[ZSTD_REP_NUM]; /* offset history after previous match */ } ZSTD_optimal_t; typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; @@ -174,8 +174,8 @@ typedef struct { unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ - ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ - ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+2 */ + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+2 */ U32 litSum; /* nb of literals */ U32 litLengthSum; /* nb of litLength codes */ diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index e16e5e4fb13..76f3ef05441 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1047,11 +1047,6 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, * Optimal parser *********************************/ -static U32 ZSTD_totalLen(ZSTD_optimal_t sol) -{ - return sol.litlen + sol.mlen; -} - #if 0 /* debug */ static void @@ -1101,10 +1096,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, ZSTD_optimal_t* const opt = optStatePtr->priceTable; ZSTD_match_t* const matches = optStatePtr->matchTable; - ZSTD_optimal_t lastSequence; + ZSTD_optimal_t lastStretch; ZSTD_optLdm_t optLdm; - ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t)); + ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t)); optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; @@ -1127,18 +1122,23 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch); ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, (U32)(ip-istart), (U32)(iend-ip)); - if (!nbMatches) { ip++; continue; } + if (!nbMatches) { + DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart)); + ip++; + continue; + } /* initialize opt[0] */ - { U32 i ; for (i=0; i immediate encoding */ { U32 const maxML = matches[nbMatches-1].len; @@ -1147,37 +1147,36 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart)); if (maxML > sufficient_len) { - lastSequence.litlen = litlen; - lastSequence.mlen = maxML; - lastSequence.off = maxOffBase; - DEBUGLOG(6, "large match (%u>%u), immediate encoding", + lastStretch.litlen = 0; + lastStretch.mlen = maxML; + lastStretch.off = maxOffBase; + DEBUGLOG(6, "large match (%u>%u) => immediate encoding", maxML, sufficient_len); cur = 0; - last_pos = ZSTD_totalLen(lastSequence); + last_pos = maxML; goto _shortestPath; } } /* set prices for first matches starting position == 0 */ assert(opt[0].price >= 0); - { U32 const literalsPrice = (U32)opt[0].price + (U32)LL_PRICE(0); - U32 pos; + { U32 pos; U32 matchNb; for (pos = 1; pos < minMatch; pos++) { - opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ - opt[pos].mlen = 0; + opt[pos].price = ZSTD_MAX_PRICE; + /* will be updated later on at match check */ } for (matchNb = 0; matchNb < nbMatches; matchNb++) { U32 const offBase = matches[matchNb].off; U32 const end = matches[matchNb].len; for ( ; pos <= end ; pos++ ) { - U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel); - U32 const sequencePrice = literalsPrice + matchPrice; + int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel); + int const sequencePrice = opt[0].price + matchPrice; DEBUGLOG(7, "rPos:%u => set initial price : %.2f", - pos, ZSTD_fCost((int)sequencePrice)); + pos, ZSTD_fCost(sequencePrice)); opt[pos].mlen = pos; opt[pos].off = offBase; - opt[pos].litlen = litlen; - opt[pos].price = (int)sequencePrice; + opt[pos].litlen = 0; /* end of match */ + opt[pos].price = sequencePrice + LL_PRICE(0); } opt[pos].price = ZSTD_MAX_PRICE; } @@ -1192,7 +1191,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur); /* Fix current position with one literal if cheaper */ - { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; + { U32 const litlen = opt[cur-1].litlen + 1; int const price = opt[cur-1].price + LIT_PRICE(ip+cur-1) + LL_INCPRICE(litlen); @@ -1201,7 +1200,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); - if ( (optLevel == 2) /* additional check only for high modes */ + if ( 0 && (optLevel == 2) /* additional check only for high modes */ && (opt[cur].mlen > 0) /* interrupt a match */ && (LL_INCPRICE(1) < 0) ) /* ll1 is cheaper than ll0 */ { @@ -1214,15 +1213,15 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) !!!", ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals)); /* do not take this literal */ } else { - opt[cur].mlen = 0; - opt[cur].off = 0; + opt[cur].mlen = opt[cur-1].mlen; + opt[cur].off = opt[cur-1].off; opt[cur].litlen = litlen; opt[cur].price = price; } } else { - /* normal case: take the literal, it's expected to be cheaper */ - opt[cur].mlen = 0; - opt[cur].off = 0; + /* normal case: take the literal, it's expected to be cheaper at position @cur */ + opt[cur].mlen = opt[cur-1].mlen; + opt[cur].off = opt[cur-1].off; opt[cur].litlen = litlen; opt[cur].price = price; } @@ -1240,9 +1239,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, */ ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); assert(cur >= opt[cur].mlen); - if (opt[cur].mlen != 0) { + if (opt[cur].litlen == 0) { + /* just finished a match => alter offset history */ U32 const prev = cur - opt[cur].mlen; - repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); + repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0); ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); } else { ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); @@ -1255,15 +1255,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, if ( (optLevel==0) /*static_test*/ && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { - DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); + DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1); continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ } assert(opt[cur].price >= 0); - { U32 const ll0 = (opt[cur].mlen != 0); - U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; - U32 const previousPrice = (U32)opt[cur].price; - U32 const basePrice = previousPrice + (U32)LL_PRICE(0); + { U32 const ll0 = (opt[cur].litlen == 0); + int const previousPrice = opt[cur].price; + int const basePrice = previousPrice + LL_PRICE(0); U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch); U32 matchNb; @@ -1275,18 +1274,16 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, continue; } - { U32 const maxML = matches[nbMatches-1].len; - DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u", - inr-istart, cur, nbMatches, maxML); - - if ( (maxML > sufficient_len) - || (cur + maxML >= ZSTD_OPT_NUM) ) { - lastSequence.mlen = maxML; - lastSequence.off = matches[nbMatches-1].off; - lastSequence.litlen = litlen; - cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */ - last_pos = cur + ZSTD_totalLen(lastSequence); - if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */ + { U32 const longestML = matches[nbMatches-1].len; + DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u", + inr-istart, cur, nbMatches, longestML); + + if ( (longestML > sufficient_len) + || (cur + longestML >= ZSTD_OPT_NUM) ) { + lastStretch.mlen = longestML; + lastStretch.off = matches[nbMatches-1].off; + lastStretch.litlen = 0; + last_pos = cur + longestML; goto _shortestPath; } } @@ -1298,11 +1295,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, U32 mlen; DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u", - matchNb, matches[matchNb].off, lastML, litlen); + matchNb, matches[matchNb].off, lastML, opt[cur].litlen); for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ U32 const pos = cur + mlen; - int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); + int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); if ((pos > last_pos) || (price < opt[pos].price)) { DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", @@ -1310,7 +1307,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ opt[pos].mlen = mlen; opt[pos].off = offset; - opt[pos].litlen = litlen; + opt[pos].litlen = 0; opt[pos].price = price; } else { DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", @@ -1321,41 +1318,77 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, opt[last_pos+1].price = ZSTD_MAX_PRICE; } /* for (cur = 1; cur <= last_pos; cur++) */ - lastSequence = opt[last_pos]; - cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */ - assert(cur < ZSTD_OPT_NUM); /* control overflow*/ + lastStretch = opt[last_pos]; + assert(cur >= lastStretch.mlen); + cur = last_pos - lastStretch.mlen; _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ assert(opt[0].mlen == 0); + assert(last_pos >= lastStretch.mlen); + assert(cur == last_pos - lastStretch.mlen); + assert(lastStretch.rep[0] != 0); + + if (lastStretch.mlen==0) { + /* no solution : all matches have been converted into literals */ + assert(lastStretch.litlen == (ip - anchor) + last_pos); + ip += last_pos; + continue; + } + assert(lastStretch.off > 0); - /* Set the next chunk's repcodes based on the repcodes of the beginning - * of the last match, and the last sequence. This avoids us having to - * update them while traversing the sequences. - */ - if (lastSequence.mlen != 0) { - repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); - ZSTD_memcpy(rep, &reps, sizeof(reps)); + /* Update offset history */ + if (lastStretch.litlen == 0) { + /* finishing on a match : update offset history */ + repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0); + ZSTD_memcpy(rep, &reps, sizeof(repcodes_t)); } else { - ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); + ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t)); + assert(cur >= lastStretch.litlen); + cur -= lastStretch.litlen; } - { U32 const storeEnd = cur + 1; + /* let's write the shortest path solution + * solution is stored in @opt, + * in reverse order, + * starting from @storeEnd (==cur+1) + * (effectively partially overwriting @opt). + * Content is changed too: + * - So far, @opt stored stretches, aka a match followed by literals + * - Now, it will store sequences, aka literals followed by a match + */ + { U32 const storeEnd = cur + 2; U32 storeStart = storeEnd; - U32 seqPos = cur; + U32 stretchPos = cur; + ZSTD_optimal_t nextStretch; DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", last_pos, cur); (void)last_pos; assert(storeEnd < ZSTD_OPT_NUM); - DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", - storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); - opt[storeEnd] = lastSequence; - while (seqPos > 0) { - U32 const backDist = ZSTD_totalLen(opt[seqPos]); + DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off); + if (lastStretch.litlen > 0) { + /* last "sequence" is unfinished: just a bunch of literals */ + opt[storeEnd].litlen = lastStretch.litlen; + opt[storeEnd].mlen = 0; + storeStart = storeEnd-1; + opt[storeStart] = lastStretch; + } { + opt[storeEnd] = lastStretch; /* note: litlen will be fixed */ + storeStart = storeEnd; + } + while (1) { + nextStretch = opt[stretchPos]; + opt[storeStart].litlen = nextStretch.litlen; + DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)", + opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off); + if (nextStretch.mlen == 0) { + /* reaching beginning of segment */ + break; + } storeStart--; - DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", - seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off); - opt[storeStart] = opt[seqPos]; - seqPos = (seqPos > backDist) ? seqPos - backDist : 0; + opt[storeStart] = nextStretch; /* note: litlen will be fixed */ + assert(nextStretch.litlen + nextStretch.mlen <= stretchPos); + stretchPos -= nextStretch.litlen + nextStretch.mlen; } /* save sequences */ @@ -1381,6 +1414,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, anchor += advance; ip = anchor; } } + DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]); + + /* update all costs */ ZSTD_setBasePrices(optStatePtr, optLevel); } } /* while (ip < ilimit) */ @@ -1476,7 +1512,7 @@ size_t ZSTD_compressBlock_btultra2( * Consequently, this can only work if no data has been previously loaded in tables, * aka, no dictionary, no prefix, no ldm preprocessing. * The compression ratio gain is generally small (~0.5% on first block), - ** the cost is 2x cpu time on first block. */ + * the cost is 2x cpu time on first block. */ assert(srcSize <= ZSTD_BLOCKSIZE_MAX); if ( (ms->opt.litLengthSum==0) /* first block */ && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 11bf201bc36..4be145732d9 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -1585,7 +1585,8 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, /* last literal segment */ if (dctx->litBufferLocation == ZSTD_split) { /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ - size_t const lastLLSize = litBufferEnd - litPtr; + size_t const lastLLSize = (size_t)(litBufferEnd - litPtr); + DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize); RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); if (op != NULL) { ZSTD_memmove(op, litPtr, lastLLSize); @@ -1596,14 +1597,16 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, dctx->litBufferLocation = ZSTD_not_in_dst; } /* copy last literals from internal buffer */ - { size_t const lastLLSize = litBufferEnd - litPtr; + { size_t const lastLLSize = (size_t)(litBufferEnd - litPtr); + DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize); RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); if (op != NULL) { ZSTD_memcpy(op, litPtr, lastLLSize); op += lastLLSize; } } - return op-ostart; + DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart)); + return (size_t)(op - ostart); } FORCE_INLINE_TEMPLATE size_t @@ -1673,14 +1676,16 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, } /* last literal segment */ - { size_t const lastLLSize = litEnd - litPtr; + { size_t const lastLLSize = (size_t)(litEnd - litPtr); + DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize); RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); if (op != NULL) { ZSTD_memcpy(op, litPtr, lastLLSize); op += lastLLSize; } } - return op-ostart; + DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart)); + return (size_t)(op - ostart); } static size_t @@ -1878,7 +1883,7 @@ ZSTD_decompressSequencesLong_body( } } - return op-ostart; + return (size_t)(op - ostart); } static size_t From 0166b2ba8083481df3ae68e3431a43f541d3c9bd Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 31 Jan 2024 11:12:57 -0800 Subject: [PATCH 03/14] modification: differentiate literal update at pos+1 helps when litlen==1 is cheaper than litlen==0 works great on pathological arr[u32] examples but doesn't generalize well on other files. silesia/x-ray is amoung the most negatively affected ones. --- lib/compress/zstd_opt.c | 52 +++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 76f3ef05441..d7fb191cc9d 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1200,8 +1200,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); - if ( 0 && (optLevel == 2) /* additional check only for high modes */ - && (opt[cur].mlen > 0) /* interrupt a match */ + if ((optLevel == 2) /* additional check only for high modes */ + && (opt[cur].litlen == 0) /* interrupt a match */ && (LL_INCPRICE(1) < 0) ) /* ll1 is cheaper than ll0 */ { /* check next position, in case it would be cheaper */ @@ -1209,33 +1209,35 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, int withMoreLiterals = price + LL_INCPRICE(litlen+1); DEBUGLOG(7, "But at next rPos %u : match+1lit %.2f vs %ulits %.2f", cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals)); - if (with1literal < withMoreLiterals) { - DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) !!!", ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals)); - /* do not take this literal */ - } else { - opt[cur].mlen = opt[cur-1].mlen; - opt[cur].off = opt[cur-1].off; - opt[cur].litlen = litlen; - opt[cur].price = price; + if ( (with1literal < withMoreLiterals) + && (with1literal < opt[cur+1].price) ) { + /* update offset history - before it disappears */ + U32 const prev = cur - opt[cur].mlen; + repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0); + assert(cur >= opt[cur].mlen); + DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !", + ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals), + newReps.rep[0], newReps.rep[1], newReps.rep[2] ); + opt[cur+1] = opt[cur]; /* mlen & offbase */ + ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t)); + opt[cur+1].litlen = 1; + opt[cur+1].price = with1literal; + if (last_pos < cur+1) last_pos = cur+1; } - } else { - /* normal case: take the literal, it's expected to be cheaper at position @cur */ - opt[cur].mlen = opt[cur-1].mlen; - opt[cur].off = opt[cur-1].off; - opt[cur].litlen = litlen; - opt[cur].price = price; } + opt[cur].mlen = opt[cur-1].mlen; + opt[cur].off = opt[cur-1].off; + opt[cur].litlen = litlen; + opt[cur].price = price; + ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); } else { - DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", - inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), - opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]); + DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price)); } } - /* Set the repcodes of the current position. We must do it here - * because we rely on the repcodes of the 2nd to last sequence being - * correct to set the next chunks repcodes during the backward - * traversal. + /* Offset history is not updated during match comparison. + * Do it here, now that the match is selected and confirmed. */ ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); assert(cur >= opt[cur].mlen); @@ -1244,8 +1246,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, U32 const prev = cur - opt[cur].mlen; repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0); ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); - } else { - ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); } /* last match must start at a minimum distance of 8 from oend */ @@ -1514,6 +1514,7 @@ size_t ZSTD_compressBlock_btultra2( * The compression ratio gain is generally small (~0.5% on first block), * the cost is 2x cpu time on first block. */ assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + //g_debuglevel = -g_debuglevel; if ( (ms->opt.litLengthSum==0) /* first block */ && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ @@ -1523,6 +1524,7 @@ size_t ZSTD_compressBlock_btultra2( ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); } + //g_debuglevel = -g_debuglevel; return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); } #endif From d31018e223691256aac9c426fcfbeec735a2d6ab Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 3 Feb 2024 14:26:18 -0800 Subject: [PATCH 04/14] finally, a version that generalizes well While it's not always strictly a win, it's a win for files that see a noticeably compression ratio increase, while it's a very small noise for other files. Downside is, this patch is less efficient for 32-bit arrays of integer than the previous patch which was introducing losses for other files, but it's still a net improvement on this scenario. --- lib/compress/zstd_opt.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index d7fb191cc9d..2c4af2f062a 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1178,9 +1178,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, opt[pos].litlen = 0; /* end of match */ opt[pos].price = sequencePrice + LL_PRICE(0); } - opt[pos].price = ZSTD_MAX_PRICE; } last_pos = pos-1; + opt[pos].price = ZSTD_MAX_PRICE; + opt[pos+1].price = ZSTD_MAX_PRICE; } } @@ -1197,39 +1198,47 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, + LL_INCPRICE(litlen); assert(price < 1000000000); /* overflow check */ if (price <= opt[cur].price) { + ZSTD_optimal_t const prevMatch = opt[cur]; DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); + opt[cur].mlen = opt[cur-1].mlen; + opt[cur].off = opt[cur-1].off; + opt[cur].litlen = litlen; + opt[cur].price = price; + ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); if ((optLevel == 2) /* additional check only for high modes */ - && (opt[cur].litlen == 0) /* interrupt a match */ + && (prevMatch.litlen == 0) /* interrupt a match */ && (LL_INCPRICE(1) < 0) ) /* ll1 is cheaper than ll0 */ { /* check next position, in case it would be cheaper */ - int with1literal = opt[cur].price + LL_INCPRICE(1); - int withMoreLiterals = price + LL_INCPRICE(litlen+1); + int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1); + int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1); DEBUGLOG(7, "But at next rPos %u : match+1lit %.2f vs %ulits %.2f", cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals)); if ( (with1literal < withMoreLiterals) && (with1literal < opt[cur+1].price) ) { /* update offset history - before it disappears */ - U32 const prev = cur - opt[cur].mlen; - repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0); - assert(cur >= opt[cur].mlen); + U32 const prev = cur - prevMatch.mlen; + repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0); + assert(cur >= prevMatch.mlen); DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !", ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals), newReps.rep[0], newReps.rep[1], newReps.rep[2] ); - opt[cur+1] = opt[cur]; /* mlen & offbase */ + opt[cur+1] = prevMatch; /* mlen & offbase */ ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t)); opt[cur+1].litlen = 1; opt[cur+1].price = with1literal; if (last_pos < cur+1) last_pos = cur+1; +#if 0 + /* but, for following byte, get back to literal run */ + opt[cur+2] = opt[cur]; + opt[cur+2].litlen += 2; + opt[cur+2].price += LIT_PRICE(ip+cur) + LIT_PRICE(ip+cur+1) + LL_INCPRICE(litlen+1) + LL_INCPRICE(litlen+2); + if (last_pos < cur+2) last_pos = cur+2; +#endif } } - opt[cur].mlen = opt[cur-1].mlen; - opt[cur].off = opt[cur-1].off; - opt[cur].litlen = litlen; - opt[cur].price = price; - ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); } else { DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)", inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price)); @@ -1316,6 +1325,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, } } } } opt[last_pos+1].price = ZSTD_MAX_PRICE; + opt[last_pos+2].price = ZSTD_MAX_PRICE; } /* for (cur = 1; cur <= last_pos; cur++) */ lastStretch = opt[last_pos]; From 8168a451e58261baf9a53b0c1bcfdaff2ba0480d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 3 Feb 2024 17:26:47 -0800 Subject: [PATCH 05/14] minor optimization, mostly for clarity --- lib/compress/zstd_opt.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 2c4af2f062a..bbf367b24de 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1181,7 +1181,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, } last_pos = pos-1; opt[pos].price = ZSTD_MAX_PRICE; - opt[pos+1].price = ZSTD_MAX_PRICE; } } @@ -1202,19 +1201,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); - opt[cur].mlen = opt[cur-1].mlen; - opt[cur].off = opt[cur-1].off; + opt[cur] = opt[cur-1]; opt[cur].litlen = litlen; opt[cur].price = price; - ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); if ((optLevel == 2) /* additional check only for high modes */ && (prevMatch.litlen == 0) /* interrupt a match */ - && (LL_INCPRICE(1) < 0) ) /* ll1 is cheaper than ll0 */ - { + && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */ + ) { /* check next position, in case it would be cheaper */ int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1); int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1); - DEBUGLOG(7, "But at next rPos %u : match+1lit %.2f vs %ulits %.2f", + DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f", cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals)); if ( (with1literal < withMoreLiterals) && (with1literal < opt[cur+1].price) ) { @@ -1230,13 +1227,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, opt[cur+1].litlen = 1; opt[cur+1].price = with1literal; if (last_pos < cur+1) last_pos = cur+1; -#if 0 - /* but, for following byte, get back to literal run */ - opt[cur+2] = opt[cur]; - opt[cur+2].litlen += 2; - opt[cur+2].price += LIT_PRICE(ip+cur) + LIT_PRICE(ip+cur+1) + LL_INCPRICE(litlen+1) + LL_INCPRICE(litlen+2); - if (last_pos < cur+2) last_pos = cur+2; -#endif } } } else { @@ -1325,7 +1315,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, } } } } opt[last_pos+1].price = ZSTD_MAX_PRICE; - opt[last_pos+2].price = ZSTD_MAX_PRICE; } /* for (cur = 1; cur <= last_pos; cur++) */ lastStretch = opt[last_pos]; From e5af24c5fa82186d61ee1ed4dfe161d65a1c1a7d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 3 Feb 2024 17:48:29 -0800 Subject: [PATCH 06/14] fixed wrong assert --- lib/compress/zstd_opt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index bbf367b24de..bcebfaa3559 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1325,7 +1325,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, assert(opt[0].mlen == 0); assert(last_pos >= lastStretch.mlen); assert(cur == last_pos - lastStretch.mlen); - assert(lastStretch.rep[0] != 0); if (lastStretch.mlen==0) { /* no solution : all matches have been converted into literals */ From 9ae3bf5ee2ea676594b84afc6a3a4edcc22a19bf Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 3 Feb 2024 17:52:50 -0800 Subject: [PATCH 07/14] update compression results good news: there are only improvements --- tests/regression/results.csv | 84 ++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/tests/regression/results.csv b/tests/regression/results.csv index d072c0d850b..6d398c0e7f6 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -11,10 +11,10 @@ silesia.tar, level 6, compress silesia.tar, level 7, compress simple, 4579828 silesia.tar, level 9, compress simple, 4555448 silesia.tar, level 13, compress simple, 4502956 -silesia.tar, level 16, compress simple, 4360546 -silesia.tar, level 19, compress simple, 4265911 +silesia.tar, level 16, compress simple, 4360529 +silesia.tar, level 19, compress simple, 4261475 silesia.tar, uncompressed literals, compress simple, 4854086 -silesia.tar, uncompressed literals optimal, compress simple, 4265911 +silesia.tar, uncompressed literals optimal, compress simple, 4261475 silesia.tar, huffman literals, compress simple, 6179047 github.tar, level -5, compress simple, 52115 github.tar, level -3, compress simple, 45678 @@ -45,8 +45,8 @@ silesia, level 6, compress silesia, level 7, compress cctx, 4570271 silesia, level 9, compress cctx, 4545850 silesia, level 13, compress cctx, 4493990 -silesia, level 16, compress cctx, 4360041 -silesia, level 19, compress cctx, 4296055 +silesia, level 16, compress cctx, 4359969 +silesia, level 19, compress cctx, 4267082 silesia, long distance mode, compress cctx, 4842075 silesia, multithreaded, compress cctx, 4842075 silesia, multithreaded long distance mode, compress cctx, 4842075 @@ -55,7 +55,7 @@ silesia, small hash log, compress silesia, small chain log, compress cctx, 4912197 silesia, explicit params, compress cctx, 4794318 silesia, uncompressed literals, compress cctx, 4842075 -silesia, uncompressed literals optimal, compress cctx, 4296055 +silesia, uncompressed literals optimal, compress cctx, 4267082 silesia, huffman literals, compress cctx, 6172202 silesia, multithreaded with advanced params, compress cctx, 4842075 github, level -5, compress cctx, 204407 @@ -109,8 +109,8 @@ silesia, level 6, zstdcli, silesia, level 7, zstdcli, 4570319 silesia, level 9, zstdcli, 4545898 silesia, level 13, zstdcli, 4494038 -silesia, level 16, zstdcli, 4360089 -silesia, level 19, zstdcli, 4296103 +silesia, level 16, zstdcli, 4360017 +silesia, level 19, zstdcli, 4267130 silesia, long distance mode, zstdcli, 4833785 silesia, multithreaded, zstdcli, 4842123 silesia, multithreaded long distance mode, zstdcli, 4833785 @@ -119,7 +119,7 @@ silesia, small hash log, zstdcli, silesia, small chain log, zstdcli, 4912245 silesia, explicit params, zstdcli, 4795840 silesia, uncompressed literals, zstdcli, 5120614 -silesia, uncompressed literals optimal, zstdcli, 4319566 +silesia, uncompressed literals optimal, zstdcli, 4317385 silesia, huffman literals, zstdcli, 5321417 silesia, multithreaded with advanced params, zstdcli, 5120614 silesia.tar, level -5, zstdcli, 6862049 @@ -134,8 +134,8 @@ silesia.tar, level 6, zstdcli, silesia.tar, level 7, zstdcli, 4581791 silesia.tar, level 9, zstdcli, 4555452 silesia.tar, level 13, zstdcli, 4502960 -silesia.tar, level 16, zstdcli, 4360550 -silesia.tar, level 19, zstdcli, 4265915 +silesia.tar, level 16, zstdcli, 4360533 +silesia.tar, level 19, zstdcli, 4261479 silesia.tar, no source size, zstdcli, 4854160 silesia.tar, long distance mode, zstdcli, 4845745 silesia.tar, multithreaded, zstdcli, 4854164 @@ -145,7 +145,7 @@ silesia.tar, small hash log, zstdcli, silesia.tar, small chain log, zstdcli, 4917022 silesia.tar, explicit params, zstdcli, 4821112 silesia.tar, uncompressed literals, zstdcli, 5122571 -silesia.tar, uncompressed literals optimal, zstdcli, 4310145 +silesia.tar, uncompressed literals optimal, zstdcli, 4308929 silesia.tar, huffman literals, zstdcli, 5342074 silesia.tar, multithreaded with advanced params, zstdcli, 5122571 github, level -5, zstdcli, 206407 @@ -248,8 +248,8 @@ silesia, level 11 row 2, advanced silesia, level 12 row 1, advanced one pass, 4505658 silesia, level 12 row 2, advanced one pass, 4503429 silesia, level 13, advanced one pass, 4493990 -silesia, level 16, advanced one pass, 4360041 -silesia, level 19, advanced one pass, 4296055 +silesia, level 16, advanced one pass, 4359969 +silesia, level 19, advanced one pass, 4267082 silesia, no source size, advanced one pass, 4842075 silesia, long distance mode, advanced one pass, 4833710 silesia, multithreaded, advanced one pass, 4842075 @@ -259,7 +259,7 @@ silesia, small hash log, advanced silesia, small chain log, advanced one pass, 4912197 silesia, explicit params, advanced one pass, 4795840 silesia, uncompressed literals, advanced one pass, 5120566 -silesia, uncompressed literals optimal, advanced one pass, 4319518 +silesia, uncompressed literals optimal, advanced one pass, 4317337 silesia, huffman literals, advanced one pass, 5321369 silesia, multithreaded with advanced params, advanced one pass, 5120566 silesia.tar, level -5, advanced one pass, 6861055 @@ -282,8 +282,8 @@ silesia.tar, level 11 row 2, advanced silesia.tar, level 12 row 1, advanced one pass, 4514517 silesia.tar, level 12 row 2, advanced one pass, 4514007 silesia.tar, level 13, advanced one pass, 4502956 -silesia.tar, level 16, advanced one pass, 4360546 -silesia.tar, level 19, advanced one pass, 4265911 +silesia.tar, level 16, advanced one pass, 4360529 +silesia.tar, level 19, advanced one pass, 4261475 silesia.tar, no source size, advanced one pass, 4854086 silesia.tar, long distance mode, advanced one pass, 4840452 silesia.tar, multithreaded, advanced one pass, 4854160 @@ -293,7 +293,7 @@ silesia.tar, small hash log, advanced silesia.tar, small chain log, advanced one pass, 4917041 silesia.tar, explicit params, advanced one pass, 4807274 silesia.tar, uncompressed literals, advanced one pass, 5122473 -silesia.tar, uncompressed literals optimal, advanced one pass, 4310141 +silesia.tar, uncompressed literals optimal, advanced one pass, 4308925 silesia.tar, huffman literals, advanced one pass, 5341705 silesia.tar, multithreaded with advanced params, advanced one pass, 5122567 github, level -5, advanced one pass, 204407 @@ -566,8 +566,8 @@ silesia, level 11 row 2, advanced silesia, level 12 row 1, advanced one pass small out, 4505658 silesia, level 12 row 2, advanced one pass small out, 4503429 silesia, level 13, advanced one pass small out, 4493990 -silesia, level 16, advanced one pass small out, 4360041 -silesia, level 19, advanced one pass small out, 4296055 +silesia, level 16, advanced one pass small out, 4359969 +silesia, level 19, advanced one pass small out, 4267082 silesia, no source size, advanced one pass small out, 4842075 silesia, long distance mode, advanced one pass small out, 4833710 silesia, multithreaded, advanced one pass small out, 4842075 @@ -577,7 +577,7 @@ silesia, small hash log, advanced silesia, small chain log, advanced one pass small out, 4912197 silesia, explicit params, advanced one pass small out, 4795840 silesia, uncompressed literals, advanced one pass small out, 5120566 -silesia, uncompressed literals optimal, advanced one pass small out, 4319518 +silesia, uncompressed literals optimal, advanced one pass small out, 4317337 silesia, huffman literals, advanced one pass small out, 5321369 silesia, multithreaded with advanced params, advanced one pass small out, 5120566 silesia.tar, level -5, advanced one pass small out, 6861055 @@ -600,8 +600,8 @@ silesia.tar, level 11 row 2, advanced silesia.tar, level 12 row 1, advanced one pass small out, 4514517 silesia.tar, level 12 row 2, advanced one pass small out, 4514007 silesia.tar, level 13, advanced one pass small out, 4502956 -silesia.tar, level 16, advanced one pass small out, 4360546 -silesia.tar, level 19, advanced one pass small out, 4265911 +silesia.tar, level 16, advanced one pass small out, 4360529 +silesia.tar, level 19, advanced one pass small out, 4261475 silesia.tar, no source size, advanced one pass small out, 4854086 silesia.tar, long distance mode, advanced one pass small out, 4840452 silesia.tar, multithreaded, advanced one pass small out, 4854160 @@ -611,7 +611,7 @@ silesia.tar, small hash log, advanced silesia.tar, small chain log, advanced one pass small out, 4917041 silesia.tar, explicit params, advanced one pass small out, 4807274 silesia.tar, uncompressed literals, advanced one pass small out, 5122473 -silesia.tar, uncompressed literals optimal, advanced one pass small out, 4310141 +silesia.tar, uncompressed literals optimal, advanced one pass small out, 4308925 silesia.tar, huffman literals, advanced one pass small out, 5341705 silesia.tar, multithreaded with advanced params, advanced one pass small out, 5122567 github, level -5, advanced one pass small out, 204407 @@ -884,8 +884,8 @@ silesia, level 11 row 2, advanced silesia, level 12 row 1, advanced streaming, 4505658 silesia, level 12 row 2, advanced streaming, 4503429 silesia, level 13, advanced streaming, 4493990 -silesia, level 16, advanced streaming, 4360041 -silesia, level 19, advanced streaming, 4296055 +silesia, level 16, advanced streaming, 4359969 +silesia, level 19, advanced streaming, 4267082 silesia, no source size, advanced streaming, 4842039 silesia, long distance mode, advanced streaming, 4833710 silesia, multithreaded, advanced streaming, 4842075 @@ -895,7 +895,7 @@ silesia, small hash log, advanced silesia, small chain log, advanced streaming, 4912197 silesia, explicit params, advanced streaming, 4795857 silesia, uncompressed literals, advanced streaming, 5120566 -silesia, uncompressed literals optimal, advanced streaming, 4319518 +silesia, uncompressed literals optimal, advanced streaming, 4317337 silesia, huffman literals, advanced streaming, 5321370 silesia, multithreaded with advanced params, advanced streaming, 5120566 silesia.tar, level -5, advanced streaming, 6856523 @@ -918,8 +918,8 @@ silesia.tar, level 11 row 2, advanced silesia.tar, level 12 row 1, advanced streaming, 4514514 silesia.tar, level 12 row 2, advanced streaming, 4514003 silesia.tar, level 13, advanced streaming, 4502956 -silesia.tar, level 16, advanced streaming, 4360546 -silesia.tar, level 19, advanced streaming, 4265911 +silesia.tar, level 16, advanced streaming, 4360529 +silesia.tar, level 19, advanced streaming, 4261475 silesia.tar, no source size, advanced streaming, 4859267 silesia.tar, long distance mode, advanced streaming, 4840452 silesia.tar, multithreaded, advanced streaming, 4854160 @@ -929,7 +929,7 @@ silesia.tar, small hash log, advanced silesia.tar, small chain log, advanced streaming, 4917021 silesia.tar, explicit params, advanced streaming, 4807288 silesia.tar, uncompressed literals, advanced streaming, 5127423 -silesia.tar, uncompressed literals optimal, advanced streaming, 4310141 +silesia.tar, uncompressed literals optimal, advanced streaming, 4308925 silesia.tar, huffman literals, advanced streaming, 5341712 silesia.tar, multithreaded with advanced params, advanced streaming, 5122567 github, level -5, advanced streaming, 204407 @@ -1194,11 +1194,11 @@ silesia, level 6, old stre silesia, level 7, old streaming, 4570271 silesia, level 9, old streaming, 4545850 silesia, level 13, old streaming, 4493990 -silesia, level 16, old streaming, 4360041 -silesia, level 19, old streaming, 4296055 +silesia, level 16, old streaming, 4359969 +silesia, level 19, old streaming, 4267082 silesia, no source size, old streaming, 4842039 silesia, uncompressed literals, old streaming, 4842075 -silesia, uncompressed literals optimal, old streaming, 4296055 +silesia, uncompressed literals optimal, old streaming, 4267082 silesia, huffman literals, old streaming, 6172207 silesia.tar, level -5, old streaming, 6856523 silesia.tar, level -3, old streaming, 6505954 @@ -1212,11 +1212,11 @@ silesia.tar, level 6, old stre silesia.tar, level 7, old streaming, 4579823 silesia.tar, level 9, old streaming, 4555445 silesia.tar, level 13, old streaming, 4502956 -silesia.tar, level 16, old streaming, 4360546 -silesia.tar, level 19, old streaming, 4265911 +silesia.tar, level 16, old streaming, 4360529 +silesia.tar, level 19, old streaming, 4261475 silesia.tar, no source size, old streaming, 4859267 silesia.tar, uncompressed literals, old streaming, 4859271 -silesia.tar, uncompressed literals optimal, old streaming, 4265911 +silesia.tar, uncompressed literals optimal, old streaming, 4261475 silesia.tar, huffman literals, old streaming, 6179056 github, level -5, old streaming, 204407 github, level -5 with dict, old streaming, 45832 @@ -1296,8 +1296,8 @@ silesia, level 6, old stre silesia, level 7, old streaming advanced, 4570271 silesia, level 9, old streaming advanced, 4545850 silesia, level 13, old streaming advanced, 4493990 -silesia, level 16, old streaming advanced, 4360041 -silesia, level 19, old streaming advanced, 4296055 +silesia, level 16, old streaming advanced, 4359969 +silesia, level 19, old streaming advanced, 4267082 silesia, no source size, old streaming advanced, 4842039 silesia, long distance mode, old streaming advanced, 4842075 silesia, multithreaded, old streaming advanced, 4842075 @@ -1307,7 +1307,7 @@ silesia, small hash log, old stre silesia, small chain log, old streaming advanced, 4912197 silesia, explicit params, old streaming advanced, 4795857 silesia, uncompressed literals, old streaming advanced, 4842075 -silesia, uncompressed literals optimal, old streaming advanced, 4296055 +silesia, uncompressed literals optimal, old streaming advanced, 4267082 silesia, huffman literals, old streaming advanced, 6172207 silesia, multithreaded with advanced params, old streaming advanced, 4842075 silesia.tar, level -5, old streaming advanced, 6856523 @@ -1322,8 +1322,8 @@ silesia.tar, level 6, old stre silesia.tar, level 7, old streaming advanced, 4579823 silesia.tar, level 9, old streaming advanced, 4555445 silesia.tar, level 13, old streaming advanced, 4502956 -silesia.tar, level 16, old streaming advanced, 4360546 -silesia.tar, level 19, old streaming advanced, 4265911 +silesia.tar, level 16, old streaming advanced, 4360529 +silesia.tar, level 19, old streaming advanced, 4261475 silesia.tar, no source size, old streaming advanced, 4859267 silesia.tar, long distance mode, old streaming advanced, 4859271 silesia.tar, multithreaded, old streaming advanced, 4859271 @@ -1333,7 +1333,7 @@ silesia.tar, small hash log, old stre silesia.tar, small chain log, old streaming advanced, 4917021 silesia.tar, explicit params, old streaming advanced, 4807288 silesia.tar, uncompressed literals, old streaming advanced, 4859271 -silesia.tar, uncompressed literals optimal, old streaming advanced, 4265911 +silesia.tar, uncompressed literals optimal, old streaming advanced, 4261475 silesia.tar, huffman literals, old streaming advanced, 6179056 silesia.tar, multithreaded with advanced params, old streaming advanced, 4859271 github, level -5, old streaming advanced, 213265 From 5474edbe6016175453d09eca139566baefe0b97b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 3 Feb 2024 19:31:53 -0800 Subject: [PATCH 08/14] fixed wrong assert by introducing ZSTD_OPT_SIZE --- lib/compress/zstd_compress.c | 8 ++++---- lib/compress/zstd_compress_internal.h | 5 +++-- lib/compress/zstd_opt.c | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 23c517d2be0..f8abbbbd91b 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1661,8 +1661,8 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32)) + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32)) + ZSTD_cwksp_aligned_alloc_size((1<strategy, useRowMatchFinder) ? ZSTD_cwksp_aligned_alloc_size(hSize) : 0; @@ -2045,8 +2045,8 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms, ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); - ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+2) * sizeof(ZSTD_match_t)); - ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+2) * sizeof(ZSTD_optimal_t)); + ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_match_t)); + ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t)); } ms->cParams = *cParams; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index ec34f2a7749..dae8526d461 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -168,14 +168,15 @@ typedef struct { typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; +#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+2) typedef struct { /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ unsigned* litFreq; /* table of literals statistics, of size 256 */ unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ - ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+2 */ - ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+2 */ + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_SIZE */ + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */ U32 litSum; /* nb of literals */ U32 litLengthSum; /* nb of litLength codes */ diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index bcebfaa3559..eb86470629d 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1361,7 +1361,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", last_pos, cur); (void)last_pos; - assert(storeEnd < ZSTD_OPT_NUM); + assert(storeEnd < ZSTD_OPT_SIZE); DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off); if (lastStretch.litlen > 0) { From 0ae21d8c3170741e4005c877d3c300a6034601ec Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 3 Feb 2024 19:32:59 -0800 Subject: [PATCH 09/14] removed trace control --- lib/compress/zstd_opt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index eb86470629d..04587e855aa 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1512,7 +1512,6 @@ size_t ZSTD_compressBlock_btultra2( * The compression ratio gain is generally small (~0.5% on first block), * the cost is 2x cpu time on first block. */ assert(srcSize <= ZSTD_BLOCKSIZE_MAX); - //g_debuglevel = -g_debuglevel; if ( (ms->opt.litLengthSum==0) /* first block */ && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ @@ -1522,7 +1521,6 @@ size_t ZSTD_compressBlock_btultra2( ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); } - //g_debuglevel = -g_debuglevel; return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); } #endif From fe2e2ad36d434d0989ca669d3a4f4d60f1cb907b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 3 Feb 2024 19:57:38 -0800 Subject: [PATCH 10/14] use ZSTD_memcpy() which can be redirected in Linux kernel mode --- lib/compress/zstd_opt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 04587e855aa..20a30406a31 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1138,7 +1138,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, */ opt[0].price = LL_PRICE(litlen); ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0])); - memcpy(&opt[0].rep, rep, sizeof(opt[0].rep)); + ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep)); /* large match -> immediate encoding */ { U32 const maxML = matches[nbMatches-1].len; From 641749fc0935b6905b2fcfaa362cedfc631f5960 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 5 Feb 2024 00:36:10 -0800 Subject: [PATCH 11/14] fix uasan dictionary_stream_round_trip fuzz test --- lib/compress/zstd_opt.c | 6 ++++-- tests/Makefile | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 20a30406a31..eed3319299e 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -267,6 +267,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, const optState_t* const optPtr, int optLevel) { + DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength); if (litLength == 0) return 0; if (!ZSTD_compressedLiterals(optPtr)) @@ -1204,7 +1205,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, opt[cur] = opt[cur-1]; opt[cur].litlen = litlen; opt[cur].price = price; - if ((optLevel == 2) /* additional check only for high modes */ + if ( (optLevel == 2) /* additional check only for high modes */ && (prevMatch.litlen == 0) /* interrupt a match */ && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */ ) { @@ -1278,7 +1279,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, inr-istart, cur, nbMatches, longestML); if ( (longestML > sufficient_len) - || (cur + longestML >= ZSTD_OPT_NUM) ) { + || (cur + longestML >= ZSTD_OPT_NUM) + || (ip + cur + longestML >= iend) ) { lastStretch.mlen = longestML; lastStretch.off = matches[nbMatches-1].off; lastStretch.litlen = 0; diff --git a/tests/Makefile b/tests/Makefile index c31e7500558..2f33e1d0f0b 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -251,8 +251,9 @@ checkTag.o : $(ZSTDDIR)/zstd.h clean: $(MAKE) -C $(ZSTDDIR) clean $(MAKE) -C $(PRGDIR) clean - $(RM) -fR $(TESTARTEFACT) - $(RM) -rf tmp* # some test directories are named tmp* + $(MAKE) -C fuzz clean + $(RM) -R $(TESTARTEFACT) + $(RM) -r tmp* # some test directories are named tmp* $(RM) $(CLEAN) core *.o *.tmp result* *.gcda dictionary *.zst \ $(PRGDIR)/zstd$(EXT) $(PRGDIR)/zstd32$(EXT) \ fullbench-dll$(EXT) fuzzer-dll$(EXT) zstreamtest-dll$(EXT) From 6c35fb2e8cb826b70226856cd7442861037cca8a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 5 Feb 2024 01:21:06 -0800 Subject: [PATCH 12/14] fix msan warnings --- Makefile | 5 +++-- lib/compress/zstd_opt.c | 14 ++++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 87d80d16dea..11eca19ce20 100644 --- a/Makefile +++ b/Makefile @@ -328,8 +328,9 @@ asan-%: clean msan: clean $(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=memory -fno-omit-frame-pointer -Werror $(MOREFLAGS)" HAVE_LZMA=0 # datagen.c fails this test for no obvious reason -msan-%: clean - LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=memory -fno-omit-frame-pointer -Werror $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -C $(TESTDIR) HAVE_LZMA=0 $* +msan-%: + $(MAKE) clean + LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=memory -fno-omit-frame-pointer -Werror $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -j -C $(TESTDIR) HAVE_LZMA=0 $* asan32: clean $(MAKE) -C $(TESTDIR) test32 CC=clang MOREFLAGS="-g -fsanitize=address $(MOREFLAGS)" diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index eed3319299e..25715eabba8 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1164,7 +1164,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, U32 matchNb; for (pos = 1; pos < minMatch; pos++) { opt[pos].price = ZSTD_MAX_PRICE; - /* will be updated later on at match check */ + opt[pos].mlen = 0; + opt[pos].litlen = litlen + pos; } for (matchNb = 0; matchNb < nbMatches; matchNb++) { U32 const offBase = matches[matchNb].off; @@ -1205,8 +1206,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, opt[cur] = opt[cur-1]; opt[cur].litlen = litlen; opt[cur].price = price; - if ( (optLevel == 2) /* additional check only for high modes */ - && (prevMatch.litlen == 0) /* interrupt a match */ + if ( (optLevel >= 1) /* additional check only for higher modes */ + && (prevMatch.litlen == 0) /* replace a match */ && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */ ) { /* check next position, in case it would be cheaper */ @@ -1305,7 +1306,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, if ((pos > last_pos) || (price < opt[pos].price)) { DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); - while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ + while (last_pos < pos) { + /* fill empty positions, for future comparisons */ + last_pos++; + opt[last_pos].price = ZSTD_MAX_PRICE; + opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */ + } opt[pos].mlen = mlen; opt[pos].off = offset; opt[pos].litlen = 0; From 887f5b62aea77ec26a1a693ff9d8e2b1eba3a2cd Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 5 Feb 2024 01:27:22 -0800 Subject: [PATCH 13/14] update compression results for regression tests --- tests/regression/results.csv | 264 +++++++++++++++++------------------ 1 file changed, 132 insertions(+), 132 deletions(-) diff --git a/tests/regression/results.csv b/tests/regression/results.csv index 6d398c0e7f6..fc3fbe7c7e1 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -11,10 +11,10 @@ silesia.tar, level 6, compress silesia.tar, level 7, compress simple, 4579828 silesia.tar, level 9, compress simple, 4555448 silesia.tar, level 13, compress simple, 4502956 -silesia.tar, level 16, compress simple, 4360529 -silesia.tar, level 19, compress simple, 4261475 +silesia.tar, level 16, compress simple, 4360385 +silesia.tar, level 19, compress simple, 4260939 silesia.tar, uncompressed literals, compress simple, 4854086 -silesia.tar, uncompressed literals optimal, compress simple, 4261475 +silesia.tar, uncompressed literals optimal, compress simple, 4260939 silesia.tar, huffman literals, compress simple, 6179047 github.tar, level -5, compress simple, 52115 github.tar, level -3, compress simple, 45678 @@ -29,9 +29,9 @@ github.tar, level 7, compress github.tar, level 9, compress simple, 36723 github.tar, level 13, compress simple, 35501 github.tar, level 16, compress simple, 40466 -github.tar, level 19, compress simple, 32276 +github.tar, level 19, compress simple, 32262 github.tar, uncompressed literals, compress simple, 38831 -github.tar, uncompressed literals optimal, compress simple, 32276 +github.tar, uncompressed literals optimal, compress simple, 32262 github.tar, huffman literals, compress simple, 42560 silesia, level -5, compress cctx, 6857372 silesia, level -3, compress cctx, 6503412 @@ -45,8 +45,8 @@ silesia, level 6, compress silesia, level 7, compress cctx, 4570271 silesia, level 9, compress cctx, 4545850 silesia, level 13, compress cctx, 4493990 -silesia, level 16, compress cctx, 4359969 -silesia, level 19, compress cctx, 4267082 +silesia, level 16, compress cctx, 4359652 +silesia, level 19, compress cctx, 4266582 silesia, long distance mode, compress cctx, 4842075 silesia, multithreaded, compress cctx, 4842075 silesia, multithreaded long distance mode, compress cctx, 4842075 @@ -55,7 +55,7 @@ silesia, small hash log, compress silesia, small chain log, compress cctx, 4912197 silesia, explicit params, compress cctx, 4794318 silesia, uncompressed literals, compress cctx, 4842075 -silesia, uncompressed literals optimal, compress cctx, 4267082 +silesia, uncompressed literals optimal, compress cctx, 4266582 silesia, huffman literals, compress cctx, 6172202 silesia, multithreaded with advanced params, compress cctx, 4842075 github, level -5, compress cctx, 204407 @@ -83,9 +83,9 @@ github, level 9 with dict, compress github, level 13, compress cctx, 132878 github, level 13 with dict, compress cctx, 39948 github, level 16, compress cctx, 133209 -github, level 16 with dict, compress cctx, 37568 +github, level 16 with dict, compress cctx, 37892 github, level 19, compress cctx, 132879 -github, level 19 with dict, compress cctx, 37567 +github, level 19 with dict, compress cctx, 37906 github, long distance mode, compress cctx, 141069 github, multithreaded, compress cctx, 141069 github, multithreaded long distance mode, compress cctx, 141069 @@ -109,8 +109,8 @@ silesia, level 6, zstdcli, silesia, level 7, zstdcli, 4570319 silesia, level 9, zstdcli, 4545898 silesia, level 13, zstdcli, 4494038 -silesia, level 16, zstdcli, 4360017 -silesia, level 19, zstdcli, 4267130 +silesia, level 16, zstdcli, 4359700 +silesia, level 19, zstdcli, 4266630 silesia, long distance mode, zstdcli, 4833785 silesia, multithreaded, zstdcli, 4842123 silesia, multithreaded long distance mode, zstdcli, 4833785 @@ -119,7 +119,7 @@ silesia, small hash log, zstdcli, silesia, small chain log, zstdcli, 4912245 silesia, explicit params, zstdcli, 4795840 silesia, uncompressed literals, zstdcli, 5120614 -silesia, uncompressed literals optimal, zstdcli, 4317385 +silesia, uncompressed literals optimal, zstdcli, 4316928 silesia, huffman literals, zstdcli, 5321417 silesia, multithreaded with advanced params, zstdcli, 5120614 silesia.tar, level -5, zstdcli, 6862049 @@ -134,8 +134,8 @@ silesia.tar, level 6, zstdcli, silesia.tar, level 7, zstdcli, 4581791 silesia.tar, level 9, zstdcli, 4555452 silesia.tar, level 13, zstdcli, 4502960 -silesia.tar, level 16, zstdcli, 4360533 -silesia.tar, level 19, zstdcli, 4261479 +silesia.tar, level 16, zstdcli, 4360389 +silesia.tar, level 19, zstdcli, 4260943 silesia.tar, no source size, zstdcli, 4854160 silesia.tar, long distance mode, zstdcli, 4845745 silesia.tar, multithreaded, zstdcli, 4854164 @@ -145,7 +145,7 @@ silesia.tar, small hash log, zstdcli, silesia.tar, small chain log, zstdcli, 4917022 silesia.tar, explicit params, zstdcli, 4821112 silesia.tar, uncompressed literals, zstdcli, 5122571 -silesia.tar, uncompressed literals optimal, zstdcli, 4308929 +silesia.tar, uncompressed literals optimal, zstdcli, 4308455 silesia.tar, huffman literals, zstdcli, 5342074 silesia.tar, multithreaded with advanced params, zstdcli, 5122571 github, level -5, zstdcli, 206407 @@ -173,9 +173,9 @@ github, level 9 with dict, zstdcli, github, level 13, zstdcli, 134878 github, level 13 with dict, zstdcli, 41900 github, level 16, zstdcli, 135209 -github, level 16 with dict, zstdcli, 39577 +github, level 16 with dict, zstdcli, 39902 github, level 19, zstdcli, 134879 -github, level 19 with dict, zstdcli, 39576 +github, level 19 with dict, zstdcli, 39916 github, long distance mode, zstdcli, 138332 github, multithreaded, zstdcli, 138332 github, multithreaded long distance mode, zstdcli, 138332 @@ -212,9 +212,9 @@ github.tar, level 9 with dict, zstdcli, github.tar, level 13, zstdcli, 35505 github.tar, level 13 with dict, zstdcli, 37134 github.tar, level 16, zstdcli, 40470 -github.tar, level 16 with dict, zstdcli, 33378 -github.tar, level 19, zstdcli, 32280 -github.tar, level 19 with dict, zstdcli, 32716 +github.tar, level 16 with dict, zstdcli, 33379 +github.tar, level 19, zstdcli, 32266 +github.tar, level 19 with dict, zstdcli, 32705 github.tar, no source size, zstdcli, 38832 github.tar, no source size with dict, zstdcli, 38004 github.tar, long distance mode, zstdcli, 40236 @@ -225,7 +225,7 @@ github.tar, small hash log, zstdcli, github.tar, small chain log, zstdcli, 41673 github.tar, explicit params, zstdcli, 41385 github.tar, uncompressed literals, zstdcli, 41529 -github.tar, uncompressed literals optimal, zstdcli, 35401 +github.tar, uncompressed literals optimal, zstdcli, 35360 github.tar, huffman literals, zstdcli, 38857 github.tar, multithreaded with advanced params, zstdcli, 41529 silesia, level -5, advanced one pass, 6857372 @@ -248,8 +248,8 @@ silesia, level 11 row 2, advanced silesia, level 12 row 1, advanced one pass, 4505658 silesia, level 12 row 2, advanced one pass, 4503429 silesia, level 13, advanced one pass, 4493990 -silesia, level 16, advanced one pass, 4359969 -silesia, level 19, advanced one pass, 4267082 +silesia, level 16, advanced one pass, 4359652 +silesia, level 19, advanced one pass, 4266582 silesia, no source size, advanced one pass, 4842075 silesia, long distance mode, advanced one pass, 4833710 silesia, multithreaded, advanced one pass, 4842075 @@ -259,7 +259,7 @@ silesia, small hash log, advanced silesia, small chain log, advanced one pass, 4912197 silesia, explicit params, advanced one pass, 4795840 silesia, uncompressed literals, advanced one pass, 5120566 -silesia, uncompressed literals optimal, advanced one pass, 4317337 +silesia, uncompressed literals optimal, advanced one pass, 4316880 silesia, huffman literals, advanced one pass, 5321369 silesia, multithreaded with advanced params, advanced one pass, 5120566 silesia.tar, level -5, advanced one pass, 6861055 @@ -282,8 +282,8 @@ silesia.tar, level 11 row 2, advanced silesia.tar, level 12 row 1, advanced one pass, 4514517 silesia.tar, level 12 row 2, advanced one pass, 4514007 silesia.tar, level 13, advanced one pass, 4502956 -silesia.tar, level 16, advanced one pass, 4360529 -silesia.tar, level 19, advanced one pass, 4261475 +silesia.tar, level 16, advanced one pass, 4360385 +silesia.tar, level 19, advanced one pass, 4260939 silesia.tar, no source size, advanced one pass, 4854086 silesia.tar, long distance mode, advanced one pass, 4840452 silesia.tar, multithreaded, advanced one pass, 4854160 @@ -293,7 +293,7 @@ silesia.tar, small hash log, advanced silesia.tar, small chain log, advanced one pass, 4917041 silesia.tar, explicit params, advanced one pass, 4807274 silesia.tar, uncompressed literals, advanced one pass, 5122473 -silesia.tar, uncompressed literals optimal, advanced one pass, 4308925 +silesia.tar, uncompressed literals optimal, advanced one pass, 4308451 silesia.tar, huffman literals, advanced one pass, 5341705 silesia.tar, multithreaded with advanced params, advanced one pass, 5122567 github, level -5, advanced one pass, 204407 @@ -397,17 +397,17 @@ github, level 13 with dict dds, advanced github, level 13 with dict copy, advanced one pass, 39948 github, level 13 with dict load, advanced one pass, 42624 github, level 16, advanced one pass, 133209 -github, level 16 with dict, advanced one pass, 37577 -github, level 16 with dict dms, advanced one pass, 37577 -github, level 16 with dict dds, advanced one pass, 37577 -github, level 16 with dict copy, advanced one pass, 37568 -github, level 16 with dict load, advanced one pass, 42338 +github, level 16 with dict, advanced one pass, 37902 +github, level 16 with dict dms, advanced one pass, 37902 +github, level 16 with dict dds, advanced one pass, 37902 +github, level 16 with dict copy, advanced one pass, 37892 +github, level 16 with dict load, advanced one pass, 42402 github, level 19, advanced one pass, 132879 -github, level 19 with dict, advanced one pass, 37576 -github, level 19 with dict dms, advanced one pass, 37576 -github, level 19 with dict dds, advanced one pass, 37576 -github, level 19 with dict copy, advanced one pass, 37567 -github, level 19 with dict load, advanced one pass, 39613 +github, level 19 with dict, advanced one pass, 37916 +github, level 19 with dict dms, advanced one pass, 37916 +github, level 19 with dict dds, advanced one pass, 37916 +github, level 19 with dict copy, advanced one pass, 37906 +github, level 19 with dict load, advanced one pass, 39770 github, no source size, advanced one pass, 136332 github, no source size with dict, advanced one pass, 41148 github, long distance mode, advanced one pass, 136332 @@ -522,17 +522,17 @@ github.tar, level 13 with dict dds, advanced github.tar, level 13 with dict copy, advanced one pass, 37130 github.tar, level 13 with dict load, advanced one pass, 36010 github.tar, level 16, advanced one pass, 40466 -github.tar, level 16 with dict, advanced one pass, 33374 -github.tar, level 16 with dict dms, advanced one pass, 33206 -github.tar, level 16 with dict dds, advanced one pass, 33206 -github.tar, level 16 with dict copy, advanced one pass, 33374 +github.tar, level 16 with dict, advanced one pass, 33375 +github.tar, level 16 with dict dms, advanced one pass, 33207 +github.tar, level 16 with dict dds, advanced one pass, 33207 +github.tar, level 16 with dict copy, advanced one pass, 33375 github.tar, level 16 with dict load, advanced one pass, 39081 -github.tar, level 19, advanced one pass, 32276 -github.tar, level 19 with dict, advanced one pass, 32712 -github.tar, level 19 with dict dms, advanced one pass, 32555 -github.tar, level 19 with dict dds, advanced one pass, 32555 -github.tar, level 19 with dict copy, advanced one pass, 32712 -github.tar, level 19 with dict load, advanced one pass, 32479 +github.tar, level 19, advanced one pass, 32262 +github.tar, level 19 with dict, advanced one pass, 32701 +github.tar, level 19 with dict dms, advanced one pass, 32565 +github.tar, level 19 with dict dds, advanced one pass, 32565 +github.tar, level 19 with dict copy, advanced one pass, 32701 +github.tar, level 19 with dict load, advanced one pass, 32428 github.tar, no source size, advanced one pass, 38831 github.tar, no source size with dict, advanced one pass, 37995 github.tar, long distance mode, advanced one pass, 40252 @@ -543,7 +543,7 @@ github.tar, small hash log, advanced github.tar, small chain log, advanced one pass, 41669 github.tar, explicit params, advanced one pass, 41385 github.tar, uncompressed literals, advanced one pass, 41525 -github.tar, uncompressed literals optimal, advanced one pass, 35397 +github.tar, uncompressed literals optimal, advanced one pass, 35356 github.tar, huffman literals, advanced one pass, 38853 github.tar, multithreaded with advanced params, advanced one pass, 41525 silesia, level -5, advanced one pass small out, 6857372 @@ -566,8 +566,8 @@ silesia, level 11 row 2, advanced silesia, level 12 row 1, advanced one pass small out, 4505658 silesia, level 12 row 2, advanced one pass small out, 4503429 silesia, level 13, advanced one pass small out, 4493990 -silesia, level 16, advanced one pass small out, 4359969 -silesia, level 19, advanced one pass small out, 4267082 +silesia, level 16, advanced one pass small out, 4359652 +silesia, level 19, advanced one pass small out, 4266582 silesia, no source size, advanced one pass small out, 4842075 silesia, long distance mode, advanced one pass small out, 4833710 silesia, multithreaded, advanced one pass small out, 4842075 @@ -577,7 +577,7 @@ silesia, small hash log, advanced silesia, small chain log, advanced one pass small out, 4912197 silesia, explicit params, advanced one pass small out, 4795840 silesia, uncompressed literals, advanced one pass small out, 5120566 -silesia, uncompressed literals optimal, advanced one pass small out, 4317337 +silesia, uncompressed literals optimal, advanced one pass small out, 4316880 silesia, huffman literals, advanced one pass small out, 5321369 silesia, multithreaded with advanced params, advanced one pass small out, 5120566 silesia.tar, level -5, advanced one pass small out, 6861055 @@ -600,8 +600,8 @@ silesia.tar, level 11 row 2, advanced silesia.tar, level 12 row 1, advanced one pass small out, 4514517 silesia.tar, level 12 row 2, advanced one pass small out, 4514007 silesia.tar, level 13, advanced one pass small out, 4502956 -silesia.tar, level 16, advanced one pass small out, 4360529 -silesia.tar, level 19, advanced one pass small out, 4261475 +silesia.tar, level 16, advanced one pass small out, 4360385 +silesia.tar, level 19, advanced one pass small out, 4260939 silesia.tar, no source size, advanced one pass small out, 4854086 silesia.tar, long distance mode, advanced one pass small out, 4840452 silesia.tar, multithreaded, advanced one pass small out, 4854160 @@ -611,7 +611,7 @@ silesia.tar, small hash log, advanced silesia.tar, small chain log, advanced one pass small out, 4917041 silesia.tar, explicit params, advanced one pass small out, 4807274 silesia.tar, uncompressed literals, advanced one pass small out, 5122473 -silesia.tar, uncompressed literals optimal, advanced one pass small out, 4308925 +silesia.tar, uncompressed literals optimal, advanced one pass small out, 4308451 silesia.tar, huffman literals, advanced one pass small out, 5341705 silesia.tar, multithreaded with advanced params, advanced one pass small out, 5122567 github, level -5, advanced one pass small out, 204407 @@ -715,17 +715,17 @@ github, level 13 with dict dds, advanced github, level 13 with dict copy, advanced one pass small out, 39948 github, level 13 with dict load, advanced one pass small out, 42624 github, level 16, advanced one pass small out, 133209 -github, level 16 with dict, advanced one pass small out, 37577 -github, level 16 with dict dms, advanced one pass small out, 37577 -github, level 16 with dict dds, advanced one pass small out, 37577 -github, level 16 with dict copy, advanced one pass small out, 37568 -github, level 16 with dict load, advanced one pass small out, 42338 +github, level 16 with dict, advanced one pass small out, 37902 +github, level 16 with dict dms, advanced one pass small out, 37902 +github, level 16 with dict dds, advanced one pass small out, 37902 +github, level 16 with dict copy, advanced one pass small out, 37892 +github, level 16 with dict load, advanced one pass small out, 42402 github, level 19, advanced one pass small out, 132879 -github, level 19 with dict, advanced one pass small out, 37576 -github, level 19 with dict dms, advanced one pass small out, 37576 -github, level 19 with dict dds, advanced one pass small out, 37576 -github, level 19 with dict copy, advanced one pass small out, 37567 -github, level 19 with dict load, advanced one pass small out, 39613 +github, level 19 with dict, advanced one pass small out, 37916 +github, level 19 with dict dms, advanced one pass small out, 37916 +github, level 19 with dict dds, advanced one pass small out, 37916 +github, level 19 with dict copy, advanced one pass small out, 37906 +github, level 19 with dict load, advanced one pass small out, 39770 github, no source size, advanced one pass small out, 136332 github, no source size with dict, advanced one pass small out, 41148 github, long distance mode, advanced one pass small out, 136332 @@ -840,17 +840,17 @@ github.tar, level 13 with dict dds, advanced github.tar, level 13 with dict copy, advanced one pass small out, 37130 github.tar, level 13 with dict load, advanced one pass small out, 36010 github.tar, level 16, advanced one pass small out, 40466 -github.tar, level 16 with dict, advanced one pass small out, 33374 -github.tar, level 16 with dict dms, advanced one pass small out, 33206 -github.tar, level 16 with dict dds, advanced one pass small out, 33206 -github.tar, level 16 with dict copy, advanced one pass small out, 33374 +github.tar, level 16 with dict, advanced one pass small out, 33375 +github.tar, level 16 with dict dms, advanced one pass small out, 33207 +github.tar, level 16 with dict dds, advanced one pass small out, 33207 +github.tar, level 16 with dict copy, advanced one pass small out, 33375 github.tar, level 16 with dict load, advanced one pass small out, 39081 -github.tar, level 19, advanced one pass small out, 32276 -github.tar, level 19 with dict, advanced one pass small out, 32712 -github.tar, level 19 with dict dms, advanced one pass small out, 32555 -github.tar, level 19 with dict dds, advanced one pass small out, 32555 -github.tar, level 19 with dict copy, advanced one pass small out, 32712 -github.tar, level 19 with dict load, advanced one pass small out, 32479 +github.tar, level 19, advanced one pass small out, 32262 +github.tar, level 19 with dict, advanced one pass small out, 32701 +github.tar, level 19 with dict dms, advanced one pass small out, 32565 +github.tar, level 19 with dict dds, advanced one pass small out, 32565 +github.tar, level 19 with dict copy, advanced one pass small out, 32701 +github.tar, level 19 with dict load, advanced one pass small out, 32428 github.tar, no source size, advanced one pass small out, 38831 github.tar, no source size with dict, advanced one pass small out, 37995 github.tar, long distance mode, advanced one pass small out, 40252 @@ -861,7 +861,7 @@ github.tar, small hash log, advanced github.tar, small chain log, advanced one pass small out, 41669 github.tar, explicit params, advanced one pass small out, 41385 github.tar, uncompressed literals, advanced one pass small out, 41525 -github.tar, uncompressed literals optimal, advanced one pass small out, 35397 +github.tar, uncompressed literals optimal, advanced one pass small out, 35356 github.tar, huffman literals, advanced one pass small out, 38853 github.tar, multithreaded with advanced params, advanced one pass small out, 41525 silesia, level -5, advanced streaming, 6854744 @@ -884,8 +884,8 @@ silesia, level 11 row 2, advanced silesia, level 12 row 1, advanced streaming, 4505658 silesia, level 12 row 2, advanced streaming, 4503429 silesia, level 13, advanced streaming, 4493990 -silesia, level 16, advanced streaming, 4359969 -silesia, level 19, advanced streaming, 4267082 +silesia, level 16, advanced streaming, 4359652 +silesia, level 19, advanced streaming, 4266582 silesia, no source size, advanced streaming, 4842039 silesia, long distance mode, advanced streaming, 4833710 silesia, multithreaded, advanced streaming, 4842075 @@ -895,7 +895,7 @@ silesia, small hash log, advanced silesia, small chain log, advanced streaming, 4912197 silesia, explicit params, advanced streaming, 4795857 silesia, uncompressed literals, advanced streaming, 5120566 -silesia, uncompressed literals optimal, advanced streaming, 4317337 +silesia, uncompressed literals optimal, advanced streaming, 4316880 silesia, huffman literals, advanced streaming, 5321370 silesia, multithreaded with advanced params, advanced streaming, 5120566 silesia.tar, level -5, advanced streaming, 6856523 @@ -918,8 +918,8 @@ silesia.tar, level 11 row 2, advanced silesia.tar, level 12 row 1, advanced streaming, 4514514 silesia.tar, level 12 row 2, advanced streaming, 4514003 silesia.tar, level 13, advanced streaming, 4502956 -silesia.tar, level 16, advanced streaming, 4360529 -silesia.tar, level 19, advanced streaming, 4261475 +silesia.tar, level 16, advanced streaming, 4360385 +silesia.tar, level 19, advanced streaming, 4260939 silesia.tar, no source size, advanced streaming, 4859267 silesia.tar, long distance mode, advanced streaming, 4840452 silesia.tar, multithreaded, advanced streaming, 4854160 @@ -929,7 +929,7 @@ silesia.tar, small hash log, advanced silesia.tar, small chain log, advanced streaming, 4917021 silesia.tar, explicit params, advanced streaming, 4807288 silesia.tar, uncompressed literals, advanced streaming, 5127423 -silesia.tar, uncompressed literals optimal, advanced streaming, 4308925 +silesia.tar, uncompressed literals optimal, advanced streaming, 4308451 silesia.tar, huffman literals, advanced streaming, 5341712 silesia.tar, multithreaded with advanced params, advanced streaming, 5122567 github, level -5, advanced streaming, 204407 @@ -1033,17 +1033,17 @@ github, level 13 with dict dds, advanced github, level 13 with dict copy, advanced streaming, 39948 github, level 13 with dict load, advanced streaming, 42624 github, level 16, advanced streaming, 133209 -github, level 16 with dict, advanced streaming, 37577 -github, level 16 with dict dms, advanced streaming, 37577 -github, level 16 with dict dds, advanced streaming, 37577 -github, level 16 with dict copy, advanced streaming, 37568 -github, level 16 with dict load, advanced streaming, 42338 +github, level 16 with dict, advanced streaming, 37902 +github, level 16 with dict dms, advanced streaming, 37902 +github, level 16 with dict dds, advanced streaming, 37902 +github, level 16 with dict copy, advanced streaming, 37892 +github, level 16 with dict load, advanced streaming, 42402 github, level 19, advanced streaming, 132879 -github, level 19 with dict, advanced streaming, 37576 -github, level 19 with dict dms, advanced streaming, 37576 -github, level 19 with dict dds, advanced streaming, 37576 -github, level 19 with dict copy, advanced streaming, 37567 -github, level 19 with dict load, advanced streaming, 39613 +github, level 19 with dict, advanced streaming, 37916 +github, level 19 with dict dms, advanced streaming, 37916 +github, level 19 with dict dds, advanced streaming, 37916 +github, level 19 with dict copy, advanced streaming, 37906 +github, level 19 with dict load, advanced streaming, 39770 github, no source size, advanced streaming, 136332 github, no source size with dict, advanced streaming, 41148 github, long distance mode, advanced streaming, 136332 @@ -1158,17 +1158,17 @@ github.tar, level 13 with dict dds, advanced github.tar, level 13 with dict copy, advanced streaming, 37130 github.tar, level 13 with dict load, advanced streaming, 36010 github.tar, level 16, advanced streaming, 40466 -github.tar, level 16 with dict, advanced streaming, 33374 -github.tar, level 16 with dict dms, advanced streaming, 33206 -github.tar, level 16 with dict dds, advanced streaming, 33206 -github.tar, level 16 with dict copy, advanced streaming, 33374 +github.tar, level 16 with dict, advanced streaming, 33375 +github.tar, level 16 with dict dms, advanced streaming, 33207 +github.tar, level 16 with dict dds, advanced streaming, 33207 +github.tar, level 16 with dict copy, advanced streaming, 33375 github.tar, level 16 with dict load, advanced streaming, 39081 -github.tar, level 19, advanced streaming, 32276 -github.tar, level 19 with dict, advanced streaming, 32712 -github.tar, level 19 with dict dms, advanced streaming, 32555 -github.tar, level 19 with dict dds, advanced streaming, 32555 -github.tar, level 19 with dict copy, advanced streaming, 32712 -github.tar, level 19 with dict load, advanced streaming, 32479 +github.tar, level 19, advanced streaming, 32262 +github.tar, level 19 with dict, advanced streaming, 32701 +github.tar, level 19 with dict dms, advanced streaming, 32565 +github.tar, level 19 with dict dds, advanced streaming, 32565 +github.tar, level 19 with dict copy, advanced streaming, 32701 +github.tar, level 19 with dict load, advanced streaming, 32428 github.tar, no source size, advanced streaming, 38828 github.tar, no source size with dict, advanced streaming, 38000 github.tar, long distance mode, advanced streaming, 40252 @@ -1179,7 +1179,7 @@ github.tar, small hash log, advanced github.tar, small chain log, advanced streaming, 41669 github.tar, explicit params, advanced streaming, 41385 github.tar, uncompressed literals, advanced streaming, 41525 -github.tar, uncompressed literals optimal, advanced streaming, 35397 +github.tar, uncompressed literals optimal, advanced streaming, 35356 github.tar, huffman literals, advanced streaming, 38853 github.tar, multithreaded with advanced params, advanced streaming, 41525 silesia, level -5, old streaming, 6854744 @@ -1194,11 +1194,11 @@ silesia, level 6, old stre silesia, level 7, old streaming, 4570271 silesia, level 9, old streaming, 4545850 silesia, level 13, old streaming, 4493990 -silesia, level 16, old streaming, 4359969 -silesia, level 19, old streaming, 4267082 +silesia, level 16, old streaming, 4359652 +silesia, level 19, old streaming, 4266582 silesia, no source size, old streaming, 4842039 silesia, uncompressed literals, old streaming, 4842075 -silesia, uncompressed literals optimal, old streaming, 4267082 +silesia, uncompressed literals optimal, old streaming, 4266582 silesia, huffman literals, old streaming, 6172207 silesia.tar, level -5, old streaming, 6856523 silesia.tar, level -3, old streaming, 6505954 @@ -1212,11 +1212,11 @@ silesia.tar, level 6, old stre silesia.tar, level 7, old streaming, 4579823 silesia.tar, level 9, old streaming, 4555445 silesia.tar, level 13, old streaming, 4502956 -silesia.tar, level 16, old streaming, 4360529 -silesia.tar, level 19, old streaming, 4261475 +silesia.tar, level 16, old streaming, 4360385 +silesia.tar, level 19, old streaming, 4260939 silesia.tar, no source size, old streaming, 4859267 silesia.tar, uncompressed literals, old streaming, 4859271 -silesia.tar, uncompressed literals optimal, old streaming, 4261475 +silesia.tar, uncompressed literals optimal, old streaming, 4260939 silesia.tar, huffman literals, old streaming, 6179056 github, level -5, old streaming, 204407 github, level -5 with dict, old streaming, 45832 @@ -1243,9 +1243,9 @@ github, level 9 with dict, old stre github, level 13, old streaming, 132878 github, level 13 with dict, old streaming, 39900 github, level 16, old streaming, 133209 -github, level 16 with dict, old streaming, 37577 +github, level 16 with dict, old streaming, 37902 github, level 19, old streaming, 132879 -github, level 19 with dict, old streaming, 37576 +github, level 19 with dict, old streaming, 37916 github, no source size, old streaming, 140599 github, no source size with dict, old streaming, 40654 github, uncompressed literals, old streaming, 136332 @@ -1276,13 +1276,13 @@ github.tar, level 9 with dict, old stre github.tar, level 13, old streaming, 35501 github.tar, level 13 with dict, old streaming, 37130 github.tar, level 16, old streaming, 40466 -github.tar, level 16 with dict, old streaming, 33374 -github.tar, level 19, old streaming, 32276 -github.tar, level 19 with dict, old streaming, 32712 +github.tar, level 16 with dict, old streaming, 33375 +github.tar, level 19, old streaming, 32262 +github.tar, level 19 with dict, old streaming, 32701 github.tar, no source size, old streaming, 38828 github.tar, no source size with dict, old streaming, 38000 github.tar, uncompressed literals, old streaming, 38831 -github.tar, uncompressed literals optimal, old streaming, 32276 +github.tar, uncompressed literals optimal, old streaming, 32262 github.tar, huffman literals, old streaming, 42560 silesia, level -5, old streaming advanced, 6854744 silesia, level -3, old streaming advanced, 6503319 @@ -1296,8 +1296,8 @@ silesia, level 6, old stre silesia, level 7, old streaming advanced, 4570271 silesia, level 9, old streaming advanced, 4545850 silesia, level 13, old streaming advanced, 4493990 -silesia, level 16, old streaming advanced, 4359969 -silesia, level 19, old streaming advanced, 4267082 +silesia, level 16, old streaming advanced, 4359652 +silesia, level 19, old streaming advanced, 4266582 silesia, no source size, old streaming advanced, 4842039 silesia, long distance mode, old streaming advanced, 4842075 silesia, multithreaded, old streaming advanced, 4842075 @@ -1307,7 +1307,7 @@ silesia, small hash log, old stre silesia, small chain log, old streaming advanced, 4912197 silesia, explicit params, old streaming advanced, 4795857 silesia, uncompressed literals, old streaming advanced, 4842075 -silesia, uncompressed literals optimal, old streaming advanced, 4267082 +silesia, uncompressed literals optimal, old streaming advanced, 4266582 silesia, huffman literals, old streaming advanced, 6172207 silesia, multithreaded with advanced params, old streaming advanced, 4842075 silesia.tar, level -5, old streaming advanced, 6856523 @@ -1322,8 +1322,8 @@ silesia.tar, level 6, old stre silesia.tar, level 7, old streaming advanced, 4579823 silesia.tar, level 9, old streaming advanced, 4555445 silesia.tar, level 13, old streaming advanced, 4502956 -silesia.tar, level 16, old streaming advanced, 4360529 -silesia.tar, level 19, old streaming advanced, 4261475 +silesia.tar, level 16, old streaming advanced, 4360385 +silesia.tar, level 19, old streaming advanced, 4260939 silesia.tar, no source size, old streaming advanced, 4859267 silesia.tar, long distance mode, old streaming advanced, 4859271 silesia.tar, multithreaded, old streaming advanced, 4859271 @@ -1333,7 +1333,7 @@ silesia.tar, small hash log, old stre silesia.tar, small chain log, old streaming advanced, 4917021 silesia.tar, explicit params, old streaming advanced, 4807288 silesia.tar, uncompressed literals, old streaming advanced, 4859271 -silesia.tar, uncompressed literals optimal, old streaming advanced, 4261475 +silesia.tar, uncompressed literals optimal, old streaming advanced, 4260939 silesia.tar, huffman literals, old streaming advanced, 6179056 silesia.tar, multithreaded with advanced params, old streaming advanced, 4859271 github, level -5, old streaming advanced, 213265 @@ -1361,9 +1361,9 @@ github, level 9 with dict, old stre github, level 13, old streaming advanced, 138676 github, level 13 with dict, old streaming advanced, 39725 github, level 16, old streaming advanced, 138575 -github, level 16 with dict, old streaming advanced, 40789 +github, level 16 with dict, old streaming advanced, 40804 github, level 19, old streaming advanced, 132879 -github, level 19 with dict, old streaming advanced, 37576 +github, level 19 with dict, old streaming advanced, 37916 github, no source size, old streaming advanced, 140599 github, no source size with dict, old streaming advanced, 40608 github, long distance mode, old streaming advanced, 141104 @@ -1403,8 +1403,8 @@ github.tar, level 13, old stre github.tar, level 13 with dict, old streaming advanced, 35807 github.tar, level 16, old streaming advanced, 40466 github.tar, level 16 with dict, old streaming advanced, 38578 -github.tar, level 19, old streaming advanced, 32276 -github.tar, level 19 with dict, old streaming advanced, 32704 +github.tar, level 19, old streaming advanced, 32262 +github.tar, level 19 with dict, old streaming advanced, 32678 github.tar, no source size, old streaming advanced, 38828 github.tar, no source size with dict, old streaming advanced, 38015 github.tar, long distance mode, old streaming advanced, 38831 @@ -1415,7 +1415,7 @@ github.tar, small hash log, old stre github.tar, small chain log, old streaming advanced, 41669 github.tar, explicit params, old streaming advanced, 41385 github.tar, uncompressed literals, old streaming advanced, 38831 -github.tar, uncompressed literals optimal, old streaming advanced, 32276 +github.tar, uncompressed literals optimal, old streaming advanced, 32262 github.tar, huffman literals, old streaming advanced, 42560 github.tar, multithreaded with advanced params, old streaming advanced, 38831 github, level -5 with dict, old streaming cdict, 45832 @@ -1430,8 +1430,8 @@ github, level 6 with dict, old stre github, level 7 with dict, old streaming cdict, 38765 github, level 9 with dict, old streaming cdict, 39439 github, level 13 with dict, old streaming cdict, 39900 -github, level 16 with dict, old streaming cdict, 37577 -github, level 19 with dict, old streaming cdict, 37576 +github, level 16 with dict, old streaming cdict, 37902 +github, level 19 with dict, old streaming cdict, 37916 github, no source size with dict, old streaming cdict, 40654 github.tar, level -5 with dict, old streaming cdict, 51286 github.tar, level -3 with dict, old streaming cdict, 45147 @@ -1446,7 +1446,7 @@ github.tar, level 7 with dict, old stre github.tar, level 9 with dict, old streaming cdict, 36322 github.tar, level 13 with dict, old streaming cdict, 36010 github.tar, level 16 with dict, old streaming cdict, 39081 -github.tar, level 19 with dict, old streaming cdict, 32479 +github.tar, level 19 with dict, old streaming cdict, 32428 github.tar, no source size with dict, old streaming cdict, 38000 github, level -5 with dict, old streaming advanced cdict, 46708 github, level -3 with dict, old streaming advanced cdict, 45476 @@ -1460,8 +1460,8 @@ github, level 6 with dict, old stre github, level 7 with dict, old streaming advanced cdict, 38875 github, level 9 with dict, old streaming advanced cdict, 38941 github, level 13 with dict, old streaming advanced cdict, 39725 -github, level 16 with dict, old streaming advanced cdict, 40789 -github, level 19 with dict, old streaming advanced cdict, 37576 +github, level 16 with dict, old streaming advanced cdict, 40804 +github, level 19 with dict, old streaming advanced cdict, 37916 github, no source size with dict, old streaming advanced cdict, 40608 github.tar, level -5 with dict, old streaming advanced cdict, 50791 github.tar, level -3 with dict, old streaming advanced cdict, 44926 @@ -1476,5 +1476,5 @@ github.tar, level 7 with dict, old stre github.tar, level 9 with dict, old streaming advanced cdict, 36241 github.tar, level 13 with dict, old streaming advanced cdict, 35807 github.tar, level 16 with dict, old streaming advanced cdict, 38578 -github.tar, level 19 with dict, old streaming advanced cdict, 32704 +github.tar, level 19 with dict, old streaming advanced cdict, 32678 github.tar, no source size with dict, old streaming advanced cdict, 38015 From b88c593d8ff79b96390308380604f232802e0f04 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 5 Feb 2024 18:32:25 -0800 Subject: [PATCH 14/14] added or updated code comments as suggested by @terrelln, to make the code of the optimal parser a bit more understandable. --- lib/compress/zstd_compress_internal.h | 2 +- lib/compress/zstd_opt.c | 22 ++++++++++++++-------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index dae8526d461..087ea49dcf8 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -162,7 +162,7 @@ typedef struct { int price; /* price from beginning of segment to this position */ U32 off; /* offset of previous match */ U32 mlen; /* length of previous match */ - U32 litlen; /* nb of literals after previous match */ + U32 litlen; /* nb of literals since previous match */ U32 rep[ZSTD_REP_NUM]; /* offset history after previous match */ } ZSTD_optimal_t; diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 25715eabba8..8e1be1cec18 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1129,13 +1129,20 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, continue; } + /* Match found: let's store this solution, and eventually find more candidates. + * During this forward pass, @opt is used to store stretches, + * defined as "a match followed by N literals". + * Note how this is different from a Sequence, which is "N literals followed by a match". + * Storing stretches allows us to store different match predecessors + * for each literal position part of a literals run. */ + /* initialize opt[0] */ opt[0].mlen = 0; /* there are only literals so far */ opt[0].litlen = litlen; - /* No need to include the actual price of the literals before the segment + /* No need to include the actual price of the literals before the first match * because it is static for the duration of the forward pass, and is included - * in every subsequent price. We include the literal length as the cost variation - * of litlen depends on the value of litlen. + * in every subsequent price. But, we include the literal length because + * the cost variation of litlen depends on the value of litlen. */ opt[0].price = LL_PRICE(litlen); ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0])); @@ -1353,11 +1360,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, cur -= lastStretch.litlen; } - /* let's write the shortest path solution - * solution is stored in @opt, - * in reverse order, - * starting from @storeEnd (==cur+1) - * (effectively partially overwriting @opt). + /* Let's write the shortest path solution. + * It is stored in @opt in reverse order, + * starting from @storeEnd (==cur+2), + * effectively partially @opt overwriting. * Content is changed too: * - So far, @opt stored stretches, aka a match followed by literals * - Now, it will store sequences, aka literals followed by a match