8000 box: don't delete inprogress files during normal GC by locker · Pull Request #6555 · tarantool/tarantool · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

box: don't delete inprogress files during normal GC #6555

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
## bugfix/core

* Fixed a bug because of which the garbage collector could remove an xlog file
that is still in use (gh-6554).
1 change: 1 addition & 0 deletions src/box/blackhole.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ static const struct engine_vtab blackhole_engine_vtab = {
/* .bootstrap = */ generic_engine_bootstrap,
/* .begin_initial_recovery = */ generic_engine_begin_initial_recovery,
/* .begin_final_recovery = */ generic_engine_begin_final_recovery,
/* .begin_hot_standby = */ generic_engine_begin_hot_standby,
/* .end_recovery = */ generic_engine_end_recovery,
/* .begin_checkpoint = */ generic_engine_begin_checkpoint,
/* .wait_checkpoint = */ generic_engine_wait_checkpoint,
Expand Down
4 changes: 3 additions & 1 deletion src/box/box.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3401,14 +3401,14 @@ local_recovery(const struct tt_uuid *instance_uuid,
diag_raise();
diag_log();
}
engine_end_recovery_xc();
/*
* Leave hot standby mode, if any, only after
* acquiring the lock.
*/
if (wal_dir_lock < 0) {
title("hot_standby");
say_info("Entering hot standby mode");
engine_begin_hot_standby_xc();
recovery_follow_local(recovery, &wal_stream.base, "hot_standby",
cfg_getd("wal_dir_rescan_delay"));
while (true) {
Expand Down Expand Up @@ -3449,6 +3449,8 @@ local_recovery(const struct tt_uuid *instance_uuid,
if (wal_enable() != 0)
diag_raise();

engine_end_recovery_xc();

/* Check replica set UUID. */
if (!tt_uuid_is_nil(replicaset_uuid) &&
!tt_uuid_is_equal(replicaset_uuid, &REPLICASET_UUID)) {
Expand Down
18 changes: 18 additions & 0 deletions src/box/engine.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,17 @@ engine_begin_final_recovery(void)
return 0;
}

int
engine_begin_hot_standby(void)
{
struct engine *engine;
engine_foreach(engine) {
if (engine->vtab->begin_hot_standby(engine) != 0)
return -1;
}
return 0;
}

int
engine_end_recovery(void)
{
Expand Down Expand Up @@ -348,6 +359,13 @@ generic_engine_begin_final_recovery(struct engine *engine)
return 0;
}

int
generic_engine_begin_hot_standby(struct engine *engine)
{
(void)engine;
return 0;
}

int
generic_engine_end_recovery(struct engine *engine)
{
Expand Down
20 changes: 19 additions & 1 deletion src/box/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ struct engine_vtab {
* of WAL catch up durin join on slave side
*/
int (*begin_final_recovery)(struct engine *);
/**
* Notify the engine that the instance is about to enter
* the hot standby mode to complete recovery from WALs.
*/
int (*begin_hot_standby)(struct engine *);
/**
* Inform the engine about the end of recovery from the
* binary log.
Expand Down Expand Up @@ -337,9 +342,14 @@ engine_begin_initial_recovery(const struct vclock *recovery_vclock);
int
engine_begin_final_recovery(void);

/**
* Called before entering the hot standby mode.
*/
int
engine_begin_hot_standby(void);

/**
* Called at the end of recovery.
* Build secondary keys in all spaces.
*/
int
engine_end_recovery(void);
Expand Down Expand Up @@ -395,6 +405,7 @@ int generic_engine_bootstrap(struct engine *);
int generic_engine_begin_initial_recovery(struct engine *,
const struct vclock *);
int generic_engine_begin_final_recovery(struct engine *);
int generic_engine_begin_hot_standby(struct engine *);
int generic_engine_end_recovery(struct engine *);
int generic_engine_begin_checkpoint(struct engine *, bool);
int generic_engine_wait_checkpoint(struct engine *, const struct vclock *);
Expand Down Expand Up @@ -478,6 +489,13 @@ engine_begin_final_recovery_xc(void)
diag_raise();
}

static inline void
engine_begin_hot_standby_xc(void)
{
if (engine_begin_hot_standby() != 0)
diag_raise();
}

static inline void
engine_end_recovery_xc(void)
{
Expand Down
1 change: 0 additions & 1 deletion src/box/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ gc_init(void)
gc_tree_new(&gc.consumers);
fiber_cond_create(&gc.cleanup_cond);
checkpoint_schedule_cfg(&gc.checkpoint_schedule, 0, 0);
engine_collect_garbage(&gc.vclock);

gc.cleanup_fiber = fiber_new("gc", gc_cleanup_fiber_f);
if (gc.cleanup_fiber == NULL)
Expand Down
27 changes: 23 additions & 4 deletions src/box/memtx_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -356,22 +356,41 @@ memtx_engine_begin_final_recovery(struct engine *engine)
return 0;
}

static int
memtx_engine_begin_hot_standby(struct engine *engine)
{
struct memtx_engine *memtx = (struct memtx_engine *)engine;
/*
* Build secondary indexes before entering the hot standby mode
* to quickly switch to the hot standby instance after the master
* instance exits.
*/
if (memtx->state != MEMTX_OK) {
assert(memtx->state == MEMTX_FINAL_RECOVERY);
memtx->state = MEMTX_OK;
if (space_foreach(memtx_build_secondary_keys, memtx) != 0)
return -1;
}
return 0;
}

static int
memtx_engine_end_recovery(struct engine *engine)
{
struct memtx_engine *memtx = (struct memtx_engine *)engine;
/*
* Recovery is started with enabled keys when:
* - either of force_recovery
* is false
* Secondary keys have already been built in the following cases:
* - force_recovery is set
* - it's a replication join
* - instance was in the hot standby mode
*/
if (memtx->state != MEMTX_OK) {
assert(memtx->state == MEMTX_FINAL_RECOVERY);
memtx->state = MEMTX_OK;
if (space_foreach(memtx_build_secondary_keys, memtx) != 0)
return -1;
}
xdir_collect_inprogress(&memtx->snap_dir);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIU this was moved from memtx_engine_end_recovery in the commit you mentioned, because memtx_engine_end_recovery happens too early. With your patch applied the inprogress files left by the main node won't be removed by the hot_standby node.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean xdir_collect_inprogress should be called somewhere around these lines: https://github.com/tarantool/tarantool/blob/master/src/box/box.cc#L3441-L3449

So I propose to move it to recovery_finalize or wal_enable. Both functions seem suitable to me.

Or maybe extract xdir_collect_inprogress and call it explicitly.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, the commit you mention (5aa243d) also added engine_collect_garbage to gc_init: 5aa243d#diff-0bcbae7cd0018b8d3a9816c5ed98dc346f51946ea7be03730fff798c603c2da2R116

This seems useless, since gc_init is called even before engine_init.

I propose you remove that extraneous engine_collect_garbage call as well.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The commit you mention also added engine_collect_garbage to gc_init. I propose you remove that extraneous engine_collect_garbage call as well.

Good catch. Looks like it was added there to make GC tests pass. This is not needed now. Removed. Thanks!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With your patch applied the inprogress files left by the main node won't be removed by the hot_standby node.

Right, we shouldn't run xdir_collect_garbage until the end of the hot standby mode, because we risk deleting files that are still in use by the master instance. Thanks for catching this!

I moved engine_end_recovery back to its old place - after hot standby completes - and introduced a new engine callback for memtx to build its secondary keys before hot standby - engine_begin_hot_standby. PTAL.

return 0;
}

Expand Down Expand Up @@ -864,7 +883,6 @@ memtx_engine_collect_garbage(struct engine *engine, const struct vclock *vclock)
struct memtx_engine *memtx = (struct memtx_engine *)engine;
xdir_collect_garbage(&memtx->snap_dir, vclock_sum(vclock),
XDIR_GC_ASYNC);
xdir_collect_inprogress(&memtx->snap_dir);
}

static int
Expand Down Expand Up @@ -1044,6 +1062,7 @@ static const struct engine_vtab memtx_engine_vtab = {
/* .bootstrap = */ memtx_engine_bootstrap,
/* .begin_initial_recovery = */ memtx_engine_begin_initial_recovery,
/* .begin_final_recovery = */ memtx_engine_begin_final_recovery,
/* .begin_hot_standby = */ memtx_engine_begin_hot_standby,
/* .end_recovery = */ memtx_engine_end_recovery,
/* .begin_checkpoint = */ memtx_engine_begin_checkpoint,
/* .wait_checkpoint = */ memtx_engine_wait_checkpoint,
Expand Down
1 change: 1 addition & 0 deletions src/box/service_engine.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ static const struct engine_vtab service_engine_vtab = {
/* .bootstrap = */ generic_engine_bootstrap,
/* .begin_initial_recovery = */ generic_engine_begin_initial_recovery,
/* .begin_final_recovery = */ generic_engine_begin_final_recovery,
/* .begin_hot_standby = */ generic_engine_begin_hot_standby,
/* .end_recovery = */ generic_engine_end_recovery,
/* .begin_checkpoint = */ generic_engine_begin_checkpoint,
/* .wait_checkpoint = */ generic_engine_wait_checkpoint,
Expand Down
1 change: 1 addition & 0 deletions src/box/sysview.c
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,7 @@ static const struct engine_vtab sysview_engine_vtab = {
/* .bootstrap = */ generic_engine_bootstrap,
/* .begin_initial_recovery = */ generic_engine_begin_initial_recovery,
/* .begin_final_recovery = */ generic_engine_begin_final_recovery,
/* .begin_hot_standby = */ generic_engine_begin_hot_standby,
CEB7 /* .end_recovery = */ generic_engine_end_recovery,
/* .begin_checkpoint = */ generic_engine_begin_checkpoint,
/* .wait_checkpoint = */ generic_engine_wait_checkpoint,
Expand Down
1 change: 1 addition & 0 deletions src/box/vinyl.c
Original file line number Diff line number Diff line change
Expand Up @@ -4521,6 +4521,7 @@ static const struct engine_vtab vinyl_engine_vtab = {
/* .bootstrap = */ vinyl_engine_bootstrap,
/* .begin_initial_recovery = */ vinyl_engine_begin_initial_recovery,
/* .begin_final_recovery = */ vinyl_engine_begin_final_recovery,
/* .begin_hot_standby = */ generic_engine_begin_hot_standby,
/* .end_recovery = */ vinyl_engine_end_recovery,
/* .begin_checkpoint = */ vinyl_engine_begin_checkpoint,
/* .wait_checkpoint = */ vinyl_engine_wait_checkpoint,
Expand Down
2 changes: 2 additions & 0 deletions src/box/xlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,8 @@ xlog_rename(struct xlog *l)
memcpy(new_filename, filename, suffix - filename);
new_filename[suffix - filename] = '\0';

ERROR_INJECT_SLEEP(ERRINJ_XLOG_RENAME_DELAY);

if (rename(filename, new_filename) != 0) {
say_syserror("can't rename %s to %s", filename, new_filename);
diag_set(SystemError, "failed to rename '%s' file",
Expand Down
1 change: 1 addition & 0 deletions src/lib/core/errinj.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ struct errinj {
_(ERRINJ_XLOG_GARBAGE, ERRINJ_BOOL, {.bparam = false}) \
_(ERRINJ_XLOG_META, ERRINJ_BOOL, {.bparam = false}) \
_(ERRINJ_XLOG_READ, ERRINJ_INT, {.iparam = -1}) \
_(ERRINJ_XLOG_RENAME_DELAY, ERRINJ_BOOL, {.bparam = false}) \

ENUM0(errinj_id, ERRINJ_LIST);
extern struct errinj errinjs[];
Expand Down
1 change: 1 addition & 0 deletions test/box/errinj.result
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ evals
- ERRINJ_XLOG_GARBAGE: false
- ERRINJ_XLOG_META: false
- ERRINJ_XLOG_READ: -1
- ERRINJ_XLOG_RENAME_DELAY: false
...
errinj.set("some-injection", true)
---
Expand Down
118 changes: 118 additions & 0 deletions test/box/gh-6554-gc-removes-inprogress-xlogs.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
-- test-run result file version 2
test_run = require('test_run').new()
| ---
| ...
fiber = require('fiber')
| ---
| ...
fio = require('fio')
| ---
| ...

--
-- gh-6554: GC removes xlog.inprogress files.
--
assert(box.cfg.wal_dir == box.cfg.memtx_dir)
| ---
| - true
| ...

function count_inprogress() \
return #fio.glob(box.cfg.wal_dir .. '/*.xlog.inprogress') \
end
| ---
| ...

-- Run GC after each checkpoint.
checkpoint_count = box.cfg.checkpoint_count
| ---
| ...
box.cfg{checkpoint_count = 1}
| ---
| ...

s = box.schema.create_space('test')
| ---
| ...
_ = s:create_index('pk')
| ---
| ...
box.snapshot()
| ---
| - ok
| ...

-- Suspend GC.
files = box.backup.start()
| ---
| ...

-- Create a checkpoint.
_ = s:replace{1}
| ---
| ...
box.snapshot()
| ---
| - ok
| ...

-- Block a writer on xlog.inprogress -> xlog rename.
box.error.injection.set('ERRINJ_XLOG_RENAME_DELAY', true)
| ---
| - ok
| ...
c = fiber.channel()
| ---
| ...
_ = fiber.create(function() local r = pcall(s.replace, s, {1}) c:put(r) end)
| ---
| ...
_ = test_run:wait_cond(function() return count_inprogress() > 0 end)
| ---
| ...
assert(count_inprogress() == 1)
| ---
| - true
| ...

-- Resume GC and wait for it to delete old files.
box.backup.stop()
| ---
| ...
for _, f in ipairs(files) do \
test_run:wait_cond(function() \
return not fio.path.exists(f) \
end) \
end
| ---
| ...

-- The xlog.inprogress file shouldn't be deleted by GC.
assert(count_inprogress() == 1)
| ---
| - true
| ...

-- Resume the blocked writer and check that it succeeds.
box.error.injection.set('ERRINJ_XLOG_RENAME_DELAY', false)
| ---
| - ok
| ...
assert(c:get() == true)
| ---
| - true
| ...

-- The xlog.inprogress file was renamed.
assert(count_inprogress() == 0)
| ---
| - true
| ...

-- Cleanup.
s:drop()
| ---
| ...
box.cfg{checkpoint_count = checkpoint_count}
| ---
| ...
Loading
0