diff --git a/CMakeLists.txt b/CMakeLists.txt index 60bdef262c23..3aa9a5128f5d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -852,6 +852,7 @@ set(TARANTOOL_BIN $) add_subdirectory(extra) add_subdirectory(test) +add_subdirectory(proofs/tla) if (CMAKE_BUILD_TYPE STREQUAL "Debug") message(AUTHOR_WARNING "Benchmarks are available only in release build") else () diff --git a/proofs/tla/CMakeLists.txt b/proofs/tla/CMakeLists.txt new file mode 100644 index 000000000000..5bf3c273b4fc --- /dev/null +++ b/proofs/tla/CMakeLists.txt @@ -0,0 +1,47 @@ +find_program(TLC tlc) +if (NOT EXISTS ${TLC}) + message(WARNING "TLC is required for running TLA+ tests") + return() +endif() + +set(TEST_SUITE_NAME "tla") + +message(STATUS "Add test suite ${TEST_SUITE_NAME}") + +list(APPEND TLA_LIBRARIES + ${PROJECT_SOURCE_DIR}/proofs/tla/src/modules + ${PROJECT_SOURCE_DIR}/proofs/tla/src/ +) +list(JOIN TLA_LIBRARIES ":" TLA_LIBRARIES_STR) + +file(GLOB_RECURSE tests + ${CMAKE_CURRENT_SOURCE_DIR}/test/*.tla +) + +# Remove files that match *_TTrace_*.tla. These are remainings of the runs. +foreach(test ${tests}) + if(test MATCHES ".*_TTrace_.*\\.tla$") + list(REMOVE_ITEM tests ${test}) + endif() +endforeach() + +foreach(test_path ${tests}) + get_filename_component(test_name ${test_path} NAME) + # FIXME: By default, GLOB lists directories. + # Directories are omitted in the result if LIST_DIRECTORIES + # is set to false. New in version CMake 3.3. + if(${test_name} STREQUAL ${TEST_SUITE_NAME}) + continue() + endif() + set(test_title "test/${TEST_SUITE_NAME}/${test_name}") + get_filename_component(test_dir "${test_path}" DIRECTORY) + get_filename_component(test_name "${test_path}" NAME_WLE) + add_test(NAME ${test_title} + COMMAND ${TLC} -workers auto ${test_name} + WORKING_DIRECTORY ${test_dir} + ) + set_tests_properties(${test_title} PROPERTIES + ENVIRONMENT "JAVA_OPTS=-DTLA-Library=${TLA_LIBRARIES_STR}" + LABELS "${TEST_SUITE_NAME};regression;" + ) +endforeach() diff --git a/proofs/tla/src/definitions.tla b/proofs/tla/src/definitions.tla new file mode 100644 index 000000000000..e26710af1c5e --- /dev/null +++ b/proofs/tla/src/definitions.tla @@ -0,0 +1,74 @@ +/* + * Copyright 2010-2025, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +------------------------------ MODULE definitions ------------------------------ + +\* See raft's state variable. +Follower == "FOLLOWER" +Candidate == "CANDIDATE" +Leader == "LEADER" + +\* XrowEntry types. +DmlType == "INSERT" +PromoteType == "PROMOTE" +ConfirmType == "CONFIRM" +RollbackType == "ROLLBACK" +RaftType == "RAFT" +OkType == "OK" +NopType == "NOP" + +\* Group IDs of the XrowEntry. +DefaultGroup == "DEFAULT" +LocalGroup == "LOCAL" + +\* Flags of the XrowEntry: +\* - wait_sync - true for any transaction, that would enter the limbo. +\* - wait_ack - true for a synchronous transaction. +DefaultFlags == [wait_sync |-> FALSE, wait_ack |-> FALSE, force_async |-> FALSE] +SyncFlags == [wait_sync |-> TRUE, wait_ack |-> TRUE, force_async |-> FALSE] +ForceAsyncFlags == [wait_sync |-> FALSE, wait_ack |-> FALSE, force_async |-> TRUE] + +\* Type of cbus messages to Tx thread. +TxWalType == "WAL" +TxRelayType == "RELAY" +TxApplierType == "APPLIER" + +\* See msgs variable for description. +RelaySource == 1 +ApplierSource == 2 + +\* Error codes. +SplitBrainError == "SPLIT_BRAIN" + +\* Reserved value +Nil == "NIL" + +=============================================================================== diff --git a/proofs/tla/src/modules/applier.tla b/proofs/tla/src/modules/applier.tla new file mode 100644 index 000000000000..f59765dc6c57 --- /dev/null +++ b/proofs/tla/src/modules/applier.tla @@ -0,0 +1,196 @@ +/* + * Copyright 2010-2025, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +-------------------------------- MODULE applier -------------------------------- + +EXTENDS Integers, Sequences, FiniteSets + +-------------------------------------------------------------------------------- +\* Declaration +-------------------------------------------------------------------------------- + +CONSTANTS + Servers, \* A nonempty set of server identifiers. + SplitBrainCheck \* Whether SplitBrain errors should be raised. + +ASSUME Cardinality(Servers) > 0 +ASSUME SplitBrainCheck \in {TRUE, FALSE} + +VARIABLES + msgs, + \* Applier implementation. + applierAckMsg, \* Whether applier needs to send acks. + applierVclock, \* Implementation of the replicaset.applier.vclock + \* Tx implementation (see box module). + txApplierQueue + +applierVars == <> + +\* Txn substitution. +CONSTANTS MaxClientRequests +VARIABLES tId, clientCtr, walQueue +LOCAL txnVarsSub == <> + +\* Raft substitution. +CONSTANTS ElectionQuorum, MaxRaftTerm +VARIABLES state, term, volatileTerm, vote, volatileVote, leader, votesReceived, + isBroadcastScheduled, candidateVclock, vclock, + relayRaftMsg +LOCAL raftVarsSub == <> + +\* Limbo substitution. +VARIABLES limbo, limboVclock, limboOwner, limboPromoteGreatestTerm, + limboPromoteTermMap, limboConfirmedLsn, limboVolatileConfirmedLsn, + limboConfirmedVclock, limboAckCount, limboSynchroMsg, + limboPromoteLatch, error, relayLastAck +LOCAL limboVarsSub == <> + +-------------------------------------------------------------------------------- +\* Imports +-------------------------------------------------------------------------------- + +LOCAL INSTANCE txn +LOCAL INSTANCE raft +LOCAL INSTANCE limbo +LOCAL INSTANCE utils + +-------------------------------------------------------------------------------- +\* Implementation +-------------------------------------------------------------------------------- + +ApplierInit == + /\ applierAckMsg = [i \in Servers |-> [j \in Servers |-> EmptyGeneralMsg]] + /\ applierVclock = [i \in Servers |-> [j \in Servers |-> 0]] + +\* Implementation of the applier_thread_writer_f +ApplierWrite(i, j) == + /\ applierAckMsg[i][j].is_ready = TRUE + /\ Send(msgs, i, j, ApplierSource, applierAckMsg[i][j].body) + /\ LET newMsg == [applierAckMsg[i][j] EXCEPT !.is_ready = FALSE] + IN applierAckMsg' = [applierAckMsg EXCEPT ![i][j] = newMsg] + /\ UNCHANGED + \* Without {msgs, applierAckMsg} + <> + +ApplierRead(i, j) == + /\ Len(msgs[j][i][RelaySource]) > 0 + /\ LET entry == Head(msgs[j][i][RelaySource]) + newQueue == Append(txApplierQueue[i], TxMsg(TxApplierType, entry)) + IN /\ txApplierQueue' = [txApplierQueue EXCEPT ![i] = newQueue] + /\ msgs' = [msgs EXCEPT ![j][i][RelaySource] = + Tail(msgs[j][i][RelaySource])] + /\ UNCHANGED + \* Without {msgs, txApplierQueue} + <> + +ApplierProcess(i, j) == + /\ \/ ApplierWrite(i, j) + \/ ApplierRead(i, j) + /\ UNCHANGED <> + +ApplierSynchroIsSplitBrain(i, entry) == + /\ SplitBrainCheck = TRUE + /\ limboPromoteTermMap[entry.replica_id] # limboPromoteGreatestTerm[i] + /\ entry.type = DmlType + +\* Part of applier_synchro_filter_tx, raise Split Brain error. +ApplierSynchroRaiseSplitBrainIfNeeded(i, entry) == + IF ApplierSynchroIsSplitBrain(i, entry) + THEN error' = [error EXCEPT ![i] = SplitBrainError] + ELSE UNCHANGED <> + +\* Part of applier_synchro_filter_tx, NOPify entries. +ApplierSynchroNopifyTx(i, entry) == + LET skipNopify == /\ limboPromoteTermMap[entry.replica_id] = + limboPromoteGreatestTerm[i] + /\ \/ entry.type = PromoteType + \/ /\ entry.type = ConfirmType + /\ entry.body.lsn > + limboConfirmedVclock[i][entry.replica_id] + IN IF skipNopify THEN entry + ELSE [entry EXCEPT !.type = NopType, !.body = <<>>] + +ApplierNotInSynchroWrite(i) == + ~LimboIsInRollback(i, limboSynchroMsg, limboPromoteLatch) + +ApplierApplyTx(i, entry) == + /\ ApplierSynchroRaiseSplitBrainIfNeeded(i, entry) + /\ IF /\ ~ApplierSynchroIsSplitBrain(i, entry) + /\ entry.lsn > applierVclock[i][entry.replica_id] + THEN LET newEntry == ApplierSynchroNopifyTx(i, entry) + IN /\ applierVclock = [applierVclock EXCEPT + ![i][entry.replica_id] = newEntry.lsn] + /\ IF \/ newEntry.type = DmlType + \/ newEntry.type = NopType + THEN /\ TxnDo(i, newEntry) + ELSE /\ LimboScheduleWrite(i, newEntry) + /\ UNCHANGED <<>> + ELSE UNCHANGED <> + /\ UNCHANGED + \* Without {error, applierVclock, TxnDo: {wal, walQueue, limbo, tId}, + \* LimboScheduleWrite: {limboSynchroMsg, {tId, walQueue, error, + \* limbo, limboSynchroMsg, limboVolatileConfirmedLsn, + \* limboPromoteLatch}}} + <> + +\* Implementation of the applier_process_batch. +TxOnApplierReceive(i, entry) == + \/ /\ entry.lsn # -1 \* DmlType, PromoteType, ConfirmType + /\ ApplierApplyTx(i, entry) + \/ /\ entry.type = RaftType + /\ RaftProcessMsg(i, entry) + +ApplierSignalAck(i, j, ackVclock) == + LET entry == XrowEntry(OkType, i, DefaultGroup, DefaultFlags, [ + vclock |-> ackVclock, + term |-> term[i] + ]) + IN applierAckMsg = [applierAckMsg EXCEPT ![i][j] = GeneralMsg(entry)] + +\* Implementation of the applier_on_wal_write. Sends acks. +ApplierSignalAckIfNeeded(i, entry, ackVclock) == + /\ IF entry.replica_id # i + THEN ApplierSignalAck(i, entry.replica_id, ackVclock) + ELSE UNCHANGED <> + /\ UNCHANGED <> + +ApplierNext(servers) == \E i, j \in servers: ApplierProcess(i, j) + +================================================================================ diff --git a/proofs/tla/src/modules/box.tla b/proofs/tla/src/modules/box.tla new file mode 100644 index 000000000000..c08bff643884 --- /dev/null +++ b/proofs/tla/src/modules/box.tla @@ -0,0 +1,169 @@ +/* + * Copyright 2010-2025, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +--------------------------------- MODULE box ----------------------------------- + +EXTENDS Integers, Sequences, FiniteSets + +-------------------------------------------------------------------------------- +\* Declaration +-------------------------------------------------------------------------------- + +CONSTANTS + Servers + +ASSUME Cardinality(Servers) > 0 + +VARIABLES + error, \* Critical error on the instance (only SplitBrain now). + vclock, \* Vclock of the current instance. + txQueue, \* Queue from any thread (except applier) to TX. + txApplierQueue \* Queue from applier thread to Tx. Applier needs separate + \* one, since it's crucial, that the write of synchro + \* request is synchronous and none of the new entries + \* are processed until this write is completed. + +boxVars == <> + +\* Applier substitution +CONSTANTS SplitBrainCheck +VARIABLES msgs, applierAckMsg, applierVclock +LOCAL applierVarsSub == <> + +\* Txn substitution. +CONSTANTS MaxClientRequests +VARIABLES tId, clientCtr, walQueue +LOCAL txnVarsSub == <> + +\* Raft substitution. +CONSTANTS ElectionQuorum, MaxRaftTerm +VARIABLES state, term, volatileTerm, vote, volatileVote, leader, votesReceived, + isBroadcastScheduled, candidateVclock, + relayRaftMsg +LOCAL raftVarsSub == <> + +\* Limbo substitution. +VARIABLES limbo, limboVclock, limboOwner, limboPromoteGreatestTerm, + limboPromoteTermMap, limboConfirmedLsn, limboVolatileConfirmedLsn, + limboConfirmedVclock, limboAckCount, limboSynchroMsg, + limboPromoteLatch, relayLastAck +LOCAL limboVarsSub == <> + +LOCAL allVars == <> + +-------------------------------------------------------------------------------- +\* Imports +-------------------------------------------------------------------------------- + +LOCAL INSTANCE raft +LOCAL INSTANCE limbo +LOCAL INSTANCE applier +LOCAL INSTANCE utils + +-------------------------------------------------------------------------------- +\* Implementation +-------------------------------------------------------------------------------- + +BoxInit == + /\ error = [i \in Servers |-> Nil] + /\ vclock = [i \in Servers |-> [j \in Servers |-> 0]] + /\ txQueue = [i \in Servers |-> << >>] + /\ txApplierQueue = [i \in Servers |-> << >>] + +\* Process cbus from a thread to Tx. In TLA it's not possible to yield and wait +\* for end of writing to disk e.g, so it's processed as a separate step. + +\* Implementation of the tx_status_update. +TxOnRelayUpdate(i, ack) == + /\ RaftProcessTerm(i, ack.term) + /\ LimboAck(i, limbo[i], ack.replica_id, ack.vclock[i]) + \* See TxProcess for additional UNCHANGED. + /\ UNCHANGED <> + +TxOnWrite(i, entry) == + LET numGlobalRows == Cardinality({j \in 1..Len(entry.rows) : + entry.rows[j].group_id = DefaultGroup}) + numLocalRows == Cardinality({j \in 1..Len(entry.rows) : + entry.rows[j].group_id = LocalGroup}) + \* Update vclock's 0 and i component acording to number of + \* LocalGroup and LocalGroup rows accordingly. + newVclock == BagAdd(BagAdd(vclock[i], entry.replica_id, + Len(entry.rows)), 0, numLocalRows) + IN \* Implementation of the tx_complete batch. + /\ vclock' = [vclock EXCEPT ![i] = newVclock] + /\ \/ /\ entry.rows[1].type = DmlType + /\ TxnOnJournalWrite(i, entry) + /\ ApplierSignalAckIfNeeded(i, entry, newVclock) + /\ UNCHANGED <> + \/ /\ entry.rows[1].type = ConfirmType + /\ LimboWriteEnd(i, entry.rows[1], LimboReadConfirm) + /\ ApplierSignalAckIfNeeded(i, entry, newVclock) + /\ UNCHANGED <> + \/ /\ entry.rows[1].type = PromoteType + /\ LimboWriteEnd(i, entry.rows[1], LimboReadPromote) + /\ ApplierSignalAckIfNeeded(i, entry, newVclock) + /\ UNCHANGED <> + \/ /\ entry.rows[1].type = RaftType + /\ RaftOnJournalWrite(i, entry) + /\ UNCHANGED <> + +TxProcess(i) == + \/ IF Len(txQueue[i]) > 0 + THEN LET entry == Head(txQueue[i]) + newQueue == Tail(txQueue[i]) + IN /\ txQueue' = [txQueue EXCEPT ![i] = newQueue] + /\ \/ /\ \/ entry.type = TxWalType + \/ entry.type = PromoteType + \/ entry.type = ConfirmType + /\ TxOnWrite(i, entry.body) + \/ /\ entry.type = TxRelayType + /\ TxOnRelayUpdate(i, entry.body) + ELSE UNCHANGED <> + \/ IF /\ Len(txApplierQueue[i]) > 0 + /\ ~LimboIsInRollback(i, limboSynchroMsg, limboPromoteLatch) + THEN LET entry == Head(txApplierQueue[i]) + newQueue == Tail(txApplierQueue[i]) + IN /\ txApplierQueue' = [txApplierQueue EXCEPT ![i] = newQueue] + /\ TxOnApplierReceive(i, entry.body) + ELSE UNCHANGED <> + +BoxNext(servers) == \E i \in servers: TxProcess(i) + +================================================================================ diff --git a/proofs/tla/src/modules/limbo.tla b/proofs/tla/src/modules/limbo.tla new file mode 100644 index 000000000000..c1d959533cff --- /dev/null +++ b/proofs/tla/src/modules/limbo.tla @@ -0,0 +1,342 @@ +/* + * Copyright 2010-2025, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +--------------------------------- MODULE limbo --------------------------------- + +EXTENDS Integers, Sequences, FiniteSets, utils + +-------------------------------------------------------------------------------- +\* Declaration +-------------------------------------------------------------------------------- + +CONSTANTS + Servers, \* A nonempty set of server identifiers. + ElectionQuorum, \* The number of votes needed to elect a leader. + SplitBrainCheck \* Whether SplitBrain errors should be raised. + +ASSUME Cardinality(Servers) > 0 +ASSUME /\ ElectionQuorum \in Nat + /\ ElectionQuorum < Cardinality(Servers) +ASSUME SplitBrainCheck \in {TRUE, FALSE} + +VARIABLES + limbo, \* Implemented in the current module. + box, \* For access of the error. + relay \* For access of the lastAck. + +-------------------------------------------------------------------------------- +\* Imports +-------------------------------------------------------------------------------- + +\* Txn substitution. +CONSTANTS MaxClientRequests +VARIABLES txn, wal, raft +LOCAL INSTANCE txn + +-------------------------------------------------------------------------------- +\* Implementation +-------------------------------------------------------------------------------- + +LimboInit == + limbo = [i \in Servers |-> [ + \* Sequence of not yet confirmed entries. + txns |-> << >>, + \* How owner LSN is visible on other nodes. + vclock |-> [j \in Servers |-> 0], + \* Owner of the limbo as seen by server. + owner |-> Nil, + \* The biggest promote term seen. + promoteGreatestTerm |-> 0, + \* Latest terms received with PROMOTE entries. + promoteTermMap |-> [j \in Servers |-> 0], + \* Maximal quorum lsn that has been persisted. + confirmedLsn |-> 0, + \* Not yet persisted confirmedLsn. + volatileConfirmedLsn |-> 0, + \* Biggest known confirmed lsn for each owner. + confimedVclock |-> [j \in Servers |-> 0], + \* Number of ACKs for the first txn in limbo. + ackCount |-> 0, + \* Synchro request to write. + synchroMsg |-> EmptyGeneralMsg, + \* Lsn of the promote currently been written. + promoteQsyncLsn |-> 0, + \* Term of the promote currently been written. + promoteQsyncTerm |-> 0, + \* Order access to the promote data. For appliers. + promoteLatch |-> FALSE, + \* Guard against new transactions during PROMOTE write. + isInRollback |-> FALSE + ]] + +LOCAL LimboState(i) == [ + txns |-> limbo[i].txns, \* Also passed to txn. + limboVclock |-> limbo[i].vclock, + owner |-> limbo[i].owner, + promoteGreatestTerm |-> limbo[i].promoteGreatestTerm, + promoteTermMap |-> limbo[i].promoteTermMap, + confirmedLsn |-> limbo[i].confirmedLsn, + volatileConfirmedLsn |-> limbo[i].volatileConfirmedLsn, + confimedVclock |-> limbo[i].confirmedVclock, + ackCount |-> limbo[i].ackCount, + synchroMsg |-> limbo[i].synchroMsg, + promoteQsyncLsn |-> limbo[i].promoteQsyncLsn, + promoteQsyncTerm |-> limbo[i].promoteQsyncTerm, + promoteLatch |-> limbo[i].promoteLatch, + isInRollback |-> limbo[i].isInRollback, + \* Txn state. + tId |-> txn[i].tId, + walQueue |-> wal[i].queue, + \* Box state, + error |-> box[i].error, + \* RO variables. + i |-> i \* Should not be used to access global variables. +] + +LOCAL LimboStateApply(i, state) == + /\ limbo' = VarSet(i, "txns", state.txns, + VarSet(i, "vclock", state.limboVclock, + VarSet(i, "owner", state.owner, + VarSet(i, "promoteGreatestTerm", state.promoteGreatestTerm, + VarSet(i, "promoteTermMap", state.promoteTermMap, + VarSet(i, "confirmedLsn", state.confirmedLsn, + VarSet(i, "volatileConfirmedLsn", state.volatileConfirmedLsn, + VarSet(i, "confirmedVclock", state.confirmedVclock, + VarSet(i, "ackCount", state.ackCount, + VarSet(i, "synchroMsg", state.synchroMsg, + VarSet(i, "promoteQsyncLsn", state.promoteQsyncLsn, + VarSet(i, "promoteQsyncTerm", state.promoteQsyncTerm, + VarSet(i, "promoteLatch", state.promoteLatch, limbo + ))))))))))))) \* LOL + /\ txn + +LOCAL LimboConfirm(state) == + IF ~LimboIsInRollback(state.synchroMsg, state.promoteLatch) + THEN LET k == Cardinality(Servers) - ElectionQuorum + confirmLsn == BagKthOrderStatistic(state.limboVclock, k) + idx == SetMax({x \in 1..Len(state.txns): + /\ LastLsn(state.txns[x].stmts) # -1 + /\ LastLsn(state.txns[x].stmts) <= confirmLsn}) + maxAssignedLsn == LastLsn(state.txns[idx].stmts) + newAckCount == IF idx + 1 > Len(state.txns) THEN 0 + ELSE IF state.txns[idx + 1].stmts[1].lsn = -1 THEN 0 + ELSE BagCountGreaterOrEqual(state.limboVclock, + state.txns[idx + 1].stmts[1].lsn) + IN [state EXCEPT + !.volatileConfirmedLsn = maxAssignedLsn, + !.ackCount = newAckCount] + ELSE state + +\* txn_limbo_ack. +LimboAck(state, source, lsn) == + IF /\ state.owner = state.i + /\ Len(state.txns) > 0 + /\ lsn > state.limboVclock[source] + THEN LET newVclock == BagSet(state.limboVclock, source, lsn) + vclockState == [state EXCEPT !.limboVclock = newVclock] + row == vclockState.txns[1].stmts[1] + IN IF /\ row.lsn # -1 + /\ row.lsn <= lsn + THEN LET countState == [vclockState EXCEPT + !.ackCount = vclockState.ackCount + 1] + IN IF countState.ackCount >= ElectionQuorum + THEN LimboConfirm(countState) + ELSE countState + ELSE vclockState + ELSE state + +LOCAL LimboBegin(state) == + [state EXCEPT !.promoteLatch = TRUE] + +LOCAL LimboCommit(state) == + [state EXCEPT !.promoteLatch = FALSE] + +LOCAL LimboReqPrepare(state, entry) == + IF entry.type = PromoteType + THEN [state EXCEPT !.isInRollback = TRUE] + ELSE state + +\* Part of txn_limbo_req_commit, doesn't include reading written request. +LOCAL LimboReqCommit(state, entry) == + LET t == entry.body.term + newMap == IF t > state.promoteTermMap[entry.body.origin_id] THEN + [state.promoteTermMap EXCEPT ![entry.body.origin_id] = t] + ELSE state.promoteTermMap + newGreatestTerm == IF t > state.promoteGreatestTerm + THEN t ELSE state.promoteGreatestTerm + IN [state EXCEPT + !.isInRollback = FALSE, + !.promoteTermMap = newMap, + !.promoteGreatestTerm = newGreatestTerm + ] + +LOCAL LimboIsSplitBrain(state, entry) == + /\ SplitBrainCheck = TRUE + /\ entry.replica_id # state.owner + +LOCAL LimboRaiseSplitBrainIfNeeded(state, entry) == + IF LimboIsSplitBrain(state, entry) + THEN [state EXCEPT !.error = SplitBrainError] + ELSE state + +\* Part of txn_limbo_req_prepare. Checks whether synchro request can +\* be applied without yields. +LOCAL LimboReqPrepareCheck(state, entry) == + /\ state.promoteLatch[i] = FALSE + /\ ~LimboIsSplitBrain(state, entry) + /\ \/ Len(state.txns) = 0 + \/ state.txns[Len(state.txns)].stmts[1].lsn # -1 + +LOCAL LimboWriteStart(state, entry) == + /\ TxnDo(LimboReqPrepare(LimboBegin(state), entry), entry) + +LimboWriteEnd(state, entry, Read(_, _)) == + /\ LimboReqCommit(state, entry) + /\ Read(i, entry) + /\ LimboCommit(i) + +\* Part of apply_synchro_req. +LimboScheduleWrite(state, entry) == + LET newState == LimboRaiseSplitBrainIfNeeded(state, entry) + IN IF newState.error = Nil + THEN IF LimboReqPrepareCheck(newState, entry) + THEN LimboWriteStart(newState, entry) + ELSE [newState EXCEPT !.synchroMsg = GeneralMsg(entry)] + ELSE newState + +LOCAL LimboWritePromote(state, lsn, term) == + LET entry == XrowEntry(PromoteType, state.i, DefaultGroup, + DefaultFlags, [ + replica_id |-> state.owner, + origin_id |-> state.i, + lsn |-> lsn, + term |-> term + ]) + IN LimboWriteStart(state, entry) + +\* txn_limbo_write_confirm. +LimboWriteConfirm(state, lsn) == + LET entry == XrowEntry(ConfirmType, state.i, DefaultGroup, ForceAsyncFlags, + [ + replica_id |-> state.owner, + origin_id |-> state.i, + lsn |-> lsn, + term |-> 0 + ]) + IN LimboWriteStart(state, entry) + +\* Imolementation of the txn_limbo_read_confirm. +LOCAL LimboReadConfirm(state, entry) == + LET lsn == entry.body.lsn + startIdx == FirstEntryWithGreaterLsnIdx(state.txns, lsn, + LAMBDA tx: tx.stmts[Len(tx.stmts)].lsn) + newLimbo == SubSeq(state.txns, startIdx, Len(state.txns)) + newVclock == BagSet(state.confirmedVclock, state.owner, lsn) + IN [state EXCEPT + !.confirmedLsn = lsn, + !.confirmedVclock = newVclock, + !.txns = newLimbo + ] + +LimboReadPromote(state, entry) == + LET newState == LimboReadConfirm(state, entry) + IN [newState EXCEPT + !.txns = << >>, + !.owner = entry.body.origin_id, + !.volatileConfirmedLsn = state.confirmedLsn + ] + +LOCAL LimboLastLsn(state) == + IF Len(state.txns) = 0 + THEN state.confirmedLsn + ELSE LET stmts == state.txns[Len(state.txns)].stmts + IN stmts[Len(stmts)].lsn + +LOCAL LimboPromoteQsync(state, lsn, term) == + IF lsn # -1 \* wal_sync() + THEN IF \A j \in Servers: \* box_wait_limbo_acked() + \/ j = state.i + \/ relay[state.i].lastAck[j].body.vclock[state.i] >= lsn + THEN [LimboWritePromote(state, lsn, term) + EXCEPT !.promoteQsyncLsn = 0, !.promoteQsyncTerm = 0] + ELSE [state EXCEPT !.promoteQsyncLsn = lsn, + !.promoteQsyncTerm = term] + ELSE state + +LimboPromoteQsyncTry(state) == + LimboPromoteQsync(state, state.promoteQsyncLsn, state.promoteQsyncTerm) + +\* TODO: it's actually difficult to write promote, since it +\* requires written entry in wal, so in real Tarantool there will be more +\* frequent promotes. We should properly block raft worker +\* until the promote is written. Requires fiber yields. +LimboPromoteQsyncRaft(state, term) == + IF state.promoteQsyncLsn = 0 + THEN LimboPromoteQsync(state, LimboLastLsn(state), term) + ELSE state + +LimboProcess(state) == + \/ /\ state.synchroMsg.is_ready + /\ LimboReqPrepareCheck(state, state.synchroMsg.body) + /\ [LimboWriteStart(state, state.synchroMsg.body) + EXCEPT !.synchroMsg = EmptyGeneralMsg] + \/ \* limbo_bump_confirmed_lsn + /\ state.owner = state.i + /\ ~LimboIsInRollback(state.synchroMsg, state.promoteLatch) + /\ state.volatileConfirmedLsn # state.confirmedLsn + /\ LimboWriteConfirm(state, state.volatileConfirmedLsn) + \/ /\ state.promoteQsyncLsn # 0 + /\ LimboPromoteQsyncTry(state) + +LOCAL FindTxnInLimbo(newLimbo, tx) == + CHOOSE i \in 1..Len(newLimbo) : newLimbo[i].id = tx.id + +\* Implementation of the txn_on_journal_write. +TxnOnJournalWrite(state, txnWritten) == + \* Implementation of the txn_on_journal_write. Assign LSNs to limbo entries. + IF txnWritten.stmts[1].flags.wait_sync = TRUE + THEN LET idx == FindTxnInLimbo(state.txns, txnWritten) + newAckCount == BagCountGreaterOrEqual( + state.limboVclock, state.txns[idx].stmts[1].lsn) + newState == [state EXCEPT + !.txns = [state.txns EXCEPT ![idx] = txnWritten], + !.ackCount = newAckCount + ] + row == txnWritten.stmts[Len(txnWritten.stmts)] + IN IF row.flags.wait_ack = TRUE + THEN LimboAck(newState, row.replica_id, row.lsn) + ELSE newState + ELSE state + +LimboNext(servers) == \E i \in servers: + LimboStateApply(i, LimboProcess(LimboState(i))) + +================================================================================ diff --git a/proofs/tla/src/modules/raft.tla b/proofs/tla/src/modules/raft.tla new file mode 100644 index 000000000000..e33e00555e7f --- /dev/null +++ b/proofs/tla/src/modules/raft.tla @@ -0,0 +1,350 @@ +/* + * Copyright 2010-2025, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +--------------------------------- MODULE raft ---------------------------------- + +EXTENDS Integers, Sequences, FiniteSets, Bags + +-------------------------------------------------------------------------------- +\* Declaration +-------------------------------------------------------------------------------- + +CONSTANTS + Servers, \* A nonempty set of server identifiers. + ElectionQuorum, \* The number of votes needed to elect a leader. + MaxRaftTerm \* The maximum term, which can be achieved in the rs. + +ASSUME Cardinality(Servers) > 0 +ASSUME /\ ElectionQuorum \in Nat + /\ ElectionQuorum < Cardinality(Servers) +ASSUME MaxRaftTerm \in Int + +\* Raft implementation. +VARIABLES + state, \* {"Follower", "Candidate", "Leader"}. + term, \* The current term number of each node. + volatileTerm, \* Not yet persisted term. + vote, \* Node vote in its term. + volatileVote, \* Not yet persisted vote. + leader, \* The leader as known by node. + votesReceived, \* The set of nodes, which voted for the candidate. + isBroadcastScheduled, \* Whether state should be broadcasted to other nodes. + candidateVclock \* Vclock of the candidate for which node is voting. + +raftVars == + <> + +\* Variables, defined in non imported modules. +VARIABLES + msgs, \* tarantool module. + vclock, \* box module. + relayRaftMsg \* relay module. + +-------------------------------------------------------------------------------- +\* Imports +-------------------------------------------------------------------------------- + +\* Limbo substitution. +CONSTANTS SplitBrainCheck, MaxClientRequests + +VARIABLES + limbo, + limboVclock, + limboOwner, + limboPromoteGreatestTerm, + limboPromoteTermMap, + limboConfirmedLsn, + limboVolatileConfirmedLsn, + limboConfirmedVclock, + limboAckCount, + limboSynchroMsg, + limboPromoteLatch, + error, + relayLastAck, + tId, + clientCtr, + walQueue + +LOCAL limboVarsSub == + <> + +LOCAL INSTANCE limbo +LOCAL INSTANCE definitions +LOCAL INSTANCE utils + +-------------------------------------------------------------------------------- +\* Implementation +-------------------------------------------------------------------------------- + +RaftInit == + /\ state = [i \in Servers |-> "Follower"] + /\ term = [i \in Servers |-> 1] + /\ volatileTerm = [i \in Servers |-> 1] + /\ vote = [i \in Servers |-> Nil] + /\ volatileVote = [i \in Servers |-> Nil] + /\ leader = [i \in Servers |-> Nil] + /\ votesReceived = [i \in Servers |-> { }] + /\ isBroadcastScheduled = [i \in Servers |-> FALSE] + /\ candidateVclock = [i \in Servers |-> [j \in Servers |-> 0]] + +\* See RaftProcessMsg, why RaftVars is needed. +RaftVars(i) == [ + state |-> state[i], + term |-> term[i], + volatileTerm |-> volatileTerm[i], + vote |-> vote[i], + volatileVote |-> volatileVote[i], + leader |-> leader[i], + votesReceived |-> votesReceived[i], + isBroadcastScheduled |-> isBroadcastScheduled[i], + candidateVclock |-> candidateVclock[i] +] + +RaftScheduleBroadcast(i) == + isBroadcastScheduled' = [isBroadcastScheduled EXCEPT ![i] = TRUE] + +RaftScheduleNewTermVars(vars, newTerm) == + [vars EXCEPT + \* UNCHANGED <> + !.state = Follower, + !.volatileTerm = newTerm, + !.volatileVote = Nil, + !.leader = Nil, + !.votesReceived = {}, + !.isBroadcastScheduled = TRUE, + !.candidateVclock = [i \in Servers |-> 0] + ] + +RaftScheduleNewTerm(i, newTerm) == + LET vars == RaftScheduleNewTermVars(RaftVars(i), newTerm) + IN /\ state' = [state EXCEPT ![i] = vars.state] + /\ volatileTerm' = [volatileTerm EXCEPT ![i] = vars.volatileTerm] + /\ volatileVote' = [volatileVote EXCEPT ![i] = vars.volatileVote] + /\ leader' = [leader EXCEPT ![i] = vars.leader] + /\ votesReceived' = [votesReceived EXCEPT ![i] = vars.votesReceived] + /\ isBroadcastScheduled' = [isBroadcastScheduled EXCEPT ![i] = + vars.isBroadcastScheduled] + /\ candidateVclock' = [candidateVclock EXCEPT ![i] = + vars.candidateVclock] + +RaftProcessTerm(i, newTerm) == + IF newTerm > volatileTerm[i] + THEN RaftScheduleNewTerm(i, newTerm) + ELSE UNCHANGED <> + +RaftScheduleNewVoteVars(vars, j, newCandidateVclock) == + [vars EXCEPT + !.volatileVote = j, + !.candidateVclock = newCandidateVclock + ] + +RaftScheduleNewVote(i, j, newCandidateVclock) == + LET vars == RaftScheduleNewVoteVars(RaftVars(i), j, newCandidateVclock) + IN /\ volatileVote' = [volatileVote EXCEPT ![i] = vars.volatileVote] + /\ candidateVclock' = [candidateVclock EXCEPT ![i] = + vars.candidateVclock] + +\* Implementation of the raft_can_vote_for. +RaftCanVoteFor(i, newCandidateVclock) == + BagCompare(newCandidateVclock, vclock[i]) \in {0, 1} + +\* Implementation of the raft_sm_election_update. leader_witness_map is not +\* needed in TLA, since it's impossible to figure out when applier dies. +RaftElectionUpdate(i) == + /\ RaftScheduleNewTerm(i, term[i] + 1) + /\ RaftScheduleNewVote(i, i, vclock[i]) + +\* Server times out and tries to start new election. +\* Implementation of the raft_sm_election_update_cb. +RaftTimeout(i) == + /\ \/ MaxRaftTerm = -1 + \/ SetMax({term[j] : j \in DOMAIN term}) < MaxRaftTerm + \* In Tarantool timer is stopped on leader. + /\ IF /\ state[i] \in {Follower, Candidate} + THEN RaftElectionUpdate(i) + ELSE UNCHANGED <> + /\ UNCHANGED <> + +\* Send to WAL if node can vote for the candidate or if only term is changed. +\* Implementation of the raft_worker_io. +RaftWorkerHandleIo(i) == + LET xrow == XrowEntry(RaftType, i, LocalGroup, DefaultFlags, [ + term |-> volatileTerm[i], + vote |-> volatileVote[i] + ]) + entry == JournalEntry(<>, <<>>) + newWalQueue == Append(walQueue[i], entry) + voteChanged == volatileVote[i] # vote[i] + doNotWrite == voteChanged /\ ~RaftCanVoteFor(i, volatileVote[i]) + IN /\ volatileVote' = IF doNotWrite THEN + [volatileVote EXCEPT ![i] = Nil] ELSE volatileVote + /\ candidateVclock' = IF doNotWrite THEN + [candidateVclock EXCEPT ![i] = EmptyBag] ELSE candidateVclock + /\ walQueue' = IF ~doNotWrite THEN + [walQueue EXCEPT ![i] = newWalQueue] ELSE walQueue + +RaftBecomeLeaderVars(vars, i) == + [vars EXCEPT + !.state = Leader, + !.leader = i, + !.isBroadcastScheduled = TRUE + ] + +RaftBecomeLeader(i) == + LET vars == RaftBecomeLeaderVars(RaftVars(i), i) + IN /\ state' = [state EXCEPT ![i] = vars.state] + /\ leader' = [leader EXCEPT ![i] = vars.leader] + /\ isBroadcastScheduled' = [isBroadcastScheduled EXCEPT ![i] = + vars.isBroadcastScheduled] + +RaftBecomeCandidate(i) == + /\ state' = [state EXCEPT ![i] = Candidate] + /\ leader' = leader + /\ RaftScheduleBroadcast(i) + +\* Continue implementation of the raft_worker_handle_io. +RaftOnJournalWrite(i, entry) == + /\ vote' = [vote EXCEPT ![i] = entry.rows[1].body.vote] + /\ term' = [term EXCEPT ![i] = entry.rows[1].body.term] + /\ IF volatileVote[i] = i + THEN IF ElectionQuorum = 1 + THEN RaftBecomeLeader(i) + ELSE RaftBecomeCandidate(i) + ELSE UNCHANGED <> + +\* Implementation of the raft_worker_handle_broadcast. +RaftWorkerHandleBroadcast(i) == + LET xrow == XrowEntry(RaftType, i, DefaultGroup, DefaultFlags, [ + term |-> term[i], + vote |-> vote[i], + state |-> state[i], + leader_id |-> leader[i], + vclock |-> IF state[i] = Candidate THEN vclock[i] ELSE <<>> + ]) + newMsgs == [j \in Servers |-> GeneralMsg(xrow)] + IN relayRaftMsg' = [relayRaftMsg EXCEPT ![i] = newMsgs] + +\* Implementation of the box_raft_worker_f. +\* TODO: raft worker block, worker writes synchronously +RaftWorker(i) == + /\ \/ /\ \/ volatileTerm[i] # term[i] + \/ volatileVote[i] # vote[i] + /\ RaftWorkerHandleIo(i) + /\ UNCHANGED <> + \/ /\ isBroadcastScheduled[i] = TRUE + /\ RaftWorkerHandleBroadcast(i) + /\ UNCHANGED <> + \/ /\ /\ state[i] = Leader + /\ limboPromoteTermMap[i][i] # term[i] + /\ LimboPromoteQsync(i, term[i]) + /\ UNCHANGED <> + /\ UNCHANGED <> + +RaftApplyVars(i, vars) == + /\ state' = [state EXCEPT ![i] = vars.state] + /\ term' = [term EXCEPT ![i] = vars.term] + /\ volatileTerm' = [volatileTerm EXCEPT ![i] = vars.volatileTerm] + /\ vote' = [vote EXCEPT ![i] = vars.vote] + /\ volatileVote' = [volatileVote EXCEPT ![i] = vars.volatileVote] + /\ leader' = [leader EXCEPT ![i] = vars.leader] + /\ votesReceived' = [votesReceived EXCEPT ![i] = vars.votesReceived] + /\ isBroadcastScheduled' = [isBroadcastScheduled EXCEPT ![i] = + vars.isBroadcastScheduled] + /\ candidateVclock' = [candidateVclock EXCEPT ![i] = vars.candidateVclock] + +\* +\* It's impossible to implement RaftProcessMsg in imperative style, since +\* it's prohibited to update one variable several times and the updated +\* variable is not seen until the end of the step. The function implements +\* the following operator: +\* +RaftProcessMsg(i, entry) == + IF entry.body.term >= volatileTerm[i] + THEN LET raftVarsInit == RaftVars(i) + raftVarsTerm == + IF entry.body.term > raftVarsInit.volatileTerm[i] + THEN RaftScheduleNewTermVars(raftVars, entry.body.term) + ELSE raftVarsInit + raftVarsVote == + IF entry.body.vote # 0 + THEN IF raftVarsTerm.state \in {Follower, Leader} + THEN IF /\ raftVarsTerm.leader = Nil + /\ entry.body.vote # i + /\ entry.body.state = Candidate + /\ raftVarsTerm.volatileVote = Nil + /\ RaftCanVoteFor(i, entry.body.vclock) + THEN RaftScheduleNewVoteVars(raftVarsTerm, + entry.replica_id, entry.body.vclock) + ELSE raftVarsTerm + ELSE \* state = Candidate + IF entry.body.vote = i + THEN LET raftVarsTmp == [raftVarsTerm EXCEPT + !.votesReceived = @ \cup {entry.replica_id}] + IN IF Cardinality(raftVarsTmp.votesReceived) >= + ElectionQuorum + THEN RaftBecomeLeaderVars(raftVarsTmp, i) + ELSE raftVarsTmp + ELSE raftVarsTerm + ELSE raftVarsTerm + raftVarsFinal == IF entry.body.state # Leader + THEN IF raftVarsVote.leader = entry.replica_id + THEN [raftVarsVote EXCEPT + !.leader = Nil, + !.isBroadcastScheduled = TRUE] + ELSE raftVarsVote + ELSE \* entry.body.state = Leader + IF raftVarsVote.leader # entry.replica_id + THEN \* raft_sm_follow_leader + [raftVarsVote EXCEPT + !.state = Follower, + !.leader = entry.replica_id, + !.isBroadcastScheduled = TRUE] + ELSE raftVarsVote + IN RaftApplyVars(i, raftVarsFinal) + ELSE UNCHANGED <> + +RaftNext(servers) == + \/ \E i \in servers: RaftTimeout(i) + \/ \E i \in servers: RaftWorker(i) + +================================================================================ diff --git a/proofs/tla/src/modules/relay.tla b/proofs/tla/src/modules/relay.tla new file mode 100644 index 000000000000..a90d4fdb7c8d --- /dev/null +++ b/proofs/tla/src/modules/relay.tla @@ -0,0 +1,140 @@ +/* + * Copyright 2010-2025, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +--------------------------------- MODULE relay --------------------------------- + +EXTENDS Integers, Sequences, FiniteSets, utils, definitions + +-------------------------------------------------------------------------------- +\* Declaration +-------------------------------------------------------------------------------- + +CONSTANTS Servers +ASSUME Cardinality(Servers) > 0 + +VARIABLES + relay, \* Implemented in the current module. + box, \* For access of the queue, vclock. + msgs, \* See tarantool module. + wal \* For access of the rows. + + +fdsjlfdsjfsdljfsdfdsfsdfjsdlfjds +-------------------------------------------------------------------------------- +\* Implementation +-------------------------------------------------------------------------------- + +RelayInit == + relay = [i \in Servers |-> [ + relayQueue |-> << >>, + lastAck |-> [j \in Servers |-> EmptyAck(Servers)], + sentLsn |-> [j \in Servers |-> 0], + raftMsg |-> [j \in Servers |-> EmptyGeneralMsg] + ]] + +LOCAL RelayState(i, j) == [ + msgsToSend |-> msgs[i][j][RelaySource], + msgsToReceive |-> msgs[j][i][ApplierSource], + relayQueue |-> relay[i].relayQueue, + lastAck |-> relay[i].lastAck[j], + sentLsn |-> relay[i].sentLsn[j], + raftMsg |-> relay[i].raftMsg[j], + txQueue |-> box[i].queue, + \* RO variables. + toInstance |-> j, + walRows |-> wal[i].rows +] + +LOCAL RelayStateApply(i, state) == + /\ msgs' = [msgs EXCEPT + ![i][state.toInstance][RelaySource] = state.msgsToSend, + ![state.toInstance][i][ApplierSource] = state.msgsToReceive] + /\ relay' = VarSet(i, "lastAck", [relay[i].lastAck EXCEPT + ![state.toInstance] = state.lastAck], + VarSet(i, "sentLsn", [relay[i].sentLsn EXCEPT + ![state.toInstance] = state.sentLsn], + VarSet(i, "raftMsg", [relay[i].raftMsg EXCEPT + ![state.toInstance] = state.raftMsg], + VarSet(i, "relayQueue", state.relayQueue, + relay)))) + /\ box' = VarSet(i, "queue", state.txQueue, box) + /\ UNCHANGED <> + +RelayProcessWalEvent(state) == + LET startIdx == FirstEntryWithGreaterLsnIdx(state.walRows, state.sentLsn, + LAMBDA x: x.lsn) + entries == + IF startIdx > 0 THEN + LET tmp == SubSeq(state.walRows, startIdx, Len(state.walRows)) + IN [j \in 1..Len(tmp) |-> + IF tmp[j].group_id = LocalGroup THEN [tmp[j] EXCEPT + !.type = NopType, + !.group_id = DefaultGroup, + !.body = << >> + ] ELSE tmp[j]] + ELSE << >> + newSentLsn == IF entries = << >> + THEN state.sentLsn + ELSE LastLsn(entries) + IN [state EXCEPT + !.msgsToSend = state.msgsToSend \o entries, + !.sentLsn = newSentLsn + ] + +RelayRaftTrySend(state) == + IF state.raftMsg.is_ready = TRUE THEN [state EXCEPT + !.msgsToSend = Append(state.msgsToSend, state.raftMsg.body), + !.raftMsg = EmptyGeneralMsg + ] ELSE state + +\* Implementation of the relay_reader_f. +RelayRead(state) == + IF Len(state.msgsToReceive) > 0 THEN [state EXCEPT + !.lastAck = Head(state.msgsToReceive), + !.msgsToReceive = Tail(state.msgsToReceive) + ] ELSE state + +\* Implementation of the relay_check_status_needs_update. +RelayStatusUpdate(state) == + IF Len(state.relayQueue) > 0 THEN [state EXCEPT + !.txQueue = Append(state.txQueue, TxMsg(TxRelayType, state.lastAck)), + !.relayQueue = Tail(state.relayQueue) + ] ELSE state + +RelayNext(servers) == \E i, j \in servers: + LET state == RelayState(i, j) + IN /\ i # j \* No replication to self. + /\ \/ RelayStateApply(i, RelayRead(state)) + \/ RelayStateApply(i, RelayStatusUpdate(state)) + \/ RelayStateApply(i, RelayProcessWalEvent(state)) + \/ RelayStateApply(i, RelayRaftTrySend(state)) + +================================================================================ diff --git a/proofs/tla/src/modules/txn.tla b/proofs/tla/src/modules/txn.tla new file mode 100644 index 000000000000..9ea811c297db --- /dev/null +++ b/proofs/tla/src/modules/txn.tla @@ -0,0 +1,138 @@ +/* + * Copyright 2010-2025, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +---------------------------------- MODULE txn ---------------------------------- + +EXTENDS Integers, FiniteSets, Sequences, utils, definitions + +-------------------------------------------------------------------------------- +\* Declaration +-------------------------------------------------------------------------------- + +CONSTANTS + Servers, \* A nonempty set of server identifiers. + MaxClientRequests \* The max number of ClientRequests, -1 means unlimited + +ASSUME Cardinality(Servers) > 0 +ASSUME MaxClientRequests \in Int + +VARIABLES + txn, \* Implemented in the current module. + wal, \* For access of the queue. + limbo, \* For access of the txns, synchroMsg, promoteLatch. + raft \* For acess of the state. + +-------------------------------------------------------------------------------- +\* Implementation +-------------------------------------------------------------------------------- + +TxnInit == + txn = [i \in Servers |-> [ + tId |-> 0, \* A sequentially growing transaction id (abstraction) + clientCtr |-> 0 \* The number of done ClientRequests. + ]] + +LOCAL TxnState(i) == [ + tId |-> txn[i].tId, + clientCtr |-> txn[i].clientCtr, + walQueue |-> wal[i].queue, + txns |-> limbo[i].txns, + \* RO variables. + raftState |-> raft[i].state, + promoteLatch |-> limbo[i].promoteLatch, + synchroMsg |-> limbo[i].synchroMsg +] + +LOCAL TxnStateApply(i, state) == + /\ txn' = VarSet(i, "tId", state.tId, + VarSet(i, "clientCtr", state.clientCtr, txn)) + /\ wal' = VarSet(i, "queue", state.walQueue, wal) + /\ limbo' = VarSet(i, "txns", state.txns, limbo) + /\ UNCHANGED <> + +\* Implementation of the txn_begin. +LOCAL TxnBegin(state, stmts) == + \* id is fully abstractional and is not represented in real code. it's + \* used in order to identify the transaction in the limbo. Note, + \* that it's not tsn, which must be assigned during WalProcess (but it + \* won't be, since it's not needed in TLA for now). + LET id == state.tId + 1 + IN [id |-> id, + \* stmts is a sequence of XrowEntries, which are written to wal, + \* non empty, cannot have different types. + stmts |-> stmts] + +\* Implementation of the txn_commit_impl for synchronous tx. +\* Adds entry to the limbo and sends it to the WAL thread for writing, +\* where it's processed by WalProcess operator. +LOCAL TxnCommit(state, txnToApply) == + LET \* Set wait_sync flag if limbo is not empty. + newStmts == IF /\ Len(state.txns) > 0 + /\ txnToApply.stmts[1].group_id # LocalGroup + /\ txnToApply.stmts[1].type # NopType + /\ txnToApply.stmts[1].flags.force_async = FALSE + THEN [j \in 1..Len(txnToApply.stmts) |-> + [txnToApply.stmts[j] EXCEPT + !.flags = [txnToApply.stmts[j].flags + EXCEPT !.wait_sync = TRUE] + ] + ] + ELSE txnToApply.stmts + newTxn == [txnToApply EXCEPT !.stmts = newStmts] + newWalQueue == Append(state.walQueue, newTxn) + newLimboTxns == IF newStmts[1].flags.wait_sync = TRUE + THEN Append(state.txns, newTxn) + ELSE state.txns + doWrite == \/ newStmts[1].flags.wait_sync = FALSE + \/ ~LimboIsInRollback(state.synchroMsg, state.promoteLatch) + IN IF doWrite THEN [state EXCEPT + !.tId = state.tId + 1, + !.walQueue = newWalQueue, + !.txns = newLimboTxns + ] ELSE state + +TxnDo(state, entry) == + LET stmts == <> + IN TxnCommit(state, TxnBegin(state, stmts)) + +LOCAL ClientRequest(i, state) == + IF /\ \/ state.clientCtr = -1 + \/ state.clientCtr < MaxClientRequests + /\ state.raftState = Leader + THEN LET entry == XrowEntry(DmlType, i, DefaultGroup, SyncFlags, <<>>) + stateTxn == TxnDo(state, entry) + IN [stateTxn EXCEPT !.clientCtr = @ + 1] + ELSE state + +TxnNext(servers) == \E i \in servers: + TxnStateApply(i, ClientRequest(i, TxnState(i))) + +================================================================================ diff --git a/proofs/tla/src/modules/wal.tla b/proofs/tla/src/modules/wal.tla new file mode 100644 index 000000000000..b2f07f100304 --- /dev/null +++ b/proofs/tla/src/modules/wal.tla @@ -0,0 +1,91 @@ +/* + * Copyright 2010-2025, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +---------------------------------- MODULE wal ---------------------------------- + +EXTENDS Integers, FiniteSets, Sequences, utils + +-------------------------------------------------------------------------------- +\* Declaration +-------------------------------------------------------------------------------- + +CONSTANTS Servers +ASSUME Cardinality(Servers) > 0 + +VARIABLES + wal, \* Imlemented in the current module. + box \* For access of the queue. + +-------------------------------------------------------------------------------- +\* Implementation +-------------------------------------------------------------------------------- + +WalInit == + wal = [i \in Servers |-> [ + rows |-> << >>, \* Sequence of log entries, persisted in wal. + queue |-> << >> \* Queue from TX thread to WAL. + ]] + +LOCAL WalState(i) == [ + rows |-> wal[i].rows, + queue |-> wal[i].queue, + txQueue |-> box[i].queue +] + +LOCAL WalStateApply(i, state) == + /\ wal' = VarSet(i, "queue", state.queue, + VarSet(i, "rows", state.rows, wal)) + /\ box' = VarSet(i, "queue", state.txQueue, box) + +\* Implementation of the wal_write_to_disk, non failing. +WalProcess(state) == + IF Len(state.queue) > 0 + THEN LET txn == Head(state.queue) + \* Implementation of the wal_assign_lsn. + newRows == [j \in 1..Len(txn.stmts) |-> + [txn.stmts[j] EXCEPT !.lsn = IF txn.stmts[j].lsn = -1 THEN + LastLsn(state.rows) + j ELSE txn.stmts[j].lsn]] + \* Write to disk. + newWalRows == state.rows \o newRows + newTxn == [txn EXCEPT !.stmts = newRows] + newTxQueue == Append(state.txQueue, TxMsg(TxWalType, newTxn)) + newWalQueue == Tail(state.queue) + IN [state EXCEPT + !.rows = newWalRows, + !.queue = newWalQueue, + !.txQueue = newTxQueue + ] + ELSE state + +WalNext(servers) == \E i \in servers: + WalStateApply(i, WalProcess(WalState(i))) + +================================================================================ diff --git a/proofs/tla/src/tarantool.tla b/proofs/tla/src/tarantool.tla new file mode 100644 index 000000000000..16dc089e84c2 --- /dev/null +++ b/proofs/tla/src/tarantool.tla @@ -0,0 +1,234 @@ +/* + * Copyright 2010-2025, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +------------------------------- MODULE tarantool ------------------------------- + +EXTENDS Integers, Bags, FiniteSets, Sequences, TLC + +CONSTANTS + Servers, \* A nonempty set of server identifiers. + ElectionQuorum, \* The number of votes needed to elect a leader. + MaxClientRequests, \* The max number of ClientRequests, -1 means unlimited + SplitBrainCheck, \* Whether SplitBrain errors should be raised. + MaxRaftTerm \* The maximum term, which can be achieved in the rs. + +ASSUME Cardinality(Servers) > 0 +ASSUME /\ ElectionQuorum \in Nat + /\ ElectionQuorum < Cardinality(Servers) +ASSUME MaxClientRequests \in Int +ASSUME SplitBrainCheck \in {TRUE, FALSE} +ASSUME MaxRaftTerm \in Int + +------------------------------------------------------------------------------- +\* Global variables +------------------------------------------------------------------------------- + +\* Note, that all variables of another module must be instantiated in the +\* one, to which it's imported (see part 4.2 of Specifying Systems). + +\* msgs[sender][receiver][source]. Source is needed, since every instance +\* has 2 connections: first - relay, second - applier. So source = 1 means +\* that msg was written by relay, 2 - by applier. Relay writes to 1, reads +\* from 2. Applier writes to 2, reads from 1. +VARIABLE msgs + +------------------------------------------------------------------------------- +\* Per-server variables (operators with server argument) +------------------------------------------------------------------------------- + +\* Box implementation. +VARIABLES + error, \* Critical error on the instance. + vclock, \* Vclock of the current instance. + txQueue, \* Queue from any thread (except applier) to TX. + txApplierQueue \* Queue from applier thread to Tx. Applier needs separate + \* one, since it's crucial, that the write of synchro + \* request is synchronous and none of the new entries + \* are processed until this write is completed. + +\* Txn implementation. +VARIABLES + tId, \* A sequentially growing transaction id (abstraction). + clientCtr \* The number of done ClientRequests + +\* Wal implementation. +VARIABLES + wal, \* Sequence of log entries, persisted in WAL. + walQueue \* Queue from TX thread to WAL. + +\* Limbo implementation. +VARIABLES + limbo, \* Sequence of not yet confirmed entries. + limboVclock, \* How owner LSN is visible on other nodes. + limboOwner, \* Owner of the limbo as seen by server. + limboPromoteGreatestTerm, \* The biggest promote term seen. + limboPromoteTermMap, \* Latest terms received with PROMOTE entries. + limboConfirmedLsn, \* Maximal quorum lsn that has been persisted. + limboVolatileConfirmedLsn, \* Not yet persisted confirmedLsn. + limboConfirmedVclock, \* Biggest known confirmed lsn for each owner. + limboAckCount, \* Number of ACKs for the first txn in limbo. + limboSynchroMsg, \* Synchro request to write. + limboPromoteLatch \* Order access to the promote data. + +\* Raft implementation. +VARIABLES + state, \* {"Follower", "Candidate", "Leader"}. + term, \* The current term number of each node. + volatileTerm, \* Not yet persisted term. + vote, \* Node vote in its term. + volatileVote, \* Not yet persisted vote. + leader, \* The leader as known by node. + votesReceived, \* The set of nodes, which voted for the candidate. + isBroadcastScheduled, \* Whether state should be broadcasted to other nodes. + candidateVclock \* Vclock of the candidate for which node is voting. + +\* Relay implementation. +VARIABLES + relaySentLsn, \* Last sent LSN to the peer. See relay->r->cursor. + relayLastAck, \* Last received ack from replica. + relayRaftMsg \* Raft message for broadcast. + +\* Applier implementation +VARIABLES + applierAckMsg, \* Whether applier needs to send acks. + applierVclock \* Implementation of the replicaset.applier.vclock + +-------------------------------------------------------------------------------- +\* Imports +-------------------------------------------------------------------------------- + +LOCAL INSTANCE definitions +LOCAL INSTANCE raft +LOCAL INSTANCE applier +LOCAL INSTANCE box +LOCAL INSTANCE limbo +LOCAL INSTANCE relay +LOCAL INSTANCE txn +LOCAL INSTANCE wal + +allVars == <> + +------------------------------------------------------------------------------- +\* Initial values for all variables +------------------------------------------------------------------------------- + +Init == /\ msgs = [i \in Servers |-> [j \in Servers |-> [k \in 1..2 |-> <<>>]]] + /\ ApplierInit + /\ BoxInit + /\ LimboInit + /\ RaftInit + /\ RelayInit + /\ TxnInit + /\ WalInit + +------------------------------------------------------------------------------- +\* Specification +------------------------------------------------------------------------------- + +AliveServers == {i \in Servers : error[i] = Nil} + +RaftNextTnt == + /\ RaftNext(AliveServers) + /\ UNCHANGED <> + +BoxNextTnt == + /\ BoxNext(AliveServers) + /\ UNCHANGED <> + +LimboNextTnt == + /\ LimboNext(AliveServers) + /\ UNCHANGED <> + +TxnNextTnt == + /\ TxnNext(AliveServers) + /\ UNCHANGED <> + +WalNextTnt == + /\ WalNext(AliveServers) + \* doesn't add states, why + /\ UNCHANGED <<>> + +RelayNextTnt == + /\ RelayNext(AliveServers) + /\ UNCHANGED <> + +ApplierNextTnt == + /\ ApplierNext(AliveServers) + \* doesn't add states, why + /\ UNCHANGED <<>> + +\* Defines how the variables may transition. +Next == + \* TX thread. + \/ RaftNextTnt + \/ BoxNextTnt + \/ LimboNextTnt + \/ TxnNextTnt + \* WAL thread. + \/ WalNextTnt + \* Relay threads (from i to j) + \/ RelayNextTnt \* (doesn't work) + \* Applier threads (from j to i). + \/ ApplierNextTnt + +\* Start with Init and transition according to Next. By specifying WF (which +\* stands for Weak Fairness) we're including the requirement that the system +\* must eventually take a non-stuttering step whenever one is possible. +Spec == Init /\ [][Next]_allVars /\ WF_allVars(Next) + +================================================================================ + +Follow-ups: + * Restart, requires implementation of the SUBSCRIBE, since + cursor on relay is not up to date. Can update sentLsn explicitly! + * Reconfiguration of a replicaset: add/remove replicas: + - Probably requires implementation of the JOIN, SUBSCRIBE, + snapshots, xlogs. diff --git a/proofs/tla/src/utils.tla b/proofs/tla/src/utils.tla new file mode 100644 index 000000000000..e68d48b632cc --- /dev/null +++ b/proofs/tla/src/utils.tla @@ -0,0 +1,153 @@ +/* + * Copyright 2010-2025, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +--------------------------------- MODULE utils --------------------------------- + +EXTENDS Integers, Sequences, FiniteSets, TLC, definitions + +-------------------------------------------------------------------------------- +\* General helper operators +-------------------------------------------------------------------------------- + +\* Sets root.variable = value for server i. +VarSet(i, variable, value, root) == + [root EXCEPT ![i] = [root[i] EXCEPT ![variable] = value]] + +\* Sends the xrow message from i to j. See details about messaging system in +\* the tarantool module, alongside with the declaration of the msgs variable. +Send(messages, i, j, source, xrow) == + LET newMsgs == Append(messages[i][j][source], xrow) + IN [messages EXCEPT ![i][j][source] = newMsgs] + +\* Returns the maximum value in the set 'S'. +SetMax(S) == + CHOOSE m \in S: \A n \in S: n <= m + +-------------------------------------------------------------------------------- +\* Vclock helpers +-------------------------------------------------------------------------------- +\* Bag module is used for vclocks (see Bag standard module, basically mutiset). + +\* Given a bag and entry e, return a new bag with v more e in it. +BagAdd(bag, e, v) == + IF e \in DOMAIN bag + THEN [bag EXCEPT ![e] = bag[e] + v] + ELSE bag @@ (e :> v) + +\* Assigns bag[e] = v. +BagSet(bag, e, v) == + (e :> v) @@ bag + +\* Compares two bags b1 and b2. Returns: +\* 0 if for all s in Servers b1[s] = b2[s] +\* 1 if for all s in Servers b1[s] >= b2[s] and b1 differs in at least one s +\* -1 otherwise +BagCompare(b1, b2) == + IF /\ \A s \in DOMAIN b1: b1[s] >= b2[s] + /\ Assert(DOMAIN b1 = DOMAIN b2, "Domains are not equal in compare") + THEN IF \A s \in DOMAIN b1: b1[s] = b2[s] THEN 0 ELSE 1 + ELSE -1 + +\* Returns the number of elements < x. +BagCountLess(b, x) == + Cardinality({i \in DOMAIN b: b[i] < x}) + +\* Returns the number of elements <= x. +BagCountLessOrEqual(b, x) == + Cardinality({i \in DOMAIN b: b[i] <= x}) + +BagCountGreaterOrEqual(b, x) == + Cardinality({i \in DOMAIN b: b[i] >= x}) + +\* Returns kth order statistic of the bag. +BagKthOrderStatistic(b, k) == + IF k >= Cardinality(DOMAIN b) THEN -1 + ELSE LET idx == CHOOSE x \in DOMAIN b: + /\ BagCountLess(b, b[x]) <= k + /\ BagCountLessOrEqual(b, b[x]) > k + IN b[idx] + +------------------------------------------------------------------------------- +\* Structure declarations +------------------------------------------------------------------------------- + +\* Return xrow entry. It's written to WAL and replicated. +\* LSN is assigned during WalProcess. +\* - `xrowType` is DmlXrowType/RaftXrowType/SynchroXrowType; +\* - `replica_id` is id of the replica which made xrow. +\* - `groupId` is DefaultGroup/LocalGroup; +\* - `body` is any function, depends on `xrowType`. +XrowEntry(xrowType, replica_id, groupId, flags, body) == [ + type |-> xrowType, + replica_id |-> replica_id, + group_id |-> groupId, + lsn |-> -1, + flags |-> flags, + body |-> body +] + +\* LSN of the last entry in the log or 0 if the log is empty. +LastLsn(xlog) == + IF Len(xlog) = 0 THEN 0 ELSE xlog[Len(xlog)].lsn + +\* Msg from any thread to Tx thread. Put in TxQueue. Abstraction to process +\* different Tx events. +TxMsg(txEntryType, body) == [ + type |-> txEntryType, + body |-> body +] + +\* Used in applierAckMsg and relayRaftMsg, limboSynchroMsg. +GeneralMsg(body) == [ + is_ready |-> TRUE, + body |-> body +] + +GreaterCmp(a, b) == a > b +EqualCmp(a, b) == a = b + +FirstEntryLsnIdx(w, lsn, Op(_), Cmp(_, _)) == + IF \E k \in 1..Len(w) : Cmp(Op(w[k]), lsn) + THEN CHOOSE k \in 1..Len(w) : Cmp(Op(w[k]), lsn) + ELSE -1 + +FirstEntryWithGreaterLsnIdx(w, lsn, Op(_)) == + FirstEntryLsnIdx(w, lsn, Op, GreaterCmp) + +\* TODO: I don't need it? +FirstEntryWithEqualLsnIdx(w, lsn, Op(_)) == + FirstEntryLsnIdx(w, lsn, Op, EqualCmp) + +EmptyGeneralMsg == [is_ready |-> FALSE, body |-> <<>>] +EmptyAck(servers) == XrowEntry(OkType, Nil, DefaultGroup, DefaultFlags, + [vclock |-> [i \in servers |-> 0], term |-> 0]) + +================================================================================ diff --git a/proofs/tla/test/.gitignore b/proofs/tla/test/.gitignore new file mode 100644 index 000000000000..3f76076a3144 --- /dev/null +++ b/proofs/tla/test/.gitignore @@ -0,0 +1,2 @@ +states +*TTrace* diff --git a/proofs/tla/test/integration/tarantool/tarantoolTests.cfg b/proofs/tla/test/integration/tarantool/tarantoolTests.cfg new file mode 100644 index 000000000000..c0eb16a68ab9 --- /dev/null +++ b/proofs/tla/test/integration/tarantool/tarantoolTests.cfg @@ -0,0 +1,30 @@ +CONSTANTS + Servers = {s1, s2, s3} + ElectionQuorum = 2 + MaxClientRequests = 2 + SplitBrainCheck = TRUE + MaxRaftTerm = 2 + MaxHeartbeatsPerTerm = 10 + +\* In TLA+, a behavior is defined as exhibiting a "deadlock" if it reaches some +\* state for which there is no (non-stuttering) transition that is enabled +\* (see Specifying Systems, page 222). More precisely, it is a state for which +\* there is no transition of Next that can be taken. So, checking for deadlock +\* should be disabled, when limits (e.g. MaxClientRequests) are not -1. +CHECK_DEADLOCK FALSE + +SPECIFICATION Spec + +INVARIANT + OneLeaderPerTerm + NoServerError + TermIsAlwaysOne + VolatileTermIsAlwaysOne + ClientReqIsNotDone + +\* PROPERTY EventuallyAllLimboEntriesHaveLsn + +\* INVARIANT +\* DebugCtrUnchanged + +\* INVARIANT TestInv diff --git a/proofs/tla/test/integration/tarantool/tarantoolTests.tla b/proofs/tla/test/integration/tarantool/tarantoolTests.tla new file mode 100644 index 000000000000..5209d5f7e831 --- /dev/null +++ b/proofs/tla/test/integration/tarantool/tarantoolTests.tla @@ -0,0 +1,78 @@ +---------------------------- MODULE tarantoolTests ----------------------------- + +EXTENDS FiniteSets, definitions, utils + +CONSTANTS Servers, + ElectionQuorum, + MaxClientRequests, + SplitBrainCheck, + MaxRaftTerm, + MaxHeartbeatsPerTerm + +VARIABLES + msgs, + error, + vclock, + txQueue, + txApplierQueue, + tId, + clientCtr, + wal, + walQueue, + limbo, + limboVclock, + limboOwner, + limboPromoteGreatestTerm, + limboPromoteTermMap, + limboConfirmedLsn, + limboVolatileConfirmedLsn, + limboConfirmedVclock, + limboAckCount, + limboSynchroMsg, + limboPromoteLatch, + state, + term, + volatileTerm, + vote, + volatileVote, + leader, + votesReceived, + leaderWitnessMap, + isBroadcastScheduled, + candidateVclock, + relaySentLsn, + relayLastAck, + relayRaftMsg, + relayHeartbeatCtr, + applierAckMsg, + applierVclock + +INSTANCE tarantool + +------------------------------------------------------------------------------- +\* Properties +------------------------------------------------------------------------------- + +\* NOTE: These one is WIP. Ideas for properties are welcome. + +\* At most one leader per term. +OneLeaderPerTerm == + \A i, j \in AliveServers: + (i # j /\ state[i] = Leader /\ state[j] = Leader) => term[i] # term[j] + +\* All servers are error-free. +NoServerError == + \A i \in Servers: error[i] = Nil + +TermIsAlwaysOne == + \A i \in Servers: term[i] = 1 + +VolatileTermIsAlwaysOne == + \A i \in Servers: volatileTerm[i] = 1 + +ClientReqIsNotDone == + \A i \in Servers: clientCtr = 0 + +\* DebugCtr == debugCtr < 5 + +================================================================================ diff --git a/proofs/tla/test/unit/applier/applierTests.cfg b/proofs/tla/test/unit/applier/applierTests.cfg new file mode 100644 index 000000000000..02bc741c9a14 --- /dev/null +++ b/proofs/tla/test/unit/applier/applierTests.cfg @@ -0,0 +1,10 @@ +CONSTANTS + Servers = {s1, s2, s3} + SplitBrainCheck = TRUE + MaxClientRequests = 10 + ElectionQuorum = 2 + MaxRaftTerm = 5 + +CHECK_DEADLOCK FALSE + +SPECIFICATION Spec diff --git a/proofs/tla/test/unit/applier/applierTests.tla b/proofs/tla/test/unit/applier/applierTests.tla new file mode 100644 index 000000000000..6b7283296900 --- /dev/null +++ b/proofs/tla/test/unit/applier/applierTests.tla @@ -0,0 +1,56 @@ +----------------------------- MODULE applierTests ------------------------------ + +EXTENDS FiniteSets, definitions, utils + +CONSTANTS Servers, SplitBrainCheck, MaxClientRequests, ElectionQuorum, + MaxRaftTerm + +VARIABLES msgs, applierAckMsg, applierVclock, txApplierQueue, tId, clientCtr, + walQueue, state, term, volatileTerm, vote, volatileVote, leader, + votesReceived, leaderWitnessMap, isBroadcastScheduled, + candidateVclock, vclock, relayRaftMsg, limbo, limboVclock, limboOwner, + limboPromoteGreatestTerm, limboPromoteTermMap, limboConfirmedLsn, + limboVolatileConfirmedLsn, limboConfirmedVclock, limboAckCount, + limboSynchroMsg, limboPromoteLatch, error, relayLastAck +allVars == <> + +INSTANCE applier +INSTANCE txn +INSTANCE raft +INSTANCE limbo + +ASSUME LET T == INSTANCE TLC IN T!PrintT("applierTests") + +-------------------------------------------------------------------------------- +\* Unit tests +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +\* Specification test +-------------------------------------------------------------------------------- + +Init == /\ ApplierInit + /\ TxnInit + /\ RaftInit + /\ LimboInit + /\ msgs = [i \in Servers |-> [j \in Servers |-> [k \in 1..2 |-> <<>>]]] + /\ walQueue = [i \in Servers |-> << >>] + /\ error = [i \in Servers |-> Nil] + /\ vclock = [i \in Servers |-> [j \in Servers |-> 0]] + /\ txApplierQueue = [i \in Servers |-> << >>] + /\ relayLastAck = + [i \in Servers |-> [j \in Servers |-> EmptyAck(Servers)]] + /\ relayRaftMsg = + [i \in Servers |-> [j \in Servers |-> EmptyGeneralMsg]] + +Next == ApplierNext(Servers) + +Spec == Init /\ [][Next]_allVars /\ WF_allVars(Next) + +=============================================================================== diff --git a/proofs/tla/test/unit/box/boxTests.cfg b/proofs/tla/test/unit/box/boxTests.cfg new file mode 100644 index 000000000000..02bc741c9a14 --- /dev/null +++ b/proofs/tla/test/unit/box/boxTests.cfg @@ -0,0 +1,10 @@ +CONSTANTS + Servers = {s1, s2, s3} + SplitBrainCheck = TRUE + MaxClientRequests = 10 + ElectionQuorum = 2 + MaxRaftTerm = 5 + +CHECK_DEADLOCK FALSE + +SPECIFICATION Spec diff --git a/proofs/tla/test/unit/box/boxTests.tla b/proofs/tla/test/unit/box/boxTests.tla new file mode 100644 index 000000000000..958223457a9f --- /dev/null +++ b/proofs/tla/test/unit/box/boxTests.tla @@ -0,0 +1,57 @@ +----------------------------- MODULE boxTests ------------------------------ + +EXTENDS FiniteSets, definitions, utils + +CONSTANTS Servers, SplitBrainCheck, MaxClientRequests, ElectionQuorum, + MaxRaftTerm + +VARIABLES vclock, txQueue, txApplierQueue, error, + msgs, applierAckMsg, applierVclock, tId, clientCtr, + walQueue, state, term, volatileTerm, vote, volatileVote, leader, + votesReceived, leaderWitnessMap, isBroadcastScheduled, + candidateVclock, relayRaftMsg, limbo, limboVclock, limboOwner, + limboPromoteGreatestTerm, limboPromoteTermMap, limboConfirmedLsn, + limboVolatileConfirmedLsn, limboConfirmedVclock, limboAckCount, + limboSynchroMsg, limboPromoteLatch, relayLastAck +allVars == <> + +INSTANCE box +INSTANCE applier +INSTANCE txn +INSTANCE raft +INSTANCE limbo + +ASSUME LET T == INSTANCE TLC IN T!PrintT("applierTests") + +-------------------------------------------------------------------------------- +\* Unit tests +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +\* Specification test +-------------------------------------------------------------------------------- + +Init == /\ BoxInit + /\ ApplierInit + /\ TxnInit + /\ RaftInit + /\ LimboInit + /\ msgs = [i \in Servers |-> [j \in Servers |-> [k \in 1..2 |-> <<>>]]] + /\ walQueue = [i \in Servers |-> << >>] + /\ relayLastAck = + [i \in Servers |-> [j \in Servers |-> EmptyAck(Servers)]] + /\ relayRaftMsg = + [i \in Servers |-> [j \in Servers |-> EmptyGeneralMsg]] + +Next == BoxNext(Servers) + +Spec == Init /\ [][Next]_allVars /\ WF_allVars(Next) + +=============================================================================== diff --git a/proofs/tla/test/unit/limbo/limboTests.cfg b/proofs/tla/test/unit/limbo/limboTests.cfg new file mode 100644 index 000000000000..5417cbc23338 --- /dev/null +++ b/proofs/tla/test/unit/limbo/limboTests.cfg @@ -0,0 +1,7 @@ +CONSTANTS + Servers = {s1, s2, s3} + ElectionQuorum = 2 + SplitBrainCheck = TRUE + MaxClientRequests = 10 + +SPECIFICATION Spec diff --git a/proofs/tla/test/unit/limbo/limboTests.tla b/proofs/tla/test/unit/limbo/limboTests.tla new file mode 100644 index 000000000000..baddc441fc36 --- /dev/null +++ b/proofs/tla/test/unit/limbo/limboTests.tla @@ -0,0 +1,159 @@ +----------------------------- MODULE limboTests ------------------------------ + +EXTENDS FiniteSets, TLC, utils +ASSUME LET T == INSTANCE TLC IN T!PrintT("limboTests") + +-------------------------------------------------------------------------------- +\* Imports +-------------------------------------------------------------------------------- + +CONSTANTS Servers, ElectionQuorum, SplitBrainCheck, MaxClientRequests +ASSUME Cardinality(Servers) = 3 +ASSUME ElectionQuorum = 2 +ASSUME SplitBrainCheck = TRUE +VARIABLES limbo, box, relay, txn, wal, raft +allVars == <> +INSTANCE limbo + +-------------------------------------------------------------------------------- +\* Unit tests +-------------------------------------------------------------------------------- + +LOCAL name == CHOOSE x \in Servers: TRUE +LOCAL fromName == CHOOSE x \in Servers: x # name + +LOCAL LimboDefaultState == [ + txns |-> << >>, \* Also passed to txn. + limboVclock |-> [j \in Servers |-> 0], + owner |-> name, + promoteGreatestTerm |-> 0, + promoteTermMap |-> [j \in Servers |-> 0], + confirmedLsn |-> 0, + volatileConfirmedLsn |-> 0, + confimedVclock |-> [j \in Servers |-> 0], + ackCount |-> 0, + synchroMsg |-> EmptyGeneralMsg, + promoteQsyncLsn |-> 0, + promoteQsyncTerm |-> 0, + promoteLatch |-> FALSE, + \* Txn state. + tId |-> 0, + walQueue |-> << >>, + \* Box state, + error |-> Nil, + \* RO variables. + i |-> name +] + +LOCAL defaultVclock == LimboDefaultState.limboVclock +LOCAL TxnBegin(id, entry) == [id |-> id, stmts |-> <>] + +\* Test simple ack. +ASSUME LET lsn == 2 + entry == [XrowEntry(DmlType, name, DefaultGroup, SyncFlags, {}) + EXCEPT !.lsn = lsn] + txnToLimbo == TxnBegin(1, entry) + state == [LimboDefaultState EXCEPT !.txns = <>] + IN LimboAck(state, name, lsn) = [state EXCEPT + !.limboVclock = [defaultVclock EXCEPT ![name] = lsn], + !.ackCount = 1 + ] + +\* Test simple confirm after ack, the lsn is assigned before ack. +ASSUME LET lsn == 2 + entry == [XrowEntry(DmlType, name, DefaultGroup, SyncFlags, {}) + EXCEPT !.lsn = lsn] + txnToLimbo == TxnBegin(1, entry) + initState == [LimboDefaultState EXCEPT !.txns = <>] + state == LimboAck(initState, name, lsn) + IN LimboAck(state, fromName, lsn) = [state EXCEPT + !.limboVclock = [defaultVclock EXCEPT + ![fromName] = lsn, + ![name] = lsn + ], + !.ackCount = 0, + !.volatileConfirmedLsn = 2 + ] + +\* Test ack in the middle of the limbo. +ASSUME LET entry == XrowEntry(DmlType, name, DefaultGroup, SyncFlags, {}) + initState == [LimboDefaultState EXCEPT !.txns = << + TxnBegin(1, [entry EXCEPT !.lsn = 1]), + TxnBegin(2, [entry EXCEPT !.lsn = 2]), + TxnBegin(3, [entry EXCEPT !.lsn = 3]), + TxnBegin(4, entry) \* Not yet written. + >>] + state == LimboAck(initState, name, 3) + IN LimboAck(state, fromName, 2) = [state EXCEPT + !.ackCount = 1, + !.volatileConfirmedLsn = 2, + !.limboVclock = [defaultVclock EXCEPT + ![fromName] = 2, + ![name] = 3 + ] + ] + +\* Test ack before wal write. Nothing is confirmed. +ASSUME LET entry == XrowEntry(DmlType, name, DefaultGroup, SyncFlags, {}) + state == [LimboDefaultState EXCEPT + !.txns = <>] + IN LimboAck(state, fromName, 2) = [state EXCEPT + !.limboVclock = [defaultVclock EXCEPT ![fromName] = 2] + ] + +\* Test ack before wal write and then wal write happens. +ASSUME LET entry == XrowEntry(DmlType, name, DefaultGroup, SyncFlags, {}) + initState == [LimboDefaultState EXCEPT + !.txns = <>] + state == LimboAck(initState, fromName, 2) + writtenTxn == TxnBegin(1, [entry EXCEPT !.lsn = 2]) + IN TxnOnJournalWrite(state, writtenTxn) = [state EXCEPT + !.txns = <>, + !.volatileConfirmedLsn = 2, + !.limboVclock = [defaultVclock EXCEPT + ![fromName] = 2, + ![name] = 2 + ] + ] + +\* Acks gathered, time to write the confirm entry. +ASSUME LET entry == XrowEntry(DmlType, name, DefaultGroup, SyncFlags, {}) + initState == [LimboDefaultState EXCEPT !.txns = << + TxnBegin(1, [entry EXCEPT !.lsn = 1]), + TxnBegin(2, [entry EXCEPT !.lsn = 2]), + TxnBegin(3, [entry EXCEPT !.lsn = 3]), + TxnBegin(4, entry) \* Not yet written. + >>] + state == LimboAck(LimboAck(initState, fromName, 2), name, 3) + confirmEntry == + XrowEntry(ConfirmType, state.i, DefaultGroup, DefaultFlags, [ + replica_id |-> state.owner, + origin_id |-> state.i, + lsn |-> state.volatileConfirmedLsn, + term |-> 0 + ]) + IN /\ state.volatileConfirmedLsn # state.confirmedLsn + /\ PrintT(state) + /\ PrintT(LimboWriteConfirm(state, state.volatileConfirmedLsn)) + /\ LimboWriteConfirm(state, state.volatileConfirmedLsn) = + [state EXCEPT + !.tId = 1, + !.walQueue = <> + ] + +-------------------------------------------------------------------------------- +\* Specification test +-------------------------------------------------------------------------------- + +Init == /\ LimboInit + /\ box = [i \in Servers |-> [error |-> Nil]] + /\ relay = [i \in Servers |-> [lastAck |-> EmptyAck(Servers)]] + /\ txn = [i \in Servers |-> [tId |-> 0, clientCtr |-> 0]] + /\ wal = [i \in Servers |-> [queue |-> << >>]] + /\ raft = [i \in Servers |-> [state |-> Leader]] + +\* Next == LimboNext(Servers) +Next == UNCHANGED <> +Spec == Init /\ [][Next]_allVars /\ WF_allVars(Next) + +=============================================================================== diff --git a/proofs/tla/test/unit/notes.md b/proofs/tla/test/unit/notes.md new file mode 100644 index 000000000000..16a6f1e3e83c --- /dev/null +++ b/proofs/tla/test/unit/notes.md @@ -0,0 +1,3 @@ +Non-working modules: + * applier (smth is wrong, deadlock, warnings) + * relay (doesn't work with several instances) diff --git a/proofs/tla/test/unit/raft/raftTests.cfg b/proofs/tla/test/unit/raft/raftTests.cfg new file mode 100644 index 000000000000..a68508e5ce79 --- /dev/null +++ b/proofs/tla/test/unit/raft/raftTests.cfg @@ -0,0 +1,8 @@ +CONSTANTS + Servers = {s1} + ElectionQuorum = 1 + MaxRaftTerm = 5 + SplitBrainCheck = TRUE + MaxClientRequests = 100 + +SPECIFICATION Spec diff --git a/proofs/tla/test/unit/raft/raftTests.tla b/proofs/tla/test/unit/raft/raftTests.tla new file mode 100644 index 000000000000..3d917da62067 --- /dev/null +++ b/proofs/tla/test/unit/raft/raftTests.tla @@ -0,0 +1,49 @@ +------------------------------ MODULE raftTests -------------------------------- + +EXTENDS FiniteSets, utils + +CONSTANTS Servers, ElectionQuorum, MaxRaftTerm, SplitBrainCheck, + MaxClientRequests + +ASSUME Cardinality(Servers) = 1 + +VARIABLES state, term, volatileTerm, vote, volatileVote, leader, votesReceived, + leaderWitnessMap, isBroadcastScheduled, candidateVclock, limbo, + limboVclock, limboOwner, limboPromoteGreatestTerm, + limboPromoteTermMap, limboConfirmedLsn, limboVolatileConfirmedLsn, + limboConfirmedVclock, limboAckCount, limboSynchroMsg, + limboPromoteLatch, error, relayLastAck, tId, clientCtr, walQueue, + msgs, relayRaftMsg, vclock + +INSTANCE raft +INSTANCE limbo + +allVars == <> + +ASSUME LET T == INSTANCE TLC IN T!PrintT("raftTests") + +-------------------------------------------------------------------------------- +\* Unit tests +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +\* Specification test +-------------------------------------------------------------------------------- + +Init == /\ RaftInit + /\ LimboInit + /\ tId = [i \in Servers |-> 0] + /\ walQueue = [i \in Servers |-> << >>] + /\ error = [i \in Servers |-> Nil] + /\ state = [i \in Servers |-> Leader] + /\ relayLastAck = [i \in Servers |-> [j \in Servers |-> EmptyAck(Servers)]] + /\ msgs = [i \in Servers |-> [j \in Servers |-> [k \in 1..2 |-> <<>>]]] + /\ vclock = [i \in Servers |-> [j \in Servers |-> 0]] + /\ relayRaftMsg = [i \in Servers |-> [j \in Servers |-> EmptyGeneralMsg]] + +Next == RaftNext(Servers) + +Spec == Init /\ [][Next]_allVars /\ WF_allVars(Next) + +=============================================================================== diff --git a/proofs/tla/test/unit/relay/relayTests.cfg b/proofs/tla/test/unit/relay/relayTests.cfg new file mode 100644 index 000000000000..b1352e146cee --- /dev/null +++ b/proofs/tla/test/unit/relay/relayTests.cfg @@ -0,0 +1,12 @@ +CONSTANTS + Servers = {s1, s2} + WalMaxRowsTest = 3 + +CHECK_DEADLOCK FALSE +SPECIFICATION Spec + +INVARIANT + ReplNoGapsAndIncreasingLsnInv + ReplNoLocalGroupMsgs + +PROPERTY ReplSizeProp diff --git a/proofs/tla/test/unit/relay/relayTests.tla b/proofs/tla/test/unit/relay/relayTests.tla new file mode 100644 index 000000000000..294a2d3c0b30 --- /dev/null +++ b/proofs/tla/test/unit/relay/relayTests.tla @@ -0,0 +1,164 @@ +------------------------------ MODULE relayTests ------------------------------- + +EXTENDS FiniteSets, TLC, utils +ASSUME LET T == INSTANCE TLC IN T!PrintT("relayTests") + +CONSTANTS WalMaxRowsTest +ASSUME WalMaxRowsTest \in Nat + +-------------------------------------------------------------------------------- +\* Imports +-------------------------------------------------------------------------------- + +CONSTANTS Servers +ASSUME Cardinality(Servers) > 1 +VARIABLES relay, box, msgs, wal +allVars == <> +INSTANCE relay + +-------------------------------------------------------------------------------- +\* Unit tests +-------------------------------------------------------------------------------- + +LOCAL toName == CHOOSE x \in Servers: TRUE +LOCAL fromName == CHOOSE x \in Servers: x # toName + +LOCAL RelayDefaultState == [ + msgsToSend |-> << >>, + msgsToReceive |-> << >>, + relayQueue |-> << >>, + lastAck |-> EmptyAck(Servers), + raftMsg |-> EmptyGeneralMsg, + sentLsn |-> 0, + txQueue |-> << >>, + \* RO variables. + toInstance |-> toName, + walRows |-> << >> +] + +---------------------------- +\* Test RelayRead operator | +---------------------------- +\* Applier sends ack, relay saves it. +ASSUME LET ack == XrowEntry(OkType, toName, DefaultGroup, DefaultFlags, [ + vclock |-> [fromName |-> 1, toName |-> 2], + term |-> 1 + ]) + state == [RelayDefaultState EXCEPT !.msgsToReceive = <>] + IN RelayRead(state) = [RelayDefaultState EXCEPT !.lastAck = ack ] + +------------------------------------ +\* Test RelayStatusUpdate operator | +------------------------------------ +\* Relay sends ack info to Tx and deletes it. +ASSUME LET ack == XrowEntry(OkType, toName, DefaultGroup, DefaultFlags, [ + vclock |-> [fromName |-> 1, toName |-> 2], + term |-> 1 + ]) + initState == [RelayDefaultState EXCEPT + !.msgsToReceive = <>, !.relayQueue = <>] + ackState == RelayRead(initState) + IN RelayStatusUpdate(ackState) = [RelayDefaultState EXCEPT + !.txQueue = <>, + !.lastAck = ack + ] + +--------------------------------------- +\* Test RelayProcessWalEvent operator | +--------------------------------------- +\* Replicate rows from the beginning of the wal. +ASSUME LET row == XrowEntry(DmlType, toName, DefaultGroup, + DefaultFlags, [a |-> 1]) + state == [RelayDefaultState EXCEPT !.walRows = + <<[row EXCEPT !.lsn = 1], [row EXCEPT !.lsn = 2]>>] + IN RelayProcessWalEvent(state) = [state EXCEPT + !.sentLsn = 2, + !.msgsToSend = <<[row EXCEPT !.lsn = 1], [row EXCEPT !.lsn = 2]>> + ] + +\* Replicate rows from the middle of the wal. +ASSUME LET row == XrowEntry(DmlType, toName, DefaultGroup, + DefaultFlags, [a |-> 1]) + state == [RelayDefaultState EXCEPT + !.walRows = <<[row EXCEPT !.lsn = 1], [row EXCEPT !.lsn = 2]>>, + !.sentLsn = 1 + ] + IN RelayProcessWalEvent(state) = [state EXCEPT + !.sentLsn = 2, + !.msgsToSend = <<[row EXCEPT !.lsn = 2]>> + ] + +\* Local rows are replaced with NOPs. +ASSUME LET row == XrowEntry(DmlType, toName, DefaultGroup, + DefaultFlags, [a |-> 1]) + localRow == XrowEntry(DmlType, toName, LocalGroup, + DefaultFlags, [b |-> 1]) + nopRow == XrowEntry(NopType, toName, DefaultGroup, DefaultFlags, <<>>) + state == [RelayDefaultState EXCEPT + !.walRows = <<[row EXCEPT !.lsn = 1], + [localRow EXCEPT !.lsn = 2], + [row EXCEPT !.lsn = 3]>>, + !.sentLsn = 1 + ] + IN RelayProcessWalEvent(state) = [state EXCEPT + !.sentLsn = 3, + !.msgsToSend = <<[nopRow EXCEPT !.lsn = 2], [row EXCEPT !.lsn = 3]>> + ] + +----------------------------------- +\* Test RelayRaftTrySend operator | +----------------------------------- + +\* Send the latest raft msg. +ASSUME LET xrow == XrowEntry(RaftType, toName, DefaultGroup, DefaultFlags, <<>>) + state == [RelayDefaultState EXCEPT !.raftMsg = GeneralMsg(xrow)] + IN RelayRaftTrySend(state) = [RelayDefaultState EXCEPT + !.msgsToSend = <> + ] + +-------------------------------------------------------------------------------- +\* Specification test +-------------------------------------------------------------------------------- + +WalSimulate(i) == + /\ Len(wal[i].rows) <= WalMaxRowsTest + /\ LET group == RandomElement({LocalGroup, DefaultGroup}) + xrow == [XrowEntry(DmlType, i, group, DefaultFlags, <<>>) + EXCEPT !.lsn = Len(wal[i].rows)] + newRows == Append(wal[i].rows, xrow) + IN /\ wal' = VarSet(i, "rows", newRows, wal) + /\ UNCHANGED <> + +Init == /\ RelayInit + /\ box = [i \in Servers |-> [queue |-> << >>]] + /\ msgs = [i \in Servers |-> [j \in Servers |-> [k \in 1..2 |-> << >>]]] + /\ wal = [i \in Servers |-> [rows |-> << >>]] + +Next == + \/ RelayNext(Servers) + \/ \E i \in Servers: WalSimulate(i) + +Spec == Init /\ [][Next]_allVars /\ WF_allVars(Next) + +--------------- +\* Properties | +--------------- + +ReplNoGapsAndIncreasingLsnInv == + \A i, j \in Servers: + LET rows == msgs[i][j][RelaySource] + IN IF Len(rows) > 1 THEN + \A k \in 2..Len(rows): rows[k].lsn = rows[k - 1].lsn + 1 + ELSE TRUE + +ReplNoLocalGroupMsgs == + \A i, j \in Servers: + \A k \in DOMAIN msgs[i][j][RelaySource]: + msgs[i][j][RelaySource][k].group_id # LocalGroup + +ReplSizeProp == + <> \A i, j \in Servers: + \/ i = j + \/ Len(msgs[i][j][RelaySource]) = WalMaxRowsTest + +=============================================================================== diff --git a/proofs/tla/test/unit/txn/txnTests.cfg b/proofs/tla/test/unit/txn/txnTests.cfg new file mode 100644 index 000000000000..f7b2a95fdfdd --- /dev/null +++ b/proofs/tla/test/unit/txn/txnTests.cfg @@ -0,0 +1,8 @@ +CONSTANTS + Servers = {s1} + MaxClientRequests = 100 + +SPECIFICATION Spec + +PROPERTY TIdOnlyIncreases +INVARIANT LimboSizeEqualsToRequestCtr diff --git a/proofs/tla/test/unit/txn/txnTests.tla b/proofs/tla/test/unit/txn/txnTests.tla new file mode 100644 index 000000000000..234aaa9caef4 --- /dev/null +++ b/proofs/tla/test/unit/txn/txnTests.tla @@ -0,0 +1,103 @@ +------------------------------- MODULE txnTests -------------------------------- + +EXTENDS Sequences, FiniteSets, TLC, definitions, utils +ASSUME LET T == INSTANCE TLC IN T!PrintT("txnTests") + +-------------------------------------------------------------------------------- +\* Imports +-------------------------------------------------------------------------------- + +CONSTANTS Servers, MaxClientRequests +ASSUME Cardinality(Servers) = 1 +VARIABLES txn, wal, limbo, raft +allVars == <> +INSTANCE txn + +-------------------------------------------------------------------------------- +\* Unit tests +-------------------------------------------------------------------------------- + +LOCAL name == CHOOSE x \in Servers: TRUE + +LOCAL TxnDefaultState == [ + tId |-> 0, + clientCtr |-> 0, + walQueue |-> <<>>, + txns |-> <<>>, + \* RO variables. + raftState |-> Leader, + promoteLatch |-> FALSE, + synchroMsg |-> EmptyGeneralMsg +] + +\* Basic test. +ASSUME LET entry == XrowEntry(DmlType, name, DefaultGroup, SyncFlags, {}) + txnToWrite == [id |-> 1, stmts |-> <>] + IN TxnDo(TxnDefaultState, entry) = [TxnDefaultState EXCEPT + !.tId = 1, + !.walQueue = <>, + !.txns = <> \* lsn is assigned after write. + ] + +\* Test, that async is not added to limbo. +ASSUME LET entry == XrowEntry(DmlType, name, DefaultGroup, DefaultFlags, {}) + txnToWrite == [id |-> 1, stmts |-> <>] + IN TxnDo(TxnDefaultState, entry) = [TxnDefaultState EXCEPT + !.tId = 1, + !.walQueue = <> + ] + +LOCAL AsyncToLimboFlags == [wait_sync |-> TRUE, wait_ack |-> FALSE] +\* Test, that async is added to limbo, when limbo is non empty. +ASSUME LET entry == XrowEntry(DmlType, name, DefaultGroup, DefaultFlags, {}) + entryToWrite == [entry EXCEPT !.flags = AsyncToLimboFlags] + txnToWrite == [id |-> 2, stmts |-> <>] + oldTxn == [id |-> 1, stmts |-> <>] + state == [TxnDefaultState EXCEPT !.tId = 1, !.txns = <>] + IN TxnDo(state, entry) = [state EXCEPT + !.tId = 2, + !.walQueue = <>, + !.txns = <> + ] + +\* Test, that sync write is not scheduled, if limbo is writing promote. +ASSUME LET entry == XrowEntry(DmlType, name, DefaultGroup, SyncFlags, {}) + state == [TxnDefaultState EXCEPT !.promoteLatch = TRUE] + IN TxnDo(state, entry) = state + +\* Test, that async write is scheduled, even if limbo is writing promote. +ASSUME LET entry == XrowEntry(DmlType, name, DefaultGroup, DefaultFlags, {}) + txnToWrite == [id |-> 1, stmts |-> <>] + state == [TxnDefaultState EXCEPT !.promoteLatch = TRUE] + IN TxnDo(state, entry) = [state EXCEPT + !.tId = 1, + !.walQueue = <> + ] + +-------------------------------------------------------------------------------- +\* Specification test +-------------------------------------------------------------------------------- + +Init == /\ TxnInit + /\ wal = [i \in Servers |-> [queue |-> << >>]] + /\ raft = [i \in Servers |-> [state |-> Leader]] + /\ limbo = [i \in Servers |-> [ + txns |-> << >>, + promoteLatch |-> FALSE, + synchroMsg |-> EmptyGeneralMsg] + ] + +Next == TxnNext(Servers) +Spec == Init /\ [][Next]_allVars /\ WF_allVars(Next) + +------------------------------ +\* Invariants and properties | +------------------------------ + +TIdOnlyIncreases == + [][\A i \in Servers: txn[i].tId' >= txn[i].tId]_txn + +LimboSizeEqualsToRequestCtr == + \A i \in Servers: txn[i].clientCtr = Len(limbo[i].txns) + +=============================================================================== diff --git a/proofs/tla/test/unit/utils/utilsTests.cfg b/proofs/tla/test/unit/utils/utilsTests.cfg new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/proofs/tla/test/unit/utils/utilsTests.tla b/proofs/tla/test/unit/utils/utilsTests.tla new file mode 100644 index 000000000000..6f22c028367e --- /dev/null +++ b/proofs/tla/test/unit/utils/utilsTests.tla @@ -0,0 +1,96 @@ +------------------------------ MODULE utilsTests ------------------------------- + +EXTENDS Bags, utils +ASSUME LET T == INSTANCE TLC IN T!PrintT("utilsTests") + +-------------------------------------------------------------------------------- +\* Unit tests +-------------------------------------------------------------------------------- +------------------------- +\* Test VarSet operator | +------------------------- +ASSUME LET testVar == [i \in {"s1", "s2"} |-> [a |-> "a", b |-> 1]] + IN VarSet("s1", "a", "test", testVar) = [ + s1 |-> [a |-> "test", b |-> 1], + s2 |-> [a |-> "a", b |-> 1] + ] + +------------------------- +\* Test SetMax operator | +------------------------- +ASSUME LET set == {2, 3, 1} + IN SetMax(set) = 3 +ASSUME LET set == {1, 1} + IN SetMax(set) = 1 +ASSUME LET set == {1, 5, 5} + IN SetMax(set) = 5 + +------------------------- +\* Test BagAdd operator | +------------------------- +ASSUME BagAdd(EmptyBag, "s1", 3) = [s1 |-> 3] +ASSUME LET bag == [s1 |-> 1] + IN BagAdd(bag, "s2", 1) = [s1 |-> 1, s2 |-> 1] +ASSUME LET bag == [s1 |-> 1] + IN BagAdd(bag, "s1", 2) = [s1 |-> 3] + +------------------------- +\* Test BagSet operator | +------------------------- +ASSUME BagSet(EmptyBag, "s1", 3) = [s1 |-> 3] +ASSUME BagSet([s1 |-> 1], "s1", 2) = [s1 |-> 2] + +----------------------------- +\* Test BagCompare operator | +----------------------------- +ASSUME LET bag1 == [s1 |-> 1, s2 |-> 2] + bag2 == [s1 |-> 1, s2 |-> 2] + IN BagCompare(bag1, bag2) = 0 +ASSUME LET bag1 == [s1 |-> 1, s2 |-> 2] + bag2 == [s1 |-> 1, s2 |-> 1] + IN BagCompare(bag1, bag2) = 1 +ASSUME LET bag1 == [s1 |-> 1, s2 |-> 2] + bag2 == [s1 |-> 2, s2 |-> 1] + IN BagCompare(bag1, bag2) = -1 + +------------------------------- +\* Test BagCountLess operator | +------------------------------- +ASSUME LET bag == [s1 |-> 1, s2 |-> 2, s3 |-> 3] + IN /\ BagCountLess(bag, 1) = 0 + /\ BagCountLess(bag, 2) = 1 + /\ BagCountLess(bag, 3) = 2 + /\ BagCountLess(bag, 100) = 3 + +-------------------------------------- +\* Test BagCountLessOrEqual operator | +-------------------------------------- +ASSUME LET bag == [s1 |-> 1, s2 |-> 2, s3 |-> 3] + IN /\ BagCountLessOrEqual(bag, 0) = 0 + /\ BagCountLessOrEqual(bag, 1) = 1 + /\ BagCountLessOrEqual(bag, 2) = 2 + /\ BagCountLessOrEqual(bag, 100) = 3 + +--------------------------------------- +\* Test BagKthOrderStatistic operator | +--------------------------------------- +ASSUME LET bag == [s1 |-> 6, s2 |-> 9, s3 |-> 3, s4 |-> 7] + IN /\ BagKthOrderStatistic(bag, 0) = 3 + /\ BagKthOrderStatistic(bag, 1) = 6 + /\ BagKthOrderStatistic(bag, 2) = 7 + /\ BagKthOrderStatistic(bag, 3) = 9 + /\ BagKthOrderStatistic(bag, 4) = -1 + +---------------------------------------------- +\* Test FirstEntryWithGreaterLsnIdx operator | +---------------------------------------------- +ASSUME LET entry == XrowEntry(DmlType, "s1", DefaultGroup, SyncFlags, {}) + e1 == [entry EXCEPT !.lsn = 1] + e2 == [entry EXCEPT !.lsn = 2] + e3 == [entry EXCEPT !.lsn = 3] + wal == <> + IN /\ FirstEntryWithGreaterLsnIdx(wal, 0, LAMBDA x: x.lsn) = 1 + /\ FirstEntryWithGreaterLsnIdx(wal, 1, LAMBDA x: x.lsn) = 2 + /\ FirstEntryWithGreaterLsnIdx(wal, 3, LAMBDA x: x.lsn) = -1 + +================================================================================ diff --git a/proofs/tla/test/unit/wal/walTests.cfg b/proofs/tla/test/unit/wal/walTests.cfg new file mode 100644 index 000000000000..06474b0792f0 --- /dev/null +++ b/proofs/tla/test/unit/wal/walTests.cfg @@ -0,0 +1,9 @@ +CONSTANTS + Servers = {s1} + WalMaxRowsTest = 10 + +CHECK_DEADLOCK TRUE +SPECIFICATION Spec + +INVARIANT WalNoGapsAndIncreasingLsnInv +PROPERTY WalSizeProp diff --git a/proofs/tla/test/unit/wal/walTests.tla b/proofs/tla/test/unit/wal/walTests.tla new file mode 100644 index 000000000000..9681d911c22a --- /dev/null +++ b/proofs/tla/test/unit/wal/walTests.tla @@ -0,0 +1,78 @@ +------------------------------- MODULE walTests -------------------------------- + +EXTENDS Integers, Sequences, TLC, utils +ASSUME LET T == INSTANCE TLC IN T!PrintT("walTests") + +CONSTANTS WalMaxRowsTest +ASSUME WalMaxRowsTest \in Nat + +VARIABLES tId + +-------------------------------------------------------------------------------- +\* Imports +-------------------------------------------------------------------------------- + +CONSTANTS Servers +ASSUME Cardinality(Servers) = 1 +VARIABLES wal, box +allVars == <> +INSTANCE wal + +-------------------------------------------------------------------------------- +\* Unit tests +-------------------------------------------------------------------------------- + +ASSUME LET entry == XrowEntry(DmlType, "s1", DefaultGroup, SyncFlags, {}) + txn == [id |-> 1, stmts |-> <>] + state == [ + rows |-> << >>, + txQueue |-> << >>, + queue |-> <> + ] + expectedEntry == [entry EXCEPT !.lsn = 1] + expectedTxn == [txn EXCEPT !.stmts = <>] + IN WalProcess(state) = [ + queue |-> << >>, + rows |-> <>, + txQueue |-> <> + ] + +-------------------------------------------------------------------------------- +\* Specification test +-------------------------------------------------------------------------------- + +TxnSimulate(i) == + /\ tId <= WalMaxRowsTest + /\ LET entry == XrowEntry(DmlType, i, DefaultGroup, SyncFlags, {}) + txn == [tId |-> tId, stmts |-> <>] + walQueue == Append(wal[i].queue, txn) + IN /\ wal' = VarSet(i, "queue", walQueue, wal) + /\ tId' = tId + 1 + /\ UNCHANGED <> + +Init == /\ WalInit + /\ box = [i \in Servers |-> [queue |-> << >>]] + /\ tId = 1 + +Next == \/ /\ WalNext(Servers) + /\ UNCHANGED <> + \/ \E i \in Servers: TxnSimulate(i) + +Spec == Init /\ [][Next]_allVars /\ WF_allVars(Next) + +--------------- +\* Properties | +--------------- + +WalNoGapsAndIncreasingLsnInv == + \A i \in Servers: + LET rows == wal[i].rows + IN IF Len(rows) > 1 THEN + \A j \in 2..Len(rows): rows[j].lsn = rows[j - 1].lsn + 1 + ELSE TRUE + +WalSizeProp == + <> \A i \in Servers: + Len(wal[i].rows) = WalMaxRowsTest + +===============================================================================