From 9bbd6ae1f7ab89f66c5d10c0b118797e6330692b Mon Sep 17 00:00:00 2001 From: Adrien Kunysz Date: Wed, 16 Apr 2025 10:42:17 +0200 Subject: [PATCH] Notify workers of quick shutdown after graceful shutdown times out. As per https://github.com/benoitc/gunicorn/issues/3385 this allows to distinguish between graceful and abnormal termination and gives a chance to workers to do something about it (e.g. to log a stack trace). For backward compatibility, we keep the old 2 signals model if quick_shutdown_timeout is set to 0. --- docs/source/signals.rst | 6 +++-- gunicorn/arbiter.py | 23 +++++++++++------- gunicorn/config.py | 27 ++++++++++++++++++++- gunicorn/workers/base.py | 10 ++++++-- gunicorn/workers/gthread.py | 3 +-- tests/test_arbiter.py | 48 +++++++++++++++++++++++++++++++++++-- 6 files changed, 99 insertions(+), 18 deletions(-) diff --git a/docs/source/signals.rst b/docs/source/signals.rst index c22ea0362..6bdbda99b 100644 --- a/docs/source/signals.rst +++ b/docs/source/signals.rst @@ -10,9 +10,11 @@ signals used internally by Gunicorn to communicate with the workers. Master process ============== -- ``QUIT``, ``INT``: Quick shutdown +- ``QUIT``, ``INT``: Quick shutdown. Waits for workers to finish their current + requests up to the :ref:`quick-shutdown-timeout`. - ``TERM``: Graceful shutdown. Waits for workers to finish their - current requests up to the :ref:`graceful-timeout`. + current requests up to the + :ref:`graceful-timeout` + :ref:`quick-shutdown-timeout`. - ``HUP``: Reload the configuration, start the new worker processes with a new configuration and gracefully shutdown older workers. If the application is not preloaded (using the :ref:`preload-app` option), Gunicorn will also load diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index 646d684ef..15fecd07e 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -386,18 +386,23 @@ def stop(self, graceful=True): sock.close_sockets(self.LISTENERS, unlink) self.LISTENERS = [] - sig = signal.SIGTERM - if not graceful: - sig = signal.SIGQUIT - limit = time.time() + self.cfg.graceful_timeout - # instruct the workers to exit - self.kill_workers(sig) - # wait until the graceful timeout - while self.WORKERS and time.time() < limit: - time.sleep(0.1) + + if graceful: + deadline = time.time() + self.cfg.graceful_timeout + self.kill_workers(signal.SIGTERM) + self.sleep_until(deadline) + + if not graceful or self.cfg.quick_shutdown_timeout > 0: + deadline = time.time() + self.cfg.quick_shutdown_timeout + self.kill_workers(signal.SIGINT) + self.sleep_until(deadline) self.kill_workers(signal.SIGKILL) + def sleep_until(self, deadline): + while self.WORKERS and time.time() < deadline: + time.sleep(0.1) + def reexec(self): """\ Relaunch the master and workers. diff --git a/gunicorn/config.py b/gunicorn/config.py index 07c5aab34..b79aa1367 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -365,6 +365,13 @@ def validate_pos_int(val): return val +def validate_pos_float(val): + val = float(val) + if val < 0: + raise ValueError("Value must be positive: %s" % val) + return val + + def validate_ssl_version(val): if val != SSLVersion.default: sys.stderr.write("Warning: option `ssl_version` is deprecated and it is ignored. Use ssl_context instead.\n") @@ -811,7 +818,25 @@ class GracefulTimeout(Setting): After receiving a restart signal, workers have this much time to finish serving requests. Workers still alive after the timeout (starting from - the receipt of the restart signal) are force killed. + the receipt of the restart signal) are sent a quick shutdown signal (if + quick_shutdown_timeout is greater than zero) then are force killed. + """ + + +class QuickShutdownTimeout(Setting): + name = "quick_shutdown_timeout" + section = "Worker Processes" + cli = ["--quick-shutdown-timeout"] + meta = "INT" + validator = validate_pos_float + type = float + default = 0 + desc = """\ + Timeout for quick worker shutdown. + + After receiving a quick shutdown signal, workers have this much time to + finish serving requests. Workers still alive after the timeout (starting + from the receipt of the quick shutdown signal) are force killed. """ diff --git a/gunicorn/workers/base.py b/gunicorn/workers/base.py index 93c465c98..5a5ef64c1 100644 --- a/gunicorn/workers/base.py +++ b/gunicorn/workers/base.py @@ -191,12 +191,18 @@ def handle_usr1(self, sig, frame): def handle_exit(self, sig, frame): self.alive = False + def quick_exit(self): + timeout = self.cfg.quick_shutdown_timeout + if timeout <= 0: + timeout = 0.1 + time.sleep(timeout) + sys.exit(0) + def handle_quit(self, sig, frame): self.alive = False # worker_int callback self.cfg.worker_int(self) - time.sleep(0.1) - sys.exit(0) + self.quick_exit() def handle_abort(self, sig, frame): self.alive = False diff --git a/gunicorn/workers/gthread.py b/gunicorn/workers/gthread.py index 7a23228cd..445cc1648 100644 --- a/gunicorn/workers/gthread.py +++ b/gunicorn/workers/gthread.py @@ -102,8 +102,7 @@ def handle_quit(self, sig, frame): # worker_int callback self.cfg.worker_int(self) self.tpool.shutdown(False) - time.sleep(0.1) - sys.exit(0) + self.quick_exit() def _wrap_future(self, fs, conn): fs.conn = conn diff --git a/tests/test_arbiter.py b/tests/test_arbiter.py index 8c1527e26..f30ad73c4 100644 --- a/tests/test_arbiter.py +++ b/tests/test_arbiter.py @@ -3,11 +3,12 @@ # See the NOTICE for more information. import os +import signal from unittest import mock import gunicorn.app.base import gunicorn.arbiter -from gunicorn.config import ReusePort +import gunicorn.config class DummyApplication(gunicorn.app.base.BaseApplication): @@ -63,12 +64,55 @@ def test_arbiter_stop_does_not_unlink_systemd_listeners(close_sockets): @mock.patch('gunicorn.sock.close_sockets') def test_arbiter_stop_does_not_unlink_when_using_reuse_port(close_sockets): arbiter = gunicorn.arbiter.Arbiter(DummyApplication()) - arbiter.cfg.settings['reuse_port'] = ReusePort() + arbiter.cfg.settings['reuse_port'] = gunicorn.config.ReusePort() arbiter.cfg.settings['reuse_port'].set(True) arbiter.stop() close_sockets.assert_called_with([], False) +@mock.patch('os.kill') +@mock.patch('gunicorn.sock.close_sockets') +def test_arbiter_stop_graceful_no_sigquit(close_sockets, kill): + arbiter = gunicorn.arbiter.Arbiter(DummyApplication()) + arbiter.cfg.settings['graceful_timeout'] = gunicorn.config.GracefulTimeout() + arbiter.cfg.settings['graceful_timeout'].set(1) + arbiter.WORKERS = {42: mock.Mock()} + arbiter.stop() + kill.assert_has_calls([ + mock.call(42, signal.SIGTERM), + mock.call(42, signal.SIGKILL), + ]) + + +@mock.patch('os.kill') +@mock.patch('gunicorn.sock.close_sockets') +def test_arbiter_stop_quick(close_sockets, kill): + arbiter = gunicorn.arbiter.Arbiter(DummyApplication()) + arbiter.WORKERS = {42: mock.Mock()} + arbiter.stop(graceful=False) + kill.assert_has_calls([ + mock.call(42, signal.SIGINT), + mock.call(42, signal.SIGKILL), + ]) + + +@mock.patch('os.kill') +@mock.patch('gunicorn.sock.close_sockets') +def test_arbiter_stop_graceful_then_quick(close_sockets, kill): + arbiter = gunicorn.arbiter.Arbiter(DummyApplication()) + arbiter.cfg.settings['graceful_timeout'] = gunicorn.config.GracefulTimeout() + arbiter.cfg.settings['graceful_timeout'].set(1) + arbiter.cfg.settings['quick_shutdown_timeout'] = gunicorn.config.QuickShutdownTimeout() + arbiter.cfg.settings['quick_shutdown_timeout'].set(0.1) + arbiter.WORKERS = {42: mock.Mock()} + arbiter.stop() + kill.assert_has_calls([ + mock.call(42, signal.SIGTERM), + mock.call(42, signal.SIGINT), + mock.call(42, signal.SIGKILL), + ]) + + @mock.patch('os.getpid') @mock.patch('os.fork') @mock.patch('os.execvpe')