I want to perform an action at a regular interval in my multi-threaded Python application. I have seen two different ways of doing it
exit = False
def thread_func():
while not exit:
action()
time.sleep(DELAY)
or
exit_flag = threading.Event()
def thread_func():
while not exit_flag.wait(timeout=DELAY):
action()
Is there an advantage to one way over the other? Does one use less resources, or play nicer with other threads and the GIL? Which one makes the remaining threads in my app more responsive?
(Assume some external event sets exit
or exit_flag
, and I am willing to wait the full delay while shutting down)
Using exit_flag.wait(timeout=DELAY)
will be more responsive, because you'll break out of the while loop instantly when exit_flag
is set. With time.sleep
, even after the event is set, you're going to wait around in the time.sleep
call until you've slept for DELAY
seconds.
In terms of implementation, Python 2.x and Python 3.x have very different behavior. In Python 2.x Event.wait
is implemented in pure Python using a bunch of small time.sleep
calls:
from time import time as _time, sleep as _sleep
....
# This is inside the Condition class (Event.wait calls Condition.wait).
def wait(self, timeout=None):
if not self._is_owned():
raise RuntimeError("cannot wait on un-acquired lock")
waiter = _allocate_lock()
waiter.acquire()
self.__waiters.append(waiter)
saved_state = self._release_save()
try: # restore state no matter what (e.g., KeyboardInterrupt)
if timeout is None:
waiter.acquire()
if __debug__:
self._note("%s.wait(): got it", self)
else:
# Balancing act: We can't afford a pure busy loop, so we
# have to sleep; but if we sleep the whole timeout time,
# we'll be unresponsive. The scheme here sleeps very
# little at first, longer as time goes on, but never longer
# than 20 times per second (or the timeout time remaining).
endtime = _time() + timeout
delay = 0.0005 # 500 us -> initial delay of 1 ms
while True:
gotit = waiter.acquire(0)
if gotit:
break
remaining = endtime - _time()
if remaining <= 0:
break
delay = min(delay * 2, remaining, .05)
_sleep(delay)
if not gotit:
if __debug__:
self._note("%s.wait(%s): timed out", self, timeout)
try:
self.__waiters.remove(waiter)
except ValueError:
pass
else:
if __debug__:
self._note("%s.wait(%s): got it", self, timeout)
finally:
self._acquire_restore(saved_state)
This actually means using wait
is probably a bit more CPU-hungry than just sleeping the full DELAY
unconditionally, but has the benefit being (potentially a lot, depending on how long DELAY
is) more responsive. It also means that the GIL needs to be frequently re-acquired, so that the next sleep can be scheduled, while time.sleep
can release the GIL for the full DELAY
. Now, will acquiring the GIL more frequently have a noticeable effect on other threads in your application? Maybe or maybe not. It depends on how many other threads are running and what kind of work loads they have. My guess is it won't be particularly noticeable unless you have a high number of threads, or perhaps another thread doing lots of CPU-bound work, but its easy enough to try it both ways and see.
In Python 3.x, much of the implementation is moved to pure C code:
import _thread # C-module
_allocate_lock = _thread.allocate_lock
class Condition:
...
def wait(self, timeout=None):
if not self._is_owned():
raise RuntimeError("cannot wait on un-acquired lock")
waiter = _allocate_lock()
waiter.acquire()
self._waiters.append(waiter)
saved_state = self._release_save()
gotit = False
try: # restore state no matter what (e.g., KeyboardInterrupt)
if timeout is None:
waiter.acquire()
gotit = True
else:
if timeout > 0:
gotit = waiter.acquire(True, timeout) # This calls C code
else:
gotit = waiter.acquire(False)
return gotit
finally:
self._acquire_restore(saved_state)
if not gotit:
try:
self._waiters.remove(waiter)
except ValueError:
pass
class Event:
def __init__(self):
self._cond = Condition(Lock())
self._flag = False
def wait(self, timeout=None):
self._cond.acquire()
try:
signaled = self._flag
if not signaled:
signaled = self._cond.wait(timeout)
return signaled
finally:
self._cond.release()
And the C code that acquires the lock:
/* Helper to acquire an interruptible lock with a timeout. If the lock acquire
* is interrupted, signal handlers are run, and if they raise an exception,
* PY_LOCK_INTR is returned. Otherwise, PY_LOCK_ACQUIRED or PY_LOCK_FAILURE
* are returned, depending on whether the lock can be acquired withing the
* timeout.
*/
static PyLockStatus
acquire_timed(PyThread_type_lock lock, PY_TIMEOUT_T microseconds)
{
PyLockStatus r;
_PyTime_timeval curtime;
_PyTime_timeval endtime;
if (microseconds > 0) {
_PyTime_gettimeofday(&endtime);
endtime.tv_sec += microseconds / (1000 * 1000);
endtime.tv_usec += microseconds % (1000 * 1000);
}
do {
/* first a simple non-blocking try without releasing the GIL */
r = PyThread_acquire_lock_timed(lock, 0, 0);
if (r == PY_LOCK_FAILURE && microseconds != 0) {
Py_BEGIN_ALLOW_THREADS // GIL is released here
r = PyThread_acquire_lock_timed(lock, microseconds, 1);
Py_END_ALLOW_THREADS
}
if (r == PY_LOCK_INTR) {
/* Run signal handlers if we were interrupted. Propagate
* exceptions from signal handlers, such as KeyboardInterrupt, by
* passing up PY_LOCK_INTR. */
if (Py_MakePendingCalls() < 0) {
return PY_LOCK_INTR;
}
/* If we're using a timeout, recompute the timeout after processing
* signals, since those can take time. */
if (microseconds > 0) {
_PyTime_gettimeofday(&curtime);
microseconds = ((endtime.tv_sec - curtime.tv_sec) * 1000000 +
(endtime.tv_usec - curtime.tv_usec));
/* Check for negative values, since those mean block forever.
*/
if (microseconds <= 0) {
r = PY_LOCK_FAILURE;
}
}
}
} while (r == PY_LOCK_INTR); /* Retry if we were interrupted. */
return r;
}
This implementation is responsive, and doesn't require frequent wakeups that re-acquire the GIL, so you get the best of both worlds.