diff --git a/tests/run-tests.py b/tests/run-tests.py --- a/tests/run-tests.py +++ b/tests/run-tests.py @@ -1390,16 +1390,19 @@ class TestSuite(unittest.TestSuite): done.put(('!', test, 'run-test raised an error, see traceback')) raise + stoppedearly = False + try: while tests or running: if not done.empty() or running == self._jobs or not tests: try: done.get(True, 1) + running -= 1 if result and result.shouldStop: + stoppedearly = True break except queue.Empty: continue - running -= 1 if tests and not running == self._jobs: test = tests.pop(0) if self._loop: @@ -1413,6 +1416,18 @@ class TestSuite(unittest.TestSuite): args=(test, result)) t.start() running += 1 + + # If we stop early we still need to wait on started tests to + # finish. Otherwise, there is a race between the test completing + # and the test's cleanup code running. This could result in the + # test reporting incorrect. + if stoppedearly: + while running: + try: + done.get(True, 1) + running -= 1 + except queue.Empty: + continue except KeyboardInterrupt: for test in runtests: test.abort() diff --git a/tests/test-run-tests.t b/tests/test-run-tests.t --- a/tests/test-run-tests.t +++ b/tests/test-run-tests.t @@ -265,7 +265,8 @@ failures in parallel with --first should this test is still more bytes than success. Failed test-failure*.t: output changed (glob) - # Ran 2 tests, 0 skipped, 0 warned, 1 failed. + Failed test-nothing.t: output changed + # Ran 2 tests, 0 skipped, 0 warned, 2 failed. python hash seed: * (glob) [1]