diff --git a/superlance/httpok.py b/superlance/httpok.py index b204317..eb0ec5b 100644 --- a/superlance/httpok.py +++ b/superlance/httpok.py @@ -55,6 +55,11 @@ attempt to restart processes in the RUNNING state specified by -p or -a. This defaults to 10 seconds. +-r -- The number of retries that httpok should attempt before restarting + processes. Only after no successful response after this number + of retry then httpok will attempt to take action on processes. + The default is 0 to act on first failed response. + -c -- specify an expected HTTP status code from a GET request to the URL. If this status code is not the status code provided by the response, httpok will attempt to restart processes in the @@ -114,12 +119,14 @@ def usage(): class HTTPOk: connclass = None def __init__(self, rpc, programs, any, url, timeout, status, inbody, - email, sendmail, coredir, gcore, eager, retry_time): + email, sendmail, coredir, gcore, eager, retry_time, allowed_retries=0): self.rpc = rpc self.programs = programs self.any = any self.url = url self.timeout = timeout + self.allowed_retries = allowed_retries + self.attempted_retries = 0 self.retry_time = retry_time self.status = status self.inbody = inbody @@ -201,15 +208,29 @@ def runforever(self, test=False): if str(status) != str(self.status): subject = 'httpok for %s: bad status returned' % self.url - self.act(subject, msg) + self.retry_or_act(subject, msg) elif self.inbody and self.inbody not in body: subject = 'httpok for %s: bad body returned' % self.url - self.act(subject, msg) + self.retry_or_act(subject, msg) + else: + # reset this counter as we have a successful response here + self.attempted_retries = 0 childutils.listener.ok(self.stdout) if test: break + def retry_or_act(self, subject, msg): + if self.attempted_retries >= self.allowed_retries: + self.act(subject, msg) + else: + retries_left = self.allowed_retries - self.attempted_retries + self.attempted_retries += 1 + self.stderr.write('Error occurred: %s\n' % msg) + self.stderr.write('Allowed number of retries not exceeded, ' + 'will try again %d more times.\n' % retries_left) + self.stderr.flush() + def act(self, subject, msg): messages = [msg] @@ -299,12 +320,13 @@ def restart(self, spec, write): def main(argv=sys.argv): import getopt - short_args="hp:at:c:b:s:m:g:d:eE" + short_args="hp:at:r:c:b:s:m:g:d:eE" long_args=[ "help", "program=", "any", "timeout=", + "retry=", "code=", "body=", "sendmail_program=", @@ -333,6 +355,7 @@ def main(argv=sys.argv): eager = True email = None timeout = 10 + allowed_retries = 1 retry_time = 10 status = '200' inbody = None @@ -357,6 +380,9 @@ def main(argv=sys.argv): if option in ('-t', '--timeout'): timeout = int(value) + if option in ('-r', '--retry'): + allowed_retries = int(value) + if option in ('-c', '--code'): status = value @@ -388,7 +414,7 @@ def main(argv=sys.argv): return prog = HTTPOk(rpc, programs, any, url, timeout, status, inbody, email, - sendmail, coredir, gcore, eager, retry_time) + sendmail, coredir, gcore, eager, retry_time, allowed_retries) prog.runforever() if __name__ == '__main__': diff --git a/superlance/tests/httpok_test.py b/superlance/tests/httpok_test.py index 1ace4f9..5d8845e 100644 --- a/superlance/tests/httpok_test.py +++ b/superlance/tests/httpok_test.py @@ -40,11 +40,12 @@ def __init__(self, hostport): self.hostport = hostport def request(self, method, path, headers): - if exc: - if exc == True: - raise ValueError('foo') - else: - raise exc.pop() + error = exc.pop() if isinstance(exc, list) and exc else exc + if isinstance(error, BaseException): + raise error + elif error: + raise ValueError('foo') + self.method = method self.path = path self.headers = headers @@ -63,7 +64,7 @@ def _makeOne(self, *opts): return self._getTargetClass()(*opts) def _makeOnePopulated(self, programs, any, response=None, exc=None, - gcore=None, coredir=None, eager=True): + gcore=None, coredir=None, eager=True, allowed_retries=0): if response is None: response = DummyResponse() rpc = DummyRPCServer() @@ -78,7 +79,7 @@ def _makeOnePopulated(self, programs, any, response=None, exc=None, coredir = coredir prog = self._makeOne(rpc, programs, any, url, timeout, status, inbody, email, sendmail, coredir, gcore, eager, - retry_time) + retry_time, allowed_retries) prog.stdin = StringIO() prog.stdout = StringIO() prog.stderr = StringIO() @@ -130,9 +131,7 @@ def test_runforever_eager_error_on_request_some(self): programs = ['foo', 'bar', 'baz_01', 'notexisting'] any = None prog = self._makeOnePopulated(programs, any, exc=True) - prog.stdin.write('eventname:TICK len:0\n') - prog.stdin.seek(0) - prog.runforever(test=True) + self.tick(prog) lines = prog.stderr.getvalue().split('\n') #self.assertEqual(len(lines), 7) self.assertEqual(lines[0], @@ -156,9 +155,7 @@ def test_runforever_eager_error_on_request_any(self): programs = [] any = True prog = self._makeOnePopulated(programs, any, exc=True) - prog.stdin.write('eventname:TICK len:0\n') - prog.stdin.seek(0) - prog.runforever(test=True) + self.tick(prog) lines = prog.stderr.getvalue().split('\n') #self.assertEqual(len(lines), 6) self.assertEqual(lines[0], 'Restarting all running processes') @@ -178,9 +175,7 @@ def test_runforever_eager_error_on_process_stop(self): any = False prog = self._makeOnePopulated(programs, any, exc=True) prog.rpc.supervisor.all_process_info = _FAIL - prog.stdin.write('eventname:TICK len:0\n') - prog.stdin.seek(0) - prog.runforever(test=True) + self.tick(prog) lines = prog.stderr.getvalue().split('\n') #self.assertEqual(len(lines), 5) self.assertEqual(lines[0], "Restarting selected processes ['FAILED']") @@ -199,9 +194,7 @@ def test_runforever_eager_error_on_process_start(self): any = False prog = self._makeOnePopulated(programs, any, exc=True) prog.rpc.supervisor.all_process_info = _FAIL - prog.stdin.write('eventname:TICK len:0\n') - prog.stdin.seek(0) - prog.runforever(test=True) + self.tick(prog) lines = prog.stderr.getvalue().split('\n') #self.assertEqual(len(lines), 4) self.assertEqual(lines[0], @@ -221,9 +214,7 @@ def test_runforever_eager_gcore(self): any = None prog = self._makeOnePopulated(programs, any, exc=True, gcore="true", coredir="/tmp") - prog.stdin.write('eventname:TICK len:0\n') - prog.stdin.seek(0) - prog.runforever(test=True) + self.tick(prog) lines = prog.stderr.getvalue().split('\n') self.assertEqual(lines[0], ("Restarting selected processes ['foo', 'bar', " @@ -250,9 +241,7 @@ def test_runforever_not_eager_none_running(self): any = None prog = self._makeOnePopulated(programs, any, exc=True, gcore="true", coredir="/tmp", eager=False) - prog.stdin.write('eventname:TICK len:0\n') - prog.stdin.seek(0) - prog.runforever(test=True) + self.tick(prog) lines = [x for x in prog.stderr.getvalue().split('\n') if x] self.assertEqual(len(lines), 0, lines) self.assertFalse('mailed' in prog.__dict__) @@ -261,9 +250,7 @@ def test_runforever_not_eager_running(self): programs = ['foo', 'bar'] any = None prog = self._makeOnePopulated(programs, any, exc=True, eager=False) - prog.stdin.write('eventname:TICK len:0\n') - prog.stdin.seek(0) - prog.runforever(test=True) + self.tick(prog) lines = [x for x in prog.stderr.getvalue().split('\n') if x] self.assertEqual(lines[0], ("Restarting selected processes ['foo', 'bar']") @@ -283,9 +270,7 @@ def test_runforever_honor_timeout_on_connrefused(self): error = socket.error() error.errno = 111 prog = self._makeOnePopulated(programs, any, exc=[error], eager=False) - prog.stdin.write('eventname:TICK len:0\n') - prog.stdin.seek(0) - prog.runforever(test=True) + self.tick(prog) self.assertEqual(prog.stderr.getvalue(), '') self.assertEqual(prog.stdout.getvalue(), 'READY\nRESULT 2\nOK') @@ -296,9 +281,7 @@ def test_runforever_connrefused_error(self): error.errno = 111 prog = self._makeOnePopulated(programs, any, exc=[error for x in range(100)], eager=False) - prog.stdin.write('eventname:TICK len:0\n') - prog.stdin.seek(0) - prog.runforever(test=True) + self.tick(prog) lines = [x for x in prog.stderr.getvalue().split('\n') if x] self.assertEqual(lines[0], ("Restarting selected processes ['foo', 'bar']") @@ -312,5 +295,53 @@ def test_runforever_connrefused_error(self): self.assertEqual(mailed[1], 'Subject: httpok for http://foo/bar: bad status returned') + def test_retry_before_restart(self): + programs = ['foo', 'bar'] + any = None + prog = self._makeOnePopulated(programs, any, exc=True, eager=False, allowed_retries=2) + + self.tick(prog) + lines = prog.stderr.getvalue().split('\n') + self.assertEqual(lines[-2], 'Allowed number of retries not exceeded, ' + 'will try again 2 more times.') + + self.tick(prog) + lines = prog.stderr.getvalue().split('\n') + self.assertEqual(lines[-2], 'Allowed number of retries not exceeded, ' + 'will try again 1 more times.') + + self.tick(prog) + new_lines = prog.stderr.getvalue().split('\n')[len(lines) - 1:] + self.assertEqual(new_lines[0], "Restarting selected processes ['foo', 'bar']") + + def test_retry_success_reset_count(self): + programs = ['foo', 'bar'] + any = None + prog = self._makeOnePopulated(programs, any, exc=[True, False, True], + eager=False, allowed_retries=1) + + self.tick(prog) + lines = prog.stderr.getvalue().split('\n') + self.assertEqual(lines[-2], 'Allowed number of retries not exceeded, ' + 'will try again 1 more times.') + + self.tick(prog) + new_lines = prog.stderr.getvalue().split('\n') + # nothing new is printed + self.assertListEqual(lines, new_lines) + + self.tick(prog) + new_lines = prog.stderr.getvalue().split('\n') + # new retry notice is printed + self.assertTrue(len(new_lines) > len(lines)) + self.assertEqual(lines[-2], 'Allowed number of retries not exceeded, ' + 'will try again 1 more times.') + + def tick(self, prog): + prog.stdin.write('eventname:TICK len:0\n') + prog.stdin.seek(0) + prog.runforever(test=True) + + if __name__ == '__main__': unittest.main()