From 1bc9bba5a3bb201345fcf6b690b1c884f8ab8a7d Mon Sep 17 00:00:00 2001 From: Samo Penic <samo.penic@gmail.com> Date: Sun, 23 Dec 2018 08:41:23 +0000 Subject: [PATCH] A try to make greater robustness when calling client ping --- tsclient.py | 47 ++++++++++++++++++++++++++++++++++++++--------- 1 files changed, 38 insertions(+), 9 deletions(-) diff --git a/tsclient.py b/tsclient.py index e658635..610f947 100755 --- a/tsclient.py +++ b/tsclient.py @@ -10,6 +10,21 @@ import sys import socket from threading import Thread, Event +import re + + + +glob_ts_version='00000' + +def getTrisurfVersion(): + p = subprocess.Popen('trisurf --version', shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + lines=p.stdout.readlines() + version=re.findall(r'[0-9a-f]{7}(?:-dirty)?', lines[0].decode('ascii')) + p.wait() + if(len(version)): + return version[0] + else: + return "unknown version" def get_hostname(): return socket.gethostname() @@ -18,7 +33,8 @@ return ((([ip for ip in socket.gethostbyname_ex(socket.gethostname())[2] if not ip.startswith("127.")] or [[(s.connect(("8.8.8.8", 53)), s.getsockname()[0], s.close()) for s in [socket.socket(socket.AF_INET, socket.SOCK_DGRAM)]][0][1]]) + ["no IP found"])[0]) def get_client_id(addr, my_ip, my_hostname, subrun): - client_auth={'ip':my_ip,'hostname':my_hostname, 'subrun':subrun} + global glob_ts_version + client_auth={'ip':my_ip,'hostname':my_hostname, 'subrun':subrun, 'trisurf_version':glob_ts_version } response=requests.post(addr+"/api/register/", data=client_auth) if(response.status_code==200): client_data=json.loads(response.text) @@ -38,7 +54,7 @@ status=client_data['status'] return (rid,tape,vtu,status) else: - print(response.text) + #print(response.text) if(response.status_code==400): raise ValueError else: @@ -162,15 +178,19 @@ print("[{}] Could not get CID.".format(self.id)) self.sleep(10) continue - #print("Got CID. getting RID.") - client_ping_time_elapsed=0 - concurrent_runs=client_ping(self.conn_address,cid) + print("[{}] Connected and got client ID {}.".format(self.id, cid)) + try: + concurrent_runs=client_ping(self.conn_address,cid) + client_ping_time_elapsed=0 + except: + self.sleep(10) + continue self.subrunsStartStop(concurrent_runs) while(not self.isStopped()): #successfully registered, now start pinging and searching for job try: (rid,tape,vtu,status)=get_run(self.conn_address,cid) except NameError: - print("[{}] Could not get RID.".format(self.id)) + #print("[{}] Could not get RID.".format(self.id)) self.sleep(10) client_ping_time_elapsed+=10 if(client_ping_time_elapsed>=self.max_client_ping_time_elapsed): @@ -236,7 +256,7 @@ try: ping_run(self.conn_address, cid, rid) except: - print("[{}] Could not ping.".format(self.id)) + print("[{}] Could not prolong a lease on the run.".format(self.id)) self.p.terminate() self.p=None removeDir(self.workingdir.fullpath()) @@ -259,7 +279,15 @@ self.sleep(self.update_seconds-1) client_ping_time_elapsed+=self.update_seconds if(client_ping_time_elapsed>self.max_client_ping_time_elapsed-self.update_seconds/2): - concurrent_runs=client_ping(self.conn_address,cid) + try: + concurrent_runs=client_ping(self.conn_address,cid) + except: + print("[{}] Could not client ping.".format(self.sid)) + self.p.terminate() + self.p=None + removeDir(self.workingdir.fullpath()) + self.workingdir=None + break self.subrunsStartStop(concurrent_runs) client_ping_time_elapsed=0 @@ -338,7 +366,8 @@ #--- END SIGINT and SIGTERM---- if __name__ == '__main__': - + #global glob_ts_version + glob_ts_version=getTrisurfVersion() signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) -- Gitblit v1.9.3