From 1bc9bba5a3bb201345fcf6b690b1c884f8ab8a7d Mon Sep 17 00:00:00 2001
From: Samo Penic <samo.penic@gmail.com>
Date: Sun, 23 Dec 2018 08:41:23 +0000
Subject: [PATCH] A try to make greater robustness when calling client ping

---
 tsclient.py |   47 ++++++++++++++++++++++++++++++++++++++---------
 1 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/tsclient.py b/tsclient.py
index e658635..610f947 100755
--- a/tsclient.py
+++ b/tsclient.py
@@ -10,6 +10,21 @@
 import sys
 import socket
 from threading import Thread, Event
+import re
+
+
+
+glob_ts_version='00000'
+
+def getTrisurfVersion():
+	p = subprocess.Popen('trisurf --version', shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+	lines=p.stdout.readlines()
+	version=re.findall(r'[0-9a-f]{7}(?:-dirty)?', lines[0].decode('ascii'))
+	p.wait()
+	if(len(version)):
+		return version[0]
+	else:
+		return "unknown version"
 
 def get_hostname():
 	return socket.gethostname()
@@ -18,7 +33,8 @@
 	return ((([ip for ip in socket.gethostbyname_ex(socket.gethostname())[2] if not ip.startswith("127.")] or [[(s.connect(("8.8.8.8", 53)), s.getsockname()[0], s.close()) for s in [socket.socket(socket.AF_INET, socket.SOCK_DGRAM)]][0][1]]) + ["no IP found"])[0])
 
 def get_client_id(addr, my_ip, my_hostname, subrun):
-	client_auth={'ip':my_ip,'hostname':my_hostname, 'subrun':subrun}
+	global glob_ts_version
+	client_auth={'ip':my_ip,'hostname':my_hostname, 'subrun':subrun, 'trisurf_version':glob_ts_version }
 	response=requests.post(addr+"/api/register/", data=client_auth)
 	if(response.status_code==200):
 		client_data=json.loads(response.text)
@@ -38,7 +54,7 @@
 		status=client_data['status']
 		return (rid,tape,vtu,status)
 	else:
-		print(response.text)
+		#print(response.text)
 		if(response.status_code==400):
 			raise ValueError
 		else:
@@ -162,15 +178,19 @@
 				print("[{}] Could not get CID.".format(self.id))
 				self.sleep(10)
 				continue
-			#print("Got CID. getting RID.")
-			client_ping_time_elapsed=0
-			concurrent_runs=client_ping(self.conn_address,cid)
+			print("[{}] Connected and got client ID {}.".format(self.id, cid))
+			try:
+				concurrent_runs=client_ping(self.conn_address,cid)
+				client_ping_time_elapsed=0
+			except:
+				self.sleep(10)
+				continue
 			self.subrunsStartStop(concurrent_runs)
 			while(not self.isStopped()): #successfully registered, now start pinging and searching for job
 				try:
 					(rid,tape,vtu,status)=get_run(self.conn_address,cid)
 				except NameError:
-					print("[{}] Could not get RID.".format(self.id))
+					#print("[{}] Could not get RID.".format(self.id))
 					self.sleep(10)
 					client_ping_time_elapsed+=10
 					if(client_ping_time_elapsed>=self.max_client_ping_time_elapsed):
@@ -236,7 +256,7 @@
 							try:
 								ping_run(self.conn_address, cid, rid)
 							except:
-								print("[{}] Could not ping.".format(self.id))
+								print("[{}] Could not prolong a lease on the run.".format(self.id))
 								self.p.terminate()
 								self.p=None
 								removeDir(self.workingdir.fullpath())
@@ -259,7 +279,15 @@
 						self.sleep(self.update_seconds-1)
 						client_ping_time_elapsed+=self.update_seconds
 						if(client_ping_time_elapsed>self.max_client_ping_time_elapsed-self.update_seconds/2):
-							concurrent_runs=client_ping(self.conn_address,cid)
+							try:
+								concurrent_runs=client_ping(self.conn_address,cid)
+							except:
+								print("[{}] Could not client ping.".format(self.sid))
+								self.p.terminate()
+								self.p=None
+								removeDir(self.workingdir.fullpath())
+								self.workingdir=None
+								break
 							self.subrunsStartStop(concurrent_runs)
 							client_ping_time_elapsed=0
 
@@ -338,7 +366,8 @@
 #--- END SIGINT and SIGTERM----
 
 if __name__ == '__main__':
-
+	#global glob_ts_version
+	glob_ts_version=getTrisurfVersion()
 	signal.signal(signal.SIGINT, signal_handler)
 	signal.signal(signal.SIGTERM, signal_handler)
 

--
Gitblit v1.9.3