Samo Penic
2018-05-05 c2dd264efc30851fc4be4eab7bc63e4827725701
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/usr/bin/python3
import requests
import json
from time import sleep
import uuid
import subprocess
from trisurf import trisurf
import os
import shutil
import signal
import sys
import socket
CONNECT_ADDR='http://localhost:8000'
 
p=None
workingdir=None
 
 
 
#--- SIGINT and SIGTERM HANDLING ---
def signal_handler(signal,frame):
    global p
    global wirkingdir
    if p is not None:
        p.terminate()
    if(workingdir is not None):
        removeDir(workingdir.fullpath())
    print("Process ended with signal " +str(signal))
    sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
#--- END SIGINT and SIGTERM----
 
 
 
def get_hostname():
    return socket.gethostname()
 
def get_ip():
    return ((([ip for ip in socket.gethostbyname_ex(socket.gethostname())[2] if not ip.startswith("127.")] or [[(s.connect(("8.8.8.8", 53)), s.getsockname()[0], s.close()) for s in [socket.socket(socket.AF_INET, socket.SOCK_DGRAM)]][0][1]]) + ["no IP found"])[0])
 
def get_client_id(addr, my_ip, my_hostname):
    client_auth={'ip':my_ip,'hostname':my_hostname}
    response=requests.post(addr+"/api/register/", data=client_auth)
    if(response.status_code==200):
        client_data=json.loads(response.text)
        client_id=client_data['id']
        return client_id
    else:
        raise ValueError
 
 
def get_run(addr,cid):
    response=requests.get(addr+"/api/getrun/"+str(cid)+"/")
    if(response.status_code==200):
        client_data=json.loads(response.text)
        rid=client_data['id']
        tape=client_data['tape']
        vtu=client_data['lastVTU']
        status=client_data['status']
        return (rid,tape,vtu,status)
    else:
        print(response.text)
        raise ValueError
 
 
def ping_run(addr,cid, rid):
    client_data={'client_id':cid, 'run_id':rid}
    response=requests.post(addr+"/api/ping/", data=client_data)
    if(response.status_code==200):
        return
    else:
        raise ValueError
 
def send_error_report(addr,cid, rid,errcode):
    client_data={'client_id':cid, 'run_id':rid, 'error_code':errcode}
    response=requests.post(addr+"/api/reporterr/", data=client_data)
    if(response.status_code==200):
        return
    else:
        raise ValueError
 
def upload(addr,cid, rid, vtu, status):
    client_data={'client_id': cid, 'run_id': rid, 'lastVTU': vtu, 'status': status}
    response=requests.post(addr+"/api/upload/", data=client_data)
    if(response.status_code==200):
        return
    else:
        raise ValueError
 
def getNewVTU(directory):
    fset=set()
    for file in os.listdir(directory):
        if file.endswith(".vtu") and file.startswith("timestep_"):
            fset.add(file)
    return fset
 
 
def removeDir(directory):
    os.chdir('/')
    try:
        shutil.rmtree(directory)
    except:
        print("Cannot remove directory "+directory+ "\n")
    return
 
 
while(True):
    try:
        cid=get_client_id(CONNECT_ADDR, get_ip(),get_hostname())
    except:
        print("Cannot get CID.")
        sleep(10)
        continue
    print("Got CID. getting RID.")
    while(True):
        try:
            (rid,tape,vtu,status)=get_run(CONNECT_ADDR,cid)
        except:
            print("Could not get RID.")
            sleep(10)
            break
        else:
            #start separate thread with simulations.
            workingdir=trisurf.Directory('/tmp/ts_'+str(uuid.uuid4()))
            workingdir.makeifnotexist()
            workingdir.goto()
            with open(workingdir.fullpath()+"/tape", 'w') as f:
                f.write(tape)
            if(int(status)==-1):
                cmd=['trisurf', '--force-from-tape']
                print("Run id="+str(rid)+ " :: Starting from tape")
            else:
                with open(workingdir.fullpath()+"/.status",'w') as f:
                    f.write(status)
                with open(workingdir.fullpath()+"/initial.vtu",'w') as f:
                    f.write(vtu)
                cmd=['trisurf', '--restore-from-vtk', 'initial.vtu']
                print("Run id="+str(rid)+ " :: Restoring from vtk, last timestep "+status)
            p=subprocess.Popen(cmd, stdout=subprocess.DEVNULL)
            s=int(status)
            while(True):
                #monitor for new file. If file is present, upload it!
                newVTU=getNewVTU(workingdir.fullpath())
                if newVTU: #upload
                    try:
                        for nv in sorted(newVTU):
                            with open(nv,'r') as f:
                                fc=f.read()
                            s=s+1
                            print('Uploading '+nv)
                            upload(CONNECT_ADDR, cid, rid, fc, s)
                            os.unlink(nv)
                    except:
                        print("Could not upload")
                        p.terminate()
                        removeDir(workingdir.fullpath())
                        break
                    else:
                        print("VTU uploaded")
                else: #ping
                    try:
                        ping_run(CONNECT_ADDR, cid, rid)
                    except:
                        print("Could not ping")
                        p.terminate()
                        removeDir(workingdir.fullpath())
                        #stop simulations
                        break
                #check if trisurf is still running. If not break the highest level loop.
                sleep(1)
                if(p.poll() is not None): # trisurf exited!
                    print("Trisurf was stopped with return code {}".format(p.returncode))
                    if(p.returncode>0):
                        try:
                            send_error_report(CONNECT_ADDR, cid, rid, p.returncode)
                        except:
                            print("Server didn't accept error report")
                    removeDir(workingdir.fullpath())
                    break
                sleep(100)