mirror of
https://github.com/espressif/esp-idf
synced 2025-03-09 17:19:09 -04:00
tools: Fix idf_monitor test case occasional hangup or failure
Closes idf/esp-idf#39
This commit is contained in:
parent
64d038c0d5
commit
608fc23278
@ -547,7 +547,7 @@ test_idf_monitor:
|
||||
expire_in: 1 week
|
||||
script:
|
||||
- cd ${IDF_PATH}/tools/test_idf_monitor
|
||||
- ${IDF_PATH}/tools/ci/multirun_with_pyenv.sh ./run_test_idf_monitor.py
|
||||
- ./run_test_idf_monitor.py
|
||||
|
||||
test_idf_size:
|
||||
<<: *host_test_template
|
||||
|
@ -373,7 +373,7 @@ class Monitor(object):
|
||||
else:
|
||||
raise RuntimeError("Bad event data %r" % ((event_tag,data),))
|
||||
except SerialStopException:
|
||||
pass
|
||||
sys.stderr.write(ANSI_NORMAL + "Stopping condition has been received\n")
|
||||
finally:
|
||||
try:
|
||||
self.console_reader.stop()
|
||||
|
@ -19,13 +19,16 @@ from __future__ import unicode_literals
|
||||
from builtins import object
|
||||
from io import open
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
import subprocess
|
||||
import socket
|
||||
import pty
|
||||
import filecmp
|
||||
import threading
|
||||
|
||||
test_list = (
|
||||
# Add new tests here. All files should be placed in in_dir. Columns are:
|
||||
# Add new tests here. All files should be placed in IN_DIR. Columns are:
|
||||
# Input file Filter string File with expected output
|
||||
('in1.txt', '', 'in1f1.txt'),
|
||||
('in1.txt', '*:V', 'in1f1.txt'),
|
||||
@ -35,89 +38,137 @@ test_list = (
|
||||
('in2.txt', 'vfs', 'in2f2.txt'),
|
||||
)
|
||||
|
||||
in_dir = 'tests/' # tests are in this directory
|
||||
out_dir = 'outputs/' # test results are written to this directory (kept only for debugging purposes)
|
||||
socat_in = './socatfile'# temporary socat file (deleted after run)
|
||||
err_out = out_dir + 'monitor_error_output'
|
||||
elf_file = './dummy.elf' # ELF file used for starting the monitor
|
||||
idf_monitor = '{}/tools/idf_monitor.py'.format(os.getenv("IDF_PATH"))
|
||||
IN_DIR = 'tests/' # tests are in this directory
|
||||
OUT_DIR = 'outputs/' # test results are written to this directory (kept only for debugging purposes)
|
||||
ERR_OUT = OUT_DIR + 'monitor_error_output'
|
||||
ELF_FILE = './dummy.elf' # ELF file used for starting the monitor
|
||||
IDF_MONITOR = '{}/tools/idf_monitor.py'.format(os.getenv("IDF_PATH"))
|
||||
|
||||
class SocatRunner(object):
|
||||
"""
|
||||
Runs socat in the background for creating a socket.
|
||||
"""
|
||||
# connection related to communicating with idf_monitor through sockets
|
||||
HOST = 'localhost'
|
||||
# blocking socket operations are used with timeout:
|
||||
SOCKET_TIMEOUT = 30
|
||||
# idf_monitor is killed if it doesn't respond:
|
||||
MONITOR_TIMEOUT = 60
|
||||
# the test is restarted after failure (idf_monitor has to be killed):
|
||||
RETRIES_PER_TEST = 5
|
||||
|
||||
def monitor_timeout(process):
|
||||
if process.poll() is None:
|
||||
# idf_monitor is still running
|
||||
try:
|
||||
process.kill()
|
||||
print('\tidf_monitor was killed because it did not finish in {} seconds.'.format(MONITOR_TIMEOUT))
|
||||
except OSError as e:
|
||||
if e.errno == errno.ESRCH:
|
||||
# ignores a possible race condition which can occur when the process exits between poll() and kill()
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
|
||||
class TestRunner(object):
|
||||
def __enter__(self):
|
||||
# Wait for a connection on port 2399 and then run "tail" which will send the file content to that port. Tail
|
||||
# is used because it can start even when the file doesn't exists and remains running after the file has been
|
||||
# processed. This way the idf_monitor can end the communication when it received the content. Using regular
|
||||
# "cat" would invoke exception in idf_monitor.
|
||||
# Note: "-c 1GB" option is used to force sending the whole file under the assumption that all testing files
|
||||
# will be much smaller than 1G.
|
||||
# Note: A temporary file socat_in is used in order to be able to start socat only once instead of for each test.
|
||||
socat_cmd = ['socat',
|
||||
'-U', # unidirectional pipe from file to port
|
||||
'TCP4-LISTEN:2399,reuseaddr,fork',
|
||||
'exec:"tail -c 1GB -F ' + socat_in + '"']
|
||||
print(' '.join(socat_cmd))
|
||||
self._socat_process = subprocess.Popen(socat_cmd, preexec_fn=os.setsid) # See __exit__ for os.setsid
|
||||
self.serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
self.serversocket.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1)
|
||||
self.serversocket.bind((HOST, 0))
|
||||
self.port = self.serversocket.getsockname()[1]
|
||||
self.serversocket.listen(5)
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
# self._socat_process.terminate() doesn't enough because each connection to the port starts a new socat and a
|
||||
# tail processes
|
||||
os.killpg(os.getpgid(self._socat_process.pid), signal.SIGTERM)
|
||||
# Note: this terminates all the processes but makes the script UNIX-only
|
||||
self.serversocket.shutdown(socket.SHUT_RDWR)
|
||||
self.serversocket.close()
|
||||
print('Socket was closed successfully')
|
||||
|
||||
def cleanup():
|
||||
def accept_connection(self):
|
||||
""" returns a socket for sending the input for idf_monitor which must be closed before calling this again. """
|
||||
(clientsocket, address) = self.serversocket.accept()
|
||||
# exception will be thrown here if the idf_monitor didn't connect in time
|
||||
clientsocket.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1)
|
||||
return clientsocket
|
||||
|
||||
def test_iteration(runner, test, startup_timeout):
|
||||
print('\nRunning test on {} with filter "{}" and expecting {}'.format(test[0], test[1], test[2]))
|
||||
try:
|
||||
os.remove(socat_in)
|
||||
except:
|
||||
# ignore if the file doesn't exist
|
||||
pass
|
||||
with open(OUT_DIR + test[2], "w", encoding='utf-8') as o_f, open(ERR_OUT, "w", encoding='utf-8') as e_f:
|
||||
monitor_cmd = [sys.executable,
|
||||
IDF_MONITOR, '--port', 'socket://{}:{}'.format(HOST, runner.port), '--print_filter', test[1], ELF_FILE]
|
||||
(master_fd, slave_fd) = pty.openpty()
|
||||
print('\t', ' '.join(monitor_cmd), sep='')
|
||||
print('\tstdout="{}" stderr="{}" stdin="{}"'.format(o_f.name, e_f.name, os.ttyname(slave_fd)))
|
||||
start = time.time()
|
||||
# the server socket is alive so idf_monitor can start now
|
||||
proc = subprocess.Popen(monitor_cmd, stdin=slave_fd, stdout=o_f, stderr=e_f, close_fds=True, bufsize=-1)
|
||||
# - idf_monitor's stdin needs to be connected to some pseudo-tty in docker image even when it is not
|
||||
# used at all
|
||||
# - setting bufsize is needed because the default value is different on Python 2 and 3
|
||||
# - the default close_fds is also different on Python 2 and 3
|
||||
monitor_watchdog = threading.Timer(MONITOR_TIMEOUT, monitor_timeout, [proc])
|
||||
monitor_watchdog.start()
|
||||
client = runner.accept_connection()
|
||||
# The connection is ready but idf_monitor cannot yet receive data. This seems to happen on Ubuntu 16.04 LTS
|
||||
# and is not related to the version of Python or pyserial. There seems to be no reliable way other than to
|
||||
# do a sleep. The idf_monitor header in ERR_OUT could be checked on Python 2 but the file is not flushed
|
||||
# on Python 3.
|
||||
print('\tSleeping for {:.2f} seconds'.format(startup_timeout))
|
||||
time.sleep(startup_timeout)
|
||||
with open(IN_DIR + test[0], "r", encoding='utf-8') as f:
|
||||
print('\tSending {} to the socket'.format(f.name))
|
||||
for line in f:
|
||||
client.sendall(line.encode())
|
||||
idf_exit_sequence = b'\x1d\n'
|
||||
print('\tSending <exit> to the socket')
|
||||
client.sendall(idf_exit_sequence)
|
||||
ret = proc.wait()
|
||||
end = time.time()
|
||||
print('\tidf_monitor exited after {:.2f} seconds'.format(end - start))
|
||||
if ret < 0:
|
||||
raise RuntimeError('idf_monitor was terminated by signal {}'.format(-ret))
|
||||
# idf_monitor needs to end before the socket is closed in order to exit without an exception.
|
||||
finally:
|
||||
if monitor_watchdog:
|
||||
monitor_watchdog.cancel()
|
||||
os.close(slave_fd)
|
||||
os.close(master_fd)
|
||||
if client:
|
||||
client.close()
|
||||
print('\tThe client was closed successfully')
|
||||
f1 = IN_DIR + test[2]
|
||||
f2 = OUT_DIR + test[2]
|
||||
print('\tdiff {} {}'.format(f1, f2))
|
||||
if filecmp.cmp(f1, f2, shallow=False):
|
||||
print('\tTest has passed')
|
||||
else:
|
||||
raise RuntimeError("The contents of the files are different. Please examine the artifacts.")
|
||||
|
||||
def main():
|
||||
start = time.time()
|
||||
cleanup() # avoid sending old content
|
||||
if not os.path.exists(out_dir):
|
||||
os.mkdir(out_dir)
|
||||
try:
|
||||
with SocatRunner():
|
||||
# Sleep is necessary to make sure that socat is already listening. Only one sleep is used per run (this is
|
||||
# another reason while the temporary socat_in file is used instead of directly reading the test files).
|
||||
time.sleep(1)
|
||||
for t in test_list:
|
||||
print('Running test on {} with filter "{}" and expecting {}'.format(t[0], t[1], t[2]))
|
||||
with open(in_dir + t[0], "r", encoding='utf-8') as i_f, open(socat_in, "w", encoding='utf-8') as s_f:
|
||||
print('cat {} > {}'.format(i_f.name, s_f.name))
|
||||
for line in i_f:
|
||||
s_f.write(line)
|
||||
idf_exit_sequence = b'\x1d\n'
|
||||
print('echo "<exit>" >> {}'.format(s_f.name))
|
||||
s_f.write(idf_exit_sequence.decode())
|
||||
monitor_cmd = [idf_monitor,
|
||||
'--port', 'socket://localhost:2399',
|
||||
'--print_filter', t[1],
|
||||
elf_file]
|
||||
with open(out_dir + t[2], "w", encoding='utf-8') as o_f, open(err_out, "w", encoding='utf-8') as e_f:
|
||||
try:
|
||||
(master_fd, slave_fd) = pty.openpty()
|
||||
print(' '.join(monitor_cmd), end=' ')
|
||||
print(' > {} 2> {} < {}'.format(o_f.name, e_f.name, os.ttyname(slave_fd)))
|
||||
proc = subprocess.Popen(monitor_cmd, stdin=slave_fd, stdout=o_f, stderr=e_f,
|
||||
close_fds=True)
|
||||
proc.wait()
|
||||
finally:
|
||||
os.close(slave_fd)
|
||||
os.close(master_fd)
|
||||
diff_cmd = ['diff', in_dir + t[2], out_dir + t[2]]
|
||||
print(' '.join(diff_cmd))
|
||||
subprocess.check_call(diff_cmd)
|
||||
print('Test has passed')
|
||||
finally:
|
||||
cleanup()
|
||||
gstart = time.time()
|
||||
if not os.path.exists(OUT_DIR):
|
||||
os.mkdir(OUT_DIR)
|
||||
|
||||
end = time.time()
|
||||
print('Execution took {:.2f} seconds'.format(end - start))
|
||||
socket.setdefaulttimeout(SOCKET_TIMEOUT)
|
||||
|
||||
for test in test_list:
|
||||
startup_timeout = 1.0
|
||||
for i in range(RETRIES_PER_TEST):
|
||||
with TestRunner() as runner:
|
||||
# Each test (and each retry) is run with a different port (and server socket). This is done for
|
||||
# the CI run where retry with a different socket is necessary to pass the test. According to the
|
||||
# experiments, retry with the same port (and server socket) is not sufficient.
|
||||
try:
|
||||
test_iteration(runner, test, startup_timeout)
|
||||
# no more retries if test_iteration exited without an exception
|
||||
break
|
||||
except Exception as e:
|
||||
if i < RETRIES_PER_TEST - 1:
|
||||
print('Test has failed with exception:', e)
|
||||
print('Another attempt will be made.')
|
||||
startup_timeout += 0.5
|
||||
else:
|
||||
raise
|
||||
|
||||
gend = time.time()
|
||||
print('Execution took {:.2f} seconds\n'.format(gend - gstart))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user