Merge pull request #9 in SEL4/l4v from regression-cputime to master
* commit '84e5f7d81403549f9d88fab3bf0da02b657f47bc': regression: change CPU time non-mono from an error to a warning, so that it doesn't kill an otherwise working build. style: regression: refactor use of print_test_line. regression: report CPU usage; support timeouts based on CPU usage.
This commit is contained in:
commit
625c15626d
|
@ -0,0 +1,190 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
#
|
||||
# Copyright 2016, NICTA
|
||||
#
|
||||
# This software may be distributed and modified according to the terms of
|
||||
# the BSD 2-Clause license. Note that NO WARRANTY is provided.
|
||||
# See "LICENSE_BSD2.txt" for details.
|
||||
#
|
||||
# @TAG(NICTA_BSD)
|
||||
#
|
||||
|
||||
'''
|
||||
Monitors the total CPU usage of a process and its children. Usage is similar
|
||||
to the UNIX `time` utility.
|
||||
|
||||
NB: In order to get up-to-date information, we don't use interfaces such
|
||||
as os.times, which only includes terminated and waited-for children.
|
||||
Instead, we poll the process tree regularly. This means that when a
|
||||
child process dies, its CPU time since the last poll is lost and not
|
||||
included in the total time.
|
||||
|
||||
Hence the total reported time will be an underestimate of the true
|
||||
CPU usage, especially for short-lived child processes.
|
||||
'''
|
||||
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import psutil
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import warnings
|
||||
|
||||
try:
|
||||
import psutil
|
||||
if not hasattr(psutil.Process, "children") and hasattr(psutil.Process, "get_children"):
|
||||
psutil.Process.children = psutil.Process.get_children
|
||||
if not hasattr(psutil.Process, "memory_maps") and hasattr(psutil.Process, "get_memory_maps"):
|
||||
psutil.Process.memory_maps = psutil.Process.get_memory_maps
|
||||
|
||||
except ImportError:
|
||||
print("Error: 'psutil' module not available. Run\n"
|
||||
"\n"
|
||||
" pip install --user psutil\n"
|
||||
"\n"
|
||||
"to install.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
class Poller(threading.Thread):
|
||||
'''Subclass of threading.Thread that monitors CPU usage of another process.
|
||||
Use run() to start the process.
|
||||
Use cpu_usage() to retrieve the latest estimate of CPU usage.'''
|
||||
def __init__(self, pid):
|
||||
super(Poller, self).__init__()
|
||||
# Daemonise ourselves to avoid delaying exit of the process of our
|
||||
# calling thread.
|
||||
self.daemon = True
|
||||
self.pid = pid
|
||||
self.finished = False
|
||||
self.started = threading.Semaphore(0)
|
||||
self.proc = None
|
||||
|
||||
# Reported stat.
|
||||
self.cpu = 0.0
|
||||
|
||||
# Remember CPU times of recently seen children.
|
||||
# This is to prevent double-counting for child processes.
|
||||
self.current_children = {} # {(pid, create_time): CPU time}
|
||||
# CPU time of dead children is recorded here.
|
||||
self.old_children_cpu = 0.0
|
||||
|
||||
def run(self):
|
||||
def update():
|
||||
total = 0.0
|
||||
|
||||
# Fetch process's usage.
|
||||
try:
|
||||
if self.proc is None:
|
||||
self.proc = psutil.Process(self.pid)
|
||||
total += sum(self.proc.cpu_times())
|
||||
|
||||
# Fetch children's usage.
|
||||
new_current_children = {}
|
||||
for c in self.proc.children(recursive=True):
|
||||
try:
|
||||
t = sum(c.cpu_times())
|
||||
new_current_children[(c.pid, c.create_time())] = t
|
||||
total += t
|
||||
except psutil.NoSuchProcess:
|
||||
pass
|
||||
except psutil.AccessDenied:
|
||||
pass
|
||||
|
||||
# For children that are no longer running, remember their
|
||||
# most recently recorded CPU time.
|
||||
reaped_cpu = 0.0
|
||||
for c_id, c_t in self.current_children.iteritems():
|
||||
if c_id not in new_current_children:
|
||||
reaped_cpu += c_t
|
||||
self.old_children_cpu += reaped_cpu
|
||||
self.current_children = new_current_children
|
||||
total += self.old_children_cpu
|
||||
|
||||
except psutil.AccessDenied as err:
|
||||
warnings.warn("access denied: pid=%d" % err.pid, RuntimeWarning)
|
||||
|
||||
if total < self.cpu:
|
||||
try:
|
||||
cmd = repr(' '.join(self.proc.cmdline()))
|
||||
except Exception:
|
||||
cmd = '??'
|
||||
warnings.warn("cpu non-monotonic: %.15f -> %.15f, pid=%d, cmd=%s" %
|
||||
(self.cpu, total, self.pid, cmd),
|
||||
RuntimeWarning)
|
||||
return total
|
||||
|
||||
# Fetch a sample, and notify others that we have started.
|
||||
self.cpu = update()
|
||||
self.started.release()
|
||||
|
||||
# Poll the process periodically.
|
||||
#
|
||||
# We poll quickly at the beginning and use exponential backout
|
||||
# to try and get better stats on short-lived processes.
|
||||
#
|
||||
polling_interval = 0.01
|
||||
max_interval = 0.5
|
||||
while not self.finished:
|
||||
time.sleep(polling_interval)
|
||||
try:
|
||||
self.cpu = update()
|
||||
except psutil.NoSuchProcess:
|
||||
break
|
||||
if polling_interval < max_interval:
|
||||
polling_interval = min(polling_interval * 1.5, max_interval)
|
||||
|
||||
def cpu_usage(self):
|
||||
return self.cpu
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *_):
|
||||
self.finished = True
|
||||
|
||||
def process_poller(pid):
|
||||
'''Initiate polling of a subprocess. This is intended to be used in a
|
||||
`with` block.'''
|
||||
# Create a new thread and start it up.
|
||||
p = Poller(pid)
|
||||
p.start()
|
||||
|
||||
# Wait for the thread to record at least one sample before continuing.
|
||||
p.started.acquire()
|
||||
|
||||
return p
|
||||
|
||||
def main():
|
||||
if len(sys.argv) <= 1 or sys.argv[1] in ['-?', '--help']:
|
||||
print('Usage: %s command args...\n Measure total CPU ' \
|
||||
'usage of a command' % sys.argv[0], file=sys.stderr)
|
||||
return -1
|
||||
|
||||
# Run the command requested.
|
||||
try:
|
||||
p = subprocess.Popen(sys.argv[1:])
|
||||
except OSError:
|
||||
print('command not found', file=sys.stderr)
|
||||
return -1
|
||||
|
||||
cpu = 0
|
||||
m = process_poller(p.pid)
|
||||
while True:
|
||||
try:
|
||||
p.returncode = p.wait()
|
||||
break
|
||||
except KeyboardInterrupt:
|
||||
# The user Ctrl-C-ed us. The child should have received SIGINT;
|
||||
# continue waiting for it to finish
|
||||
pass
|
||||
|
||||
print('Total cpu %f seconds' % m.cpu_usage(), file=sys.stderr)
|
||||
|
||||
return p.returncode
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
|
@ -11,21 +11,25 @@
|
|||
<!ELEMENT testsuite (set|sequence|test)+>
|
||||
<!ATTLIST testsuite cwd CDATA #IMPLIED>
|
||||
<!ATTLIST testsuite timeout CDATA #IMPLIED>
|
||||
<!ATTLIST testsuite cpu-timeout CDATA #IMPLIED>
|
||||
<!ATTLIST testsuite depends NMTOKENS #IMPLIED>
|
||||
|
||||
<!ELEMENT set (set|sequence|test)+>
|
||||
<!ATTLIST set cwd CDATA #IMPLIED>
|
||||
<!ATTLIST set timeout CDATA #IMPLIED>
|
||||
<!ATTLIST set cpu-timeout CDATA #IMPLIED>
|
||||
<!ATTLIST set depends NMTOKENS #IMPLIED>
|
||||
|
||||
<!ELEMENT sequence (set|sequence|test)+>
|
||||
<!ATTLIST sequence cwd CDATA #IMPLIED>
|
||||
<!ATTLIST sequence timeout CDATA #IMPLIED>
|
||||
<!ATTLIST sequence cpu-timeout CDATA #IMPLIED>
|
||||
<!ATTLIST sequence depends NMTOKENS #IMPLIED>
|
||||
|
||||
<!ELEMENT test ANY>
|
||||
<!ATTLIST test name ID #REQUIRED>
|
||||
<!ATTLIST test cwd CDATA #IMPLIED>
|
||||
<!ATTLIST test timeout CDATA #IMPLIED>
|
||||
<!ATTLIST test cpu-timeout CDATA #IMPLIED>
|
||||
<!ATTLIST test depends NMTOKENS #IMPLIED>
|
||||
|
||||
|
|
|
@ -12,14 +12,13 @@
|
|||
#
|
||||
# Very simple command-line test runner.
|
||||
#
|
||||
# Ignores timeouts.
|
||||
#
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import atexit
|
||||
import datetime
|
||||
import cpuusage
|
||||
import fnmatch
|
||||
import memusage
|
||||
import os
|
||||
|
@ -33,19 +32,20 @@ import subprocess
|
|||
import sys
|
||||
import testspec
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
# Try importing psutil.
|
||||
PS_UTIL_AVAILABLE = False
|
||||
try:
|
||||
import psutil
|
||||
from psutil import NoSuchProcess
|
||||
PS_UTIL_AVAILABLE = True
|
||||
if not hasattr(psutil.Process, "children") and hasattr(psutil.Process, "get_children"):
|
||||
# psutil API change
|
||||
psutil.Process.children = psutil.Process.get_children
|
||||
except ImportError:
|
||||
pass
|
||||
print("Error: failed to import psutil module.\n"
|
||||
"To install psutil, try:\n"
|
||||
" pip install --user psutil", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
ANSI_RESET = "\033[0m"
|
||||
ANSI_RED = "\033[31;1m"
|
||||
|
@ -75,27 +75,24 @@ def which(file):
|
|||
# are looking at it, but not where a PID has been reused.
|
||||
#
|
||||
def kill_family(parent_pid):
|
||||
if not PS_UTIL_AVAILABLE:
|
||||
return
|
||||
|
||||
# Find process.
|
||||
try:
|
||||
process = psutil.Process(parent_pid)
|
||||
except NoSuchProcess:
|
||||
except psutil.NoSuchProcess:
|
||||
# Race. Nothing more to do.
|
||||
return
|
||||
|
||||
# SIGSTOP everyone first.
|
||||
try:
|
||||
process.suspend()
|
||||
except NoSuchProcess:
|
||||
except psutil.NoSuchProcess:
|
||||
# Race. Nothing more to do.
|
||||
return
|
||||
process_list = [process]
|
||||
for child in process.children(recursive=True):
|
||||
try:
|
||||
child.suspend()
|
||||
except NoSuchProcess:
|
||||
except psutil.NoSuchProcess:
|
||||
# Race.
|
||||
pass
|
||||
else:
|
||||
|
@ -109,10 +106,11 @@ def kill_family(parent_pid):
|
|||
#
|
||||
# Run a single test.
|
||||
#
|
||||
# Return a tuple of (success, log, time_taken, memory_usage).
|
||||
# Return a dict of (name, status, output, cpu time, elapsed time, memory usage).
|
||||
# This is placed onto the given queue.
|
||||
#
|
||||
# Log only contains the output if verbose is *false*; otherwise, the
|
||||
# log is output to stdout where we can't easily get to it.
|
||||
# log is output to stdout where we can't easily get to it.
|
||||
#
|
||||
def run_test(test, status_queue, verbose=False):
|
||||
# Construct the base command.
|
||||
|
@ -142,6 +140,7 @@ def run_test(test, status_queue, verbose=False):
|
|||
|
||||
# Start the command.
|
||||
peak_mem_usage = None
|
||||
cpu_usage = None
|
||||
try:
|
||||
process = subprocess.Popen(command,
|
||||
stdout=output, stderr=subprocess.STDOUT, stdin=subprocess.PIPE,
|
||||
|
@ -161,6 +160,7 @@ def run_test(test, status_queue, verbose=False):
|
|||
|
||||
# Setup an alarm at the timeout.
|
||||
was_timeout = [False] # Wrap in list to prevent do_timeout getting the wrong variable scope
|
||||
was_cpu_timeout = [False] # as above
|
||||
def do_timeout():
|
||||
was_timeout[0] = True
|
||||
kill_family(process.pid)
|
||||
|
@ -168,11 +168,31 @@ def run_test(test, status_queue, verbose=False):
|
|||
if test.timeout > 0:
|
||||
timer.start()
|
||||
|
||||
# Wait for the command to finish.
|
||||
with memusage.process_poller(process.pid) as m:
|
||||
(output, _) = process.communicate()
|
||||
peak_mem_usage = m.peak_mem_usage()
|
||||
process_running = False
|
||||
with cpuusage.process_poller(process.pid) as c:
|
||||
# Also set a CPU timeout. We poll the cpu usage periodically.
|
||||
def cpu_timeout():
|
||||
interval = min(0.5, test.cpu_timeout / 10.0)
|
||||
while process_running:
|
||||
if c.cpu_usage() > test.cpu_timeout:
|
||||
was_cpu_timeout[0] = True
|
||||
kill_family(process.pid)
|
||||
break
|
||||
time.sleep(interval)
|
||||
|
||||
if test.cpu_timeout > 0:
|
||||
cpu_timer = threading.Thread(target=cpu_timeout)
|
||||
cpu_timer.daemon = True
|
||||
cpu_timer.start()
|
||||
|
||||
with memusage.process_poller(process.pid) as m:
|
||||
# Wait for the command to finish.
|
||||
(output, _) = process.communicate()
|
||||
peak_mem_usage = m.peak_mem_usage()
|
||||
cpu_usage = c.cpu_usage()
|
||||
|
||||
process_running = False
|
||||
if test.cpu_timeout > 0:
|
||||
cpu_timer.join()
|
||||
|
||||
# Cancel the alarm. Small race here (if the timer fires just after the
|
||||
# process finished), but the returncode of our process should still be 0,
|
||||
|
@ -185,7 +205,7 @@ def run_test(test, status_queue, verbose=False):
|
|||
output = output.decode()
|
||||
if process.returncode == 0:
|
||||
status = "pass"
|
||||
elif was_timeout[0]:
|
||||
elif was_timeout[0] or was_cpu_timeout[0]:
|
||||
status = "TIMEOUT"
|
||||
else:
|
||||
status = "FAILED"
|
||||
|
@ -193,6 +213,7 @@ def run_test(test, status_queue, verbose=False):
|
|||
'status': status,
|
||||
'output': output,
|
||||
'real_time': datetime.datetime.now() - start_time,
|
||||
'cpu_time': cpu_usage,
|
||||
'mem_usage': peak_mem_usage})
|
||||
|
||||
# Print a status line.
|
||||
|
@ -202,15 +223,21 @@ def print_test_line_start(test_name, legacy=False):
|
|||
print(" Started %-25s " % (test_name + " ..."))
|
||||
sys.stdout.flush()
|
||||
|
||||
def print_test_line(test_name, color, status, time_taken, mem, legacy=False):
|
||||
if mem:
|
||||
def print_test_line(test_name, color, status, real_time=None, cpu_time=None, mem=None, legacy=False):
|
||||
if mem is not None:
|
||||
# Report memory usage in gigabytes.
|
||||
mem = '%5.2fGB' % round(float(mem) / 1024 / 1024 / 1024, 2)
|
||||
if time_taken:
|
||||
# Strip milliseconds for better printing.
|
||||
time_taken = datetime.timedelta(seconds=int(time_taken.total_seconds()))
|
||||
time_taken = '%8s' % time_taken
|
||||
extras = ', '.join(filter(None, [time_taken, mem]))
|
||||
|
||||
if real_time is not None:
|
||||
# Format times as H:MM:SS; strip milliseconds for better printing.
|
||||
real_time = datetime.timedelta(seconds=int(real_time.total_seconds()))
|
||||
real_time = '%8s real' % real_time
|
||||
|
||||
if cpu_time is not None:
|
||||
cpu_time = datetime.timedelta(seconds=int(cpu_time))
|
||||
cpu_time = '%8s cpu' % cpu_time
|
||||
|
||||
extras = ', '.join(filter(None, [real_time, cpu_time, mem]))
|
||||
|
||||
# Print status line.
|
||||
if legacy:
|
||||
|
@ -251,6 +278,7 @@ def main():
|
|||
help="list known tests")
|
||||
parser.add_argument("--legacy", action="store_true",
|
||||
help="use legacy 'IsaMakefile' specs")
|
||||
# --legacy-status used by top-level regression-v2 script
|
||||
parser.add_argument("--legacy-status", action="store_true",
|
||||
help="emulate legacy (sequential code) status lines")
|
||||
parser.add_argument("-v", "--verbose", action="store_true",
|
||||
|
@ -290,17 +318,6 @@ def main():
|
|||
parser.error("Unknown test names: %s" % (", ".join(sorted(bad_names))))
|
||||
tests_to_run = [t for t in tests if t.name in desired_names]
|
||||
|
||||
# If running at least one test, and psutil is not available, print a warning.
|
||||
if len(tests_to_run) > 0 and not PS_UTIL_AVAILABLE:
|
||||
print("\n"
|
||||
"Warning: 'psutil' module not available. Processes may not be correctly\n"
|
||||
"stopped. Run\n"
|
||||
"\n"
|
||||
" pip install --user psutil\n"
|
||||
"\n"
|
||||
"to install.\n"
|
||||
"\n")
|
||||
|
||||
# Run the tests.
|
||||
print("Running %d test(s)..." % len(tests_to_run))
|
||||
failed_tests = set()
|
||||
|
@ -351,7 +368,7 @@ def main():
|
|||
# Non-blocked but depends on a failed test. Remove it.
|
||||
if len(real_depends & failed_tests) > 0:
|
||||
wipe_tty_status()
|
||||
print_test_line(t.name, ANSI_YELLOW, "skipped", None, None, args.legacy_status)
|
||||
print_test_line(t.name, ANSI_YELLOW, "skipped", legacy=args.legacy_status)
|
||||
failed_tests.add(t.name)
|
||||
del tests_queue[i]
|
||||
break
|
||||
|
@ -360,20 +377,23 @@ def main():
|
|||
try:
|
||||
while True:
|
||||
info = status_queue.get(block=True, timeout=0.1337) # Built-in pause
|
||||
name = info['name']
|
||||
del current_jobs[name]
|
||||
name, status = info['name'], info['status']
|
||||
|
||||
status, log, time_taken, mem = info['status'], info['output'], info['real_time'], info['mem_usage']
|
||||
test_results[name] = info
|
||||
del current_jobs[name]
|
||||
|
||||
# Print result.
|
||||
wipe_tty_status()
|
||||
if status != 'pass':
|
||||
failed_tests.add(name)
|
||||
print_test_line(name, ANSI_RED, "%s *" % status, time_taken, mem, args.legacy_status)
|
||||
status += " *"
|
||||
colour = ANSI_RED
|
||||
else:
|
||||
passed_tests.add(name)
|
||||
print_test_line(name, ANSI_GREEN, status, time_taken, mem, args.legacy_status)
|
||||
colour = ANSI_GREEN
|
||||
print_test_line(name, colour, status,
|
||||
real_time=info['real_time'], cpu_time=info['cpu_time'], mem=info['mem_usage'],
|
||||
legacy=args.legacy_status)
|
||||
except Queue.Empty:
|
||||
pass
|
||||
wipe_tty_status()
|
||||
|
|
|
@ -31,13 +31,15 @@ class TestEnv():
|
|||
self.pwd = pwd
|
||||
self.cwd = "."
|
||||
self.timeout = 0
|
||||
self.cpu_timeout = 0
|
||||
self.depends = set()
|
||||
|
||||
class Test():
|
||||
def __init__(self, name, command, timeout=0, cwd="", depends=None):
|
||||
def __init__(self, name, command, timeout=0, cpu_timeout=0, cwd="", depends=None):
|
||||
self.name = name
|
||||
self.command = command
|
||||
self.timeout = timeout
|
||||
self.cpu_timeout = cpu_timeout
|
||||
self.cwd = cwd
|
||||
|
||||
if depends == None:
|
||||
|
@ -53,6 +55,12 @@ def parse_attributes(tag, env, strict=True):
|
|||
except:
|
||||
if strict:
|
||||
raise
|
||||
if tag.get("cpu-timeout"):
|
||||
try:
|
||||
env.cpu_timeout = float(tag.get("cpu-timeout"))
|
||||
except:
|
||||
if strict:
|
||||
raise
|
||||
if tag.get("cwd"):
|
||||
env.cwd = tag.get("cwd")
|
||||
if tag.get("depends"):
|
||||
|
@ -64,6 +72,7 @@ def parse_test(doc, env, strict=True):
|
|||
parse_attributes(doc, env, strict=strict)
|
||||
return Test(doc.get("name"), doc.text.strip(),
|
||||
timeout=env.timeout,
|
||||
cpu_timeout=env.cpu_timeout,
|
||||
cwd=os.path.normpath(os.path.join(env.pwd, env.cwd)),
|
||||
depends=env.depends)
|
||||
|
||||
|
@ -347,8 +356,8 @@ def main():
|
|||
|
||||
# Print results
|
||||
for test in tests:
|
||||
print("\"%s\" [timeout=%d, parents=%s, cwd=%s]" % (
|
||||
test.command, test.timeout, ",".join(test.depends), test.cwd))
|
||||
print("\"%s\" [timeout=%d, cpu-timeout=%g, parents=%s, cwd=%s]" % (
|
||||
test.command, test.timeout, test.cpu_timeout, ",".join(test.depends), test.cwd))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Reference in New Issue