Merge pull request #9 in SEL4/l4v from regression-cputime to master

* commit '84e5f7d81403549f9d88fab3bf0da02b657f47bc':
  regression: change CPU time non-mono from an error to a warning, so that it doesn't kill an otherwise working build.
  style: regression: refactor use of print_test_line.
  regression: report CPU usage; support timeouts based on CPU usage.
This commit is contained in:
Japheth Lim 2016-02-01 05:23:47 +00:00
commit 625c15626d
4 changed files with 271 additions and 48 deletions

190
misc/regression/cpuusage.py Normal file
View File

@ -0,0 +1,190 @@
#!/usr/bin/env python
#
#
# Copyright 2016, NICTA
#
# This software may be distributed and modified according to the terms of
# the BSD 2-Clause license. Note that NO WARRANTY is provided.
# See "LICENSE_BSD2.txt" for details.
#
# @TAG(NICTA_BSD)
#
'''
Monitors the total CPU usage of a process and its children. Usage is similar
to the UNIX `time` utility.
NB: In order to get up-to-date information, we don't use interfaces such
as os.times, which only includes terminated and waited-for children.
Instead, we poll the process tree regularly. This means that when a
child process dies, its CPU time since the last poll is lost and not
included in the total time.
Hence the total reported time will be an underestimate of the true
CPU usage, especially for short-lived child processes.
'''
from __future__ import print_function
import os
import psutil
import signal
import subprocess
import sys
import threading
import time
import warnings
try:
import psutil
if not hasattr(psutil.Process, "children") and hasattr(psutil.Process, "get_children"):
psutil.Process.children = psutil.Process.get_children
if not hasattr(psutil.Process, "memory_maps") and hasattr(psutil.Process, "get_memory_maps"):
psutil.Process.memory_maps = psutil.Process.get_memory_maps
except ImportError:
print("Error: 'psutil' module not available. Run\n"
"\n"
" pip install --user psutil\n"
"\n"
"to install.", file=sys.stderr)
sys.exit(1)
class Poller(threading.Thread):
'''Subclass of threading.Thread that monitors CPU usage of another process.
Use run() to start the process.
Use cpu_usage() to retrieve the latest estimate of CPU usage.'''
def __init__(self, pid):
super(Poller, self).__init__()
# Daemonise ourselves to avoid delaying exit of the process of our
# calling thread.
self.daemon = True
self.pid = pid
self.finished = False
self.started = threading.Semaphore(0)
self.proc = None
# Reported stat.
self.cpu = 0.0
# Remember CPU times of recently seen children.
# This is to prevent double-counting for child processes.
self.current_children = {} # {(pid, create_time): CPU time}
# CPU time of dead children is recorded here.
self.old_children_cpu = 0.0
def run(self):
def update():
total = 0.0
# Fetch process's usage.
try:
if self.proc is None:
self.proc = psutil.Process(self.pid)
total += sum(self.proc.cpu_times())
# Fetch children's usage.
new_current_children = {}
for c in self.proc.children(recursive=True):
try:
t = sum(c.cpu_times())
new_current_children[(c.pid, c.create_time())] = t
total += t
except psutil.NoSuchProcess:
pass
except psutil.AccessDenied:
pass
# For children that are no longer running, remember their
# most recently recorded CPU time.
reaped_cpu = 0.0
for c_id, c_t in self.current_children.iteritems():
if c_id not in new_current_children:
reaped_cpu += c_t
self.old_children_cpu += reaped_cpu
self.current_children = new_current_children
total += self.old_children_cpu
except psutil.AccessDenied as err:
warnings.warn("access denied: pid=%d" % err.pid, RuntimeWarning)
if total < self.cpu:
try:
cmd = repr(' '.join(self.proc.cmdline()))
except Exception:
cmd = '??'
warnings.warn("cpu non-monotonic: %.15f -> %.15f, pid=%d, cmd=%s" %
(self.cpu, total, self.pid, cmd),
RuntimeWarning)
return total
# Fetch a sample, and notify others that we have started.
self.cpu = update()
self.started.release()
# Poll the process periodically.
#
# We poll quickly at the beginning and use exponential backout
# to try and get better stats on short-lived processes.
#
polling_interval = 0.01
max_interval = 0.5
while not self.finished:
time.sleep(polling_interval)
try:
self.cpu = update()
except psutil.NoSuchProcess:
break
if polling_interval < max_interval:
polling_interval = min(polling_interval * 1.5, max_interval)
def cpu_usage(self):
return self.cpu
def __enter__(self):
return self
def __exit__(self, *_):
self.finished = True
def process_poller(pid):
'''Initiate polling of a subprocess. This is intended to be used in a
`with` block.'''
# Create a new thread and start it up.
p = Poller(pid)
p.start()
# Wait for the thread to record at least one sample before continuing.
p.started.acquire()
return p
def main():
if len(sys.argv) <= 1 or sys.argv[1] in ['-?', '--help']:
print('Usage: %s command args...\n Measure total CPU ' \
'usage of a command' % sys.argv[0], file=sys.stderr)
return -1
# Run the command requested.
try:
p = subprocess.Popen(sys.argv[1:])
except OSError:
print('command not found', file=sys.stderr)
return -1
cpu = 0
m = process_poller(p.pid)
while True:
try:
p.returncode = p.wait()
break
except KeyboardInterrupt:
# The user Ctrl-C-ed us. The child should have received SIGINT;
# continue waiting for it to finish
pass
print('Total cpu %f seconds' % m.cpu_usage(), file=sys.stderr)
return p.returncode
if __name__ == '__main__':
sys.exit(main())

View File

@ -11,21 +11,25 @@
<!ELEMENT testsuite (set|sequence|test)+>
<!ATTLIST testsuite cwd CDATA #IMPLIED>
<!ATTLIST testsuite timeout CDATA #IMPLIED>
<!ATTLIST testsuite cpu-timeout CDATA #IMPLIED>
<!ATTLIST testsuite depends NMTOKENS #IMPLIED>
<!ELEMENT set (set|sequence|test)+>
<!ATTLIST set cwd CDATA #IMPLIED>
<!ATTLIST set timeout CDATA #IMPLIED>
<!ATTLIST set cpu-timeout CDATA #IMPLIED>
<!ATTLIST set depends NMTOKENS #IMPLIED>
<!ELEMENT sequence (set|sequence|test)+>
<!ATTLIST sequence cwd CDATA #IMPLIED>
<!ATTLIST sequence timeout CDATA #IMPLIED>
<!ATTLIST sequence cpu-timeout CDATA #IMPLIED>
<!ATTLIST sequence depends NMTOKENS #IMPLIED>
<!ELEMENT test ANY>
<!ATTLIST test name ID #REQUIRED>
<!ATTLIST test cwd CDATA #IMPLIED>
<!ATTLIST test timeout CDATA #IMPLIED>
<!ATTLIST test cpu-timeout CDATA #IMPLIED>
<!ATTLIST test depends NMTOKENS #IMPLIED>

View File

@ -12,14 +12,13 @@
#
# Very simple command-line test runner.
#
# Ignores timeouts.
#
from __future__ import print_function
import argparse
import atexit
import datetime
import cpuusage
import fnmatch
import memusage
import os
@ -33,19 +32,20 @@ import subprocess
import sys
import testspec
import threading
import time
import traceback
import xml.etree.ElementTree as ET
# Try importing psutil.
PS_UTIL_AVAILABLE = False
try:
import psutil
from psutil import NoSuchProcess
PS_UTIL_AVAILABLE = True
if not hasattr(psutil.Process, "children") and hasattr(psutil.Process, "get_children"):
# psutil API change
psutil.Process.children = psutil.Process.get_children
except ImportError:
pass
print("Error: failed to import psutil module.\n"
"To install psutil, try:\n"
" pip install --user psutil", file=sys.stderr)
sys.exit(2)
ANSI_RESET = "\033[0m"
ANSI_RED = "\033[31;1m"
@ -75,27 +75,24 @@ def which(file):
# are looking at it, but not where a PID has been reused.
#
def kill_family(parent_pid):
if not PS_UTIL_AVAILABLE:
return
# Find process.
try:
process = psutil.Process(parent_pid)
except NoSuchProcess:
except psutil.NoSuchProcess:
# Race. Nothing more to do.
return
# SIGSTOP everyone first.
try:
process.suspend()
except NoSuchProcess:
except psutil.NoSuchProcess:
# Race. Nothing more to do.
return
process_list = [process]
for child in process.children(recursive=True):
try:
child.suspend()
except NoSuchProcess:
except psutil.NoSuchProcess:
# Race.
pass
else:
@ -109,10 +106,11 @@ def kill_family(parent_pid):
#
# Run a single test.
#
# Return a tuple of (success, log, time_taken, memory_usage).
# Return a dict of (name, status, output, cpu time, elapsed time, memory usage).
# This is placed onto the given queue.
#
# Log only contains the output if verbose is *false*; otherwise, the
# log is output to stdout where we can't easily get to it.
# log is output to stdout where we can't easily get to it.
#
def run_test(test, status_queue, verbose=False):
# Construct the base command.
@ -142,6 +140,7 @@ def run_test(test, status_queue, verbose=False):
# Start the command.
peak_mem_usage = None
cpu_usage = None
try:
process = subprocess.Popen(command,
stdout=output, stderr=subprocess.STDOUT, stdin=subprocess.PIPE,
@ -161,6 +160,7 @@ def run_test(test, status_queue, verbose=False):
# Setup an alarm at the timeout.
was_timeout = [False] # Wrap in list to prevent do_timeout getting the wrong variable scope
was_cpu_timeout = [False] # as above
def do_timeout():
was_timeout[0] = True
kill_family(process.pid)
@ -168,11 +168,31 @@ def run_test(test, status_queue, verbose=False):
if test.timeout > 0:
timer.start()
# Wait for the command to finish.
with memusage.process_poller(process.pid) as m:
(output, _) = process.communicate()
peak_mem_usage = m.peak_mem_usage()
process_running = False
with cpuusage.process_poller(process.pid) as c:
# Also set a CPU timeout. We poll the cpu usage periodically.
def cpu_timeout():
interval = min(0.5, test.cpu_timeout / 10.0)
while process_running:
if c.cpu_usage() > test.cpu_timeout:
was_cpu_timeout[0] = True
kill_family(process.pid)
break
time.sleep(interval)
if test.cpu_timeout > 0:
cpu_timer = threading.Thread(target=cpu_timeout)
cpu_timer.daemon = True
cpu_timer.start()
with memusage.process_poller(process.pid) as m:
# Wait for the command to finish.
(output, _) = process.communicate()
peak_mem_usage = m.peak_mem_usage()
cpu_usage = c.cpu_usage()
process_running = False
if test.cpu_timeout > 0:
cpu_timer.join()
# Cancel the alarm. Small race here (if the timer fires just after the
# process finished), but the returncode of our process should still be 0,
@ -185,7 +205,7 @@ def run_test(test, status_queue, verbose=False):
output = output.decode()
if process.returncode == 0:
status = "pass"
elif was_timeout[0]:
elif was_timeout[0] or was_cpu_timeout[0]:
status = "TIMEOUT"
else:
status = "FAILED"
@ -193,6 +213,7 @@ def run_test(test, status_queue, verbose=False):
'status': status,
'output': output,
'real_time': datetime.datetime.now() - start_time,
'cpu_time': cpu_usage,
'mem_usage': peak_mem_usage})
# Print a status line.
@ -202,15 +223,21 @@ def print_test_line_start(test_name, legacy=False):
print(" Started %-25s " % (test_name + " ..."))
sys.stdout.flush()
def print_test_line(test_name, color, status, time_taken, mem, legacy=False):
if mem:
def print_test_line(test_name, color, status, real_time=None, cpu_time=None, mem=None, legacy=False):
if mem is not None:
# Report memory usage in gigabytes.
mem = '%5.2fGB' % round(float(mem) / 1024 / 1024 / 1024, 2)
if time_taken:
# Strip milliseconds for better printing.
time_taken = datetime.timedelta(seconds=int(time_taken.total_seconds()))
time_taken = '%8s' % time_taken
extras = ', '.join(filter(None, [time_taken, mem]))
if real_time is not None:
# Format times as H:MM:SS; strip milliseconds for better printing.
real_time = datetime.timedelta(seconds=int(real_time.total_seconds()))
real_time = '%8s real' % real_time
if cpu_time is not None:
cpu_time = datetime.timedelta(seconds=int(cpu_time))
cpu_time = '%8s cpu' % cpu_time
extras = ', '.join(filter(None, [real_time, cpu_time, mem]))
# Print status line.
if legacy:
@ -251,6 +278,7 @@ def main():
help="list known tests")
parser.add_argument("--legacy", action="store_true",
help="use legacy 'IsaMakefile' specs")
# --legacy-status used by top-level regression-v2 script
parser.add_argument("--legacy-status", action="store_true",
help="emulate legacy (sequential code) status lines")
parser.add_argument("-v", "--verbose", action="store_true",
@ -290,17 +318,6 @@ def main():
parser.error("Unknown test names: %s" % (", ".join(sorted(bad_names))))
tests_to_run = [t for t in tests if t.name in desired_names]
# If running at least one test, and psutil is not available, print a warning.
if len(tests_to_run) > 0 and not PS_UTIL_AVAILABLE:
print("\n"
"Warning: 'psutil' module not available. Processes may not be correctly\n"
"stopped. Run\n"
"\n"
" pip install --user psutil\n"
"\n"
"to install.\n"
"\n")
# Run the tests.
print("Running %d test(s)..." % len(tests_to_run))
failed_tests = set()
@ -351,7 +368,7 @@ def main():
# Non-blocked but depends on a failed test. Remove it.
if len(real_depends & failed_tests) > 0:
wipe_tty_status()
print_test_line(t.name, ANSI_YELLOW, "skipped", None, None, args.legacy_status)
print_test_line(t.name, ANSI_YELLOW, "skipped", legacy=args.legacy_status)
failed_tests.add(t.name)
del tests_queue[i]
break
@ -360,20 +377,23 @@ def main():
try:
while True:
info = status_queue.get(block=True, timeout=0.1337) # Built-in pause
name = info['name']
del current_jobs[name]
name, status = info['name'], info['status']
status, log, time_taken, mem = info['status'], info['output'], info['real_time'], info['mem_usage']
test_results[name] = info
del current_jobs[name]
# Print result.
wipe_tty_status()
if status != 'pass':
failed_tests.add(name)
print_test_line(name, ANSI_RED, "%s *" % status, time_taken, mem, args.legacy_status)
status += " *"
colour = ANSI_RED
else:
passed_tests.add(name)
print_test_line(name, ANSI_GREEN, status, time_taken, mem, args.legacy_status)
colour = ANSI_GREEN
print_test_line(name, colour, status,
real_time=info['real_time'], cpu_time=info['cpu_time'], mem=info['mem_usage'],
legacy=args.legacy_status)
except Queue.Empty:
pass
wipe_tty_status()

View File

@ -31,13 +31,15 @@ class TestEnv():
self.pwd = pwd
self.cwd = "."
self.timeout = 0
self.cpu_timeout = 0
self.depends = set()
class Test():
def __init__(self, name, command, timeout=0, cwd="", depends=None):
def __init__(self, name, command, timeout=0, cpu_timeout=0, cwd="", depends=None):
self.name = name
self.command = command
self.timeout = timeout
self.cpu_timeout = cpu_timeout
self.cwd = cwd
if depends == None:
@ -53,6 +55,12 @@ def parse_attributes(tag, env, strict=True):
except:
if strict:
raise
if tag.get("cpu-timeout"):
try:
env.cpu_timeout = float(tag.get("cpu-timeout"))
except:
if strict:
raise
if tag.get("cwd"):
env.cwd = tag.get("cwd")
if tag.get("depends"):
@ -64,6 +72,7 @@ def parse_test(doc, env, strict=True):
parse_attributes(doc, env, strict=strict)
return Test(doc.get("name"), doc.text.strip(),
timeout=env.timeout,
cpu_timeout=env.cpu_timeout,
cwd=os.path.normpath(os.path.join(env.pwd, env.cwd)),
depends=env.depends)
@ -347,8 +356,8 @@ def main():
# Print results
for test in tests:
print("\"%s\" [timeout=%d, parents=%s, cwd=%s]" % (
test.command, test.timeout, ",".join(test.depends), test.cwd))
print("\"%s\" [timeout=%d, cpu-timeout=%g, parents=%s, cwd=%s]" % (
test.command, test.timeout, test.cpu_timeout, ",".join(test.depends), test.cwd))
if __name__ == "__main__":