diff --git a/misc/regression/cpuusage.py b/misc/regression/cpuusage.py
new file mode 100644
index 000000000..b30ed708c
--- /dev/null
+++ b/misc/regression/cpuusage.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python
+#
+#
+# Copyright 2016, NICTA
+#
+# This software may be distributed and modified according to the terms of
+# the BSD 2-Clause license. Note that NO WARRANTY is provided.
+# See "LICENSE_BSD2.txt" for details.
+#
+# @TAG(NICTA_BSD)
+#
+
+'''
+Monitors the total CPU usage of a process and its children. Usage is similar
+to the UNIX `time` utility.
+
+NB: In order to get up-to-date information, we don't use interfaces such
+    as os.times, which only includes terminated and waited-for children.
+    Instead, we poll the process tree regularly. This means that when a
+    child process dies, its CPU time since the last poll is lost and not
+    included in the total time.
+
+    Hence the total reported time will be an underestimate of the true
+    CPU usage, especially for short-lived child processes.
+'''
+
+from __future__ import print_function
+import os
+import psutil
+import signal
+import subprocess
+import sys
+import threading
+import time
+import warnings
+
+try:
+    import psutil
+    if not hasattr(psutil.Process, "children") and hasattr(psutil.Process, "get_children"):
+        psutil.Process.children = psutil.Process.get_children
+    if not hasattr(psutil.Process, "memory_maps") and hasattr(psutil.Process, "get_memory_maps"):
+        psutil.Process.memory_maps = psutil.Process.get_memory_maps
+
+except ImportError:
+    print("Error: 'psutil' module not available. Run\n"
+          "\n"
+          "    pip install --user psutil\n"
+          "\n"
+          "to install.", file=sys.stderr)
+    sys.exit(1)
+
+class Poller(threading.Thread):
+    '''Subclass of threading.Thread that monitors CPU usage of another process.
+       Use run() to start the process.
+       Use cpu_usage() to retrieve the latest estimate of CPU usage.'''
+    def __init__(self, pid):
+        super(Poller, self).__init__()
+        # Daemonise ourselves to avoid delaying exit of the process of our
+        # calling thread.
+        self.daemon = True
+        self.pid = pid
+        self.finished = False
+        self.started = threading.Semaphore(0)
+        self.proc = None
+
+        # Reported stat.
+        self.cpu = 0.0
+
+        # Remember CPU times of recently seen children.
+        # This is to prevent double-counting for child processes.
+        self.current_children = {} # {(pid, create_time): CPU time}
+        # CPU time of dead children is recorded here.
+        self.old_children_cpu = 0.0
+
+    def run(self):
+        def update():
+            total = 0.0
+
+            # Fetch process's usage.
+            try:
+                if self.proc is None:
+                    self.proc = psutil.Process(self.pid)
+                total += sum(self.proc.cpu_times())
+
+                # Fetch children's usage.
+                new_current_children = {}
+                for c in self.proc.children(recursive=True):
+                    try:
+                        t = sum(c.cpu_times())
+                        new_current_children[(c.pid, c.create_time())] = t
+                        total += t
+                    except psutil.NoSuchProcess:
+                        pass
+                    except psutil.AccessDenied:
+                        pass
+
+                # For children that are no longer running, remember their
+                # most recently recorded CPU time.
+                reaped_cpu = 0.0
+                for c_id, c_t in self.current_children.iteritems():
+                    if c_id not in new_current_children:
+                        reaped_cpu += c_t
+                self.old_children_cpu += reaped_cpu
+                self.current_children = new_current_children
+                total += self.old_children_cpu
+
+            except psutil.AccessDenied as err:
+                warnings.warn("access denied: pid=%d" % err.pid, RuntimeWarning)
+
+            if total < self.cpu:
+                try:
+                    cmd = repr(' '.join(self.proc.cmdline()))
+                except Exception:
+                    cmd = '??'
+                warnings.warn("cpu non-monotonic: %.15f -> %.15f, pid=%d, cmd=%s" %
+                              (self.cpu, total, self.pid, cmd),
+                              RuntimeWarning)
+            return total
+
+        # Fetch a sample, and notify others that we have started.
+        self.cpu = update()
+        self.started.release()
+
+        # Poll the process periodically.
+        #
+        # We poll quickly at the beginning and use exponential backout
+        # to try and get better stats on short-lived processes.
+        #
+        polling_interval = 0.01
+        max_interval = 0.5
+        while not self.finished:
+            time.sleep(polling_interval)
+            try:
+                self.cpu = update()
+            except psutil.NoSuchProcess:
+                break
+            if polling_interval < max_interval:
+                polling_interval = min(polling_interval * 1.5, max_interval)
+
+    def cpu_usage(self):
+        return self.cpu
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *_):
+        self.finished = True
+
+def process_poller(pid):
+    '''Initiate polling of a subprocess. This is intended to be used in a
+    `with` block.'''
+    # Create a new thread and start it up.
+    p = Poller(pid)
+    p.start()
+
+    # Wait for the thread to record at least one sample before continuing.
+    p.started.acquire()
+
+    return p
+
+def main():
+    if len(sys.argv) <= 1 or sys.argv[1] in ['-?', '--help']:
+        print('Usage: %s command args...\n Measure total CPU ' \
+              'usage of a command' % sys.argv[0], file=sys.stderr)
+        return -1
+
+    # Run the command requested.
+    try:
+        p = subprocess.Popen(sys.argv[1:])
+    except OSError:
+        print('command not found', file=sys.stderr)
+        return -1
+
+    cpu = 0
+    m = process_poller(p.pid)
+    while True:
+        try:
+            p.returncode = p.wait()
+            break
+        except KeyboardInterrupt:
+            # The user Ctrl-C-ed us. The child should have received SIGINT;
+            # continue waiting for it to finish
+            pass
+
+    print('Total cpu %f seconds' % m.cpu_usage(), file=sys.stderr)
+
+    return p.returncode
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/misc/regression/regression.dtd b/misc/regression/regression.dtd
index eafdd69c6..c39c5c487 100644
--- a/misc/regression/regression.dtd
+++ b/misc/regression/regression.dtd
@@ -11,21 +11,25 @@
 <!ELEMENT testsuite (set|sequence|test)+>
 <!ATTLIST testsuite cwd CDATA #IMPLIED>
 <!ATTLIST testsuite timeout CDATA #IMPLIED>
+<!ATTLIST testsuite cpu-timeout CDATA #IMPLIED>
 <!ATTLIST testsuite depends NMTOKENS #IMPLIED>
 
 <!ELEMENT set (set|sequence|test)+>
 <!ATTLIST set cwd CDATA #IMPLIED>
 <!ATTLIST set timeout CDATA #IMPLIED>
+<!ATTLIST set cpu-timeout CDATA #IMPLIED>
 <!ATTLIST set depends NMTOKENS #IMPLIED>
 
 <!ELEMENT sequence (set|sequence|test)+>
 <!ATTLIST sequence cwd CDATA #IMPLIED>
 <!ATTLIST sequence timeout CDATA #IMPLIED>
+<!ATTLIST sequence cpu-timeout CDATA #IMPLIED>
 <!ATTLIST sequence depends NMTOKENS #IMPLIED>
 
 <!ELEMENT test ANY>
 <!ATTLIST test name ID #REQUIRED>
 <!ATTLIST test cwd CDATA #IMPLIED>
 <!ATTLIST test timeout CDATA #IMPLIED>
+<!ATTLIST test cpu-timeout CDATA #IMPLIED>
 <!ATTLIST test depends NMTOKENS #IMPLIED>
 
diff --git a/misc/regression/run_tests.py b/misc/regression/run_tests.py
index 018566362..59be3a86c 100755
--- a/misc/regression/run_tests.py
+++ b/misc/regression/run_tests.py
@@ -12,14 +12,13 @@
 #
 # Very simple command-line test runner.
 #
-# Ignores timeouts.
-#
 
 from __future__ import print_function
 
 import argparse
 import atexit
 import datetime
+import cpuusage
 import fnmatch
 import memusage
 import os
@@ -33,19 +32,20 @@ import subprocess
 import sys
 import testspec
 import threading
+import time
 import traceback
 import xml.etree.ElementTree as ET
 
-# Try importing psutil.
-PS_UTIL_AVAILABLE = False
 try:
     import psutil
-    from psutil import NoSuchProcess
-    PS_UTIL_AVAILABLE = True
     if not hasattr(psutil.Process, "children") and hasattr(psutil.Process, "get_children"):
+        # psutil API change
         psutil.Process.children = psutil.Process.get_children
 except ImportError:
-    pass
+    print("Error: failed to import psutil module.\n"
+          "To install psutil, try:\n"
+          "  pip install --user psutil", file=sys.stderr)
+    sys.exit(2)
 
 ANSI_RESET = "\033[0m"
 ANSI_RED = "\033[31;1m"
@@ -75,27 +75,24 @@ def which(file):
 # are looking at it, but not where a PID has been reused.
 #
 def kill_family(parent_pid):
-    if not PS_UTIL_AVAILABLE:
-        return
-
     # Find process.
     try:
         process = psutil.Process(parent_pid)
-    except NoSuchProcess:
+    except psutil.NoSuchProcess:
         # Race. Nothing more to do.
         return
 
     # SIGSTOP everyone first.
     try:
         process.suspend()
-    except NoSuchProcess:
+    except psutil.NoSuchProcess:
         # Race. Nothing more to do.
         return
     process_list = [process]
     for child in process.children(recursive=True):
         try:
             child.suspend()
-        except NoSuchProcess:
+        except psutil.NoSuchProcess:
             # Race.
             pass
         else:
@@ -109,10 +106,11 @@ def kill_family(parent_pid):
 #
 # Run a single test.
 #
-# Return a tuple of (success, log, time_taken, memory_usage).
+# Return a dict of (name, status, output, cpu time, elapsed time, memory usage).
+# This is placed onto the given queue.
 #
 # Log only contains the output if verbose is *false*; otherwise, the
-# log is output to stdout where we can't easily get  to it.
+# log is output to stdout where we can't easily get to it.
 #
 def run_test(test, status_queue, verbose=False):
     # Construct the base command.
@@ -142,6 +140,7 @@ def run_test(test, status_queue, verbose=False):
 
     # Start the command.
     peak_mem_usage = None
+    cpu_usage = None
     try:
         process = subprocess.Popen(command,
                 stdout=output, stderr=subprocess.STDOUT, stdin=subprocess.PIPE,
@@ -161,6 +160,7 @@ def run_test(test, status_queue, verbose=False):
 
     # Setup an alarm at the timeout.
     was_timeout = [False] # Wrap in list to prevent do_timeout getting the wrong variable scope
+    was_cpu_timeout = [False] # as above
     def do_timeout():
         was_timeout[0] = True
         kill_family(process.pid)
@@ -168,11 +168,31 @@ def run_test(test, status_queue, verbose=False):
     if test.timeout > 0:
         timer.start()
 
-    # Wait for the command to finish.
-    with memusage.process_poller(process.pid) as m:
-        (output, _) = process.communicate()
-        peak_mem_usage = m.peak_mem_usage()
-    process_running = False
+    with cpuusage.process_poller(process.pid) as c:
+        # Also set a CPU timeout. We poll the cpu usage periodically.
+        def cpu_timeout():
+            interval = min(0.5, test.cpu_timeout / 10.0)
+            while process_running:
+                if c.cpu_usage() > test.cpu_timeout:
+                    was_cpu_timeout[0] = True
+                    kill_family(process.pid)
+                    break
+                time.sleep(interval)
+
+        if test.cpu_timeout > 0:
+            cpu_timer = threading.Thread(target=cpu_timeout)
+            cpu_timer.daemon = True
+            cpu_timer.start()
+
+        with memusage.process_poller(process.pid) as m:
+            # Wait for the command to finish.
+            (output, _) = process.communicate()
+            peak_mem_usage = m.peak_mem_usage()
+            cpu_usage = c.cpu_usage()
+
+        process_running = False
+        if test.cpu_timeout > 0:
+            cpu_timer.join()
 
     # Cancel the alarm. Small race here (if the timer fires just after the
     # process finished), but the returncode of our process should still be 0,
@@ -185,7 +205,7 @@ def run_test(test, status_queue, verbose=False):
     output = output.decode()
     if process.returncode == 0:
         status = "pass"
-    elif was_timeout[0]:
+    elif was_timeout[0] or was_cpu_timeout[0]:
         status = "TIMEOUT"
     else:
         status = "FAILED"
@@ -193,6 +213,7 @@ def run_test(test, status_queue, verbose=False):
                       'status': status,
                       'output': output,
                       'real_time': datetime.datetime.now() - start_time,
+                      'cpu_time': cpu_usage,
                       'mem_usage': peak_mem_usage})
 
 # Print a status line.
@@ -202,15 +223,21 @@ def print_test_line_start(test_name, legacy=False):
     print("  Started %-25s " % (test_name + " ..."))
     sys.stdout.flush()
 
-def print_test_line(test_name, color, status, time_taken, mem, legacy=False):
-    if mem:
+def print_test_line(test_name, color, status, real_time=None, cpu_time=None, mem=None, legacy=False):
+    if mem is not None:
         # Report memory usage in gigabytes.
         mem = '%5.2fGB' % round(float(mem) / 1024 / 1024 / 1024, 2)
-    if time_taken:
-        # Strip milliseconds for better printing.
-        time_taken = datetime.timedelta(seconds=int(time_taken.total_seconds()))
-        time_taken = '%8s' % time_taken
-    extras = ', '.join(filter(None, [time_taken, mem]))
+
+    if real_time is not None:
+        # Format times as H:MM:SS; strip milliseconds for better printing.
+        real_time = datetime.timedelta(seconds=int(real_time.total_seconds()))
+        real_time = '%8s real' % real_time
+
+    if cpu_time is not None:
+        cpu_time = datetime.timedelta(seconds=int(cpu_time))
+        cpu_time = '%8s cpu' % cpu_time
+
+    extras = ', '.join(filter(None, [real_time, cpu_time, mem]))
 
     # Print status line.
     if legacy:
@@ -251,6 +278,7 @@ def main():
             help="list known tests")
     parser.add_argument("--legacy", action="store_true",
             help="use legacy 'IsaMakefile' specs")
+    # --legacy-status used by top-level regression-v2 script
     parser.add_argument("--legacy-status", action="store_true",
             help="emulate legacy (sequential code) status lines")
     parser.add_argument("-v", "--verbose", action="store_true",
@@ -290,17 +318,6 @@ def main():
             parser.error("Unknown test names: %s" % (", ".join(sorted(bad_names))))
         tests_to_run = [t for t in tests if t.name in desired_names]
 
-    # If running at least one test, and psutil is not available, print a warning.
-    if len(tests_to_run) > 0 and not PS_UTIL_AVAILABLE:
-        print("\n"
-              "Warning: 'psutil' module not available. Processes may not be correctly\n"
-              "stopped. Run\n"
-              "\n"
-              "    pip install --user psutil\n"
-              "\n"
-              "to install.\n"
-              "\n")
-
     # Run the tests.
     print("Running %d test(s)..." % len(tests_to_run))
     failed_tests = set()
@@ -351,7 +368,7 @@ def main():
                 # Non-blocked but depends on a failed test. Remove it.
                 if len(real_depends & failed_tests) > 0:
                     wipe_tty_status()
-                    print_test_line(t.name, ANSI_YELLOW, "skipped", None, None, args.legacy_status)
+                    print_test_line(t.name, ANSI_YELLOW, "skipped", legacy=args.legacy_status)
                     failed_tests.add(t.name)
                     del tests_queue[i]
                     break
@@ -360,20 +377,23 @@ def main():
         try:
             while True:
                 info = status_queue.get(block=True, timeout=0.1337) # Built-in pause
-                name = info['name']
-                del current_jobs[name]
+                name, status = info['name'], info['status']
 
-                status, log, time_taken, mem = info['status'], info['output'], info['real_time'], info['mem_usage']
                 test_results[name] = info
+                del current_jobs[name]
 
                 # Print result.
                 wipe_tty_status()
                 if status != 'pass':
                     failed_tests.add(name)
-                    print_test_line(name, ANSI_RED, "%s *" % status, time_taken, mem, args.legacy_status)
+                    status += " *"
+                    colour = ANSI_RED
                 else:
                     passed_tests.add(name)
-                    print_test_line(name, ANSI_GREEN, status, time_taken, mem, args.legacy_status)
+                    colour = ANSI_GREEN
+                print_test_line(name, colour, status,
+                                real_time=info['real_time'], cpu_time=info['cpu_time'], mem=info['mem_usage'],
+                                legacy=args.legacy_status)
         except Queue.Empty:
             pass
     wipe_tty_status()
diff --git a/misc/regression/testspec.py b/misc/regression/testspec.py
index a7d6cbb5a..ad0000022 100755
--- a/misc/regression/testspec.py
+++ b/misc/regression/testspec.py
@@ -31,13 +31,15 @@ class TestEnv():
         self.pwd = pwd
         self.cwd = "."
         self.timeout = 0
+        self.cpu_timeout = 0
         self.depends = set()
 
 class Test():
-    def __init__(self, name, command, timeout=0, cwd="", depends=None):
+    def __init__(self, name, command, timeout=0, cpu_timeout=0, cwd="", depends=None):
         self.name = name
         self.command = command
         self.timeout = timeout
+        self.cpu_timeout = cpu_timeout
         self.cwd = cwd
 
         if depends == None:
@@ -53,6 +55,12 @@ def parse_attributes(tag, env, strict=True):
         except:
             if strict:
                 raise
+    if tag.get("cpu-timeout"):
+        try:
+            env.cpu_timeout = float(tag.get("cpu-timeout"))
+        except:
+            if strict:
+                raise
     if tag.get("cwd"):
         env.cwd = tag.get("cwd")
     if tag.get("depends"):
@@ -64,6 +72,7 @@ def parse_test(doc, env, strict=True):
     parse_attributes(doc, env, strict=strict)
     return Test(doc.get("name"), doc.text.strip(),
             timeout=env.timeout,
+            cpu_timeout=env.cpu_timeout,
             cwd=os.path.normpath(os.path.join(env.pwd, env.cwd)),
             depends=env.depends)
 
@@ -347,8 +356,8 @@ def main():
 
     # Print results
     for test in tests:
-        print("\"%s\" [timeout=%d, parents=%s, cwd=%s]" % (
-            test.command, test.timeout, ",".join(test.depends), test.cwd))
+        print("\"%s\" [timeout=%d, cpu-timeout=%g, parents=%s, cwd=%s]" % (
+            test.command, test.timeout, test.cpu_timeout, ",".join(test.depends), test.cwd))
 
 
 if __name__ == "__main__":