summaryrefslogtreecommitdiffstats
path: root/utils/creduce-clang-crash.py
diff options
context:
space:
mode:
Diffstat (limited to 'utils/creduce-clang-crash.py')
-rwxr-xr-xutils/creduce-clang-crash.py412
1 files changed, 412 insertions, 0 deletions
diff --git a/utils/creduce-clang-crash.py b/utils/creduce-clang-crash.py
new file mode 100755
index 0000000000..be16211c4d
--- /dev/null
+++ b/utils/creduce-clang-crash.py
@@ -0,0 +1,412 @@
+#!/usr/bin/env python
+"""Calls C-Reduce to create a minimal reproducer for clang crashes.
+
+Output files:
+ *.reduced.sh -- crash reproducer with minimal arguments
+ *.reduced.cpp -- the reduced file
+ *.test.sh -- interestingness test for C-Reduce
+"""
+
+from __future__ import print_function
+from argparse import ArgumentParser, RawTextHelpFormatter
+import os
+import re
+import stat
+import sys
+import subprocess
+import pipes
+import shlex
+import tempfile
+import shutil
+from distutils.spawn import find_executable
+
+verbose = False
+creduce_cmd = None
+clang_cmd = None
+not_cmd = None
+
+def verbose_print(*args, **kwargs):
+ if verbose:
+ print(*args, **kwargs)
+
+def check_file(fname):
+ if not os.path.isfile(fname):
+ sys.exit("ERROR: %s does not exist" % (fname))
+ return fname
+
+def check_cmd(cmd_name, cmd_dir, cmd_path=None):
+ """
+ Returns absolute path to cmd_path if it is given,
+ or absolute path to cmd_dir/cmd_name.
+ """
+ if cmd_path:
+ cmd = find_executable(cmd_path)
+ if cmd:
+ return cmd
+ sys.exit("ERROR: executable `%s` not found" % (cmd_path))
+
+ cmd = find_executable(cmd_name, path=cmd_dir)
+ if cmd:
+ return cmd
+
+ if not cmd_dir:
+ cmd_dir = "$PATH"
+ sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
+
+def quote_cmd(cmd):
+ return ' '.join(pipes.quote(arg) for arg in cmd)
+
+def write_to_script(text, filename):
+ with open(filename, 'w') as f:
+ f.write(text)
+ os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
+
+class Reduce(object):
+ def __init__(self, crash_script, file_to_reduce):
+ crash_script_name, crash_script_ext = os.path.splitext(crash_script)
+ file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
+
+ self.testfile = file_reduce_name + '.test.sh'
+ self.crash_script = crash_script_name + '.reduced' + crash_script_ext
+ self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext
+ shutil.copy(file_to_reduce, self.file_to_reduce)
+
+ self.clang = clang_cmd
+ self.clang_args = []
+ self.expected_output = []
+ self.is_crash = True
+ self.creduce_flags = ["--tidy"]
+
+ self.read_clang_args(crash_script, file_to_reduce)
+ self.read_expected_output()
+
+ def get_crash_cmd(self, cmd=None, args=None, filename=None):
+ if not cmd:
+ cmd = self.clang
+ if not args:
+ args = self.clang_args
+ if not filename:
+ filename = self.file_to_reduce
+
+ return [cmd] + args + [filename]
+
+ def read_clang_args(self, crash_script, filename):
+ print("\nReading arguments from crash script...")
+ with open(crash_script) as f:
+ # Assume clang call is the first non comment line.
+ cmd = []
+ for line in f:
+ if not line.lstrip().startswith('#'):
+ cmd = shlex.split(line)
+ break
+ if not cmd:
+ sys.exit("Could not find command in the crash script.");
+
+ # Remove clang and filename from the command
+ # Assume the last occurrence of the filename is the clang input file
+ del cmd[0]
+ for i in range(len(cmd)-1, -1, -1):
+ if cmd[i] == filename:
+ del cmd[i]
+ break
+ self.clang_args = cmd
+ verbose_print("Clang arguments:", quote_cmd(self.clang_args))
+
+ def read_expected_output(self):
+ print("\nGetting expected crash output...")
+ p = subprocess.Popen(self.get_crash_cmd(),
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT)
+ crash_output, _ = p.communicate()
+ result = []
+
+ # Remove color codes
+ ansi_escape = r'\x1b\[[0-?]*m'
+ crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8'))
+
+ # Look for specific error messages
+ regexes = [r"Assertion `(.+)' failed", # Linux assert()
+ r"Assertion failed: (.+),", # FreeBSD/Mac assert()
+ r"fatal error: error in backend: (.+)",
+ r"LLVM ERROR: (.+)",
+ r"UNREACHABLE executed (at .+)?!",
+ r"LLVM IR generation of ceclaration '(.+)'",
+ r"Generating code for declaration '(.+)'",
+ r"\*\*\* Bad machine code: (.+) \*\*\*"]
+ for msg_re in regexes:
+ match = re.search(msg_re, crash_output)
+ if match:
+ msg = match.group(1)
+ result = [msg]
+ print("Found message:", msg)
+
+ if "fatal error:" in msg_re:
+ self.is_crash = False
+ break
+
+ # If no message was found, use the top five stack trace functions,
+ # ignoring some common functions
+ # Five is a somewhat arbitrary number; the goal is to get a small number
+ # of identifying functions with some leeway for common functions
+ if not result:
+ stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
+ filters = ["PrintStackTraceSignalHandler",
+ "llvm::sys::RunSignalHandlers",
+ "SignalHandler", "__restore_rt", "gsignal", "abort"]
+ matches = re.findall(stacktrace_re, crash_output)
+ result = [x for x in matches if x and x.strip() not in filters][:5]
+ for msg in result:
+ print("Found stack trace function:", msg)
+
+ if not result:
+ print("ERROR: no crash was found")
+ print("The crash output was:\n========\n%s========" % crash_output)
+ sys.exit(1)
+
+ self.expected_output = result
+
+ def check_expected_output(self, args=None, filename=None):
+ if not args:
+ args = self.clang_args
+ if not filename:
+ filename = self.file_to_reduce
+
+ p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename),
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT)
+ crash_output, _ = p.communicate()
+ return all(msg in crash_output.decode('utf-8') for msg in
+ self.expected_output)
+
+ def write_interestingness_test(self):
+ print("\nCreating the interestingness test...")
+
+ crash_flag = "--crash" if self.is_crash else ""
+
+ output = "#!/bin/bash\n%s %s %s >& t.log || exit 1\n" % \
+ (pipes.quote(not_cmd), crash_flag, quote_cmd(self.get_crash_cmd()))
+
+ for msg in self.expected_output:
+ output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg)
+
+ write_to_script(output, self.testfile)
+ self.check_interestingness()
+
+ def check_interestingness(self):
+ testfile = os.path.abspath(self.testfile)
+
+ # Check that the test considers the original file interesting
+ with open(os.devnull, 'w') as devnull:
+ returncode = subprocess.call(testfile, stdout=devnull)
+ if returncode:
+ sys.exit("The interestingness test does not pass for the original file.")
+
+ # Check that an empty file is not interesting
+ # Instead of modifying the filename in the test file, just run the command
+ with tempfile.NamedTemporaryFile() as empty_file:
+ is_interesting = self.check_expected_output(filename=empty_file.name)
+ if is_interesting:
+ sys.exit("The interestingness test passes for an empty file.")
+
+ def clang_preprocess(self):
+ print("\nTrying to preprocess the source file...")
+ with tempfile.NamedTemporaryFile() as tmpfile:
+ cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name]
+ cmd_preprocess_no_lines = cmd_preprocess + ['-P']
+ try:
+ subprocess.check_call(cmd_preprocess_no_lines)
+ if self.check_expected_output(filename=tmpfile.name):
+ print("Successfully preprocessed with line markers removed")
+ shutil.copy(tmpfile.name, self.file_to_reduce)
+ else:
+ subprocess.check_call(cmd_preprocess)
+ if self.check_expected_output(filename=tmpfile.name):
+ print("Successfully preprocessed without removing line markers")
+ shutil.copy(tmpfile.name, self.file_to_reduce)
+ else:
+ print("No longer crashes after preprocessing -- "
+ "using original source")
+ except subprocess.CalledProcessError:
+ print("Preprocessing failed")
+
+ @staticmethod
+ def filter_args(args, opts_equal=[], opts_startswith=[],
+ opts_one_arg_startswith=[]):
+ result = []
+ skip_next = False
+ for arg in args:
+ if skip_next:
+ skip_next = False
+ continue
+ if any(arg == a for a in opts_equal):
+ continue
+ if any(arg.startswith(a) for a in opts_startswith):
+ continue
+ if any(arg.startswith(a) for a in opts_one_arg_startswith):
+ skip_next = True
+ continue
+ result.append(arg)
+ return result
+
+ def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
+ new_args = self.filter_args(args, **kwargs)
+
+ if extra_arg:
+ if extra_arg in new_args:
+ new_args.remove(extra_arg)
+ new_args.append(extra_arg)
+
+ if (new_args != args and
+ self.check_expected_output(args=new_args)):
+ if msg:
+ verbose_print(msg)
+ return new_args
+ return args
+
+ def try_remove_arg_by_index(self, args, index):
+ new_args = args[:index] + args[index+1:]
+ removed_arg = args[index]
+
+ # Heuristic for grouping arguments:
+ # remove next argument if it doesn't start with "-"
+ if index < len(new_args) and not new_args[index].startswith('-'):
+ del new_args[index]
+ removed_arg += ' ' + args[index+1]
+
+ if self.check_expected_output(args=new_args):
+ verbose_print("Removed", removed_arg)
+ return new_args, index
+ return args, index+1
+
+ def simplify_clang_args(self):
+ """Simplify clang arguments before running C-Reduce to reduce the time the
+ interestingness test takes to run.
+ """
+ print("\nSimplifying the clang command...")
+
+ # Remove some clang arguments to speed up the interestingness test
+ new_args = self.clang_args
+ new_args = self.try_remove_args(new_args,
+ msg="Removed debug info options",
+ opts_startswith=["-gcodeview",
+ "-debug-info-kind=",
+ "-debugger-tuning="])
+
+ new_args = self.try_remove_args(new_args,
+ msg="Removed --show-includes",
+ opts_startswith=["--show-includes"])
+ # Not suppressing warnings (-w) sometimes prevents the crash from occurring
+ # after preprocessing
+ new_args = self.try_remove_args(new_args,
+ msg="Replaced -W options with -w",
+ extra_arg='-w',
+ opts_startswith=["-W"])
+ new_args = self.try_remove_args(new_args,
+ msg="Replaced optimization level with -O0",
+ extra_arg="-O0",
+ opts_startswith=["-O"])
+
+ # Try to remove compilation steps
+ new_args = self.try_remove_args(new_args, msg="Added -emit-llvm",
+ extra_arg="-emit-llvm")
+ new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only",
+ extra_arg="-fsyntax-only")
+
+ # Try to make implicit int an error for more sensible test output
+ new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int",
+ opts_equal=["-w"],
+ extra_arg="-Werror=implicit-int")
+
+ self.clang_args = new_args
+ verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
+
+ def reduce_clang_args(self):
+ """Minimize the clang arguments after running C-Reduce, to get the smallest
+ command that reproduces the crash on the reduced file.
+ """
+ print("\nReducing the clang crash command...")
+
+ new_args = self.clang_args
+
+ # Remove some often occurring args
+ new_args = self.try_remove_args(new_args, msg="Removed -D options",
+ opts_startswith=["-D"])
+ new_args = self.try_remove_args(new_args, msg="Removed -D options",
+ opts_one_arg_startswith=["-D"])
+ new_args = self.try_remove_args(new_args, msg="Removed -I options",
+ opts_startswith=["-I"])
+ new_args = self.try_remove_args(new_args, msg="Removed -I options",
+ opts_one_arg_startswith=["-I"])
+ new_args = self.try_remove_args(new_args, msg="Removed -W options",
+ opts_startswith=["-W"])
+
+ # Remove other cases that aren't covered by the heuristic
+ new_args = self.try_remove_args(new_args, msg="Removed -mllvm",
+ opts_one_arg_startswith=["-mllvm"])
+
+ i = 0
+ while i < len(new_args):
+ new_args, i = self.try_remove_arg_by_index(new_args, i)
+
+ self.clang_args = new_args
+
+ reduced_cmd = quote_cmd(self.get_crash_cmd())
+ write_to_script(reduced_cmd, self.crash_script)
+ print("Reduced command:", reduced_cmd)
+
+ def run_creduce(self):
+ print("\nRunning C-Reduce...")
+ try:
+ p = subprocess.Popen([creduce_cmd] + self.creduce_flags +
+ [self.testfile, self.file_to_reduce])
+ p.communicate()
+ except KeyboardInterrupt:
+ # Hack to kill C-Reduce because it jumps into its own pgid
+ print('\n\nctrl-c detected, killed creduce')
+ p.kill()
+
+def main():
+ global verbose
+ global creduce_cmd
+ global clang_cmd
+ global not_cmd
+
+ parser = ArgumentParser(description=__doc__,
+ formatter_class=RawTextHelpFormatter)
+ parser.add_argument('crash_script', type=str, nargs=1,
+ help="Name of the script that generates the crash.")
+ parser.add_argument('file_to_reduce', type=str, nargs=1,
+ help="Name of the file to be reduced.")
+ parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
+ help="Path to the LLVM bin directory.")
+ parser.add_argument('--llvm-not', dest='llvm_not', type=str,
+ help="The path to the `not` executable. "
+ "By default uses the llvm-bin directory.")
+ parser.add_argument('--clang', dest='clang', type=str,
+ help="The path to the `clang` executable. "
+ "By default uses the llvm-bin directory.")
+ parser.add_argument('--creduce', dest='creduce', type=str,
+ help="The path to the `creduce` executable. "
+ "Required if `creduce` is not in PATH environment.")
+ parser.add_argument('-v', '--verbose', action='store_true')
+ args = parser.parse_args()
+
+ verbose = args.verbose
+ llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
+ creduce_cmd = check_cmd('creduce', None, args.creduce)
+ clang_cmd = check_cmd('clang', llvm_bin, args.clang)
+ not_cmd = check_cmd('not', llvm_bin, args.llvm_not)
+
+ crash_script = check_file(args.crash_script[0])
+ file_to_reduce = check_file(args.file_to_reduce[0])
+
+ r = Reduce(crash_script, file_to_reduce)
+
+ r.simplify_clang_args()
+ r.write_interestingness_test()
+ r.clang_preprocess()
+ r.run_creduce()
+ r.reduce_clang_args()
+
+if __name__ == '__main__':
+ main()