commit 70403205d298c3b763d19e7ecbec461012c2af40 Author: David Malcolm dmalcolm@redhat.com Date: Wed Jun 29 20:05:21 2011 -0400
Work-in-progress on refcount checker
Do the checking earlier, on the pre-SSA representation, rather than on the SSA, so that we have more user-readable variable names (this means we're dealing with gcc.VarDecl rather than gcc.SsaName)
Use OrderedDict within states to preserve a meaningful ordering of the components.
Introduce a Transition class to support associated descriptive text with a state transition; reword accordingly
Rather than a depth-first traversal of an implicit graph of states, instead we generate an explicit graph (the StateGraph and StateEdge classes).
Add visualizations of these state graphs to help with debugging the checker (libcpychecker.visualizations)
libcpychecker/__init__.py | 5 +- libcpychecker/absinterp.py | 2 +- libcpychecker/refcounts.py | 189 ++++++++++++++++++++++++++++++-------- libcpychecker/visualizations.py | 131 +++++++++++++++++++++++++++ 4 files changed, 285 insertions(+), 42 deletions(-) --- diff --git a/libcpychecker/__init__.py b/libcpychecker/__init__.py index 133da34..5188fad 100644 --- a/libcpychecker/__init__.py +++ b/libcpychecker/__init__.py @@ -32,11 +32,14 @@ def on_pass_execution(optpass, fun, log(fun) check_pyargs(fun)
+ # non-SSA version of the checker: + check_refcounts(fun, show_traces) + # The refcount code is too buggy for now to be on by default: if not verify_refcounting: return
- if optpass.name == 'release_ssa': + if 0: # optpass.name == 'release_ssa': # SSA data needed: assert optpass.properties_required & (1<<5) # methods = get_all_PyMethodDef_methods() diff --git a/libcpychecker/absinterp.py b/libcpychecker/absinterp.py index d661827..4aa7f20 100644 --- a/libcpychecker/absinterp.py +++ b/libcpychecker/absinterp.py @@ -75,7 +75,7 @@ def describe_stmt(stmt): if isinstance(stmt, gcc.GimpleCall): if isinstance(stmt.fn.operand, gcc.FunctionDecl): fnname = stmt.fn.operand.name - return 'call to %s at %s' % (fnname, stmt.loc) + return 'call to %s at line %i' % (fnname, stmt.loc.line) else: return str(stmt.loc)
diff --git a/libcpychecker/refcounts.py b/libcpychecker/refcounts.py index afe9b0e..e01ee2f 100644 --- a/libcpychecker/refcounts.py +++ b/libcpychecker/refcounts.py @@ -23,6 +23,7 @@ import sys import gcc
+from collections import OrderedDict from gccutils import cfg_to_dot, invoke_dot, get_src_for_loc
from absinterp import * @@ -154,11 +155,18 @@ class State: if self.loc.get_stmt(): logger('%s' % self.loc.get_stmt().loc, indent + 1)
- def make_assignment(self, key, value): + def get_key_for_lvalue(self, lvalue): + return str(lvalue) # FIXME + + def make_assignment(self, lvalue, value, desc): + if desc: + assert isinstance(desc, str) new = self.copy() new.loc = self.loc.next_loc() - new.data[str(key)] = value - return new + assert isinstance(lvalue, gcc.VarDecl) # for now + key = self.get_key_for_lvalue(lvalue) + new.data[key] = value + return Transition(new, desc)
def update_loc(self, newloc): new = self.copy() @@ -169,6 +177,14 @@ class State: newloc = self.loc.next_loc() return self.update_loc(newloc)
+class Transition: + def __init__(self, nextstate, desc): + self.nextstate = nextstate + self.desc = desc + + def __repr__(self): + return 'Transition(%r, %r)' % (self.nextstate, self.desc) + class Trace: """A sequence of State""" def __init__(self): @@ -266,47 +282,54 @@ class MyState(State): def release(self, resource): self.resources.release(resource)
- def make_assignment(self, key, value, additional_ptr=None): - newstate = State.make_assignment(self, key, value) + def make_assignment(self, key, value, desc, additional_ptr=None): + if desc: + assert isinstance(desc, str) + transition = State.make_assignment(self, key, value, desc) if additional_ptr: - newstate.owned_refs.append(additional_ptr) - return newstate + transition.nextstate.owned_refs.append(additional_ptr) + return transition
- def next_states(self, oldstate): - # Return a list of State instances, based on input State + def get_transitions(self, oldstate): + # Return a list of Transition instances, based on input State stmt = self.loc.get_stmt() if stmt: - return self._next_states_for_stmt(stmt, oldstate) + return self._get_transitions_for_stmt(stmt, oldstate) else: result = [] for loc in self.loc.next_locs(): newstate = self.copy() newstate.loc = loc - result.append(newstate) + result.append(Transition(newstate, '')) log('result: %s' % result) return result
- def _next_states_for_stmt(self, stmt, oldstate): - log('_next_states_for_stmt: %r %s' % (stmt, stmt), 2) + def _get_transitions_for_stmt(self, stmt, oldstate): + log('_get_transitions_for_stmt: %r %s' % (stmt, stmt), 2) log('dir(stmt): %s' % dir(stmt), 3) if isinstance(stmt, gcc.GimpleCall): - return self._next_states_for_GimpleCall(stmt) + return self._get_transitions_for_GimpleCall(stmt) elif isinstance(stmt, (gcc.GimpleDebug, gcc.GimpleLabel)): return [self.use_next_loc()] elif isinstance(stmt, gcc.GimpleCond): - return self._next_states_for_GimpleCond(stmt) + return self._get_transitions_for_GimpleCond(stmt) elif isinstance(stmt, gcc.GimplePhi): - return self._next_states_for_GimplePhi(stmt, oldstate) + return self._get_transitions_for_GimplePhi(stmt, oldstate) elif isinstance(stmt, gcc.GimpleReturn): return [] elif isinstance(stmt, gcc.GimpleAssign): - return self._next_states_for_GimpleAssign(stmt) + return self._get_transitions_for_GimpleAssign(stmt) else: raise "foo"
def eval_expr(self, expr): if isinstance(expr, gcc.IntegerCst): return expr.constant + if isinstance(expr, gcc.VarDecl): + if expr.name in self.data: + return self.data[expr.name] + else: + return UnknownValue(expr.type, None) if isinstance(expr, gcc.SsaName): if str(expr) in self.data: return self.data[str(expr)] @@ -324,7 +347,7 @@ class MyState(State): return None return UnknownValue(expr.type, None) # FIXME
- def _next_states_for_GimpleCall(self, stmt): + def _get_transitions_for_GimpleCall(self, stmt): log('stmt.lhs: %s %r' % (stmt.lhs, stmt.lhs), 3) log('stmt.fn: %s %r' % (stmt.fn, stmt.fn), 3) log('dir(stmt.fn): %s' % dir(stmt.fn), 4) @@ -338,8 +361,8 @@ class MyState(State): # Function returning new references: if fnname in ('PyList_New', 'PyLong_FromLong'): nonnull = NonNullPtrValue(1, stmt) - return [self.make_assignment(stmt.lhs, nonnull, nonnull), - self.make_assignment(stmt.lhs, NullPtrValue())] + return [self.make_assignment(stmt.lhs, nonnull, '%s() succeeded' % fnname, nonnull), + self.make_assignment(stmt.lhs, NullPtrValue(), '%s() failed' % fnname)] # Function returning borrowed references: elif fnname in ('Py_InitModule4_64',): return [self.make_assignment(stmt.lhs, NonNullPtrValue(1, stmt)), @@ -355,26 +378,27 @@ class MyState(State): # Unknown function: log('Invocation of unknown function: %r' % fnname) return [self.make_assignment(stmt.lhs, - UnknownValue(returntype, stmt))] + UnknownValue(returntype, stmt), + None)]
log('stmt.args: %s %r' % (stmt.args, stmt.args), 3) for i, arg in enumerate(stmt.args): log('args[%i]: %s %r' % (i, arg, arg), 4)
- def _next_states_for_GimpleCond(self, stmt): - def make_nextstate_for_true(stmt): + def _get_transitions_for_GimpleCond(self, stmt): + def make_transition_for_true(stmt): e = true_edge(self.loc.bb) assert e nextstate = self.update_loc(Location.get_block_start(e.dest)) nextstate.prior_bool = True - return nextstate + return Transition(nextstate, 'taking True path')
- def make_nextstate_for_false(stmt): + def make_transition_for_false(stmt): e = false_edge(self.loc.bb) assert e nextstate = self.update_loc(Location.get_block_start(e.dest)) nextstate.prior_bool = False - return nextstate + return Transition(nextstate, 'taking False path')
log('stmt.exprcode: %s' % stmt.exprcode, 4) log('stmt.exprtype: %s' % stmt.exprtype, 4) @@ -385,19 +409,19 @@ class MyState(State): boolval = self.eval_condition(stmt) if boolval is True: log('taking True edge', 2) - nextstate = make_nextstate_for_true(stmt) + nextstate = make_transition_for_true(stmt) return [nextstate] elif boolval is False: log('taking False edge', 2) - nextstate = make_nextstate_for_false(stmt) + nextstate = make_transition_for_false(stmt) return [nextstate] else: assert isinstance(boolval, UnknownValue) # We don't have enough information; both branches are possible: - return [make_nextstate_for_true(stmt), - make_nextstate_for_false(stmt)] + return [make_transition_for_true(stmt), + make_transition_for_false(stmt)]
- def _next_states_for_GimplePhi(self, stmt, oldstate): + def _get_transitions_for_GimplePhi(self, stmt, oldstate): log('stmt: %s' % stmt) log('stmt.lhs: %s' % stmt.lhs) log('stmt.args: %s' % stmt.args) @@ -411,18 +435,21 @@ class MyState(State): raise AnalysisError('incoming edge not found')
def eval_condition(self, stmt): + log('eval_condition: %s' % stmt) lhs = self.eval_expr(stmt.lhs) rhs = self.eval_expr(stmt.rhs) log('eval of lhs: %r' % lhs) log('eval of rhs: %r' % rhs) + log('stmt.exprcode: %r' % stmt.exprcode) if stmt.exprcode == gcc.EqExpr: if isinstance(lhs, NonNullPtrValue) and rhs == 0: return False if isinstance(lhs, NullPtrValue) and rhs == 0: return True + log('got here') return UnknownValue(None, stmt)
- def _next_states_for_GimpleAssign(self, stmt): + def _get_transitions_for_GimpleAssign(self, stmt): log('stmt.lhs: %r %s' % (stmt.lhs, stmt.lhs)) log('stmt.rhs: %r %s' % (stmt.rhs, stmt.rhs))
@@ -435,7 +462,7 @@ class MyState(State): if value in nextstate.owned_refs: log('removing ownership of %s' % value) nextstate.owned_refs.remove(value) - return [nextstate] # for now + return [Transition(nextstate, None)] # for now
def iter_traces(fun, prefix=None): # Traverse the tree of traces of program state @@ -444,7 +471,7 @@ def iter_traces(fun, prefix=None): if prefix is None: prefix = Trace() curstate = MyState(Location.get_block_start(fun.cfg.entry), - {}, + OrderedDict(), [], Resources()) else: @@ -459,8 +486,8 @@ def iter_traces(fun, prefix=None): prefix.log(log, 'PREFIX', 1) log(' %s:%s' % (fun.decl.name, curstate.loc)) try: - nextstates = curstate.next_states(prevstate) - assert isinstance(nextstates, list) + transitions = curstate.get_transitions(prevstate) + assert isinstance(transitions, list) except PredictedError, err: # We're at a terminating state: err.loc = prefix.get_last_stmt().loc @@ -469,13 +496,13 @@ def iter_traces(fun, prefix=None): trace_with_err.log(log, 'FINISHED TRACE WITH ERROR: %s' % err, 1) return [trace_with_err]
- log('states: %s' % nextstates, 2) + log('transitions: %s' % transitions, 2)
- if len(nextstates) > 0: + if len(transitions) > 0: result = [] - for nextstate in nextstates: + for transition in transitions: # Recurse: - for trace in iter_traces(fun, prefix.copy().add(nextstate)): + for trace in iter_traces(fun, prefix.copy().add(transition.nextstate)): result.append(trace) return result else: @@ -483,6 +510,68 @@ def iter_traces(fun, prefix=None): prefix.log(log, 'FINISHED TRACE', 1) return [prefix]
+class StateEdge: + def __init__(self, src, dest, transition): + assert isinstance(src, State) + assert isinstance(dest, State) + assert isinstance(transition, Transition) + self.src = src + self.dest = dest + self.transition = transition + +class StateGraph: + """ + A graph of states, representing the various routes through a function, + tracking state. + + For now, we give up when we encounter a loop, as an easy way to ensure + termination of the analysis + """ + def __init__(self, fun, logger): + assert isinstance(fun, gcc.Function) + self.fun = fun + self.states = [] + self.edges = [] + + logger('StateGraph.__init__(%r)' % fun) + + # Recursively gather states: + initial = MyState(Location.get_block_start(fun.cfg.entry), + OrderedDict(), + [], + Resources()) + self.states.append(initial) + self._gather_states(initial, None, logger) + + def _gather_states(self, curstate, prevstate, logger): + logger(' %s:%s' % (self.fun.decl.name, curstate.loc)) + try: + transitions = curstate.get_transitions(prevstate) + assert isinstance(transitions, list) + except PredictedError, err: + # We're at a terminating state: + raise "foo" # FIXME + err.loc = prefix.get_last_stmt().loc + trace_with_err = prefix.copy() + trace_with_err.add_error(err) + trace_with_err.log(log, 'FINISHED TRACE WITH ERROR: %s' % err, 1) + return [trace_with_err] + + logger('transitions: %s' % transitions, 2) + + if len(transitions) > 0: + for transition in transitions: + # FIXME: what about loops??? + assert isinstance(transition, Transition) + self.states.append(transition.nextstate) + self.edges.append(StateEdge(curstate, transition.nextstate, transition)) + + # Recurse: + self._gather_states(transition.nextstate, curstate, logger) + else: + # We're at a terminating state: + logger('FINISHED TRACE') + def extra_text(msg, indent): sys.stderr.write('%s%s\n' % (' ' * indent, msg))
@@ -512,6 +601,15 @@ def check_refcounts(fun, show_traces): # Abstract interpretation: # Walk the CFG, gathering the information we're interested in
+ if 1: + from libcpychecker.visualizations import StateGraphPrettyPrinter + sg = StateGraph(fun, log) + sgpp = StateGraphPrettyPrinter(sg) + dot = sgpp.to_dot() + #dot = sgpp.extra_items() + print(dot) + invoke_dot(dot) + traces = iter_traces(fun) if show_traces: traces = list(traces) @@ -520,6 +618,17 @@ def check_refcounts(fun, show_traces): sys.stdout.write('%s%s\n' % (' ' * indent, item)) trace.log(my_logger, 'TRACE %i' % i, 0)
+ # Show trace #0: + if i == 0: + from libcpychecker.visualizations import TracePrettyPrinter + tpp = TracePrettyPrinter(fun.cfg, trace) + dot = tpp.to_dot() + print dot + f = open('test.dot', 'w') + f.write(dot) + f.close() + invoke_dot(dot) + for i, trace in enumerate(traces): trace.log(log, 'TRACE %i' % i, 0) if trace.err: diff --git a/libcpychecker/visualizations.py b/libcpychecker/visualizations.py new file mode 100644 index 0000000..1659fae --- /dev/null +++ b/libcpychecker/visualizations.py @@ -0,0 +1,131 @@ +# Copyright 2011 David Malcolm dmalcolm@redhat.com +# Copyright 2011 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# http://www.gnu.org/licenses/. + +from gccutils import CfgPrettyPrinter, get_src_for_loc + +class StatePrettyPrinter(CfgPrettyPrinter): + """ + Various ways of annotating a CFG with state information + """ + def state_to_dot_label(self, state): + result = '<table cellborder="0" border="0" cellspacing="0">\n' + for key in state.data: + value = state.data[key] + result += ('<tr> %s %s </tr>\n' + % (self._dot_td(key), + self._dot_td(value))) + result += '</table>\n' + return result + +class TracePrettyPrinter(StatePrettyPrinter): + """ + Annotate a CFG, showing a specific trace of execution through it + """ + def __init__(self, cfg, trace): + self.cfg = cfg + self.trace = trace + + def extra_items(self): + # Hook for expansion + result = '' + result += ' subgraph cluster_trace {\n' + result += ' label="Trace";\n' + for i, state in enumerate(self.trace.states): + + result += (' state%i [label=<%s>];\n' + % (i, self.state_to_dot_label(state))) + + if i > 0: + result += ' state%i -> state%i;\n' % (i-1, i) + result += ' state%i -> %s:stmt%i;\n' % (i, + self.block_id(state.loc.bb), + state.loc.idx) + result += ' }\n'; + return result + +class StateGraphPrettyPrinter(StatePrettyPrinter): + """ + Annotate a CFG, showing all possible states as execution proceeds through + it + """ + def __init__(self, sg): + self.sg = sg + self.name = sg.fun.decl.name + self.cfg = sg.fun.cfg + + def state_id(self, state): + return 'state%i' % id(state) + + def state_to_dot_label(self, state): + result = '<table cellborder="0" border="0" cellspacing="0">\n' + + # Show data: + for key in state.data: + value = state.data[key] + result += ('<tr> %s %s </tr>\n' + % (self._dot_td(key + ' : '), + self._dot_td(value))) + # Show location: + stmt = state.loc.get_stmt() + if stmt: + if stmt.loc: + result += ('<tr><td>' + + self.to_html('%4i ' % stmt.loc.line) + + self.code_to_html(get_src_for_loc(stmt.loc)) + + '<br/>' + + (' ' * (5 + stmt.loc.column-1)) + '^' + + '</td></tr>\n') + result += '<tr><td></td>' + self.stmt_to_html(stmt, state.loc.idx) + '</tr>\n' + + result += '</table>\n' + return result + + def extra_items(self): + # Hook for expansion + result = '' + result += ' subgraph cluster_state_transitions {\n' + result += ' label="State Transitions";\n' + result += ' node [shape=box];\n' + for state in self.sg.states: + + result += (' %s [label=<%s>];\n' + % (self.state_id(state), + self.state_to_dot_label(state))) + + #result += (' %s -> %s:stmt%i;\n' + # % (self.state_id(state), + # self.block_id(state.loc.bb), + # state.loc.idx)) + + for edge in self.sg.edges: + if edge.transition.desc: + attrliststr = '[label = "%s"]' % self.to_html(edge.transition.desc) + else: + attrliststr = '' + result += (' %s -> %s %s;\n' + % (self.state_id(edge.src), + self.state_id(edge.dest), + attrliststr)) + + result += ' }\n'; + return result + + #def to_dot(self): + # result = 'digraph {\n' + # result += self.extra_items() + # result += '}\n'; + # return result