clang 21.0.0git
DataflowAnalysisContext.cpp
Go to the documentation of this file.
1//===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DataflowAnalysisContext class that owns objects that
10// encompass the state of a program and stores context that is used during
11// dataflow analysis.
12//
13//===----------------------------------------------------------------------===//
14
16#include "clang/AST/ExprCXX.h"
23#include "llvm/ADT/SetOperations.h"
24#include "llvm/ADT/SetVector.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/FileSystem.h"
28#include "llvm/Support/Path.h"
29#include "llvm/Support/raw_ostream.h"
30#include <cassert>
31#include <memory>
32#include <string>
33#include <utility>
34#include <vector>
35
36static llvm::cl::opt<std::string> DataflowLog(
37 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
38 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
39 "log to stderr. With an arg, writes HTML logs under the "
40 "specified directory (one per analyzed function)."));
41
42namespace clang {
43namespace dataflow {
44
46 // During context-sensitive analysis, a struct may be allocated in one
47 // function, but its field accessed in a function lower in the stack than
48 // the allocation. Since we only collect fields used in the function where
49 // the allocation occurs, we can't apply that filter when performing
50 // context-sensitive analysis. But, this only applies to storage locations,
51 // since field access it not allowed to fail. In contrast, field *values*
52 // don't need this allowance, since the API allows for uninitialized fields.
53 if (Opts.ContextSensitiveOpts)
54 return getObjectFields(Type);
55
56 return llvm::set_intersection(getObjectFields(Type), ModeledFields);
57}
58
59void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) {
60 ModeledFields.set_union(Fields);
61}
62
64 if (!Type.isNull() && Type->isRecordType()) {
65 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
66 for (const FieldDecl *Field : getModeledFields(Type))
67 if (Field->getType()->isReferenceType())
68 FieldLocs.insert({Field, nullptr});
69 else
70 FieldLocs.insert({Field, &createStorageLocation(
71 Field->getType().getNonReferenceType())});
72
74 for (const auto &Entry : getSyntheticFields(Type))
75 SyntheticFields.insert(
76 {Entry.getKey(),
77 &createStorageLocation(Entry.getValue().getNonReferenceType())});
78
79 return createRecordStorageLocation(Type, std::move(FieldLocs),
80 std::move(SyntheticFields));
81 }
82 return arena().create<ScalarStorageLocation>(Type);
83}
84
85// Returns the keys for a given `StringMap`.
86// Can't use `StringSet` as the return type as it doesn't support `operator==`.
87template <typename T>
88static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) {
89 return llvm::DenseSet<llvm::StringRef>(Map.keys().begin(), Map.keys().end());
90}
91
92RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation(
95 assert(Type->isRecordType());
96 assert(containsSameFields(getModeledFields(Type), FieldLocs));
97 assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields));
98
99 RecordStorageLocationCreated = true;
100 return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs),
101 std::move(SyntheticFields));
102}
103
105DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) {
106 if (auto *Loc = DeclToLoc.lookup(&D))
107 return *Loc;
108 auto &Loc = createStorageLocation(D.getType().getNonReferenceType());
109 DeclToLoc[&D] = &Loc;
110 return Loc;
111}
112
114DataflowAnalysisContext::getStableStorageLocation(const Expr &E) {
115 const Expr &CanonE = ignoreCFGOmittedNodes(E);
116
117 if (auto *Loc = ExprToLoc.lookup(&CanonE))
118 return *Loc;
119 auto &Loc = createStorageLocation(CanonE.getType());
120 ExprToLoc[&CanonE] = &Loc;
121 return Loc;
122}
123
125DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
126 auto CanonicalPointeeType =
127 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType();
128 auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr);
129 if (Res.second) {
130 auto &PointeeLoc = createStorageLocation(CanonicalPointeeType);
131 Res.first->second = &arena().create<PointerValue>(PointeeLoc);
132 }
133 return *Res.first->second;
134}
135
136void DataflowAnalysisContext::addInvariant(const Formula &Constraint) {
137 if (Invariant == nullptr)
138 Invariant = &Constraint;
139 else
140 Invariant = &arena().makeAnd(*Invariant, Constraint);
141}
142
143void DataflowAnalysisContext::addFlowConditionConstraint(
144 Atom Token, const Formula &Constraint) {
145 auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint);
146 if (!Res.second) {
147 Res.first->second =
148 &arena().makeAnd(*Res.first->second, Constraint);
149 }
150}
151
152Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) {
153 Atom ForkToken = arena().makeFlowConditionToken();
154 FlowConditionDeps[ForkToken].insert(Token);
155 addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token));
156 return ForkToken;
157}
158
159Atom
160DataflowAnalysisContext::joinFlowConditions(Atom FirstToken,
161 Atom SecondToken) {
162 Atom Token = arena().makeFlowConditionToken();
163 auto &TokenDeps = FlowConditionDeps[Token];
164 TokenDeps.insert(FirstToken);
165 TokenDeps.insert(SecondToken);
166 addFlowConditionConstraint(Token,
167 arena().makeOr(arena().makeAtomRef(FirstToken),
168 arena().makeAtomRef(SecondToken)));
169 return Token;
170}
171
172Solver::Result DataflowAnalysisContext::querySolver(
173 llvm::SetVector<const Formula *> Constraints) {
174 return S.solve(Constraints.getArrayRef());
175}
176
177bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
178 const Formula &F) {
179 if (F.isLiteral(true))
180 return true;
181
182 // Returns true if and only if truth assignment of the flow condition implies
183 // that `F` is also true. We prove whether or not this property holds by
184 // reducing the problem to satisfiability checking. In other words, we attempt
185 // to show that assuming `F` is false makes the constraints induced by the
186 // flow condition unsatisfiable.
187 llvm::SetVector<const Formula *> Constraints;
188 Constraints.insert(&arena().makeAtomRef(Token));
189 Constraints.insert(&arena().makeNot(F));
190 addTransitiveFlowConditionConstraints(Token, Constraints);
191 return isUnsatisfiable(std::move(Constraints));
192}
193
194bool DataflowAnalysisContext::flowConditionAllows(Atom Token,
195 const Formula &F) {
196 if (F.isLiteral(false))
197 return false;
198
199 llvm::SetVector<const Formula *> Constraints;
200 Constraints.insert(&arena().makeAtomRef(Token));
201 Constraints.insert(&F);
202 addTransitiveFlowConditionConstraints(Token, Constraints);
203 return isSatisfiable(std::move(Constraints));
204}
205
206bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1,
207 const Formula &Val2) {
208 llvm::SetVector<const Formula *> Constraints;
209 Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2)));
210 return isUnsatisfiable(std::move(Constraints));
211}
212
213void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
214 Atom Token, llvm::SetVector<const Formula *> &Constraints) {
215 llvm::DenseSet<Atom> AddedTokens;
216 std::vector<Atom> Remaining = {Token};
217
218 if (Invariant)
219 Constraints.insert(Invariant);
220 // Define all the flow conditions that might be referenced in constraints.
221 while (!Remaining.empty()) {
222 auto Token = Remaining.back();
223 Remaining.pop_back();
224 if (!AddedTokens.insert(Token).second)
225 continue;
226
227 auto ConstraintsIt = FlowConditionConstraints.find(Token);
228 if (ConstraintsIt == FlowConditionConstraints.end()) {
229 Constraints.insert(&arena().makeAtomRef(Token));
230 } else {
231 // Bind flow condition token via `iff` to its set of constraints:
232 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
233 Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token),
234 *ConstraintsIt->second));
235 }
236
237 if (auto DepsIt = FlowConditionDeps.find(Token);
238 DepsIt != FlowConditionDeps.end())
239 for (Atom A : DepsIt->second)
240 Remaining.push_back(A);
241 }
242}
243
244static void printAtomList(const llvm::SmallVector<Atom> &Atoms,
245 llvm::raw_ostream &OS) {
246 OS << "(";
247 for (size_t i = 0; i < Atoms.size(); ++i) {
248 OS << Atoms[i];
249 if (i + 1 < Atoms.size())
250 OS << ", ";
251 }
252 OS << ")\n";
253}
254
255void DataflowAnalysisContext::dumpFlowCondition(Atom Token,
256 llvm::raw_ostream &OS) {
257 llvm::SetVector<const Formula *> Constraints;
258 Constraints.insert(&arena().makeAtomRef(Token));
259 addTransitiveFlowConditionConstraints(Token, Constraints);
260
261 OS << "Flow condition token: " << Token << "\n";
263 llvm::SetVector<const Formula *> OriginalConstraints = Constraints;
264 simplifyConstraints(Constraints, arena(), &Info);
265 if (!Constraints.empty()) {
266 OS << "Constraints:\n";
267 for (const auto *Constraint : Constraints) {
268 Constraint->print(OS);
269 OS << "\n";
270 }
271 }
272 if (!Info.TrueAtoms.empty()) {
273 OS << "True atoms: ";
274 printAtomList(Info.TrueAtoms, OS);
275 }
276 if (!Info.FalseAtoms.empty()) {
277 OS << "False atoms: ";
278 printAtomList(Info.FalseAtoms, OS);
279 }
280 if (!Info.EquivalentAtoms.empty()) {
281 OS << "Equivalent atoms:\n";
283 printAtomList(Class, OS);
284 }
285
286 OS << "\nFlow condition constraints before simplification:\n";
287 for (const auto *Constraint : OriginalConstraints) {
288 Constraint->print(OS);
289 OS << "\n";
290 }
291}
292
293const AdornedCFG *
294DataflowAnalysisContext::getAdornedCFG(const FunctionDecl *F) {
295 // Canonicalize the key:
296 F = F->getDefinition();
297 if (F == nullptr)
298 return nullptr;
299 auto It = FunctionContexts.find(F);
300 if (It != FunctionContexts.end())
301 return &It->second;
302
304 auto ACFG = AdornedCFG::build(*F);
305 // FIXME: Handle errors.
306 assert(ACFG);
307 auto Result = FunctionContexts.insert({F, std::move(*ACFG)});
308 return &Result.first->second;
309 }
310
311 return nullptr;
312}
313
314static std::unique_ptr<Logger> makeLoggerFromCommandLine() {
315 if (DataflowLog.empty())
316 return Logger::textual(llvm::errs());
317
318 llvm::StringRef Dir = DataflowLog;
319 if (auto EC = llvm::sys::fs::create_directories(Dir))
320 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
321 // All analysis runs within a process will log to the same directory.
322 // Share a counter so they don't all overwrite each other's 0.html.
323 // (Don't share a logger, it's not threadsafe).
324 static std::atomic<unsigned> Counter = {0};
325 auto StreamFactory =
326 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
328 llvm::sys::path::append(File,
329 std::to_string(Counter.fetch_add(1)) + ".html");
330 std::error_code EC;
331 auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC);
332 if (EC) {
333 llvm::errs() << "Failed to create log " << File << ": " << EC.message()
334 << "\n";
335 return std::make_unique<llvm::raw_null_ostream>();
336 }
337 return OS;
338 };
339 return Logger::html(std::move(StreamFactory));
340}
341
342DataflowAnalysisContext::DataflowAnalysisContext(
343 Solver &S, std::unique_ptr<Solver> &&OwnedSolver, Options Opts)
344 : S(S), OwnedSolver(std::move(OwnedSolver)), A(std::make_unique<Arena>()),
345 Opts(Opts) {
346 // If the -dataflow-log command-line flag was set, synthesize a logger.
347 // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
348 // based tools.
349 if (Opts.Log == nullptr) {
350 if (DataflowLog.getNumOccurrences() > 0) {
351 LogOwner = makeLoggerFromCommandLine();
352 this->Opts.Log = LogOwner.get();
353 // FIXME: if the flag is given a value, write an HTML log to a file.
354 } else {
355 this->Opts.Log = &Logger::null();
356 }
357 }
358}
359
360DataflowAnalysisContext::~DataflowAnalysisContext() = default;
361
362} // namespace dataflow
363} // namespace clang
const Decl * D
Expr * E
static llvm::cl::opt< std::string > DataflowLog("dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " "log to stderr. With an arg, writes HTML logs under the " "specified directory (one per analyzed function)."))
Defines the clang::Expr interface and subclasses for C++ expressions.
SourceLocation Loc
Definition: SemaObjC.cpp:759
This represents one expression.
Definition: Expr.h:110
QualType getType() const
Definition: Expr.h:142
Represents a member of a struct/union/class.
Definition: Decl.h:3033
Represents a function declaration or definition.
Definition: Decl.h:1935
bool doesThisDeclarationHaveABody() const
Returns whether this specific declaration of the function has a body.
Definition: Decl.h:2261
FunctionDecl * getDefinition()
Get the definition for this declaration.
Definition: Decl.h:2217
A (possibly-)qualified type.
Definition: Type.h:929
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:996
QualType getCanonicalType() const
Definition: Type.h:7989
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
The base class of the type hierarchy.
Definition: Type.h:1828
bool isRecordType() const
Definition: Type.h:8292
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:671
Holds CFG with additional information derived from it that is needed to perform dataflow analysis.
Definition: AdornedCFG.h:47
llvm::StringMap< QualType > getSyntheticFields(QualType Type)
Returns the names and types of the synthetic fields for the given record type.
StorageLocation & createStorageLocation(QualType Type)
Returns a new storage location appropriate for Type.
FieldSet getModeledFields(QualType Type)
Returns the fields of Type, limited to the set of fields modeled by this context.
RecordStorageLocation & createRecordStorageLocation(QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, RecordStorageLocation::SyntheticFieldMap SyntheticFields)
Creates a RecordStorageLocation for the given type and with the given fields.
bool isLiteral(bool b) const
Definition: Formula.h:78
static Logger & null()
Returns a dummy logger that does nothing.
Definition: Logger.cpp:16
Models a symbolic pointer. Specifically, any value of type T*.
Definition: Value.h:170
A storage location for a record (struct, class, or union).
llvm::DenseMap< const ValueDecl *, StorageLocation * > FieldToLoc
llvm::StringMap< StorageLocation * > SyntheticFieldMap
A storage location that is not subdivided further for the purposes of abstract interpretation.
Base class for elements of the local variable store and of the heap.
Atom
Identifies an atomic boolean variable such as "V1".
Definition: Formula.h:34
static void printAtomList(const llvm::SmallVector< Atom > &Atoms, llvm::raw_ostream &OS)
void simplifyConstraints(llvm::SetVector< const Formula * > &Constraints, Arena &arena, SimplifyConstraintsInfo *Info=nullptr)
Simplifies a set of constraints (implicitly connected by "and") in a way that does not change satisfi...
const Expr & ignoreCFGOmittedNodes(const Expr &E)
Skip past nodes that the CFG does not emit.
Definition: ASTOps.cpp:35
FieldSet getObjectFields(QualType Type)
Returns the set of all fields in the type.
Definition: ASTOps.cpp:75
static std::unique_ptr< Logger > makeLoggerFromCommandLine()
static llvm::DenseSet< llvm::StringRef > getKeys(const llvm::StringMap< T > &Map)
bool containsSameFields(const FieldSet &Fields, const RecordStorageLocation::FieldToLoc &FieldLocs)
Returns whether Fields and FieldLocs contain the same fields.
Definition: ASTOps.cpp:81
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
@ Invariant
The parameter is invariant: must match exactly.
@ Class
The "class" keyword introduces the elaborated-type-specifier.
std::optional< ContextSensitiveOptions > ContextSensitiveOpts
Options for analyzing function bodies when present in the translation unit, or empty to disable conte...
Information on the way a set of constraints was simplified.
llvm::SmallVector< Atom > TrueAtoms
Atoms that the original constraints imply must be true.
llvm::SmallVector< llvm::SmallVector< Atom > > EquivalentAtoms
List of equivalence classes of atoms.
llvm::SmallVector< Atom > FalseAtoms
Atoms that the original constraints imply must be false.