Skip to content

Commit 16ef496

Browse files
authored
[analyzer] Improve diagnostics from ArrayBoundCheckerV2 (#70056)
Previously alpha.security.ArrayBoundV2 produced very spartan bug reports; this commit ensures that the relevant (and known) details are reported to the user. The logic for detecting bugs is not changed, after this commit the checker will report the same set of issues, but with better messages. To test the details of the message generation this commit adds a new test file 'out-of-bounds-diagnostics.c'. Three of the testcases are added with FIXME notes because they reveal shortcomings of the existing modeling and bounds checking code. I will try to fix them in separate follow-up commits.
1 parent d0ef94b commit 16ef496

File tree

7 files changed

+349
-100
lines changed

7 files changed

+349
-100
lines changed

clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp

Lines changed: 154 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -22,23 +22,25 @@
2222
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
2323
#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
2424
#include "llvm/ADT/SmallString.h"
25+
#include "llvm/Support/FormatVariadic.h"
2526
#include "llvm/Support/raw_ostream.h"
2627
#include <optional>
2728

2829
using namespace clang;
2930
using namespace ento;
3031
using namespace taint;
32+
using llvm::formatv;
3133

3234
namespace {
35+
enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
36+
3337
class ArrayBoundCheckerV2 :
3438
public Checker<check::Location> {
3539
BugType BT{this, "Out-of-bound access"};
3640
BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
3741

38-
enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
39-
4042
void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
41-
SVal TaintedSVal = UnknownVal()) const;
43+
NonLoc Offset, std::string RegName, std::string Msg) const;
4244

4345
static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
4446

@@ -53,7 +55,7 @@ class ArrayBoundCheckerV2 :
5355
/// Arr and the distance of Location from the beginning of Arr (expressed in a
5456
/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
5557
/// cannot be determined.
56-
std::optional<std::pair<const SubRegion *, NonLoc>>
58+
static std::optional<std::pair<const SubRegion *, NonLoc>>
5759
computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
5860
QualType T = SVB.getArrayIndexType();
5961
auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) {
@@ -174,9 +176,116 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
174176
return {nullptr, nullptr};
175177
}
176178

177-
void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
178-
const Stmt* LoadS,
179-
CheckerContext &checkerContext) const {
179+
static std::string getRegionName(const SubRegion *Region) {
180+
if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())
181+
return RegName;
182+
183+
// Field regions only have descriptive names when their parent has a
184+
// descriptive name; so we provide a fallback representation for them:
185+
if (const auto *FR = Region->getAs<FieldRegion>()) {
186+
if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
187+
return formatv("the field '{0}'", Name);
188+
return "the unnamed field";
189+
}
190+
191+
if (isa<AllocaRegion>(Region))
192+
return "the memory returned by 'alloca'";
193+
194+
if (isa<SymbolicRegion>(Region) &&
195+
isa<HeapSpaceRegion>(Region->getMemorySpace()))
196+
return "the heap area";
197+
198+
if (isa<StringRegion>(Region))
199+
return "the string literal";
200+
201+
return "the region";
202+
}
203+
204+
static std::optional<int64_t> getConcreteValue(NonLoc SV) {
205+
if (auto ConcreteVal = SV.getAs<nonloc::ConcreteInt>()) {
206+
return ConcreteVal->getValue().tryExtValue();
207+
}
208+
return std::nullopt;
209+
}
210+
211+
static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
212+
static const char *ShortMsgTemplates[] = {
213+
"Out of bound access to memory preceding {0}",
214+
"Out of bound access to memory after the end of {0}",
215+
"Potential out of bound access to {0} with tainted offset"};
216+
217+
return formatv(ShortMsgTemplates[Kind], RegName);
218+
}
219+
220+
static std::string getPrecedesMsg(std::string RegName, NonLoc Offset) {
221+
SmallString<128> Buf;
222+
llvm::raw_svector_ostream Out(Buf);
223+
Out << "Access of " << RegName << " at negative byte offset";
224+
if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>())
225+
Out << ' ' << ConcreteIdx->getValue();
226+
return std::string(Buf);
227+
}
228+
static std::string getExceedsMsg(ASTContext &ACtx, std::string RegName,
229+
NonLoc Offset, NonLoc Extent, SVal Location) {
230+
const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
231+
assert(EReg && "this checker only handles element access");
232+
QualType ElemType = EReg->getElementType();
233+
234+
std::optional<int64_t> OffsetN = getConcreteValue(Offset);
235+
std::optional<int64_t> ExtentN = getConcreteValue(Extent);
236+
237+
bool UseByteOffsets = true;
238+
if (int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity()) {
239+
const bool OffsetHasRemainder = OffsetN && *OffsetN % ElemSize;
240+
const bool ExtentHasRemainder = ExtentN && *ExtentN % ElemSize;
241+
if (!OffsetHasRemainder && !ExtentHasRemainder) {
242+
UseByteOffsets = false;
243+
if (OffsetN)
244+
*OffsetN /= ElemSize;
245+
if (ExtentN)
246+
*ExtentN /= ElemSize;
247+
}
248+
}
249+
250+
SmallString<256> Buf;
251+
llvm::raw_svector_ostream Out(Buf);
252+
Out << "Access of ";
253+
if (!ExtentN && !UseByteOffsets)
254+
Out << "'" << ElemType.getAsString() << "' element in ";
255+
Out << RegName << " at ";
256+
if (OffsetN) {
257+
Out << (UseByteOffsets ? "byte offset " : "index ") << *OffsetN;
258+
} else {
259+
Out << "an overflowing " << (UseByteOffsets ? "byte offset" : "index");
260+
}
261+
if (ExtentN) {
262+
Out << ", while it holds only ";
263+
if (*ExtentN != 1)
264+
Out << *ExtentN;
265+
else
266+
Out << "a single";
267+
if (UseByteOffsets)
268+
Out << " byte";
269+
else
270+
Out << " '" << ElemType.getAsString() << "' element";
271+
272+
if (*ExtentN > 1)
273+
Out << "s";
274+
}
275+
276+
return std::string(Buf);
277+
}
278+
static std::string getTaintMsg(std::string RegName) {
279+
SmallString<128> Buf;
280+
llvm::raw_svector_ostream Out(Buf);
281+
Out << "Access of " << RegName
282+
<< " with a tainted offset that may be too large";
283+
return std::string(Buf);
284+
}
285+
286+
void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad,
287+
const Stmt *LoadS,
288+
CheckerContext &C) const {
180289

181290
// NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
182291
// some new logic here that reasons directly about memory region extents.
@@ -193,14 +302,14 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
193302
// and incomplete analysis of these leads to false positives. As even
194303
// accurate reports would be confusing for the users, just disable reports
195304
// from these macros:
196-
if (isFromCtypeMacro(LoadS, checkerContext.getASTContext()))
305+
if (isFromCtypeMacro(LoadS, C.getASTContext()))
197306
return;
198307

199-
ProgramStateRef state = checkerContext.getState();
200-
SValBuilder &svalBuilder = checkerContext.getSValBuilder();
308+
ProgramStateRef State = C.getState();
309+
SValBuilder &SVB = C.getSValBuilder();
201310

202311
const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
203-
computeOffset(state, svalBuilder, location);
312+
computeOffset(State, SVB, Location);
204313

205314
if (!RawOffset)
206315
return;
@@ -217,80 +326,71 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
217326
// MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and
218327
// non-symbolic regions (e.g. a field subregion of a symbolic region) in
219328
// unknown space.
220-
auto [state_precedesLowerBound, state_withinLowerBound] =
221-
compareValueToThreshold(state, ByteOffset,
222-
svalBuilder.makeZeroArrayIndex(), svalBuilder);
329+
auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold(
330+
State, ByteOffset, SVB.makeZeroArrayIndex(), SVB);
223331

224-
if (state_precedesLowerBound && !state_withinLowerBound) {
332+
if (PrecedesLowerBound && !WithinLowerBound) {
225333
// We know that the index definitely precedes the lower bound.
226-
reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
334+
std::string RegName = getRegionName(Reg);
335+
std::string Msg = getPrecedesMsg(RegName, ByteOffset);
336+
reportOOB(C, PrecedesLowerBound, OOB_Precedes, ByteOffset, RegName, Msg);
227337
return;
228338
}
229339

230-
if (state_withinLowerBound)
231-
state = state_withinLowerBound;
340+
if (WithinLowerBound)
341+
State = WithinLowerBound;
232342
}
233343

234344
// CHECK UPPER BOUND
235-
DefinedOrUnknownSVal Size = getDynamicExtent(state, Reg, svalBuilder);
345+
DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB);
236346
if (auto KnownSize = Size.getAs<NonLoc>()) {
237-
auto [state_withinUpperBound, state_exceedsUpperBound] =
238-
compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder);
347+
auto [WithinUpperBound, ExceedsUpperBound] =
348+
compareValueToThreshold(State, ByteOffset, *KnownSize, SVB);
239349

240-
if (state_exceedsUpperBound) {
241-
if (!state_withinUpperBound) {
350+
if (ExceedsUpperBound) {
351+
if (!WithinUpperBound) {
242352
// We know that the index definitely exceeds the upper bound.
243-
reportOOB(checkerContext, state_exceedsUpperBound, OOB_Exceeds);
353+
std::string RegName = getRegionName(Reg);
354+
std::string Msg = getExceedsMsg(C.getASTContext(), RegName, ByteOffset,
355+
*KnownSize, Location);
356+
reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, RegName, Msg);
244357
return;
245358
}
246-
if (isTainted(state, ByteOffset)) {
359+
if (isTainted(State, ByteOffset)) {
247360
// Both cases are possible, but the index is tainted, so report.
248-
reportOOB(checkerContext, state_exceedsUpperBound, OOB_Taint,
249-
ByteOffset);
361+
std::string RegName = getRegionName(Reg);
362+
std::string Msg = getTaintMsg(RegName);
363+
reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, RegName, Msg);
250364
return;
251365
}
252366
}
253367

254-
if (state_withinUpperBound)
255-
state = state_withinUpperBound;
368+
if (WithinUpperBound)
369+
State = WithinUpperBound;
256370
}
257371

258-
checkerContext.addTransition(state);
372+
C.addTransition(State);
259373
}
260374

261375
void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
262376
ProgramStateRef ErrorState, OOB_Kind Kind,
263-
SVal TaintedSVal) const {
377+
NonLoc Offset, std::string RegName,
378+
std::string Msg) const {
264379

265380
ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
266381
if (!ErrorNode)
267382
return;
268383

269-
// FIXME: These diagnostics are preliminary, and they'll be replaced soon by
270-
// a follow-up commit.
384+
std::string ShortMsg = getShortMsg(Kind, RegName);
271385

272-
SmallString<128> Buf;
273-
llvm::raw_svector_ostream Out(Buf);
274-
Out << "Out of bound memory access ";
275-
276-
switch (Kind) {
277-
case OOB_Precedes:
278-
Out << "(accessed memory precedes memory block)";
279-
break;
280-
case OOB_Exceeds:
281-
Out << "(access exceeds upper limit of memory block)";
282-
break;
283-
case OOB_Taint:
284-
Out << "(index is tainted)";
285-
break;
286-
}
287386
auto BR = std::make_unique<PathSensitiveBugReport>(
288-
Kind == OOB_Taint ? TaintBT : BT, Out.str(), ErrorNode);
289-
// Track back the propagation of taintedness, or do nothing if TaintedSVal is
290-
// the default UnknownVal().
291-
for (SymbolRef Sym : getTaintedSymbols(ErrorState, TaintedSVal)) {
292-
BR->markInteresting(Sym);
293-
}
387+
Kind == OOB_Taint ? TaintBT : BT, ShortMsg, Msg, ErrorNode);
388+
389+
// Track back the propagation of taintedness.
390+
if (Kind == OOB_Taint)
391+
for (SymbolRef Sym : getTaintedSymbols(ErrorState, Offset))
392+
BR->markInteresting(Sym);
393+
294394
C.emitReport(std::move(BR));
295395
}
296396

0 commit comments

Comments
 (0)