22
22
#include " clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
23
23
#include " clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
24
24
#include " llvm/ADT/SmallString.h"
25
+ #include " llvm/Support/FormatVariadic.h"
25
26
#include " llvm/Support/raw_ostream.h"
26
27
#include < optional>
27
28
28
29
using namespace clang ;
29
30
using namespace ento ;
30
31
using namespace taint ;
32
+ using llvm::formatv;
31
33
32
34
namespace {
35
+ enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
36
+
33
37
class ArrayBoundCheckerV2 :
34
38
public Checker<check::Location> {
35
39
BugType BT{this , " Out-of-bound access" };
36
40
BugType TaintBT{this , " Out-of-bound access" , categories::TaintedData};
37
41
38
- enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
39
-
40
42
void reportOOB (CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
41
- SVal TaintedSVal = UnknownVal() ) const ;
43
+ NonLoc Offset, std::string RegName, std::string Msg ) const ;
42
44
43
45
static bool isFromCtypeMacro (const Stmt *S, ASTContext &AC);
44
46
@@ -53,7 +55,7 @@ class ArrayBoundCheckerV2 :
53
55
// / Arr and the distance of Location from the beginning of Arr (expressed in a
54
56
// / NonLoc that specifies the number of CharUnits). Returns nullopt when these
55
57
// / cannot be determined.
56
- std::optional<std::pair<const SubRegion *, NonLoc>>
58
+ static std::optional<std::pair<const SubRegion *, NonLoc>>
57
59
computeOffset (ProgramStateRef State, SValBuilder &SVB, SVal Location) {
58
60
QualType T = SVB.getArrayIndexType ();
59
61
auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) {
@@ -174,9 +176,116 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
174
176
return {nullptr , nullptr };
175
177
}
176
178
177
- void ArrayBoundCheckerV2::checkLocation (SVal location, bool isLoad,
178
- const Stmt* LoadS,
179
- CheckerContext &checkerContext) const {
179
+ static std::string getRegionName (const SubRegion *Region) {
180
+ if (std::string RegName = Region->getDescriptiveName (); !RegName.empty ())
181
+ return RegName;
182
+
183
+ // Field regions only have descriptive names when their parent has a
184
+ // descriptive name; so we provide a fallback representation for them:
185
+ if (const auto *FR = Region->getAs <FieldRegion>()) {
186
+ if (StringRef Name = FR->getDecl ()->getName (); !Name.empty ())
187
+ return formatv (" the field '{0}'" , Name);
188
+ return " the unnamed field" ;
189
+ }
190
+
191
+ if (isa<AllocaRegion>(Region))
192
+ return " the memory returned by 'alloca'" ;
193
+
194
+ if (isa<SymbolicRegion>(Region) &&
195
+ isa<HeapSpaceRegion>(Region->getMemorySpace ()))
196
+ return " the heap area" ;
197
+
198
+ if (isa<StringRegion>(Region))
199
+ return " the string literal" ;
200
+
201
+ return " the region" ;
202
+ }
203
+
204
+ static std::optional<int64_t > getConcreteValue (NonLoc SV) {
205
+ if (auto ConcreteVal = SV.getAs <nonloc::ConcreteInt>()) {
206
+ return ConcreteVal->getValue ().tryExtValue ();
207
+ }
208
+ return std::nullopt;
209
+ }
210
+
211
+ static std::string getShortMsg (OOB_Kind Kind, std::string RegName) {
212
+ static const char *ShortMsgTemplates[] = {
213
+ " Out of bound access to memory preceding {0}" ,
214
+ " Out of bound access to memory after the end of {0}" ,
215
+ " Potential out of bound access to {0} with tainted offset" };
216
+
217
+ return formatv (ShortMsgTemplates[Kind], RegName);
218
+ }
219
+
220
+ static std::string getPrecedesMsg (std::string RegName, NonLoc Offset) {
221
+ SmallString<128 > Buf;
222
+ llvm::raw_svector_ostream Out (Buf);
223
+ Out << " Access of " << RegName << " at negative byte offset" ;
224
+ if (auto ConcreteIdx = Offset.getAs <nonloc::ConcreteInt>())
225
+ Out << ' ' << ConcreteIdx->getValue ();
226
+ return std::string (Buf);
227
+ }
228
+ static std::string getExceedsMsg (ASTContext &ACtx, std::string RegName,
229
+ NonLoc Offset, NonLoc Extent, SVal Location) {
230
+ const auto *EReg = Location.getAsRegion ()->getAs <ElementRegion>();
231
+ assert (EReg && " this checker only handles element access" );
232
+ QualType ElemType = EReg->getElementType ();
233
+
234
+ std::optional<int64_t > OffsetN = getConcreteValue (Offset);
235
+ std::optional<int64_t > ExtentN = getConcreteValue (Extent);
236
+
237
+ bool UseByteOffsets = true ;
238
+ if (int64_t ElemSize = ACtx.getTypeSizeInChars (ElemType).getQuantity ()) {
239
+ const bool OffsetHasRemainder = OffsetN && *OffsetN % ElemSize;
240
+ const bool ExtentHasRemainder = ExtentN && *ExtentN % ElemSize;
241
+ if (!OffsetHasRemainder && !ExtentHasRemainder) {
242
+ UseByteOffsets = false ;
243
+ if (OffsetN)
244
+ *OffsetN /= ElemSize;
245
+ if (ExtentN)
246
+ *ExtentN /= ElemSize;
247
+ }
248
+ }
249
+
250
+ SmallString<256 > Buf;
251
+ llvm::raw_svector_ostream Out (Buf);
252
+ Out << " Access of " ;
253
+ if (!ExtentN && !UseByteOffsets)
254
+ Out << " '" << ElemType.getAsString () << " ' element in " ;
255
+ Out << RegName << " at " ;
256
+ if (OffsetN) {
257
+ Out << (UseByteOffsets ? " byte offset " : " index " ) << *OffsetN;
258
+ } else {
259
+ Out << " an overflowing " << (UseByteOffsets ? " byte offset" : " index" );
260
+ }
261
+ if (ExtentN) {
262
+ Out << " , while it holds only " ;
263
+ if (*ExtentN != 1 )
264
+ Out << *ExtentN;
265
+ else
266
+ Out << " a single" ;
267
+ if (UseByteOffsets)
268
+ Out << " byte" ;
269
+ else
270
+ Out << " '" << ElemType.getAsString () << " ' element" ;
271
+
272
+ if (*ExtentN > 1 )
273
+ Out << " s" ;
274
+ }
275
+
276
+ return std::string (Buf);
277
+ }
278
+ static std::string getTaintMsg (std::string RegName) {
279
+ SmallString<128 > Buf;
280
+ llvm::raw_svector_ostream Out (Buf);
281
+ Out << " Access of " << RegName
282
+ << " with a tainted offset that may be too large" ;
283
+ return std::string (Buf);
284
+ }
285
+
286
+ void ArrayBoundCheckerV2::checkLocation (SVal Location, bool IsLoad,
287
+ const Stmt *LoadS,
288
+ CheckerContext &C) const {
180
289
181
290
// NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
182
291
// some new logic here that reasons directly about memory region extents.
@@ -193,14 +302,14 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
193
302
// and incomplete analysis of these leads to false positives. As even
194
303
// accurate reports would be confusing for the users, just disable reports
195
304
// from these macros:
196
- if (isFromCtypeMacro (LoadS, checkerContext .getASTContext ()))
305
+ if (isFromCtypeMacro (LoadS, C .getASTContext ()))
197
306
return ;
198
307
199
- ProgramStateRef state = checkerContext .getState ();
200
- SValBuilder &svalBuilder = checkerContext .getSValBuilder ();
308
+ ProgramStateRef State = C .getState ();
309
+ SValBuilder &SVB = C .getSValBuilder ();
201
310
202
311
const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
203
- computeOffset (state, svalBuilder, location );
312
+ computeOffset (State, SVB, Location );
204
313
205
314
if (!RawOffset)
206
315
return ;
@@ -217,80 +326,71 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
217
326
// MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and
218
327
// non-symbolic regions (e.g. a field subregion of a symbolic region) in
219
328
// unknown space.
220
- auto [state_precedesLowerBound, state_withinLowerBound] =
221
- compareValueToThreshold (state, ByteOffset,
222
- svalBuilder.makeZeroArrayIndex (), svalBuilder);
329
+ auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold (
330
+ State, ByteOffset, SVB.makeZeroArrayIndex (), SVB);
223
331
224
- if (state_precedesLowerBound && !state_withinLowerBound ) {
332
+ if (PrecedesLowerBound && !WithinLowerBound ) {
225
333
// We know that the index definitely precedes the lower bound.
226
- reportOOB (checkerContext, state_precedesLowerBound, OOB_Precedes);
334
+ std::string RegName = getRegionName (Reg);
335
+ std::string Msg = getPrecedesMsg (RegName, ByteOffset);
336
+ reportOOB (C, PrecedesLowerBound, OOB_Precedes, ByteOffset, RegName, Msg);
227
337
return ;
228
338
}
229
339
230
- if (state_withinLowerBound )
231
- state = state_withinLowerBound ;
340
+ if (WithinLowerBound )
341
+ State = WithinLowerBound ;
232
342
}
233
343
234
344
// CHECK UPPER BOUND
235
- DefinedOrUnknownSVal Size = getDynamicExtent (state , Reg, svalBuilder );
345
+ DefinedOrUnknownSVal Size = getDynamicExtent (State , Reg, SVB );
236
346
if (auto KnownSize = Size.getAs <NonLoc>()) {
237
- auto [state_withinUpperBound, state_exceedsUpperBound ] =
238
- compareValueToThreshold (state , ByteOffset, *KnownSize, svalBuilder );
347
+ auto [WithinUpperBound, ExceedsUpperBound ] =
348
+ compareValueToThreshold (State , ByteOffset, *KnownSize, SVB );
239
349
240
- if (state_exceedsUpperBound ) {
241
- if (!state_withinUpperBound ) {
350
+ if (ExceedsUpperBound ) {
351
+ if (!WithinUpperBound ) {
242
352
// We know that the index definitely exceeds the upper bound.
243
- reportOOB (checkerContext, state_exceedsUpperBound, OOB_Exceeds);
353
+ std::string RegName = getRegionName (Reg);
354
+ std::string Msg = getExceedsMsg (C.getASTContext (), RegName, ByteOffset,
355
+ *KnownSize, Location);
356
+ reportOOB (C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, RegName, Msg);
244
357
return ;
245
358
}
246
- if (isTainted (state , ByteOffset)) {
359
+ if (isTainted (State , ByteOffset)) {
247
360
// Both cases are possible, but the index is tainted, so report.
248
- reportOOB (checkerContext, state_exceedsUpperBound, OOB_Taint,
249
- ByteOffset);
361
+ std::string RegName = getRegionName (Reg);
362
+ std::string Msg = getTaintMsg (RegName);
363
+ reportOOB (C, ExceedsUpperBound, OOB_Taint, ByteOffset, RegName, Msg);
250
364
return ;
251
365
}
252
366
}
253
367
254
- if (state_withinUpperBound )
255
- state = state_withinUpperBound ;
368
+ if (WithinUpperBound )
369
+ State = WithinUpperBound ;
256
370
}
257
371
258
- checkerContext .addTransition (state );
372
+ C .addTransition (State );
259
373
}
260
374
261
375
void ArrayBoundCheckerV2::reportOOB (CheckerContext &C,
262
376
ProgramStateRef ErrorState, OOB_Kind Kind,
263
- SVal TaintedSVal) const {
377
+ NonLoc Offset, std::string RegName,
378
+ std::string Msg) const {
264
379
265
380
ExplodedNode *ErrorNode = C.generateErrorNode (ErrorState);
266
381
if (!ErrorNode)
267
382
return ;
268
383
269
- // FIXME: These diagnostics are preliminary, and they'll be replaced soon by
270
- // a follow-up commit.
384
+ std::string ShortMsg = getShortMsg (Kind, RegName);
271
385
272
- SmallString<128 > Buf;
273
- llvm::raw_svector_ostream Out (Buf);
274
- Out << " Out of bound memory access " ;
275
-
276
- switch (Kind) {
277
- case OOB_Precedes:
278
- Out << " (accessed memory precedes memory block)" ;
279
- break ;
280
- case OOB_Exceeds:
281
- Out << " (access exceeds upper limit of memory block)" ;
282
- break ;
283
- case OOB_Taint:
284
- Out << " (index is tainted)" ;
285
- break ;
286
- }
287
386
auto BR = std::make_unique<PathSensitiveBugReport>(
288
- Kind == OOB_Taint ? TaintBT : BT, Out.str (), ErrorNode);
289
- // Track back the propagation of taintedness, or do nothing if TaintedSVal is
290
- // the default UnknownVal().
291
- for (SymbolRef Sym : getTaintedSymbols (ErrorState, TaintedSVal)) {
292
- BR->markInteresting (Sym);
293
- }
387
+ Kind == OOB_Taint ? TaintBT : BT, ShortMsg, Msg, ErrorNode);
388
+
389
+ // Track back the propagation of taintedness.
390
+ if (Kind == OOB_Taint)
391
+ for (SymbolRef Sym : getTaintedSymbols (ErrorState, Offset))
392
+ BR->markInteresting (Sym);
393
+
294
394
C.emitReport (std::move (BR));
295
395
}
296
396
0 commit comments