diff --git a/.github/workflows/syntax-checks.yaml b/.github/workflows/syntax-checks.yaml index 27badf441d3..c0d00ac778a 100644 --- a/.github/workflows/syntax-checks.yaml +++ b/.github/workflows/syntax-checks.yaml @@ -59,3 +59,23 @@ jobs: rustup toolchain install stable --profile minimal --no-self-update -c clippy -c rustfmt - name: Run `cargo fmt` on top of Rust API project run: cd src/libcprover-rust; cargo fmt --all -- --check + + # This job takes approximately 3 minutes + check-irep-copies: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v6 + with: + submodules: recursive + - name: Fetch dependencies + env: + DEBIAN_FRONTEND: noninteractive + run: | + sudo apt-get update + sudo apt-get install --no-install-recommends -yq \ + cmake g++ flex bison clang-18 llvm-18-dev libclang-18-dev \ + libclang-cpp18-dev + - name: Configure + run: cmake -S . -Bbuild -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + - name: Check for unnecessary irept copies + run: ./scripts/run_irep_copy_check.sh build diff --git a/scripts/check_irep_copies.py b/scripts/check_irep_copies.py new file mode 100644 index 00000000000..c005fb906b3 --- /dev/null +++ b/scripts/check_irep_copies.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +"""Detect irept copy-then-modify patterns that could use std::move. + +Scans C++ source files for patterns like: + exprt x = some_expr; // copy + x.set(...); // modify + +Where some_expr is a local variable or function return that isn't used +after the copy, meaning std::move could avoid the copy. + +This is a heuristic grep-based checker, not a full AST analysis. +It flags candidates for manual review, not guaranteed fixes. + +Usage: + python3 scripts/check_irep_copies.py src/goto-symex/*.cpp +""" + +import re +import sys +from pathlib import Path + +# Types that inherit from irept (copy = refcount increment + potential detach) +IREP_TYPES = { + "exprt", "typet", "codet", "irept", + "symbol_exprt", "ssa_exprt", "member_exprt", "index_exprt", + "if_exprt", "typecast_exprt", "address_of_exprt", + "equal_exprt", "notequal_exprt", "and_exprt", "or_exprt", + "not_exprt", "implies_exprt", "plus_exprt", "minus_exprt", + "mult_exprt", "div_exprt", "mod_exprt", + "binary_exprt", "unary_exprt", "multi_ary_exprt", + "dereference_exprt", "byte_extract_exprt", + "struct_exprt", "array_exprt", "union_exprt", + "constant_exprt", "string_constantt", + "source_locationt", "code_assignt", "code_returnt", + "goto_programt::instructiont", + "array_typet", "pointer_typet", "struct_typet", + "signedbv_typet", "unsignedbv_typet", "floatbv_typet", +} + +# Methods that modify an irept (trigger write()/detach()) +MODIFY_METHODS = { + "set", "add", "remove", "clear", "swap", + "set_identifier", "set_expression", "set_level_0", + "set_level_1", "set_level_2", "remove_level_2", + "operands", "type", "op0", "op1", "op2", "op3", + "make_typecast", "make_not", +} + +# Patterns that indicate the source is NOT used after the copy +# (conservative: only flag when source is a simple local variable) + + +def check_file(filepath): + """Check a single file for copy-then-modify patterns.""" + findings = [] + lines = Path(filepath).read_text().splitlines() + + for i, line in enumerate(lines): + # Match: Type varname = source; + # where Type is an irep type and source is not std::move(...) + m = re.match( + r'\s+(\w+)\s+(\w+)\s*=\s*(.+?)\s*;', + line + ) + if not m: + continue + + typename, varname, source = m.groups() + + # Skip if not an irep type + if typename not in IREP_TYPES: + continue + + # Skip if already using std::move + if "std::move" in source or "move(" in source: + continue + + # Skip const + if re.match(r'\s*const\s', line): + continue + + # Check if the variable is modified in the next few lines + modified = False + source_reused = False + # Extract the source variable name (if it's a simple variable) + source_var = re.match(r'(\w+)', source.strip()) + source_var = source_var.group(1) if source_var else None + + for j in range(i + 1, min(i + 15, len(lines))): + next_line = lines[j] + # Check if varname is modified + if re.search( + rf'\b{varname}\b\s*\.\s*(' + + '|'.join(MODIFY_METHODS) + r')\s*\(', + next_line + ): + modified = True + # Check if varname is passed to to_*_expr() which returns + # a mutable reference + if re.search(rf'to_\w+_expr\s*\(\s*{varname}\s*\)', next_line): + modified = True + # Check if source variable is used after the copy + if source_var and re.search( + rf'\b{source_var}\b', next_line + ): + source_reused = True + + if modified and not source_reused and source_var: + findings.append((i + 1, varname, typename, source.strip())) + + return findings + + +def main(): + if len(sys.argv) < 2: + print(f"Usage: {sys.argv[0]} [file2.cpp ...]") + sys.exit(1) + + total = 0 + for filepath in sys.argv[1:]: + findings = check_file(filepath) + for lineno, var, typ, src in findings: + print(f"{filepath}:{lineno}: {typ} {var} = {src} " + f"[could use std::move({src})]") + total += 1 + + if total: + print(f"\n{total} potential unnecessary copies found.") + else: + print("No unnecessary copies found.") + + +if __name__ == "__main__": + main() diff --git a/scripts/check_irep_moves.cpp b/scripts/check_irep_moves.cpp new file mode 100644 index 00000000000..65b1f62c3f9 --- /dev/null +++ b/scripts/check_irep_moves.cpp @@ -0,0 +1,433 @@ +// Detect irept copy-then-modify patterns that could use std::move. +// +// Build: +// clang++-18 -o check-irep-moves scripts/check_irep_moves.cpp \ +// $(llvm-config-18 --cxxflags | sed 's/-Werror//g') \ +// -L/usr/lib/llvm-18/lib -lclang-cpp \ +// $(llvm-config-18 --ldflags --libs --system-libs) -fno-rtti +// +// Usage: +// ./check-irep-moves -p build/compile_commands.json src/goto-symex/*.cpp + +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/CommandLine.h" + +using namespace clang; +using namespace clang::ast_matchers; +using namespace clang::tooling; + +static llvm::cl::OptionCategory Cat("check-irep-moves"); +static llvm::cl::opt + Debug("debug", llvm::cl::desc("Debug output"), llvm::cl::cat(Cat)); + +static bool inheritsFromIrep(const CXXRecordDecl *RD, int depth = 0) +{ + if(!RD || depth > 10) + return false; + auto name = RD->getNameAsString(); + if(name == "irept" || name == "sharing_treet") + return true; + // Try definition + const auto *Def = RD->getDefinition(); + if(!Def) + return false; + for(const auto &Base : Def->bases()) + { + auto BT = Base.getType(); + // Handle template specializations + if(const auto *TST = BT->getAs()) + { + if(auto *TD = TST->getTemplateName().getAsTemplateDecl()) + if(auto *TRD = dyn_cast(TD->getTemplatedDecl())) + if(inheritsFromIrep(TRD, depth + 1)) + return true; + } + if(const auto *BRD = BT->getAsCXXRecordDecl()) + if(inheritsFromIrep(BRD, depth + 1)) + return true; + } + return false; +} + +class PostCopyRefFinder : public RecursiveASTVisitor +{ +public: + const VarDecl *Target; + SourceLocation After; + const SourceManager &SM; + bool Found = false; + + PostCopyRefFinder(const VarDecl *T, SourceLocation A, const SourceManager &S) + : Target(T), After(A), SM(S) + { + } + + bool VisitDeclRefExpr(DeclRefExpr *DRE) + { + if( + DRE->getDecl() == Target && + SM.isBeforeInTranslationUnit(After, DRE->getBeginLoc())) + { + Found = true; + return false; + } + return true; + } +}; + +/// Check if a variable is ever modified (non-const method call, assignment, etc.) +/// Conservative: assumes mutable if we can't prove const. +class MutableUseChecker : public RecursiveASTVisitor +{ +public: + const VarDecl *Target; + bool HasMutableUse = false; + + MutableUseChecker(const VarDecl *T) : Target(T) + { + } + + bool VisitCXXMemberCallExpr(CXXMemberCallExpr *MCE) + { + // Check if this is a non-const method call on our variable + if(const auto *ObjExpr = MCE->getImplicitObjectArgument()) + { + if(const auto *DRE = dyn_cast(ObjExpr->IgnoreImplicit())) + { + if(DRE->getDecl() == Target) + { + if(const auto *MD = MCE->getMethodDecl()) + { + if(!MD->isConst()) + { + HasMutableUse = true; + return false; + } + } + else + { + HasMutableUse = true; // can't determine, assume mutable + return false; + } + } + } + } + return true; + } + + bool VisitBinaryOperator(BinaryOperator *BO) + { + if(BO->isAssignmentOp()) + { + if( + const auto *DRE = dyn_cast(BO->getLHS()->IgnoreImplicit())) + { + if(DRE->getDecl() == Target) + { + HasMutableUse = true; + return false; + } + } + } + return true; + } + + bool VisitUnaryOperator(UnaryOperator *UO) + { + if(UO->getOpcode() == UO_AddrOf) + { + if( + const auto *DRE = + dyn_cast(UO->getSubExpr()->IgnoreImplicit())) + { + if(DRE->getDecl() == Target) + { + HasMutableUse = true; + return false; + } + } + } + return true; + } + + bool VisitCallExpr(CallExpr *CE) + { + // Check if our variable is passed as a non-const reference argument + if(const auto *Callee = CE->getDirectCallee()) + { + for(unsigned i = 0; i < CE->getNumArgs() && i < Callee->getNumParams(); + ++i) + { + if( + const auto *DRE = + dyn_cast(CE->getArg(i)->IgnoreImplicit())) + { + if(DRE->getDecl() == Target) + { + QualType PT = Callee->getParamDecl(i)->getType(); + if( + PT->isReferenceType() && !PT->getPointeeType().isConstQualified()) + { + HasMutableUse = true; + return false; + } + } + } + } + } + else + { + // Can't resolve callee — check if any arg is our variable + for(unsigned i = 0; i < CE->getNumArgs(); ++i) + { + if( + const auto *DRE = + dyn_cast(CE->getArg(i)->IgnoreImplicit())) + { + if(DRE->getDecl() == Target) + { + HasMutableUse = true; // conservative + return false; + } + } + } + } + return true; + } +}; + +class Callback : public MatchFinder::MatchCallback +{ +public: + void run(const MatchFinder::MatchResult &Result) override + { + const auto *VD = Result.Nodes.getNodeAs("var"); + if(!VD) + return; + + const auto &SM = *Result.SourceManager; + if(!SM.isInMainFile(VD->getLocation())) + return; + if(VD->getType().isConstQualified()) + return; + + const auto *RD = VD->getType()->getAsCXXRecordDecl(); + bool isIrep = inheritsFromIrep(RD); + + if(Debug && RD) + { + llvm::errs() << " " << SM.getSpellingLineNumber(VD->getLocation()) + << ": " << VD->getNameAsString() + << " type=" << RD->getNameAsString() << " irep=" << isIrep + << "\n"; + } + + if(!isIrep) + return; + + // Check: copy that's never modified (should be const ref) + checkUnmodifiedCopy(VD, Result); + + // Get initializer, unwrap to find the source + const Expr *Init = VD->getInit(); + if(!Init) + return; + Init = Init->IgnoreImplicit(); + + // Must be copy construction + if(const auto *CE = dyn_cast(Init)) + { + if(CE->getNumArgs() != 1) + return; + auto *Ctor = CE->getConstructor(); + if(Ctor && Ctor->isMoveConstructor()) + return; + // Restrict to a true copy: skip converting constructors (e.g. + // `unsignedbv_typet t(some_size_t)`), where suggesting std::move + // is meaningless because no string-of-irept storage is being + // duplicated. + if(Ctor && !Ctor->isCopyConstructor()) + return; + Init = CE->getArg(0)->IgnoreImplicit(); + } + else + return; + + // Source must be a named variable + const auto *SrcRef = dyn_cast(Init); + if(!SrcRef) + return; + const auto *SrcVD = dyn_cast(SrcRef->getDecl()); + if(!SrcVD || isa(SrcVD)) + return; + { + // Skip const sources: `std::move` of a const value silently + // copy-constructs (the rvalue has type `const T&&` which binds + // to the copy constructor) and would in any case be unsafe for + // a `const auto &` alias into someone else's data. The check + // needs to look through references, since for `const auto &x` + // the variable's top-level type is a reference and is not + // itself const-qualified. + QualType SrcQT = SrcVD->getType(); + if(SrcQT->isReferenceType()) + SrcQT = SrcQT.getNonReferenceType(); + if(SrcQT.isConstQualified()) + return; + } + + // Check source not used after copy + const auto *FD = dyn_cast_or_null(VD->getDeclContext()); + if(!FD || !FD->getBody()) + return; + + PostCopyRefFinder Finder(SrcVD, VD->getEndLoc(), SM); + Finder.TraverseStmt(const_cast(FD->getBody())); + if(Finder.Found) + return; + + // Skip loop-reused sources: if `VD` is inside a loop body and + // `SrcVD` is declared outside that loop, then the loop will copy + // from `SrcVD` once per iteration; converting that copy to a move + // would leave a moved-from `SrcVD` for every subsequent iteration. + { + ASTContext &Ctx = *Result.Context; + const Stmt *EnclosingLoop = nullptr; + DynTypedNode Cur = DynTypedNode::create(*VD); + while(EnclosingLoop == nullptr) + { + const auto Parents = Ctx.getParents(Cur); + if(Parents.empty()) + break; + Cur = Parents[0]; + if(const auto *S = Cur.get()) + { + if( + isa(S) || isa(S) || isa(S) || + isa(S)) + { + EnclosingLoop = S; + break; + } + } + } + if( + EnclosingLoop && SM.isBeforeInTranslationUnit( + SrcVD->getBeginLoc(), EnclosingLoop->getBeginLoc())) + return; + } + + llvm::errs() << SM.getFilename(VD->getLocation()) << ":" + << SM.getSpellingLineNumber(VD->getLocation()) + << ": warning: '" << VD->getNameAsString() << "' copies '" + << SrcVD->getNameAsString() << "' (type " + << VD->getType().getAsString() + << ") which is not used afterwards; " + << "use std::move [cprover-unnecessary-irep-copy]\n"; + } + + void + checkUnmodifiedCopy(const VarDecl *VD, const MatchFinder::MatchResult &Result) + { + const auto &SM = *Result.SourceManager; + if(!SM.isInMainFile(VD->getLocation())) + return; + if(VD->getType().isConstQualified() || VD->getType()->isReferenceType()) + return; + const auto *RD = VD->getType()->getAsCXXRecordDecl(); + if(!inheritsFromIrep(RD)) + return; + const Expr *Init = VD->getInit(); + if(!Init) + return; + Init = Init->IgnoreImplicit(); + if(const auto *CE = dyn_cast(Init)) + { + if(CE->getNumArgs() != 1) + return; + auto *Ctor = CE->getConstructor(); + if(!Ctor || Ctor->isMoveConstructor()) + return; + } + else + return; + // Check if the source is a temporary (function return, constructor call). + // A const reference to a temporary would dangle. + const Expr *Src = Init; + if(const auto *CE = dyn_cast(Src)) + { + if(CE->getNumArgs() == 1) + Src = CE->getArg(0)->IgnoreImplicit(); + } + // If source is NOT a DeclRefExpr or MemberExpr, it's likely a temporary + if(!isa(Src) && !isa(Src)) + return; + // Skip MemberExpr sources: the existing MutableUseChecker only + // tracks mutations of the destination VarDecl, so it cannot detect + // a `this->member = ...` between the copy and a later use of VD. + // Suggesting `const auto &` would alias to the post-mutation value + // -- not the snapshot the user intended. + if(isa(Src)) + return; + // Skip reference-typed sources: a `T &alias` is often introduced + // precisely because the referee will be mutated, and a `const T &` + // copy would observe the post-mutation value rather than the + // snapshot the caller wants. We can't tell without flow analysis + // whether that's the case here, so play it safe. + if(const auto *DRE = dyn_cast(Src)) + { + if(const auto *SrcVD = dyn_cast(DRE->getDecl())) + { + if(SrcVD->getType()->isReferenceType()) + return; + } + } + // Source type must match variable type (otherwise it's a conversion) + if(Src->getType().getCanonicalType() != VD->getType().getCanonicalType()) + return; + // If source is a MemberExpr calling a method that returns by value, + // a const ref would dangle + if(const auto *ME = dyn_cast(Src)) + { + (void)ME; // MemberExpr accessing a field is fine (returns lvalue ref) + } + + const auto *FD = dyn_cast_or_null(VD->getDeclContext()); + if(!FD || !FD->getBody()) + return; + MutableUseChecker Checker(VD); + Checker.TraverseStmt(const_cast(FD->getBody())); + if(!Checker.HasMutableUse) + { + llvm::errs() << SM.getFilename(VD->getLocation()) << ":" + << SM.getSpellingLineNumber(VD->getLocation()) + << ": warning: '" << VD->getNameAsString() << "' (type " + << VD->getType().getAsString() + << ") is copy-initialized but never modified; " + << "use const reference [cprover-unmodified-irep-copy]\n"; + } + } +}; + +int main(int argc, const char **argv) +{ + auto EP = CommonOptionsParser::create(argc, argv, Cat); + if(!EP) + { + llvm::errs() << EP.takeError(); + return 1; + } + ClangTool Tool(EP->getCompilations(), EP->getSourcePathList()); + Tool.appendArgumentsAdjuster(getInsertArgumentAdjuster( + "-Wno-unknown-warning-option", ArgumentInsertPosition::BEGIN)); + + Callback CB; + MatchFinder Finder; + Finder.addMatcher( + varDecl(hasLocalStorage(), hasInitializer(anything())).bind("var"), &CB); + return Tool.run(newFrontendActionFactory(&Finder).get()); +} diff --git a/scripts/run_diff.sh b/scripts/run_diff.sh index 3f41b836994..1e565508769 100755 --- a/scripts/run_diff.sh +++ b/scripts/run_diff.sh @@ -37,7 +37,7 @@ then echo "Ensure cpplint.py is inside the $script_folder directory then run again" exit 1 else - cmd='${script_folder}/cpplint.py --exclude=jbmc/src/miniz/* --filter=-whitespace/operators,-readability/identifier_spacing $file 2>&1 >/dev/null' + cmd='${script_folder}/cpplint.py --exclude=jbmc/src/miniz/* --exclude=scripts/check_irep_moves.cpp --filter=-whitespace/operators,-readability/identifier_spacing $file 2>&1 >/dev/null' fi else echo "Mode $mode not recognized" diff --git a/scripts/run_irep_copy_check.sh b/scripts/run_irep_copy_check.sh new file mode 100755 index 00000000000..688521702c9 --- /dev/null +++ b/scripts/run_irep_copy_check.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# CI script: check for unnecessary irept copies +# Requires: check-irep-moves binary (built from scripts/check_irep_moves.cpp) +# and a compile_commands.json in the build directory. +# +# Usage: scripts/run_irep_copy_check.sh [build-dir] +# +# Returns non-zero if any findings are detected. + +set -euo pipefail + +BUILD_DIR="${1:-build}" +TOOL="scripts/check-irep-moves" + +if [ ! -x "$TOOL" ]; then + echo "Building check-irep-moves..." + LLVM_VER=$(llvm-config --version 2>/dev/null | cut -d. -f1 || echo "") + if [ -z "$LLVM_VER" ]; then + for v in 20 18 15; do + if command -v llvm-config-$v >/dev/null 2>&1; then + LLVM_VER=$v; break + fi + done + fi + if [ -z "$LLVM_VER" ]; then + echo "Error: no LLVM installation found" >&2 + exit 1 + fi + clang++-$LLVM_VER -o "$TOOL" scripts/check_irep_moves.cpp \ + $(llvm-config-$LLVM_VER --cxxflags | sed 's/-Werror//g') \ + -L/usr/lib/llvm-$LLVM_VER/lib -lclang-cpp \ + $(llvm-config-$LLVM_VER --ldflags --libs --system-libs) \ + -fno-rtti +fi + +if [ ! -f "$BUILD_DIR/compile_commands.json" ]; then + echo "Error: $BUILD_DIR/compile_commands.json not found" >&2 + echo "Run: cmake -S . -B$BUILD_DIR -DCMAKE_EXPORT_COMPILE_COMMANDS=ON" >&2 + exit 1 +fi + +# Check the hot paths +FINDINGS=$("$TOOL" -p "$BUILD_DIR/compile_commands.json" \ + src/goto-symex/*.cpp \ + src/goto-programs/*.cpp \ + src/ansi-c/goto-conversion/*.cpp \ + src/pointer-analysis/*.cpp \ + src/solvers/flattening/*.cpp \ + 2>&1 | grep "warning:" || true) + +if [ -n "$FINDINGS" ]; then + echo "$FINDINGS" + COUNT=$(echo "$FINDINGS" | wc -l) + echo "" + echo "$COUNT unnecessary irept copies found." + echo "Fix: use 'const auto &' for unmodified copies, 'std::move' for last-use copies." + exit 1 +else + echo "No unnecessary irept copies found." + exit 0 +fi diff --git a/src/ansi-c/goto-conversion/string_instrumentation.cpp b/src/ansi-c/goto-conversion/string_instrumentation.cpp index 598fe4574e0..0d6f0fb174b 100644 --- a/src/ansi-c/goto-conversion/string_instrumentation.cpp +++ b/src/ansi-c/goto-conversion/string_instrumentation.cpp @@ -764,7 +764,7 @@ void string_instrumentationt::do_strerror( index_exprt index( symbol_buf.symbol_expr(), from_integer(0, c_index_type()), char_type()); - address_of_exprt ptr(index); + address_of_exprt ptr(std::move(index)); // make that zero-terminated tmp.add(goto_programt::make_assignment( diff --git a/src/goto-programs/string_abstraction.cpp b/src/goto-programs/string_abstraction.cpp index 6de670c1794..09943654912 100644 --- a/src/goto-programs/string_abstraction.cpp +++ b/src/goto-programs/string_abstraction.cpp @@ -237,7 +237,7 @@ code_typet::parametert string_abstractiont::add_parameter( param_symbol.is_parameter = true; param_symbol.value.make_nil(); - code_typet::parametert str_parameter{final_type}; + code_typet::parametert str_parameter{std::move(final_type)}; str_parameter.add_source_location() = fct_symbol.location; str_parameter.set_base_name(param_symbol.base_name); str_parameter.set_identifier(param_symbol.name); diff --git a/src/goto-symex/shadow_memory_util.cpp b/src/goto-symex/shadow_memory_util.cpp index bc1d1ed1476..34eb8647ea3 100644 --- a/src/goto-symex/shadow_memory_util.cpp +++ b/src/goto-symex/shadow_memory_util.cpp @@ -404,7 +404,7 @@ static void extract_bytes_of_expr( element = conditional_cast_floatbv_to_unsignedbv(element); if(element.type().id() == ID_unsignedbv || element.type().id() == ID_signedbv) { - exprt value = element; + const auto &value = element; if(is_union) { extract_bytes_of_bv(value, element.type(), field_type, values); diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index aad9ae75f69..add8fa5f65b 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -1987,7 +1987,7 @@ void value_sett::guard( // from_integer(location_number, typet(ID_natural)); dynamic_object.valid()=true_exprt(); - address_of_exprt address_of(dynamic_object); + address_of_exprt address_of(std::move(dynamic_object)); address_of.type() = to_dynamic_object_expr(expr).op0().type(); assign(to_dynamic_object_expr(expr).op0(), address_of, ns, false, false);