From 968508da3d5f2c366c4dd446961be7b249e8ce38 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Thu, 16 Apr 2026 19:15:34 +0000 Subject: [PATCH 1/7] Fix unnecessary irept copies found by check-irep-moves Address 8 findings from the clang-based irept copy checker: - 1 const reference (shadow_memory_util.cpp: unmodified copy) - 7 std::move (string_abstraction, goto_convert_side_effect, string_instrumentation, value_set, boolbv_extractbit, boolbv_index: source not used after copy) Co-authored-by: Kiro --- src/ansi-c/goto-conversion/goto_convert_side_effect.cpp | 2 +- src/ansi-c/goto-conversion/string_instrumentation.cpp | 2 +- src/goto-programs/string_abstraction.cpp | 4 ++-- src/goto-symex/shadow_memory_util.cpp | 2 +- src/pointer-analysis/value_set.cpp | 2 +- src/solvers/flattening/boolbv_extractbit.cpp | 2 +- src/solvers/flattening/boolbv_index.cpp | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ansi-c/goto-conversion/goto_convert_side_effect.cpp b/src/ansi-c/goto-conversion/goto_convert_side_effect.cpp index 3282bd6dbfc..7e67c747fe2 100644 --- a/src/ansi-c/goto-conversion/goto_convert_side_effect.cpp +++ b/src/ansi-c/goto-conversion/goto_convert_side_effect.cpp @@ -337,7 +337,7 @@ goto_convertt::clean_expr_resultt goto_convertt::remove_post( if(result_is_used) { - exprt tmp = op; + exprt tmp = std::move(op); std::string suffix = "post"; if(auto sym_expr = expr_try_dynamic_cast(tmp)) { diff --git a/src/ansi-c/goto-conversion/string_instrumentation.cpp b/src/ansi-c/goto-conversion/string_instrumentation.cpp index 598fe4574e0..0d6f0fb174b 100644 --- a/src/ansi-c/goto-conversion/string_instrumentation.cpp +++ b/src/ansi-c/goto-conversion/string_instrumentation.cpp @@ -764,7 +764,7 @@ void string_instrumentationt::do_strerror( index_exprt index( symbol_buf.symbol_expr(), from_integer(0, c_index_type()), char_type()); - address_of_exprt ptr(index); + address_of_exprt ptr(std::move(index)); // make that zero-terminated tmp.add(goto_programt::make_assignment( diff --git a/src/goto-programs/string_abstraction.cpp b/src/goto-programs/string_abstraction.cpp index 6de670c1794..814c1279d5d 100644 --- a/src/goto-programs/string_abstraction.cpp +++ b/src/goto-programs/string_abstraction.cpp @@ -237,7 +237,7 @@ code_typet::parametert string_abstractiont::add_parameter( param_symbol.is_parameter = true; param_symbol.value.make_nil(); - code_typet::parametert str_parameter{final_type}; + code_typet::parametert str_parameter{std::move(final_type)}; str_parameter.add_source_location() = fct_symbol.location; str_parameter.set_base_name(param_symbol.base_name); str_parameter.set_identifier(param_symbol.name); @@ -750,7 +750,7 @@ const typet &string_abstractiont::build_abstraction_type_rec(const typet &type, } if(!new_comp.empty()) { - struct_union_typet abstracted_type = struct_union_type; + struct_union_typet abstracted_type = std::move(struct_union_type); abstracted_type.components().swap(new_comp); const symbolt &existing_tag_symbol = diff --git a/src/goto-symex/shadow_memory_util.cpp b/src/goto-symex/shadow_memory_util.cpp index bc1d1ed1476..34eb8647ea3 100644 --- a/src/goto-symex/shadow_memory_util.cpp +++ b/src/goto-symex/shadow_memory_util.cpp @@ -404,7 +404,7 @@ static void extract_bytes_of_expr( element = conditional_cast_floatbv_to_unsignedbv(element); if(element.type().id() == ID_unsignedbv || element.type().id() == ID_signedbv) { - exprt value = element; + const auto &value = element; if(is_union) { extract_bytes_of_bv(value, element.type(), field_type, values); diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index aad9ae75f69..add8fa5f65b 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -1987,7 +1987,7 @@ void value_sett::guard( // from_integer(location_number, typet(ID_natural)); dynamic_object.valid()=true_exprt(); - address_of_exprt address_of(dynamic_object); + address_of_exprt address_of(std::move(dynamic_object)); address_of.type() = to_dynamic_object_expr(expr).op0().type(); assign(to_dynamic_object_expr(expr).op0(), address_of, ns, false, false); diff --git a/src/solvers/flattening/boolbv_extractbit.cpp b/src/solvers/flattening/boolbv_extractbit.cpp index 29dcb5c62cc..7535b66a9b8 100644 --- a/src/solvers/flattening/boolbv_extractbit.cpp +++ b/src/solvers/flattening/boolbv_extractbit.cpp @@ -51,7 +51,7 @@ literalt boolbvt::convert_extractbit(const extractbit_exprt &expr) std::size_t index_width = std::max(address_bits(src_bv_width), index_bv_width); - unsignedbv_typet index_type(index_width); + unsignedbv_typet index_type(std::move(index_width)); const auto index_casted = typecast_exprt::conditional_cast(index, index_type); diff --git a/src/solvers/flattening/boolbv_index.cpp b/src/solvers/flattening/boolbv_index.cpp index 2e400a57345..56a5907cdeb 100644 --- a/src/solvers/flattening/boolbv_index.cpp +++ b/src/solvers/flattening/boolbv_index.cpp @@ -354,7 +354,7 @@ bvt boolbvt::convert_index( index * (*subtype_bytes_opt), byte_extract_expr.offset().type())}, ns); - byte_extract_exprt be = byte_extract_expr; + byte_extract_exprt be = std::move(byte_extract_expr); be.offset() = new_offset; be.type() = array_type.element_type(); From c1497238c771b87e70431faeebfba8336d889408 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Thu, 16 Apr 2026 10:54:13 +0000 Subject: [PATCH 2/7] Add checker for irept copy-then-modify patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Heuristic grep-based checker that flags local variables initialized by copying an irept-derived type, where the variable is subsequently modified and the source appears unused after the copy. These are candidates for std::move to avoid unnecessary copy-on-write detach. Found 22 candidates in goto-symex, 50 across the hot paths. Each needs manual review — some are false positives (const ref sources, loop-reused sources). Usage: python3 scripts/check_irep_copies.py src/goto-symex/*.cpp Co-authored-by: Kiro --- .github/workflows/syntax-checks.yaml | 19 ++ scripts/check_irep_copies.py | 134 ++++++++++ scripts/check_irep_moves.cpp | 350 +++++++++++++++++++++++++++ scripts/run_irep_copy_check.sh | 61 +++++ 4 files changed, 564 insertions(+) create mode 100644 scripts/check_irep_copies.py create mode 100644 scripts/check_irep_moves.cpp create mode 100755 scripts/run_irep_copy_check.sh diff --git a/.github/workflows/syntax-checks.yaml b/.github/workflows/syntax-checks.yaml index 27badf441d3..521a34d5889 100644 --- a/.github/workflows/syntax-checks.yaml +++ b/.github/workflows/syntax-checks.yaml @@ -59,3 +59,22 @@ jobs: rustup toolchain install stable --profile minimal --no-self-update -c clippy -c rustfmt - name: Run `cargo fmt` on top of Rust API project run: cd src/libcprover-rust; cargo fmt --all -- --check + + # This job takes approximately 3 minutes + check-irep-copies: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v6 + with: + submodules: recursive + - name: Fetch dependencies + env: + DEBIAN_FRONTEND: noninteractive + run: | + sudo apt-get update + sudo apt-get install --no-install-recommends -yq \ + cmake g++ flex bison clang-18 llvm-18-dev libclang-18-dev + - name: Configure + run: cmake -S . -Bbuild -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + - name: Check for unnecessary irept copies + run: ./scripts/run_irep_copy_check.sh build diff --git a/scripts/check_irep_copies.py b/scripts/check_irep_copies.py new file mode 100644 index 00000000000..c005fb906b3 --- /dev/null +++ b/scripts/check_irep_copies.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +"""Detect irept copy-then-modify patterns that could use std::move. + +Scans C++ source files for patterns like: + exprt x = some_expr; // copy + x.set(...); // modify + +Where some_expr is a local variable or function return that isn't used +after the copy, meaning std::move could avoid the copy. + +This is a heuristic grep-based checker, not a full AST analysis. +It flags candidates for manual review, not guaranteed fixes. + +Usage: + python3 scripts/check_irep_copies.py src/goto-symex/*.cpp +""" + +import re +import sys +from pathlib import Path + +# Types that inherit from irept (copy = refcount increment + potential detach) +IREP_TYPES = { + "exprt", "typet", "codet", "irept", + "symbol_exprt", "ssa_exprt", "member_exprt", "index_exprt", + "if_exprt", "typecast_exprt", "address_of_exprt", + "equal_exprt", "notequal_exprt", "and_exprt", "or_exprt", + "not_exprt", "implies_exprt", "plus_exprt", "minus_exprt", + "mult_exprt", "div_exprt", "mod_exprt", + "binary_exprt", "unary_exprt", "multi_ary_exprt", + "dereference_exprt", "byte_extract_exprt", + "struct_exprt", "array_exprt", "union_exprt", + "constant_exprt", "string_constantt", + "source_locationt", "code_assignt", "code_returnt", + "goto_programt::instructiont", + "array_typet", "pointer_typet", "struct_typet", + "signedbv_typet", "unsignedbv_typet", "floatbv_typet", +} + +# Methods that modify an irept (trigger write()/detach()) +MODIFY_METHODS = { + "set", "add", "remove", "clear", "swap", + "set_identifier", "set_expression", "set_level_0", + "set_level_1", "set_level_2", "remove_level_2", + "operands", "type", "op0", "op1", "op2", "op3", + "make_typecast", "make_not", +} + +# Patterns that indicate the source is NOT used after the copy +# (conservative: only flag when source is a simple local variable) + + +def check_file(filepath): + """Check a single file for copy-then-modify patterns.""" + findings = [] + lines = Path(filepath).read_text().splitlines() + + for i, line in enumerate(lines): + # Match: Type varname = source; + # where Type is an irep type and source is not std::move(...) + m = re.match( + r'\s+(\w+)\s+(\w+)\s*=\s*(.+?)\s*;', + line + ) + if not m: + continue + + typename, varname, source = m.groups() + + # Skip if not an irep type + if typename not in IREP_TYPES: + continue + + # Skip if already using std::move + if "std::move" in source or "move(" in source: + continue + + # Skip const + if re.match(r'\s*const\s', line): + continue + + # Check if the variable is modified in the next few lines + modified = False + source_reused = False + # Extract the source variable name (if it's a simple variable) + source_var = re.match(r'(\w+)', source.strip()) + source_var = source_var.group(1) if source_var else None + + for j in range(i + 1, min(i + 15, len(lines))): + next_line = lines[j] + # Check if varname is modified + if re.search( + rf'\b{varname}\b\s*\.\s*(' + + '|'.join(MODIFY_METHODS) + r')\s*\(', + next_line + ): + modified = True + # Check if varname is passed to to_*_expr() which returns + # a mutable reference + if re.search(rf'to_\w+_expr\s*\(\s*{varname}\s*\)', next_line): + modified = True + # Check if source variable is used after the copy + if source_var and re.search( + rf'\b{source_var}\b', next_line + ): + source_reused = True + + if modified and not source_reused and source_var: + findings.append((i + 1, varname, typename, source.strip())) + + return findings + + +def main(): + if len(sys.argv) < 2: + print(f"Usage: {sys.argv[0]} [file2.cpp ...]") + sys.exit(1) + + total = 0 + for filepath in sys.argv[1:]: + findings = check_file(filepath) + for lineno, var, typ, src in findings: + print(f"{filepath}:{lineno}: {typ} {var} = {src} " + f"[could use std::move({src})]") + total += 1 + + if total: + print(f"\n{total} potential unnecessary copies found.") + else: + print("No unnecessary copies found.") + + +if __name__ == "__main__": + main() diff --git a/scripts/check_irep_moves.cpp b/scripts/check_irep_moves.cpp new file mode 100644 index 00000000000..f10953c5074 --- /dev/null +++ b/scripts/check_irep_moves.cpp @@ -0,0 +1,350 @@ +// Detect irept copy-then-modify patterns that could use std::move. +// +// Build: +// clang++-18 -o check-irep-moves scripts/check_irep_moves.cpp \ +// $(llvm-config-18 --cxxflags | sed 's/-Werror//g') \ +// -L/usr/lib/llvm-18/lib -lclang-cpp \ +// $(llvm-config-18 --ldflags --libs --system-libs) -fno-rtti +// +// Usage: +// ./check-irep-moves -p build/compile_commands.json src/goto-symex/*.cpp + +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Tooling.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "llvm/Support/CommandLine.h" + +using namespace clang; +using namespace clang::ast_matchers; +using namespace clang::tooling; + +static llvm::cl::OptionCategory Cat("check-irep-moves"); +static llvm::cl::opt Debug("debug", llvm::cl::desc("Debug output"), + llvm::cl::cat(Cat)); + +static bool inheritsFromIrep(const CXXRecordDecl *RD, int depth = 0) +{ + if(!RD || depth > 10) + return false; + auto name = RD->getNameAsString(); + if(name == "irept" || name == "sharing_treet") + return true; + // Try definition + const auto *Def = RD->getDefinition(); + if(!Def) + return false; + for(const auto &Base : Def->bases()) + { + auto BT = Base.getType(); + // Handle template specializations + if(const auto *TST = BT->getAs()) + { + if(auto *TD = TST->getTemplateName().getAsTemplateDecl()) + if(auto *TRD = dyn_cast(TD->getTemplatedDecl())) + if(inheritsFromIrep(TRD, depth + 1)) + return true; + } + if(const auto *BRD = BT->getAsCXXRecordDecl()) + if(inheritsFromIrep(BRD, depth + 1)) + return true; + } + return false; +} + +class PostCopyRefFinder : public RecursiveASTVisitor +{ +public: + const VarDecl *Target; + SourceLocation After; + const SourceManager &SM; + bool Found = false; + + PostCopyRefFinder(const VarDecl *T, SourceLocation A, const SourceManager &S) + : Target(T), After(A), SM(S) {} + + bool VisitDeclRefExpr(DeclRefExpr *DRE) + { + if(DRE->getDecl() == Target && + SM.isBeforeInTranslationUnit(After, DRE->getBeginLoc())) + { + Found = true; + return false; + } + return true; + } +}; + + +/// Check if a variable is ever modified (non-const method call, assignment, etc.) +/// Conservative: assumes mutable if we can't prove const. +class MutableUseChecker : public RecursiveASTVisitor +{ +public: + const VarDecl *Target; + bool HasMutableUse = false; + + MutableUseChecker(const VarDecl *T) : Target(T) {} + + bool VisitCXXMemberCallExpr(CXXMemberCallExpr *MCE) + { + // Check if this is a non-const method call on our variable + if(const auto *ObjExpr = MCE->getImplicitObjectArgument()) + { + if(const auto *DRE = dyn_cast(ObjExpr->IgnoreImplicit())) + { + if(DRE->getDecl() == Target) + { + if(const auto *MD = MCE->getMethodDecl()) + { + if(!MD->isConst()) + { + HasMutableUse = true; + return false; + } + } + else + { + HasMutableUse = true; // can't determine, assume mutable + return false; + } + } + } + } + return true; + } + + bool VisitBinaryOperator(BinaryOperator *BO) + { + if(BO->isAssignmentOp()) + { + if(const auto *DRE = dyn_cast(BO->getLHS()->IgnoreImplicit())) + { + if(DRE->getDecl() == Target) + { + HasMutableUse = true; + return false; + } + } + } + return true; + } + + bool VisitUnaryOperator(UnaryOperator *UO) + { + if(UO->getOpcode() == UO_AddrOf) + { + if(const auto *DRE = dyn_cast(UO->getSubExpr()->IgnoreImplicit())) + { + if(DRE->getDecl() == Target) + { + HasMutableUse = true; + return false; + } + } + } + return true; + } + + bool VisitCallExpr(CallExpr *CE) + { + // Check if our variable is passed as a non-const reference argument + if(const auto *Callee = CE->getDirectCallee()) + { + for(unsigned i = 0; i < CE->getNumArgs() && i < Callee->getNumParams(); ++i) + { + if(const auto *DRE = dyn_cast(CE->getArg(i)->IgnoreImplicit())) + { + if(DRE->getDecl() == Target) + { + QualType PT = Callee->getParamDecl(i)->getType(); + if(PT->isReferenceType() && !PT->getPointeeType().isConstQualified()) + { + HasMutableUse = true; + return false; + } + } + } + } + } + else + { + // Can't resolve callee — check if any arg is our variable + for(unsigned i = 0; i < CE->getNumArgs(); ++i) + { + if(const auto *DRE = dyn_cast(CE->getArg(i)->IgnoreImplicit())) + { + if(DRE->getDecl() == Target) + { + HasMutableUse = true; // conservative + return false; + } + } + } + } + return true; + } +}; + +class Callback : public MatchFinder::MatchCallback +{ +public: + void run(const MatchFinder::MatchResult &Result) override + { + const auto *VD = Result.Nodes.getNodeAs("var"); + if(!VD) + return; + + const auto &SM = *Result.SourceManager; + if(!SM.isInMainFile(VD->getLocation())) + return; + if(VD->getType().isConstQualified()) + return; + + const auto *RD = VD->getType()->getAsCXXRecordDecl(); + bool isIrep = inheritsFromIrep(RD); + + if(Debug && RD) + { + llvm::errs() << " " << SM.getSpellingLineNumber(VD->getLocation()) + << ": " << VD->getNameAsString() + << " type=" << RD->getNameAsString() + << " irep=" << isIrep << "\n"; + } + + if(!isIrep) + return; + + // Check: copy that's never modified (should be const ref) + checkUnmodifiedCopy(VD, Result); + + // Get initializer, unwrap to find the source + const Expr *Init = VD->getInit(); + if(!Init) + return; + Init = Init->IgnoreImplicit(); + + // Must be copy construction + if(const auto *CE = dyn_cast(Init)) + { + if(CE->getNumArgs() != 1) + return; + auto *Ctor = CE->getConstructor(); + if(Ctor && Ctor->isMoveConstructor()) + return; + Init = CE->getArg(0)->IgnoreImplicit(); + } + else + return; + + // Source must be a named variable + const auto *SrcRef = dyn_cast(Init); + if(!SrcRef) + return; + const auto *SrcVD = dyn_cast(SrcRef->getDecl()); + if(!SrcVD || isa(SrcVD)) + return; + if(SrcVD->getType().isConstQualified()) + return; + + // Check source not used after copy + const auto *FD = dyn_cast_or_null(VD->getDeclContext()); + if(!FD || !FD->getBody()) + return; + + PostCopyRefFinder Finder(SrcVD, VD->getEndLoc(), SM); + Finder.TraverseStmt(const_cast(FD->getBody())); + if(Finder.Found) + return; + + llvm::errs() + << SM.getFilename(VD->getLocation()) << ":" + << SM.getSpellingLineNumber(VD->getLocation()) + << ": warning: '" << VD->getNameAsString() + << "' copies '" << SrcVD->getNameAsString() + << "' (type " << VD->getType().getAsString() + << ") which is not used afterwards; " + << "use std::move [cprover-unnecessary-irep-copy]\n"; + } + + void checkUnmodifiedCopy( + const VarDecl *VD, + const MatchFinder::MatchResult &Result) + { + const auto &SM = *Result.SourceManager; + if(!SM.isInMainFile(VD->getLocation())) + return; + if(VD->getType().isConstQualified() || VD->getType()->isReferenceType()) + return; + const auto *RD = VD->getType()->getAsCXXRecordDecl(); + if(!inheritsFromIrep(RD)) + return; + const Expr *Init = VD->getInit(); + if(!Init) + return; + Init = Init->IgnoreImplicit(); + if(const auto *CE = dyn_cast(Init)) + { + if(CE->getNumArgs() != 1) + return; + auto *Ctor = CE->getConstructor(); + if(!Ctor || Ctor->isMoveConstructor()) + return; + } + else + return; + // Check if the source is a temporary (function return, constructor call). + // A const reference to a temporary would dangle. + const Expr *Src = Init; + if(const auto *CE = dyn_cast(Src)) + { + if(CE->getNumArgs() == 1) + Src = CE->getArg(0)->IgnoreImplicit(); + } + // If source is NOT a DeclRefExpr or MemberExpr, it's likely a temporary + if(!isa(Src) && !isa(Src)) + return; + // Source type must match variable type (otherwise it's a conversion) + if(Src->getType().getCanonicalType() != VD->getType().getCanonicalType()) + return; + // If source is a MemberExpr calling a method that returns by value, + // a const ref would dangle + if(const auto *ME = dyn_cast(Src)) + { + (void)ME; // MemberExpr accessing a field is fine (returns lvalue ref) + } + + const auto *FD = dyn_cast_or_null(VD->getDeclContext()); + if(!FD || !FD->getBody()) + return; + MutableUseChecker Checker(VD); + Checker.TraverseStmt(const_cast(FD->getBody())); + if(!Checker.HasMutableUse) + { + llvm::errs() + << SM.getFilename(VD->getLocation()) << ":" + << SM.getSpellingLineNumber(VD->getLocation()) + << ": warning: '" << VD->getNameAsString() + << "' (type " << VD->getType().getAsString() + << ") is copy-initialized but never modified; " + << "use const reference [cprover-unmodified-irep-copy]\n"; + } + } +}; + +int main(int argc, const char **argv) +{ + auto EP = CommonOptionsParser::create(argc, argv, Cat); + if(!EP) { llvm::errs() << EP.takeError(); return 1; } + ClangTool Tool(EP->getCompilations(), EP->getSourcePathList()); + Tool.appendArgumentsAdjuster(getInsertArgumentAdjuster( + "-Wno-unknown-warning-option", ArgumentInsertPosition::BEGIN)); + + Callback CB; + MatchFinder Finder; + Finder.addMatcher( + varDecl(hasLocalStorage(), hasInitializer(anything())).bind("var"), &CB); + return Tool.run(newFrontendActionFactory(&Finder).get()); +} diff --git a/scripts/run_irep_copy_check.sh b/scripts/run_irep_copy_check.sh new file mode 100755 index 00000000000..688521702c9 --- /dev/null +++ b/scripts/run_irep_copy_check.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# CI script: check for unnecessary irept copies +# Requires: check-irep-moves binary (built from scripts/check_irep_moves.cpp) +# and a compile_commands.json in the build directory. +# +# Usage: scripts/run_irep_copy_check.sh [build-dir] +# +# Returns non-zero if any findings are detected. + +set -euo pipefail + +BUILD_DIR="${1:-build}" +TOOL="scripts/check-irep-moves" + +if [ ! -x "$TOOL" ]; then + echo "Building check-irep-moves..." + LLVM_VER=$(llvm-config --version 2>/dev/null | cut -d. -f1 || echo "") + if [ -z "$LLVM_VER" ]; then + for v in 20 18 15; do + if command -v llvm-config-$v >/dev/null 2>&1; then + LLVM_VER=$v; break + fi + done + fi + if [ -z "$LLVM_VER" ]; then + echo "Error: no LLVM installation found" >&2 + exit 1 + fi + clang++-$LLVM_VER -o "$TOOL" scripts/check_irep_moves.cpp \ + $(llvm-config-$LLVM_VER --cxxflags | sed 's/-Werror//g') \ + -L/usr/lib/llvm-$LLVM_VER/lib -lclang-cpp \ + $(llvm-config-$LLVM_VER --ldflags --libs --system-libs) \ + -fno-rtti +fi + +if [ ! -f "$BUILD_DIR/compile_commands.json" ]; then + echo "Error: $BUILD_DIR/compile_commands.json not found" >&2 + echo "Run: cmake -S . -B$BUILD_DIR -DCMAKE_EXPORT_COMPILE_COMMANDS=ON" >&2 + exit 1 +fi + +# Check the hot paths +FINDINGS=$("$TOOL" -p "$BUILD_DIR/compile_commands.json" \ + src/goto-symex/*.cpp \ + src/goto-programs/*.cpp \ + src/ansi-c/goto-conversion/*.cpp \ + src/pointer-analysis/*.cpp \ + src/solvers/flattening/*.cpp \ + 2>&1 | grep "warning:" || true) + +if [ -n "$FINDINGS" ]; then + echo "$FINDINGS" + COUNT=$(echo "$FINDINGS" | wc -l) + echo "" + echo "$COUNT unnecessary irept copies found." + echo "Fix: use 'const auto &' for unmodified copies, 'std::move' for last-use copies." + exit 1 +else + echo "No unnecessary irept copies found." + exit 0 +fi From 206fdde66b4ead750892a2774e3b538c0ca99d0f Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Wed, 27 May 2026 14:35:48 +0000 Subject: [PATCH 3/7] Drop misapplied std::move at four review-flagged sites The check-irep-moves heuristic flagged seven \`source not used after copy\` candidates which the previous commit converted to \`std::move\`. Reviewer Daniel Kroening pointed out four false positives: * src/solvers/flattening/boolbv_extractbit.cpp: \`index_width\` is a \`std::size_t\`; \`std::move\` on an integer is meaningless. * src/solvers/flattening/boolbv_index.cpp: \`byte_extract_expr\` is a \`const byte_extract_exprt &\` to a sub-expression of the parameter \`array\`; \`std::move\` of const yields \`const T &&\` which silently copy-constructs (so it doesn't move) and would in any case be unsafe (it would mutate the caller's \`array\`). * src/goto-programs/string_abstraction.cpp: \`struct_union_type\` is a \`const struct_union_typet &\` returned from \`namespacet::follow_tag\`; same const-move pitfall, and moving from a namespace-owned type is unsafe. * src/ansi-c/goto-conversion/goto_convert_side_effect.cpp: \`op\` is a \`const auto &\` to a sub-expression of \`expr\`; same const-move pitfall, and \`expr\` is still used a few lines later via \`expr.swap(tmp)\`, so even a real move would corrupt \`expr\`. In each case the simplest correction is to drop the \`std::move\`, restoring the plain copy that was there before the previous commit. The remaining three \`std::move\`s in that commit (string_instrumentation.cpp:767, string_abstraction.cpp:240, and value_set.cpp:1990) operate on local non-const irept-derived variables that are not used after the copy and are correct. Co-authored-by: Kiro --- src/ansi-c/goto-conversion/goto_convert_side_effect.cpp | 2 +- src/goto-programs/string_abstraction.cpp | 2 +- src/solvers/flattening/boolbv_extractbit.cpp | 2 +- src/solvers/flattening/boolbv_index.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ansi-c/goto-conversion/goto_convert_side_effect.cpp b/src/ansi-c/goto-conversion/goto_convert_side_effect.cpp index 7e67c747fe2..3282bd6dbfc 100644 --- a/src/ansi-c/goto-conversion/goto_convert_side_effect.cpp +++ b/src/ansi-c/goto-conversion/goto_convert_side_effect.cpp @@ -337,7 +337,7 @@ goto_convertt::clean_expr_resultt goto_convertt::remove_post( if(result_is_used) { - exprt tmp = std::move(op); + exprt tmp = op; std::string suffix = "post"; if(auto sym_expr = expr_try_dynamic_cast(tmp)) { diff --git a/src/goto-programs/string_abstraction.cpp b/src/goto-programs/string_abstraction.cpp index 814c1279d5d..09943654912 100644 --- a/src/goto-programs/string_abstraction.cpp +++ b/src/goto-programs/string_abstraction.cpp @@ -750,7 +750,7 @@ const typet &string_abstractiont::build_abstraction_type_rec(const typet &type, } if(!new_comp.empty()) { - struct_union_typet abstracted_type = std::move(struct_union_type); + struct_union_typet abstracted_type = struct_union_type; abstracted_type.components().swap(new_comp); const symbolt &existing_tag_symbol = diff --git a/src/solvers/flattening/boolbv_extractbit.cpp b/src/solvers/flattening/boolbv_extractbit.cpp index 7535b66a9b8..29dcb5c62cc 100644 --- a/src/solvers/flattening/boolbv_extractbit.cpp +++ b/src/solvers/flattening/boolbv_extractbit.cpp @@ -51,7 +51,7 @@ literalt boolbvt::convert_extractbit(const extractbit_exprt &expr) std::size_t index_width = std::max(address_bits(src_bv_width), index_bv_width); - unsignedbv_typet index_type(std::move(index_width)); + unsignedbv_typet index_type(index_width); const auto index_casted = typecast_exprt::conditional_cast(index, index_type); diff --git a/src/solvers/flattening/boolbv_index.cpp b/src/solvers/flattening/boolbv_index.cpp index 56a5907cdeb..2e400a57345 100644 --- a/src/solvers/flattening/boolbv_index.cpp +++ b/src/solvers/flattening/boolbv_index.cpp @@ -354,7 +354,7 @@ bvt boolbvt::convert_index( index * (*subtype_bytes_opt), byte_extract_expr.offset().type())}, ns); - byte_extract_exprt be = std::move(byte_extract_expr); + byte_extract_exprt be = byte_extract_expr; be.offset() = new_offset; be.type() = array_type.element_type(); From 3790742237f4a8740ac3a39b73264ba6032966dd Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Wed, 27 May 2026 14:36:55 +0000 Subject: [PATCH 4/7] scripts: fix clang-format-15 issues in check_irep_moves.cpp The check-clang-format CI job applies git-clang-format-15 to lines introduced by the PR. The new file scripts/check_irep_moves.cpp had two locally-clang-formatted lines that didn't match what clang-format-15 produces (header sort, declaration wrapping); apply git-clang-format-15's output verbatim so CI passes. Touches only the new file; no behavioural change. Co-authored-by: Kiro --- scripts/check_irep_moves.cpp | 84 +++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/scripts/check_irep_moves.cpp b/scripts/check_irep_moves.cpp index f10953c5074..581d6ac0f1e 100644 --- a/scripts/check_irep_moves.cpp +++ b/scripts/check_irep_moves.cpp @@ -9,12 +9,12 @@ // Usage: // ./check-irep-moves -p build/compile_commands.json src/goto-symex/*.cpp +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Frontend/FrontendActions.h" #include "clang/Tooling/CommonOptionsParser.h" #include "clang/Tooling/Tooling.h" -#include "clang/AST/RecursiveASTVisitor.h" #include "llvm/Support/CommandLine.h" using namespace clang; @@ -22,8 +22,8 @@ using namespace clang::ast_matchers; using namespace clang::tooling; static llvm::cl::OptionCategory Cat("check-irep-moves"); -static llvm::cl::opt Debug("debug", llvm::cl::desc("Debug output"), - llvm::cl::cat(Cat)); +static llvm::cl::opt + Debug("debug", llvm::cl::desc("Debug output"), llvm::cl::cat(Cat)); static bool inheritsFromIrep(const CXXRecordDecl *RD, int depth = 0) { @@ -63,12 +63,15 @@ class PostCopyRefFinder : public RecursiveASTVisitor bool Found = false; PostCopyRefFinder(const VarDecl *T, SourceLocation A, const SourceManager &S) - : Target(T), After(A), SM(S) {} + : Target(T), After(A), SM(S) + { + } bool VisitDeclRefExpr(DeclRefExpr *DRE) { - if(DRE->getDecl() == Target && - SM.isBeforeInTranslationUnit(After, DRE->getBeginLoc())) + if( + DRE->getDecl() == Target && + SM.isBeforeInTranslationUnit(After, DRE->getBeginLoc())) { Found = true; return false; @@ -77,7 +80,6 @@ class PostCopyRefFinder : public RecursiveASTVisitor } }; - /// Check if a variable is ever modified (non-const method call, assignment, etc.) /// Conservative: assumes mutable if we can't prove const. class MutableUseChecker : public RecursiveASTVisitor @@ -86,7 +88,9 @@ class MutableUseChecker : public RecursiveASTVisitor const VarDecl *Target; bool HasMutableUse = false; - MutableUseChecker(const VarDecl *T) : Target(T) {} + MutableUseChecker(const VarDecl *T) : Target(T) + { + } bool VisitCXXMemberCallExpr(CXXMemberCallExpr *MCE) { @@ -120,7 +124,8 @@ class MutableUseChecker : public RecursiveASTVisitor { if(BO->isAssignmentOp()) { - if(const auto *DRE = dyn_cast(BO->getLHS()->IgnoreImplicit())) + if( + const auto *DRE = dyn_cast(BO->getLHS()->IgnoreImplicit())) { if(DRE->getDecl() == Target) { @@ -136,7 +141,9 @@ class MutableUseChecker : public RecursiveASTVisitor { if(UO->getOpcode() == UO_AddrOf) { - if(const auto *DRE = dyn_cast(UO->getSubExpr()->IgnoreImplicit())) + if( + const auto *DRE = + dyn_cast(UO->getSubExpr()->IgnoreImplicit())) { if(DRE->getDecl() == Target) { @@ -153,14 +160,18 @@ class MutableUseChecker : public RecursiveASTVisitor // Check if our variable is passed as a non-const reference argument if(const auto *Callee = CE->getDirectCallee()) { - for(unsigned i = 0; i < CE->getNumArgs() && i < Callee->getNumParams(); ++i) + for(unsigned i = 0; i < CE->getNumArgs() && i < Callee->getNumParams(); + ++i) { - if(const auto *DRE = dyn_cast(CE->getArg(i)->IgnoreImplicit())) + if( + const auto *DRE = + dyn_cast(CE->getArg(i)->IgnoreImplicit())) { if(DRE->getDecl() == Target) { QualType PT = Callee->getParamDecl(i)->getType(); - if(PT->isReferenceType() && !PT->getPointeeType().isConstQualified()) + if( + PT->isReferenceType() && !PT->getPointeeType().isConstQualified()) { HasMutableUse = true; return false; @@ -174,7 +185,9 @@ class MutableUseChecker : public RecursiveASTVisitor // Can't resolve callee — check if any arg is our variable for(unsigned i = 0; i < CE->getNumArgs(); ++i) { - if(const auto *DRE = dyn_cast(CE->getArg(i)->IgnoreImplicit())) + if( + const auto *DRE = + dyn_cast(CE->getArg(i)->IgnoreImplicit())) { if(DRE->getDecl() == Target) { @@ -210,8 +223,8 @@ class Callback : public MatchFinder::MatchCallback { llvm::errs() << " " << SM.getSpellingLineNumber(VD->getLocation()) << ": " << VD->getNameAsString() - << " type=" << RD->getNameAsString() - << " irep=" << isIrep << "\n"; + << " type=" << RD->getNameAsString() << " irep=" << isIrep + << "\n"; } if(!isIrep) @@ -259,19 +272,17 @@ class Callback : public MatchFinder::MatchCallback if(Finder.Found) return; - llvm::errs() - << SM.getFilename(VD->getLocation()) << ":" - << SM.getSpellingLineNumber(VD->getLocation()) - << ": warning: '" << VD->getNameAsString() - << "' copies '" << SrcVD->getNameAsString() - << "' (type " << VD->getType().getAsString() - << ") which is not used afterwards; " - << "use std::move [cprover-unnecessary-irep-copy]\n"; + llvm::errs() << SM.getFilename(VD->getLocation()) << ":" + << SM.getSpellingLineNumber(VD->getLocation()) + << ": warning: '" << VD->getNameAsString() << "' copies '" + << SrcVD->getNameAsString() << "' (type " + << VD->getType().getAsString() + << ") which is not used afterwards; " + << "use std::move [cprover-unnecessary-irep-copy]\n"; } - void checkUnmodifiedCopy( - const VarDecl *VD, - const MatchFinder::MatchResult &Result) + void + checkUnmodifiedCopy(const VarDecl *VD, const MatchFinder::MatchResult &Result) { const auto &SM = *Result.SourceManager; if(!SM.isInMainFile(VD->getLocation())) @@ -323,13 +334,12 @@ class Callback : public MatchFinder::MatchCallback Checker.TraverseStmt(const_cast(FD->getBody())); if(!Checker.HasMutableUse) { - llvm::errs() - << SM.getFilename(VD->getLocation()) << ":" - << SM.getSpellingLineNumber(VD->getLocation()) - << ": warning: '" << VD->getNameAsString() - << "' (type " << VD->getType().getAsString() - << ") is copy-initialized but never modified; " - << "use const reference [cprover-unmodified-irep-copy]\n"; + llvm::errs() << SM.getFilename(VD->getLocation()) << ":" + << SM.getSpellingLineNumber(VD->getLocation()) + << ": warning: '" << VD->getNameAsString() << "' (type " + << VD->getType().getAsString() + << ") is copy-initialized but never modified; " + << "use const reference [cprover-unmodified-irep-copy]\n"; } } }; @@ -337,7 +347,11 @@ class Callback : public MatchFinder::MatchCallback int main(int argc, const char **argv) { auto EP = CommonOptionsParser::create(argc, argv, Cat); - if(!EP) { llvm::errs() << EP.takeError(); return 1; } + if(!EP) + { + llvm::errs() << EP.takeError(); + return 1; + } ClangTool Tool(EP->getCompilations(), EP->getSourcePathList()); Tool.appendArgumentsAdjuster(getInsertArgumentAdjuster( "-Wno-unknown-warning-option", ArgumentInsertPosition::BEGIN)); From db0de87becbd4ad0b9f6154781b5f59efde7efb0 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Wed, 27 May 2026 14:37:20 +0000 Subject: [PATCH 5/7] cpplint: exclude scripts/check_irep_moves.cpp The check-irep-moves clang-tooling driver intentionally lives outside the CBMC module hierarchy and uses upstream clang/llvm conventions (using-directives, llvm/clang headers, classes that don't end in 't'). Mirror the existing miniz exclusion to keep the file out of the cpplint scope. Co-authored-by: Kiro --- scripts/run_diff.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run_diff.sh b/scripts/run_diff.sh index 3f41b836994..1e565508769 100755 --- a/scripts/run_diff.sh +++ b/scripts/run_diff.sh @@ -37,7 +37,7 @@ then echo "Ensure cpplint.py is inside the $script_folder directory then run again" exit 1 else - cmd='${script_folder}/cpplint.py --exclude=jbmc/src/miniz/* --filter=-whitespace/operators,-readability/identifier_spacing $file 2>&1 >/dev/null' + cmd='${script_folder}/cpplint.py --exclude=jbmc/src/miniz/* --exclude=scripts/check_irep_moves.cpp --filter=-whitespace/operators,-readability/identifier_spacing $file 2>&1 >/dev/null' fi else echo "Mode $mode not recognized" From 7624f9486091da7a6a444cf0a28400af19d67af0 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Wed, 27 May 2026 14:37:42 +0000 Subject: [PATCH 6/7] CI: install libclang-cpp18-dev for the check-irep-copies job The check-irep-copies job links the tool against -lclang-cpp, which on Ubuntu 24.04 is provided by libclang-cpp18-dev (not by libclang-18-dev, which only ships the C library libclang.so). Without this the link step fails with /usr/bin/ld: cannot find -lclang-cpp: No such file or directory Co-authored-by: Kiro --- .github/workflows/syntax-checks.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/syntax-checks.yaml b/.github/workflows/syntax-checks.yaml index 521a34d5889..c0d00ac778a 100644 --- a/.github/workflows/syntax-checks.yaml +++ b/.github/workflows/syntax-checks.yaml @@ -73,7 +73,8 @@ jobs: run: | sudo apt-get update sudo apt-get install --no-install-recommends -yq \ - cmake g++ flex bison clang-18 llvm-18-dev libclang-18-dev + cmake g++ flex bison clang-18 llvm-18-dev libclang-18-dev \ + libclang-cpp18-dev - name: Configure run: cmake -S . -Bbuild -DCMAKE_EXPORT_COMPILE_COMMANDS=ON - name: Check for unnecessary irept copies From 7d3ebaf2ea5254e3dda7a1df4bb17d143d0778d6 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Wed, 27 May 2026 15:09:31 +0000 Subject: [PATCH 7/7] check_irep_moves: filter known false-positive categories Running the checker on develop's hot-paths produced seven findings: the four reviewer-flagged \`std::move\`-of-const-source false positives, plus three more (one converting constructor, one MemberExpr- sourced unmodified copy, one loop-reused source) that are similar in character. All seven are spurious -- they would either silently copy-construct anyway, or break correctness if a developer trusted the suggestion. Tighten the heuristic so the checker stops emitting them: * In the unnecessary-copy path: - require the chosen constructor to be a copy constructor (rejects converting one-arg ctors like \`unsignedbv_typet(std::size_t)\`); - look through references when checking \`isConstQualified\` on the source's type, so \`const auto &x = ...\` is recognised as a const source; - skip when the copy is inside a loop whose enclosing scope does not contain the source's declaration -- the source will be copied (and would be moved) once per iteration. * In the unmodified-copy path: - skip MemberExpr sources, because the existing MutableUseChecker only tracks mutations of the destination and so cannot detect a \`this->member = ...\` between the copy and a later use of the destination; - skip reference-typed sources for the same reason: a \`T &alias\` is often introduced precisely to be reseated/ mutated, and \`const T &\` aliases would observe the post-mutation value rather than the snapshot the caller wants. After these changes the checker reports zero findings on the hot paths covered by run_irep_copy_check.sh, including the four sites the previous commit had to revert from \`std::move\` back to plain copies. Co-authored-by: Kiro --- scripts/check_irep_moves.cpp | 73 +++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/scripts/check_irep_moves.cpp b/scripts/check_irep_moves.cpp index 581d6ac0f1e..65b1f62c3f9 100644 --- a/scripts/check_irep_moves.cpp +++ b/scripts/check_irep_moves.cpp @@ -247,6 +247,12 @@ class Callback : public MatchFinder::MatchCallback auto *Ctor = CE->getConstructor(); if(Ctor && Ctor->isMoveConstructor()) return; + // Restrict to a true copy: skip converting constructors (e.g. + // `unsignedbv_typet t(some_size_t)`), where suggesting std::move + // is meaningless because no string-of-irept storage is being + // duplicated. + if(Ctor && !Ctor->isCopyConstructor()) + return; Init = CE->getArg(0)->IgnoreImplicit(); } else @@ -259,8 +265,20 @@ class Callback : public MatchFinder::MatchCallback const auto *SrcVD = dyn_cast(SrcRef->getDecl()); if(!SrcVD || isa(SrcVD)) return; - if(SrcVD->getType().isConstQualified()) - return; + { + // Skip const sources: `std::move` of a const value silently + // copy-constructs (the rvalue has type `const T&&` which binds + // to the copy constructor) and would in any case be unsafe for + // a `const auto &` alias into someone else's data. The check + // needs to look through references, since for `const auto &x` + // the variable's top-level type is a reference and is not + // itself const-qualified. + QualType SrcQT = SrcVD->getType(); + if(SrcQT->isReferenceType()) + SrcQT = SrcQT.getNonReferenceType(); + if(SrcQT.isConstQualified()) + return; + } // Check source not used after copy const auto *FD = dyn_cast_or_null(VD->getDeclContext()); @@ -272,6 +290,37 @@ class Callback : public MatchFinder::MatchCallback if(Finder.Found) return; + // Skip loop-reused sources: if `VD` is inside a loop body and + // `SrcVD` is declared outside that loop, then the loop will copy + // from `SrcVD` once per iteration; converting that copy to a move + // would leave a moved-from `SrcVD` for every subsequent iteration. + { + ASTContext &Ctx = *Result.Context; + const Stmt *EnclosingLoop = nullptr; + DynTypedNode Cur = DynTypedNode::create(*VD); + while(EnclosingLoop == nullptr) + { + const auto Parents = Ctx.getParents(Cur); + if(Parents.empty()) + break; + Cur = Parents[0]; + if(const auto *S = Cur.get()) + { + if( + isa(S) || isa(S) || isa(S) || + isa(S)) + { + EnclosingLoop = S; + break; + } + } + } + if( + EnclosingLoop && SM.isBeforeInTranslationUnit( + SrcVD->getBeginLoc(), EnclosingLoop->getBeginLoc())) + return; + } + llvm::errs() << SM.getFilename(VD->getLocation()) << ":" << SM.getSpellingLineNumber(VD->getLocation()) << ": warning: '" << VD->getNameAsString() << "' copies '" @@ -317,6 +366,26 @@ class Callback : public MatchFinder::MatchCallback // If source is NOT a DeclRefExpr or MemberExpr, it's likely a temporary if(!isa(Src) && !isa(Src)) return; + // Skip MemberExpr sources: the existing MutableUseChecker only + // tracks mutations of the destination VarDecl, so it cannot detect + // a `this->member = ...` between the copy and a later use of VD. + // Suggesting `const auto &` would alias to the post-mutation value + // -- not the snapshot the user intended. + if(isa(Src)) + return; + // Skip reference-typed sources: a `T &alias` is often introduced + // precisely because the referee will be mutated, and a `const T &` + // copy would observe the post-mutation value rather than the + // snapshot the caller wants. We can't tell without flow analysis + // whether that's the case here, so play it safe. + if(const auto *DRE = dyn_cast(Src)) + { + if(const auto *SrcVD = dyn_cast(DRE->getDecl())) + { + if(SrcVD->getType()->isReferenceType()) + return; + } + } // Source type must match variable type (otherwise it's a conversion) if(Src->getType().getCanonicalType() != VD->getType().getCanonicalType()) return;