From 84e07c26ee56b758ed6c616ccbd16de26e1d9c79 Mon Sep 17 00:00:00 2001 From: Jutho Haegeman Date: Thu, 28 May 2026 12:05:36 +0200 Subject: [PATCH] taylorexponentiate part 1 --- src/common/balancing.jl | 125 +++++++++++++++++++++ src/implementations/exponential.jl | 172 +++++++++++++++++++++++++++++ src/interface/exponential.jl | 33 ++++++ src/matrixfunctions.jl | 1 - 4 files changed, 330 insertions(+), 1 deletion(-) create mode 100644 src/common/balancing.jl create mode 100644 src/implementations/exponential.jl create mode 100644 src/interface/exponential.jl delete mode 100644 src/matrixfunctions.jl diff --git a/src/common/balancing.jl b/src/common/balancing.jl new file mode 100644 index 000000000..e2ec37ba9 --- /dev/null +++ b/src/common/balancing.jl @@ -0,0 +1,125 @@ +""" + balance!(A::AbstractMatrix{T}; radix=convert(T, 2)) where T + +A pure Julia implementation of the GEBAL algorithm. +Balances a matrix to improve eigenvalue accuracy. +""" +function balance!(A::AbstractMatrix{T}; radix=convert(T, 2)) where T + n = LinearAlgebra.checksquare(A) + + scale = ones(real(T), n) + low = 1 + high = n + converged = false + + # --- Step 1: Permutation (Search for isolated eigenvalues) --- + # (Simplified for brevity; most performance gain comes from scaling) + # In a full GEBAL, we would swap rows/cols to move zeros to the corners. + + # --- Step 2: Scaling (The Ward Algorithm) --- + while !converged + converged = true + for i in low:high + # Calculate row and column norms (excluding diagonal) + row_norm = 0.0 + col_norm = 0.0 + for j in low:high + if i == j continue end + row_norm += abs(A[i, j]) + col_norm += abs(A[j, i]) + end + + # Avoid division by zero + if row_norm == 0 || col_norm == 0 + continue + end + + # Iterative scaling to bring row and col norms closer + g = row_norm / radix + f = 1.0 + s = col_norm + row_norm + + # While column norm is significantly smaller than row norm + while col_norm < g + f *= radix + col_norm *= radix^2 + end + + # While column norm is significantly larger than row norm + g = row_norm * radix + while col_norm >= g + f /= radix + col_norm /= radix^2 + end + + # Apply scaling if it's significant + if (col_norm + row_norm) / f < 0.95 * s + converged = false + g = 1.0 / f + scale[i] *= f + # Apply to rows + for j in 1:n + A[i, j] *= g + end + # Apply to columns + for j in 1:n + A[j, i] *= f + end + end + end + end + + return A, scale +end + + +function permute_matrix!(A::AbstractMatrix{T}) where T + n = size(A, 1) + low = 1 + high = n + + # Simple implementation of the permutation search + # We look for rows/cols that are essentially isolated + + # Search from the bottom up (high) and top down (low) + changed = true + while changed + changed = false + + # Look for a column with only one non-zero element + for j in high:-1:low + col = A[low:high, j] + if count(!iszero, col) == 1 + # Permute this column to the 'high' position + swap_cols!(A, j, high) + swap_rows!(A, j, high) + high -= 1 + changed = true + end + end + + # Look for a row with only one non-zero element + for i in low:high + row = A[i, low:high] + if count(!iszero, row) == 1 + # Permute this row to the 'low' position + swap_cols!(A, i, low) + swap_rows!(A, i, low) + low += 1 + changed = true + end + end + end + return low, high +end + +function swap_rows!(A, i, j) + for k in axes(A, 2) + A[i, k], A[j, k] = A[j, k], A[i, k] + end +end +function swap_cols!(A, i, j) + for k in axes(A, 1) + A[k, i], A[k, j] = A[k, j], A[k, i] + end +end \ No newline at end of file diff --git a/src/implementations/exponential.jl b/src/implementations/exponential.jl new file mode 100644 index 000000000..7e1adca96 --- /dev/null +++ b/src/implementations/exponential.jl @@ -0,0 +1,172 @@ +# Inputs +# ------ +function copy_input(::typeof(exponential), A::AbstractMatrix) + return copy!(similar(A, float(eltype(A))), A) +end + +copy_input(::typeof(exponential), A::Diagonal) = copy(A) + +function check_input(::typeof(exponential!), A::AbstractMatrix, expA::AbstractMatrix, alg::AbstractAlgorithm) + m = LinearAlgebra.checksquare(A) + @check_size(expA, (m, m)) + return @check_scalar(expA, A) +end + +function check_input(::typeof(exponential!), A::AbstractMatrix, expA::AbstractMatrix, ::DiagonalAlgorithm) + m = LinearAlgebra.checksquare(A) + isdiag(A) || throw(DimensionMismatch("diagonal input matrix expected")) + @assert expA isa Diagonal + @check_size(expA, (m, m)) + @check_scalar(expA, A) + return nothing +end + +# Outputs +# ------- +initialize_output(::typeof(exponential!), A::AbstractMatrix, ::AbstractAlgorithm) = A + +# Implementation +# -------------- +function exponential!(A, expA, alg::MatrixFunctionViaTaylor) + check_input(exponential!, A, expA, alg) + return exponential_via_taylor!(A, expA) +end + + +module TaylorExponential + +""" + exponential_via_taylor!(A::Matrix{T}) where T <: AbstractFloat + +An implementation of the Fasi & Higham (2018) Taylor-based scaling +and squaring for arbitrary precision. +""" +function exponential_via_taylor!(A::AbstractMatrix{T}, expA::AbstractMatrix{T}) where T + n = size(A, 1) + ϵ = eps(T) + Apowers = fill(A, 8) # Preallocate for powers up to A^8, will be resized if needed + for k = 2:length(Apowers) + Apowers[k] = Apowers[k-1] * A + end + ρA = opnorm(Apowers[end], 1)^(1/length(Apowers)) # estimate of the spectral radius using Gelfand's formula + + # Find m and s such that (ρA/2^s)^(m+1) / (m+1)! < ϵ + m, s = optimal_taylor_order(ρA, ϵ) + + # Scale A down by 2^s + A ./= T(2)^s + + # Evaluate Taylor via Paterson-Stockmeyer approach + X = evaluate_taylor_ps(As, m) + + # Squaring to undo the scaling + for _ in 1:s + X = X * X + end + + return X +end + +""" +Evaluates Taylor series using Paterson-Stockmeyer logic. +""" +function evaluate_taylor_ps(A, m) + T = eltype(A) + k = Int(floor(sqrt(m))) # Chunk size for Paterson-Stockmeyer + + # Precompute powers A^2, ..., A^k + powers = Vector{typeof(A)}(undef, k) + powers[1] = A + for i in 2:k + powers[i] = powers[i-1] * A + end + + # Horner-like evaluation of the outer polynomial + # e^A ≈ ∑ (A^k)^j * P_j(A) + res = zeros(T, size(A)) + num_chunks = Int(ceil((m+1)/k)) + + for j in (num_chunks-1):-1:0 + # Evaluate sub-polynomial P_j(A) of degree k-1 + poly_chunk = zeros(T, size(A)) + for i in 0:k-1 + idx = j*k + i + if idx > m; continue; end + coeff = 1 / factorial(big(idx)) + if i == 0 + poly_chunk += T(coeff) * I + else + poly_chunk += T(coeff) * powers[i] + end + end + + if j == num_chunks - 1 + res = poly_chunk + else + res = res * powers[k] + poly_chunk + end + end + return res +end + +function get_ps_costs(max_m) # number of matrix multiplications for an order m polynomial + costs = Dict{Int, Int}() + for m in 1:max_m + # Find k in 1:m that minimizes (k-1) + ceil(m/k) - 1 + best_c = m # Default naive cost + for k in 1:m + c = (k - 1) + div(m-1, k) # == (k - 1) + ceil(Int, m/k) - 1 + if c < best_c + best_c = c + end + end + costs[m] = best_c + end + return costs +end + +# To filter only for the "efficient" m (where degree increases for the same cost) +function get_efficient_m(max_m::Int) + costs = get_ps_costs(max_m) + efficient = Dict{Int, Int}() + next_cost = costs[1] + for m in 1:max_m + cost = next_cost + next_cost = m < max_m ? costs[m+1] : cost + 1 + if cost < next_cost + efficient[m] = cost + end + end + return efficient +end + +PS_COSTS = get_efficient_m(100) + +""" +Optimizes m and s to minimize cost ≈ m_mults + s. +""" +function optimal_taylor_order(ρA, ϵ) + # In a full Fasi-Higham implementation, this would use a small + # search loop or a cost-model lookup. + # Here is a simplified version for BigFloat: + best_m, best_s = 0, 0 + min_cost = typemax(Int) + + # Search over efficient Paterson-Stockmeyer degrees + for (m, c) in PS_COSTS + # Calculate s needed for this m: (normA/2^s)^(m+1) / (m+1)! < ϵ + # log2(normA/2^s) * (m+1) - log2((m+1)!) < log2(ϵ) + # log2(normA) - s - [log2((m+1)!) / (m+1)] < log2(ϵ) / (m+1) + term = (log2(ρA) - (log2(factorial(big(m+1))) / (m+1)) - (log2(ϵ) / (m+1))) + s = max(0, ceil(Int, term)) + + cost = c + s + if cost < min_cost || (cost == min_cost && m < best_m) + min_cost = cost + best_m, best_s = m, s + end + end + return best_m, best_s +end + +end \ No newline at end of file diff --git a/src/interface/exponential.jl b/src/interface/exponential.jl new file mode 100644 index 000000000..3b6145d41 --- /dev/null +++ b/src/interface/exponential.jl @@ -0,0 +1,33 @@ +# Exponential functions +# -------------- + +""" + exponential(A; kwargs...) -> expA + exponential(A, alg::AbstractAlgorithm) -> expA + exponential!(A, [expA]; kwargs...) -> expA + exponential!(A, [expA], alg::AbstractAlgorithm) -> expA + +Compute the exponential of the square matrix `A`, + +!!! note + The bang method `exponential!` optionally accepts the output structure and + possibly destroys the input matrix `A`. Always use the return value of the function + as it may not always be possible to use the provided `expA` as output. +""" +@functiondef exponential + +# Algorithm selection +# ------------------- +default_exponential_algorithm(A; kwargs...) = default_exponential_algorithm(typeof(A); kwargs...) +function default_exponential_algorithm(T::Type; kwargs...) + return MatrixFunctionViaTaylor(; kwargs...) +end +function default_exponential_algorithm(::Type{T}; kwargs...) where {T <: Diagonal} + return DiagonalAlgorithm(; kwargs...) +end + +for f in (:exponential!,) + @eval function default_algorithm(::typeof($f), ::Type{A}; kwargs...) where {A} + return default_exponential_algorithm(A; kwargs...) + end +end \ No newline at end of file diff --git a/src/matrixfunctions.jl b/src/matrixfunctions.jl deleted file mode 100644 index 8b1378917..000000000 --- a/src/matrixfunctions.jl +++ /dev/null @@ -1 +0,0 @@ -