From 84e07c26ee56b758ed6c616ccbd16de26e1d9c79 Mon Sep 17 00:00:00 2001
From: Jutho Haegeman <jutho.haegeman@ugent.be>
Date: Thu, 28 May 2026 12:05:36 +0200
Subject: [PATCH] taylorexponentiate part 1

---
 src/common/balancing.jl            | 125 +++++++++++++++++++++
 src/implementations/exponential.jl | 172 +++++++++++++++++++++++++++++
 src/interface/exponential.jl       |  33 ++++++
 src/matrixfunctions.jl             |   1 -
 4 files changed, 330 insertions(+), 1 deletion(-)
 create mode 100644 src/common/balancing.jl
 create mode 100644 src/implementations/exponential.jl
 create mode 100644 src/interface/exponential.jl
 delete mode 100644 src/matrixfunctions.jl

diff --git a/src/common/balancing.jl b/src/common/balancing.jl
new file mode 100644
index 000000000..e2ec37ba9
--- /dev/null
+++ b/src/common/balancing.jl
@@ -0,0 +1,125 @@
+"""
+    balance!(A::AbstractMatrix{T}; radix=convert(T, 2)) where T
+
+A pure Julia implementation of the GEBAL algorithm. 
+Balances a matrix to improve eigenvalue accuracy.
+"""
+function balance!(A::AbstractMatrix{T}; radix=convert(T, 2)) where T
+    n = LinearAlgebra.checksquare(A)
+    
+    scale = ones(real(T), n)
+    low = 1
+    high = n
+    converged = false
+
+    # --- Step 1: Permutation (Search for isolated eigenvalues) ---
+    # (Simplified for brevity; most performance gain comes from scaling)
+    # In a full GEBAL, we would swap rows/cols to move zeros to the corners.
+
+    # --- Step 2: Scaling (The Ward Algorithm) ---
+    while !converged
+        converged = true
+        for i in low:high
+            # Calculate row and column norms (excluding diagonal)
+            row_norm = 0.0
+            col_norm = 0.0
+            for j in low:high
+                if i == j continue end
+                row_norm += abs(A[i, j])
+                col_norm += abs(A[j, i])
+            end
+
+            # Avoid division by zero
+            if row_norm == 0 || col_norm == 0
+                continue
+            end
+
+            # Iterative scaling to bring row and col norms closer
+            g = row_norm / radix
+            f = 1.0
+            s = col_norm + row_norm
+            
+            # While column norm is significantly smaller than row norm
+            while col_norm < g
+                f *= radix
+                col_norm *= radix^2
+            end
+            
+            # While column norm is significantly larger than row norm
+            g = row_norm * radix
+            while col_norm >= g
+                f /= radix
+                col_norm /= radix^2
+            end
+
+            # Apply scaling if it's significant
+            if (col_norm + row_norm) / f < 0.95 * s
+                converged = false
+                g = 1.0 / f
+                scale[i] *= f
+                # Apply to rows
+                for j in 1:n
+                    A[i, j] *= g
+                end
+                # Apply to columns
+                for j in 1:n
+                    A[j, i] *= f
+                end
+            end
+        end
+    end
+    
+    return A, scale
+end
+
+
+function permute_matrix!(A::AbstractMatrix{T}) where T
+    n = size(A, 1)
+    low = 1
+    high = n
+    
+    # Simple implementation of the permutation search
+    # We look for rows/cols that are essentially isolated
+    
+    # Search from the bottom up (high) and top down (low)
+    changed = true
+    while changed
+        changed = false
+        
+        # Look for a column with only one non-zero element
+        for j in high:-1:low
+            col = A[low:high, j]
+            if count(!iszero, col) == 1
+                # Permute this column to the 'high' position
+                swap_cols!(A, j, high)
+                swap_rows!(A, j, high)
+                high -= 1
+                changed = true
+            end
+        end
+        
+        # Look for a row with only one non-zero element
+        for i in low:high
+            row = A[i, low:high]
+            if count(!iszero, row) == 1
+                # Permute this row to the 'low' position
+                swap_cols!(A, i, low)
+                swap_rows!(A, i, low)
+                low += 1
+                changed = true
+            end
+        end
+    end
+    return low, high
+end
+
+function swap_rows!(A, i, j)
+    for k in axes(A, 2)
+        A[i, k], A[j, k] = A[j, k], A[i, k]
+    end
+end
+function swap_cols!(A, i, j)
+    for k in axes(A, 1)
+        A[k, i], A[k, j] = A[k, j], A[k, i]
+    end
+end
\ No newline at end of file
diff --git a/src/implementations/exponential.jl b/src/implementations/exponential.jl
new file mode 100644
index 000000000..7e1adca96
--- /dev/null
+++ b/src/implementations/exponential.jl
@@ -0,0 +1,172 @@
+# Inputs
+# ------
+function copy_input(::typeof(exponential), A::AbstractMatrix)
+    return copy!(similar(A, float(eltype(A))), A)
+end
+
+copy_input(::typeof(exponential), A::Diagonal) = copy(A)
+
+function check_input(::typeof(exponential!), A::AbstractMatrix, expA::AbstractMatrix, alg::AbstractAlgorithm)
+    m = LinearAlgebra.checksquare(A)
+    @check_size(expA, (m, m))
+    return @check_scalar(expA, A)
+end
+
+function check_input(::typeof(exponential!), A::AbstractMatrix, expA::AbstractMatrix, ::DiagonalAlgorithm)
+    m = LinearAlgebra.checksquare(A)
+    isdiag(A) || throw(DimensionMismatch("diagonal input matrix expected"))
+    @assert expA isa Diagonal
+    @check_size(expA, (m, m))
+    @check_scalar(expA, A)
+    return nothing
+end
+
+# Outputs
+# -------
+initialize_output(::typeof(exponential!), A::AbstractMatrix, ::AbstractAlgorithm) = A
+
+# Implementation
+# --------------
+function exponential!(A, expA, alg::MatrixFunctionViaTaylor)
+    check_input(exponential!, A, expA, alg)
+    return exponential_via_taylor!(A, expA)
+end
+
+
+module TaylorExponential
+
+"""
+    exponential_via_taylor!(A::Matrix{T}) where T <: AbstractFloat
+
+An implementation of the Fasi & Higham (2018) Taylor-based scaling 
+and squaring for arbitrary precision.
+"""
+function exponential_via_taylor!(A::AbstractMatrix{T}, expA::AbstractMatrix{T}) where T
+    n = size(A, 1)
+    ϵ = eps(T)
+    Apowers = fill(A, 8) # Preallocate for powers up to A^8, will be resized if needed
+    for k = 2:length(Apowers)
+        Apowers[k] = Apowers[k-1] * A
+    end
+    ρA = opnorm(Apowers[end], 1)^(1/length(Apowers)) # estimate of the spectral radius using Gelfand's formula
+
+    # Find m and s such that (ρA/2^s)^(m+1) / (m+1)! < ϵ
+    m, s = optimal_taylor_order(ρA, ϵ)
+    
+    # Scale A down by 2^s
+    A ./= T(2)^s
+    
+    # Evaluate Taylor via Paterson-Stockmeyer approach
+    X = evaluate_taylor_ps(As, m)
+    
+    # Squaring to undo the scaling
+    for _ in 1:s
+        X = X * X
+    end
+    
+    return X
+end
+
+"""
+Evaluates Taylor series using Paterson-Stockmeyer logic.
+"""
+function evaluate_taylor_ps(A, m)
+    T = eltype(A)
+    k = Int(floor(sqrt(m))) # Chunk size for Paterson-Stockmeyer
+    
+    # Precompute powers A^2, ..., A^k
+    powers = Vector{typeof(A)}(undef, k)
+    powers[1] = A
+    for i in 2:k
+        powers[i] = powers[i-1] * A
+    end
+    
+    # Horner-like evaluation of the outer polynomial
+    # e^A ≈ ∑ (A^k)^j * P_j(A)
+    res = zeros(T, size(A))
+    num_chunks = Int(ceil((m+1)/k))
+    
+    for j in (num_chunks-1):-1:0
+        # Evaluate sub-polynomial P_j(A) of degree k-1
+        poly_chunk = zeros(T, size(A))
+        for i in 0:k-1
+            idx = j*k + i
+            if idx > m; continue; end
+            coeff = 1 / factorial(big(idx))
+            if i == 0
+                poly_chunk += T(coeff) * I
+            else
+                poly_chunk += T(coeff) * powers[i]
+            end
+        end
+        
+        if j == num_chunks - 1
+            res = poly_chunk
+        else
+            res = res * powers[k] + poly_chunk
+        end
+    end
+    return res
+end
+
+function get_ps_costs(max_m) # number of matrix multiplications for an order m polynomial
+    costs = Dict{Int, Int}()
+    for m in 1:max_m
+        # Find k in 1:m that minimizes (k-1) + ceil(m/k) - 1
+        best_c = m # Default naive cost
+        for k in 1:m
+            c = (k - 1) + div(m-1, k) # == (k - 1) + ceil(Int, m/k) - 1
+            if c < best_c
+                best_c = c
+            end
+        end
+        costs[m] = best_c
+    end
+    return costs
+end
+
+# To filter only for the "efficient" m (where degree increases for the same cost)
+function get_efficient_m(max_m::Int)
+    costs = get_ps_costs(max_m)
+    efficient = Dict{Int, Int}()
+    next_cost = costs[1]
+    for m in 1:max_m
+        cost = next_cost
+        next_cost = m < max_m ? costs[m+1] : cost + 1
+        if cost < next_cost
+            efficient[m] = cost
+        end
+    end
+    return efficient
+end
+
+PS_COSTS = get_efficient_m(100)
+
+"""
+Optimizes m and s to minimize cost ≈ m_mults + s.
+"""
+function optimal_taylor_order(ρA, ϵ)
+    # In a full Fasi-Higham implementation, this would use a small 
+    # search loop or a cost-model lookup. 
+    # Here is a simplified version for BigFloat:
+    best_m, best_s = 0, 0
+    min_cost = typemax(Int)
+    
+    # Search over efficient Paterson-Stockmeyer degrees
+    for (m, c) in PS_COSTS
+        # Calculate s needed for this m: (normA/2^s)^(m+1) / (m+1)! < ϵ
+        # log2(normA/2^s) * (m+1) - log2((m+1)!) < log2(ϵ)
+        # log2(normA) - s - [log2((m+1)!) / (m+1)] < log2(ϵ) / (m+1)
+        term = (log2(ρA) - (log2(factorial(big(m+1))) / (m+1)) - (log2(ϵ) / (m+1)))
+        s = max(0, ceil(Int, term))
+        
+        cost = c + s
+        if cost < min_cost || (cost == min_cost && m < best_m)
+            min_cost = cost
+            best_m, best_s = m, s
+        end
+    end
+    return best_m, best_s
+end
+
+end
\ No newline at end of file
diff --git a/src/interface/exponential.jl b/src/interface/exponential.jl
new file mode 100644
index 000000000..3b6145d41
--- /dev/null
+++ b/src/interface/exponential.jl
@@ -0,0 +1,33 @@
+# Exponential functions
+# --------------
+
+"""
+    exponential(A; kwargs...) -> expA
+    exponential(A, alg::AbstractAlgorithm) -> expA
+    exponential!(A, [expA]; kwargs...) -> expA
+    exponential!(A, [expA], alg::AbstractAlgorithm) -> expA
+
+Compute the exponential of the square matrix `A`,
+
+!!! note
+    The bang method `exponential!` optionally accepts the output structure and
+    possibly destroys the input matrix `A`. Always use the return value of the function
+    as it may not always be possible to use the provided `expA` as output.
+"""
+@functiondef exponential
+
+# Algorithm selection
+# -------------------
+default_exponential_algorithm(A; kwargs...) = default_exponential_algorithm(typeof(A); kwargs...)
+function default_exponential_algorithm(T::Type; kwargs...)
+    return MatrixFunctionViaTaylor(; kwargs...)
+end
+function default_exponential_algorithm(::Type{T}; kwargs...) where {T <: Diagonal}
+    return DiagonalAlgorithm(; kwargs...)
+end
+
+for f in (:exponential!,)
+    @eval function default_algorithm(::typeof($f), ::Type{A}; kwargs...) where {A}
+        return default_exponential_algorithm(A; kwargs...)
+    end
+end
\ No newline at end of file
diff --git a/src/matrixfunctions.jl b/src/matrixfunctions.jl
deleted file mode 100644
index 8b1378917..000000000
--- a/src/matrixfunctions.jl
+++ /dev/null
@@ -1 +0,0 @@
-