Skip to content

[WIP] Taylor Exponentiate#243

Open
Jutho wants to merge 1 commit into
mainfrom
jh/taylorexponentiate
Open

[WIP] Taylor Exponentiate#243
Jutho wants to merge 1 commit into
mainfrom
jh/taylorexponentiate

Conversation

@Jutho
Copy link
Copy Markdown
Member

@Jutho Jutho commented Jun 3, 2026

This is something I started a while ago, but I need to continue working on it. Basically, for higher precision number types, the best approach for computing the exponential is just a Taylor expansion, combined with scaling and squaring (i.e. exp(A) = (exp(A/2^s))^(2^s)). But which s in combination with which order of m of the Taylor expansion is a nontrivial question, that has been studied in the literature, and also involves clever ways of computing higher order polynomials, i.e. polynomials of A of degree m, thus requiring all powers A^k, k in 0:m, without actually having to do m matrix multiplications.

@github-actions
Copy link
Copy Markdown
Contributor

github-actions Bot commented Jun 3, 2026

Your PR requires formatting changes to meet the project's style guidelines.
Please consider running Runic (git runic main) to apply these changes.

Click here to view the suggested changes.
diff --git a/src/common/balancing.jl b/src/common/balancing.jl
index e2ec37b..74d0a69 100644
--- a/src/common/balancing.jl
+++ b/src/common/balancing.jl
@@ -4,9 +4,9 @@
 A pure Julia implementation of the GEBAL algorithm. 
 Balances a matrix to improve eigenvalue accuracy.
 """
-function balance!(A::AbstractMatrix{T}; radix=convert(T, 2)) where T
+function balance!(A::AbstractMatrix{T}; radix = convert(T, 2)) where {T}
     n = LinearAlgebra.checksquare(A)
-    
+
     scale = ones(real(T), n)
     low = 1
     high = n
@@ -24,7 +24,9 @@ function balance!(A::AbstractMatrix{T}; radix=convert(T, 2)) where T
             row_norm = 0.0
             col_norm = 0.0
             for j in low:high
-                if i == j continue end
+                if i == j
+                    continue
+                end
                 row_norm += abs(A[i, j])
                 col_norm += abs(A[j, i])
             end
@@ -38,13 +40,13 @@ function balance!(A::AbstractMatrix{T}; radix=convert(T, 2)) where T
             g = row_norm / radix
             f = 1.0
             s = col_norm + row_norm
-            
+
             # While column norm is significantly smaller than row norm
             while col_norm < g
                 f *= radix
                 col_norm *= radix^2
             end
-            
+
             # While column norm is significantly larger than row norm
             g = row_norm * radix
             while col_norm >= g
@@ -68,24 +70,24 @@ function balance!(A::AbstractMatrix{T}; radix=convert(T, 2)) where T
             end
         end
     end
-    
+
     return A, scale
 end
 
 
-function permute_matrix!(A::AbstractMatrix{T}) where T
+function permute_matrix!(A::AbstractMatrix{T}) where {T}
     n = size(A, 1)
     low = 1
     high = n
-    
+
     # Simple implementation of the permutation search
     # We look for rows/cols that are essentially isolated
-    
+
     # Search from the bottom up (high) and top down (low)
     changed = true
     while changed
         changed = false
-        
+
         # Look for a column with only one non-zero element
         for j in high:-1:low
             col = A[low:high, j]
@@ -97,7 +99,7 @@ function permute_matrix!(A::AbstractMatrix{T}) where T
                 changed = true
             end
         end
-        
+
         # Look for a row with only one non-zero element
         for i in low:high
             row = A[i, low:high]
@@ -117,9 +119,11 @@ function swap_rows!(A, i, j)
     for k in axes(A, 2)
         A[i, k], A[j, k] = A[j, k], A[i, k]
     end
+    return
 end
 function swap_cols!(A, i, j)
     for k in axes(A, 1)
         A[k, i], A[k, j] = A[k, j], A[k, i]
     end
-end
\ No newline at end of file
+    return
+end
diff --git a/src/implementations/exponential.jl b/src/implementations/exponential.jl
index 7e1adca..d3cddab 100644
--- a/src/implementations/exponential.jl
+++ b/src/implementations/exponential.jl
@@ -35,138 +35,140 @@ end
 
 module TaylorExponential
 
-"""
-    exponential_via_taylor!(A::Matrix{T}) where T <: AbstractFloat
-
-An implementation of the Fasi & Higham (2018) Taylor-based scaling 
-and squaring for arbitrary precision.
-"""
-function exponential_via_taylor!(A::AbstractMatrix{T}, expA::AbstractMatrix{T}) where T
-    n = size(A, 1)
-    ϵ = eps(T)
-    Apowers = fill(A, 8) # Preallocate for powers up to A^8, will be resized if needed
-    for k = 2:length(Apowers)
-        Apowers[k] = Apowers[k-1] * A
-    end
-    ρA = opnorm(Apowers[end], 1)^(1/length(Apowers)) # estimate of the spectral radius using Gelfand's formula
-
-    # Find m and s such that (ρA/2^s)^(m+1) / (m+1)! < ϵ
-    m, s = optimal_taylor_order(ρA, ϵ)
-    
-    # Scale A down by 2^s
-    A ./= T(2)^s
-    
-    # Evaluate Taylor via Paterson-Stockmeyer approach
-    X = evaluate_taylor_ps(As, m)
-    
-    # Squaring to undo the scaling
-    for _ in 1:s
-        X = X * X
-    end
-    
-    return X
-end
+    """
+        exponential_via_taylor!(A::Matrix{T}) where T <: AbstractFloat
+
+    An implementation of the Fasi & Higham (2018) Taylor-based scaling 
+    and squaring for arbitrary precision.
+    """
+    function exponential_via_taylor!(A::AbstractMatrix{T}, expA::AbstractMatrix{T}) where {T}
+        n = size(A, 1)
+        ϵ = eps(T)
+        Apowers = fill(A, 8) # Preallocate for powers up to A^8, will be resized if needed
+        for k in 2:length(Apowers)
+            Apowers[k] = Apowers[k - 1] * A
+        end
+        ρA = opnorm(Apowers[end], 1)^(1 / length(Apowers)) # estimate of the spectral radius using Gelfand's formula
+
+        # Find m and s such that (ρA/2^s)^(m+1) / (m+1)! < ϵ
+        m, s = optimal_taylor_order(ρA, ϵ)
+
+        # Scale A down by 2^s
+        A ./= T(2)^s
 
-"""
-Evaluates Taylor series using Paterson-Stockmeyer logic.
-"""
-function evaluate_taylor_ps(A, m)
-    T = eltype(A)
-    k = Int(floor(sqrt(m))) # Chunk size for Paterson-Stockmeyer
-    
-    # Precompute powers A^2, ..., A^k
-    powers = Vector{typeof(A)}(undef, k)
-    powers[1] = A
-    for i in 2:k
-        powers[i] = powers[i-1] * A
+        # Evaluate Taylor via Paterson-Stockmeyer approach
+        X = evaluate_taylor_ps(As, m)
+
+        # Squaring to undo the scaling
+        for _ in 1:s
+            X = X * X
+        end
+
+        return X
     end
-    
-    # Horner-like evaluation of the outer polynomial
-    # e^A ≈ ∑ (A^k)^j * P_j(A)
-    res = zeros(T, size(A))
-    num_chunks = Int(ceil((m+1)/k))
-    
-    for j in (num_chunks-1):-1:0
-        # Evaluate sub-polynomial P_j(A) of degree k-1
-        poly_chunk = zeros(T, size(A))
-        for i in 0:k-1
-            idx = j*k + i
-            if idx > m; continue; end
-            coeff = 1 / factorial(big(idx))
-            if i == 0
-                poly_chunk += T(coeff) * I
+
+    """
+    Evaluates Taylor series using Paterson-Stockmeyer logic.
+    """
+    function evaluate_taylor_ps(A, m)
+        T = eltype(A)
+        k = Int(floor(sqrt(m))) # Chunk size for Paterson-Stockmeyer
+
+        # Precompute powers A^2, ..., A^k
+        powers = Vector{typeof(A)}(undef, k)
+        powers[1] = A
+        for i in 2:k
+            powers[i] = powers[i - 1] * A
+        end
+
+        # Horner-like evaluation of the outer polynomial
+        # e^A ≈ ∑ (A^k)^j * P_j(A)
+        res = zeros(T, size(A))
+        num_chunks = Int(ceil((m + 1) / k))
+
+        for j in (num_chunks - 1):-1:0
+            # Evaluate sub-polynomial P_j(A) of degree k-1
+            poly_chunk = zeros(T, size(A))
+            for i in 0:(k - 1)
+                idx = j * k + i
+                if idx > m
+                    continue
+                end
+                coeff = 1 / factorial(big(idx))
+                if i == 0
+                    poly_chunk += T(coeff) * I
+                else
+                    poly_chunk += T(coeff) * powers[i]
+                end
+            end
+
+            if j == num_chunks - 1
+                res = poly_chunk
             else
-                poly_chunk += T(coeff) * powers[i]
+                res = res * powers[k] + poly_chunk
             end
         end
-        
-        if j == num_chunks - 1
-            res = poly_chunk
-        else
-            res = res * powers[k] + poly_chunk
-        end
+        return res
     end
-    return res
-end
 
-function get_ps_costs(max_m) # number of matrix multiplications for an order m polynomial
-    costs = Dict{Int, Int}()
-    for m in 1:max_m
-        # Find k in 1:m that minimizes (k-1) + ceil(m/k) - 1
-        best_c = m # Default naive cost
-        for k in 1:m
-            c = (k - 1) + div(m-1, k) # == (k - 1) + ceil(Int, m/k) - 1
-            if c < best_c
-                best_c = c
+    function get_ps_costs(max_m) # number of matrix multiplications for an order m polynomial
+        costs = Dict{Int, Int}()
+        for m in 1:max_m
+            # Find k in 1:m that minimizes (k-1) + ceil(m/k) - 1
+            best_c = m # Default naive cost
+            for k in 1:m
+                c = (k - 1) + div(m - 1, k) # == (k - 1) + ceil(Int, m/k) - 1
+                if c < best_c
+                    best_c = c
+                end
             end
+            costs[m] = best_c
         end
-        costs[m] = best_c
+        return costs
     end
-    return costs
-end
 
-# To filter only for the "efficient" m (where degree increases for the same cost)
-function get_efficient_m(max_m::Int)
-    costs = get_ps_costs(max_m)
-    efficient = Dict{Int, Int}()
-    next_cost = costs[1]
-    for m in 1:max_m
-        cost = next_cost
-        next_cost = m < max_m ? costs[m+1] : cost + 1
-        if cost < next_cost
-            efficient[m] = cost
+    # To filter only for the "efficient" m (where degree increases for the same cost)
+    function get_efficient_m(max_m::Int)
+        costs = get_ps_costs(max_m)
+        efficient = Dict{Int, Int}()
+        next_cost = costs[1]
+        for m in 1:max_m
+            cost = next_cost
+            next_cost = m < max_m ? costs[m + 1] : cost + 1
+            if cost < next_cost
+                efficient[m] = cost
+            end
         end
+        return efficient
     end
-    return efficient
-end
 
-PS_COSTS = get_efficient_m(100)
-
-"""
-Optimizes m and s to minimize cost ≈ m_mults + s.
-"""
-function optimal_taylor_order(ρA, ϵ)
-    # In a full Fasi-Higham implementation, this would use a small 
-    # search loop or a cost-model lookup. 
-    # Here is a simplified version for BigFloat:
-    best_m, best_s = 0, 0
-    min_cost = typemax(Int)
-    
-    # Search over efficient Paterson-Stockmeyer degrees
-    for (m, c) in PS_COSTS
-        # Calculate s needed for this m: (normA/2^s)^(m+1) / (m+1)! < ϵ
-        # log2(normA/2^s) * (m+1) - log2((m+1)!) < log2(ϵ)
-        # log2(normA) - s - [log2((m+1)!) / (m+1)] < log2(ϵ) / (m+1)
-        term = (log2(ρA) - (log2(factorial(big(m+1))) / (m+1)) - (log2(ϵ) / (m+1)))
-        s = max(0, ceil(Int, term))
-        
-        cost = c + s
-        if cost < min_cost || (cost == min_cost && m < best_m)
-            min_cost = cost
-            best_m, best_s = m, s
+    PS_COSTS = get_efficient_m(100)
+
+    """
+    Optimizes m and s to minimize cost ≈ m_mults + s.
+    """
+    function optimal_taylor_order(ρA, ϵ)
+        # In a full Fasi-Higham implementation, this would use a small
+        # search loop or a cost-model lookup.
+        # Here is a simplified version for BigFloat:
+        best_m, best_s = 0, 0
+        min_cost = typemax(Int)
+
+        # Search over efficient Paterson-Stockmeyer degrees
+        for (m, c) in PS_COSTS
+            # Calculate s needed for this m: (normA/2^s)^(m+1) / (m+1)! < ϵ
+            # log2(normA/2^s) * (m+1) - log2((m+1)!) < log2(ϵ)
+            # log2(normA) - s - [log2((m+1)!) / (m+1)] < log2(ϵ) / (m+1)
+            term = (log2(ρA) - (log2(factorial(big(m + 1))) / (m + 1)) - (log2(ϵ) / (m + 1)))
+            s = max(0, ceil(Int, term))
+
+            cost = c + s
+            if cost < min_cost || (cost == min_cost && m < best_m)
+                min_cost = cost
+                best_m, best_s = m, s
+            end
         end
+        return best_m, best_s
     end
-    return best_m, best_s
-end
 
-end
\ No newline at end of file
+end
diff --git a/src/interface/exponential.jl b/src/interface/exponential.jl
index 3b6145d..7a77ff8 100644
--- a/src/interface/exponential.jl
+++ b/src/interface/exponential.jl
@@ -30,4 +30,4 @@ for f in (:exponential!,)
     @eval function default_algorithm(::typeof($f), ::Type{A}; kwargs...) where {A}
         return default_exponential_algorithm(A; kwargs...)
     end
-end
\ No newline at end of file
+end

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant