[WIP] Taylor Exponentiate#243
Open
Jutho wants to merge 1 commit into
Open
Conversation
Contributor
|
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/src/common/balancing.jl b/src/common/balancing.jl
index e2ec37b..74d0a69 100644
--- a/src/common/balancing.jl
+++ b/src/common/balancing.jl
@@ -4,9 +4,9 @@
A pure Julia implementation of the GEBAL algorithm.
Balances a matrix to improve eigenvalue accuracy.
"""
-function balance!(A::AbstractMatrix{T}; radix=convert(T, 2)) where T
+function balance!(A::AbstractMatrix{T}; radix = convert(T, 2)) where {T}
n = LinearAlgebra.checksquare(A)
-
+
scale = ones(real(T), n)
low = 1
high = n
@@ -24,7 +24,9 @@ function balance!(A::AbstractMatrix{T}; radix=convert(T, 2)) where T
row_norm = 0.0
col_norm = 0.0
for j in low:high
- if i == j continue end
+ if i == j
+ continue
+ end
row_norm += abs(A[i, j])
col_norm += abs(A[j, i])
end
@@ -38,13 +40,13 @@ function balance!(A::AbstractMatrix{T}; radix=convert(T, 2)) where T
g = row_norm / radix
f = 1.0
s = col_norm + row_norm
-
+
# While column norm is significantly smaller than row norm
while col_norm < g
f *= radix
col_norm *= radix^2
end
-
+
# While column norm is significantly larger than row norm
g = row_norm * radix
while col_norm >= g
@@ -68,24 +70,24 @@ function balance!(A::AbstractMatrix{T}; radix=convert(T, 2)) where T
end
end
end
-
+
return A, scale
end
-function permute_matrix!(A::AbstractMatrix{T}) where T
+function permute_matrix!(A::AbstractMatrix{T}) where {T}
n = size(A, 1)
low = 1
high = n
-
+
# Simple implementation of the permutation search
# We look for rows/cols that are essentially isolated
-
+
# Search from the bottom up (high) and top down (low)
changed = true
while changed
changed = false
-
+
# Look for a column with only one non-zero element
for j in high:-1:low
col = A[low:high, j]
@@ -97,7 +99,7 @@ function permute_matrix!(A::AbstractMatrix{T}) where T
changed = true
end
end
-
+
# Look for a row with only one non-zero element
for i in low:high
row = A[i, low:high]
@@ -117,9 +119,11 @@ function swap_rows!(A, i, j)
for k in axes(A, 2)
A[i, k], A[j, k] = A[j, k], A[i, k]
end
+ return
end
function swap_cols!(A, i, j)
for k in axes(A, 1)
A[k, i], A[k, j] = A[k, j], A[k, i]
end
-end
\ No newline at end of file
+ return
+end
diff --git a/src/implementations/exponential.jl b/src/implementations/exponential.jl
index 7e1adca..d3cddab 100644
--- a/src/implementations/exponential.jl
+++ b/src/implementations/exponential.jl
@@ -35,138 +35,140 @@ end
module TaylorExponential
-"""
- exponential_via_taylor!(A::Matrix{T}) where T <: AbstractFloat
-
-An implementation of the Fasi & Higham (2018) Taylor-based scaling
-and squaring for arbitrary precision.
-"""
-function exponential_via_taylor!(A::AbstractMatrix{T}, expA::AbstractMatrix{T}) where T
- n = size(A, 1)
- ϵ = eps(T)
- Apowers = fill(A, 8) # Preallocate for powers up to A^8, will be resized if needed
- for k = 2:length(Apowers)
- Apowers[k] = Apowers[k-1] * A
- end
- ρA = opnorm(Apowers[end], 1)^(1/length(Apowers)) # estimate of the spectral radius using Gelfand's formula
-
- # Find m and s such that (ρA/2^s)^(m+1) / (m+1)! < ϵ
- m, s = optimal_taylor_order(ρA, ϵ)
-
- # Scale A down by 2^s
- A ./= T(2)^s
-
- # Evaluate Taylor via Paterson-Stockmeyer approach
- X = evaluate_taylor_ps(As, m)
-
- # Squaring to undo the scaling
- for _ in 1:s
- X = X * X
- end
-
- return X
-end
+ """
+ exponential_via_taylor!(A::Matrix{T}) where T <: AbstractFloat
+
+ An implementation of the Fasi & Higham (2018) Taylor-based scaling
+ and squaring for arbitrary precision.
+ """
+ function exponential_via_taylor!(A::AbstractMatrix{T}, expA::AbstractMatrix{T}) where {T}
+ n = size(A, 1)
+ ϵ = eps(T)
+ Apowers = fill(A, 8) # Preallocate for powers up to A^8, will be resized if needed
+ for k in 2:length(Apowers)
+ Apowers[k] = Apowers[k - 1] * A
+ end
+ ρA = opnorm(Apowers[end], 1)^(1 / length(Apowers)) # estimate of the spectral radius using Gelfand's formula
+
+ # Find m and s such that (ρA/2^s)^(m+1) / (m+1)! < ϵ
+ m, s = optimal_taylor_order(ρA, ϵ)
+
+ # Scale A down by 2^s
+ A ./= T(2)^s
-"""
-Evaluates Taylor series using Paterson-Stockmeyer logic.
-"""
-function evaluate_taylor_ps(A, m)
- T = eltype(A)
- k = Int(floor(sqrt(m))) # Chunk size for Paterson-Stockmeyer
-
- # Precompute powers A^2, ..., A^k
- powers = Vector{typeof(A)}(undef, k)
- powers[1] = A
- for i in 2:k
- powers[i] = powers[i-1] * A
+ # Evaluate Taylor via Paterson-Stockmeyer approach
+ X = evaluate_taylor_ps(As, m)
+
+ # Squaring to undo the scaling
+ for _ in 1:s
+ X = X * X
+ end
+
+ return X
end
-
- # Horner-like evaluation of the outer polynomial
- # e^A ≈ ∑ (A^k)^j * P_j(A)
- res = zeros(T, size(A))
- num_chunks = Int(ceil((m+1)/k))
-
- for j in (num_chunks-1):-1:0
- # Evaluate sub-polynomial P_j(A) of degree k-1
- poly_chunk = zeros(T, size(A))
- for i in 0:k-1
- idx = j*k + i
- if idx > m; continue; end
- coeff = 1 / factorial(big(idx))
- if i == 0
- poly_chunk += T(coeff) * I
+
+ """
+ Evaluates Taylor series using Paterson-Stockmeyer logic.
+ """
+ function evaluate_taylor_ps(A, m)
+ T = eltype(A)
+ k = Int(floor(sqrt(m))) # Chunk size for Paterson-Stockmeyer
+
+ # Precompute powers A^2, ..., A^k
+ powers = Vector{typeof(A)}(undef, k)
+ powers[1] = A
+ for i in 2:k
+ powers[i] = powers[i - 1] * A
+ end
+
+ # Horner-like evaluation of the outer polynomial
+ # e^A ≈ ∑ (A^k)^j * P_j(A)
+ res = zeros(T, size(A))
+ num_chunks = Int(ceil((m + 1) / k))
+
+ for j in (num_chunks - 1):-1:0
+ # Evaluate sub-polynomial P_j(A) of degree k-1
+ poly_chunk = zeros(T, size(A))
+ for i in 0:(k - 1)
+ idx = j * k + i
+ if idx > m
+ continue
+ end
+ coeff = 1 / factorial(big(idx))
+ if i == 0
+ poly_chunk += T(coeff) * I
+ else
+ poly_chunk += T(coeff) * powers[i]
+ end
+ end
+
+ if j == num_chunks - 1
+ res = poly_chunk
else
- poly_chunk += T(coeff) * powers[i]
+ res = res * powers[k] + poly_chunk
end
end
-
- if j == num_chunks - 1
- res = poly_chunk
- else
- res = res * powers[k] + poly_chunk
- end
+ return res
end
- return res
-end
-function get_ps_costs(max_m) # number of matrix multiplications for an order m polynomial
- costs = Dict{Int, Int}()
- for m in 1:max_m
- # Find k in 1:m that minimizes (k-1) + ceil(m/k) - 1
- best_c = m # Default naive cost
- for k in 1:m
- c = (k - 1) + div(m-1, k) # == (k - 1) + ceil(Int, m/k) - 1
- if c < best_c
- best_c = c
+ function get_ps_costs(max_m) # number of matrix multiplications for an order m polynomial
+ costs = Dict{Int, Int}()
+ for m in 1:max_m
+ # Find k in 1:m that minimizes (k-1) + ceil(m/k) - 1
+ best_c = m # Default naive cost
+ for k in 1:m
+ c = (k - 1) + div(m - 1, k) # == (k - 1) + ceil(Int, m/k) - 1
+ if c < best_c
+ best_c = c
+ end
end
+ costs[m] = best_c
end
- costs[m] = best_c
+ return costs
end
- return costs
-end
-# To filter only for the "efficient" m (where degree increases for the same cost)
-function get_efficient_m(max_m::Int)
- costs = get_ps_costs(max_m)
- efficient = Dict{Int, Int}()
- next_cost = costs[1]
- for m in 1:max_m
- cost = next_cost
- next_cost = m < max_m ? costs[m+1] : cost + 1
- if cost < next_cost
- efficient[m] = cost
+ # To filter only for the "efficient" m (where degree increases for the same cost)
+ function get_efficient_m(max_m::Int)
+ costs = get_ps_costs(max_m)
+ efficient = Dict{Int, Int}()
+ next_cost = costs[1]
+ for m in 1:max_m
+ cost = next_cost
+ next_cost = m < max_m ? costs[m + 1] : cost + 1
+ if cost < next_cost
+ efficient[m] = cost
+ end
end
+ return efficient
end
- return efficient
-end
-PS_COSTS = get_efficient_m(100)
-
-"""
-Optimizes m and s to minimize cost ≈ m_mults + s.
-"""
-function optimal_taylor_order(ρA, ϵ)
- # In a full Fasi-Higham implementation, this would use a small
- # search loop or a cost-model lookup.
- # Here is a simplified version for BigFloat:
- best_m, best_s = 0, 0
- min_cost = typemax(Int)
-
- # Search over efficient Paterson-Stockmeyer degrees
- for (m, c) in PS_COSTS
- # Calculate s needed for this m: (normA/2^s)^(m+1) / (m+1)! < ϵ
- # log2(normA/2^s) * (m+1) - log2((m+1)!) < log2(ϵ)
- # log2(normA) - s - [log2((m+1)!) / (m+1)] < log2(ϵ) / (m+1)
- term = (log2(ρA) - (log2(factorial(big(m+1))) / (m+1)) - (log2(ϵ) / (m+1)))
- s = max(0, ceil(Int, term))
-
- cost = c + s
- if cost < min_cost || (cost == min_cost && m < best_m)
- min_cost = cost
- best_m, best_s = m, s
+ PS_COSTS = get_efficient_m(100)
+
+ """
+ Optimizes m and s to minimize cost ≈ m_mults + s.
+ """
+ function optimal_taylor_order(ρA, ϵ)
+ # In a full Fasi-Higham implementation, this would use a small
+ # search loop or a cost-model lookup.
+ # Here is a simplified version for BigFloat:
+ best_m, best_s = 0, 0
+ min_cost = typemax(Int)
+
+ # Search over efficient Paterson-Stockmeyer degrees
+ for (m, c) in PS_COSTS
+ # Calculate s needed for this m: (normA/2^s)^(m+1) / (m+1)! < ϵ
+ # log2(normA/2^s) * (m+1) - log2((m+1)!) < log2(ϵ)
+ # log2(normA) - s - [log2((m+1)!) / (m+1)] < log2(ϵ) / (m+1)
+ term = (log2(ρA) - (log2(factorial(big(m + 1))) / (m + 1)) - (log2(ϵ) / (m + 1)))
+ s = max(0, ceil(Int, term))
+
+ cost = c + s
+ if cost < min_cost || (cost == min_cost && m < best_m)
+ min_cost = cost
+ best_m, best_s = m, s
+ end
end
+ return best_m, best_s
end
- return best_m, best_s
-end
-end
\ No newline at end of file
+end
diff --git a/src/interface/exponential.jl b/src/interface/exponential.jl
index 3b6145d..7a77ff8 100644
--- a/src/interface/exponential.jl
+++ b/src/interface/exponential.jl
@@ -30,4 +30,4 @@ for f in (:exponential!,)
@eval function default_algorithm(::typeof($f), ::Type{A}; kwargs...) where {A}
return default_exponential_algorithm(A; kwargs...)
end
-end
\ No newline at end of file
+end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
This is something I started a while ago, but I need to continue working on it. Basically, for higher precision number types, the best approach for computing the exponential is just a Taylor expansion, combined with scaling and squaring (i.e. exp(A) = (exp(A/2^s))^(2^s)). But which
sin combination with which order ofmof the Taylor expansion is a nontrivial question, that has been studied in the literature, and also involves clever ways of computing higher order polynomials, i.e. polynomials ofAof degreem, thus requiring all powersA^k,k in 0:m, without actually having to dommatrix multiplications.