Slightly more performance (#94)

* update if statement, unicode symbols * remove meaningless assignment * remove redifinition of Base functions * fix type unstability, update symbols to unicode greek * update symbols to unicode greek * update symbols using Unicode greeks * move some bessel-roots related functions to besselroots.jl, and remove duplicates * remove unnecessary min function * add StaticArrays.jl for faster besselroots * add (a)inline for faster gausslegendre * add constants.jl to store all constants * fix compat table for StaticArrays.jl * bump version to v0.4.6
JuliaApproximation · Jan 17, 2021 · 5506f5f · 5506f5f · hyrodium · Jan 17, 2021
1 parent 0b3be86
commit 5506f5f
Show file tree

Hide file tree

Showing 9 changed files with 322 additions and 254 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,13 +1,15 @@
 name = "FastGaussQuadrature"
 uuid = "442a2c76-b920-505d-bb47-c5924d526838"
-version = "0.4.5"
+version = "0.4.6"
 
 [deps]
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 
 [compat]
 SpecialFunctions = "0.7, 0.8, 0.9, 0.10, 1.0"
+StaticArrays = "1.0"
 julia = "1"
 
 [extras]

diff --git a/src/FastGaussQuadrature.jl b/src/FastGaussQuadrature.jl
@@ -1,12 +1,8 @@
 module FastGaussQuadrature
 
-using SpecialFunctions, LinearAlgebra
-cumprod(A::AbstractArray) = Base.cumprod(A, dims=1)
-cumprod(A::AbstractArray, d::Int) = Base.cumprod(A, dims=d)
-sum(A::AbstractArray, n::Int) = Base.sum(A, dims=n)
-sum(A) = Base.sum(A)
-flipdim(A, d) = reverse(A, dims=d)
-
+using LinearAlgebra
+using SpecialFunctions
+using StaticArrays
 
 export gausslegendre
 export gausschebyshev
@@ -19,6 +15,7 @@ export besselroots
 
 import SpecialFunctions: besselj, airyai, airyaiprime
 
+include("constants.jl")
 include("gausslegendre.jl")
 include("gausschebyshev.jl")
 include("gausslaguerre.jl")

diff --git a/src/besselroots.jl b/src/besselroots.jl
@@ -42,22 +42,22 @@ function besselroots(ν::Float64, n::Integer)
     end
 
     x = zeros(n)
-    if n > 0 && ν == 0
+    if ν == 0
         for k in 1:min(n,20)
             x[k] = J0_roots[k]
         end
         for k in min(n,20)+1:n
             x[k] = McMahon(ν, k)
         end
-    elseif n > 0 && ν ≥ -1 && ν ≤ 5
-        correctFirstFew = Piessens( ν )
+    elseif -1 ≤ ν ≤ 5
+        correctFirstFew = Piessens(ν)
         for k in 1:min(n,6)
             x[k] = correctFirstFew[k]
         end
         for k in min(n,6)+1:n
             x[k] = McMahon(ν, k)
         end
-    elseif ν > 5
+    elseif 5 < ν
         for k in 1:n
             x[k] = McMahon(ν, k)
         end
@@ -74,91 +74,148 @@ function McMahon(ν::Float64, k::Integer)
     # McMahon's expansion. This expansion gives very accurate approximation
     # for the sth zero (s ≥ 7) in the whole region ν ≥ -1, and moderate
     # approximation in other cases.
-    mu = 4ν^2
+    μ = 4ν^2
     a1 = 1 / 8
-    a3 = (7mu-31) / 384
-    a5 = 4*(3779+mu*(-982+83mu)) / 61440 # Evaluate via Horner's method.
-    a7 = 6*(-6277237+mu*(1585743+mu*(-153855+6949mu))) / 20643840
-    a9 = 144*(2092163573+mu*(-512062548+mu*(48010494+mu*(-2479316+70197mu)))) / 11890851840
-    a11 = 720 *(-8249725736393+mu*(1982611456181+mu*(-179289628602+mu*(8903961290 +
-          mu*(-287149133 + 5592657mu))))) / 10463949619200
-    a13 = 576 *(423748443625564327 + mu*(-100847472093088506 + mu*(8929489333108377 +
-        mu*(-426353946885548+mu*(13172003634537+mu*(-291245357370 + 4148944183mu)))))) / 13059009124761600
+    a3 = (7μ-31) / 384
+    a5 = 4*(3779+μ*(-982+83μ)) / 61440 # Evaluate via Horner's method.
+    a7 = 6*(-6277237+μ*(1585743+μ*(-153855+6949μ))) / 20643840
+    a9 = 144*(2092163573+μ*(-512062548+μ*(48010494+μ*(-2479316+70197μ)))) / 11890851840
+    a11 = 720 *(-8249725736393+μ*(1982611456181+μ*(-179289628602+μ*(8903961290 +
+          μ*(-287149133 + 5592657μ))))) / 10463949619200
+    a13 = 576 *(423748443625564327 + μ*(-100847472093088506 + μ*(8929489333108377 +
+        μ*(-426353946885548+μ*(13172003634537+μ*(-291245357370 + 4148944183μ)))))) / 13059009124761600
     b = 0.25 * (2ν+4k-1)*π
     # Evaluate using Horner's scheme:
-    x = b - (mu-1)*( ((((((a13/b^2 + a11)/b^2 + a9)/b^2 + a7)/b^2 + a5)/b^2 + a3)/b^2 + a1)/b)
+    x = b - (μ-1)*( ((((((a13/b^2 + a11)/b^2 + a9)/b^2 + a7)/b^2 + a5)/b^2 + a3)/b^2 + a1)/b)
     return x
 end
 
-# Roots of Bessel funcion ``J_0`` in Float64.
-# https://mathworld.wolfram.com/BesselFunctionZeros.html
-const J0_roots =
-    [   2.4048255576957728
-        5.5200781102863106
-        8.6537279129110122
-        11.791534439014281
-        14.930917708487785
-        18.071063967910922
-        21.211636629879258
-        24.352471530749302
-        27.493479132040254
-        30.634606468431975
-        33.775820213573568
-        36.917098353664044
-        40.058425764628239
-        43.199791713176730
-        46.341188371661814
-        49.482609897397817
-        52.624051841114996
-        55.765510755019979
-        58.906983926080942
-        62.048469190227170  ]
-
-
-const Piessens_C = [
-       2.883975316228  8.263194332307 11.493871452173 14.689036505931 17.866882871378 21.034784308088
-       0.767665211539  4.209200330779  4.317988625384  4.387437455306  4.435717974422  4.471319438161
-      -0.086538804759 -0.164644722483 -0.130667664397 -0.109469595763 -0.094492317231 -0.083234240394
-       0.020433979038  0.039764618826  0.023009510531  0.015359574754  0.011070071951  0.008388073020
-      -0.006103761347 -0.011799527177 -0.004987164201 -0.002655024938 -0.001598668225 -0.001042443435
-       0.002046841322  0.003893555229  0.001204453026  0.000511852711  0.000257620149  0.000144611721
-      -0.000734476579 -0.001369989689 -0.000310786051 -0.000105522473 -0.000044416219 -0.000021469973
-       0.000275336751  0.000503054700  0.000083834770  0.000022761626  0.000008016197  0.000003337753
-      -0.000106375704 -0.000190381770 -0.000023343325 -0.000005071979 -0.000001495224 -0.000000536428
-       0.000042003336  0.000073681222  0.000006655551  0.000001158094  0.000000285903  0.000000088402
-      -0.000016858623 -0.000029010830 -0.000001932603 -0.000000269480 -0.000000055734 -0.000000014856
-       0.000006852440  0.000011579131  0.000000569367  0.000000063657  0.000000011033  0.000000002536
-      -0.000002813300 -0.000004672877 -0.000000169722 -0.000000015222 -0.000000002212 -0.000000000438
-       0.000001164419  0.000001903082  0.000000051084  0.000000003677  0.000000000448  0.000000000077
-      -0.000000485189 -0.000000781030 -0.000000015501 -0.000000000896 -0.000000000092 -0.000000000014
-       0.000000203309  0.000000322648  0.000000004736  0.000000000220  0.000000000019  0.000000000002
-      -0.000000085602 -0.000000134047 -0.000000001456 -0.000000000054 -0.000000000004               0
-       0.000000036192  0.000000055969  0.000000000450  0.000000000013               0               0
-      -0.000000015357 -0.000000023472 -0.000000000140 -0.000000000003               0               0
-       0.000000006537  0.000000009882  0.000000000043  0.000000000001               0               0
-      -0.000000002791 -0.000000004175 -0.000000000014               0               0               0
-       0.000000001194  0.000000001770  0.000000000004               0               0               0
-      -0.000000000512 -0.000000000752               0               0               0               0
-       0.000000000220  0.000000000321               0               0               0               0
-      -0.000000000095 -0.000000000137               0               0               0               0
-       0.000000000041  0.000000000059               0               0               0               0
-      -0.000000000018 -0.000000000025               0               0               0               0
-       0.000000000008  0.000000000011               0               0               0               0
-      -0.000000000003 -0.000000000005               0               0               0               0
-       0.000000000001  0.000000000002               0               0               0               0]
+function _piessens_chebyshev30(ν::Float64)
+    # Piessens's Chebyshev series approximations (1984). Calculates the 6 first
+    # zeros to at least 12 decimal figures in region -1 ≤ ν ≤ 5:
+    pt = (ν-2)/3
 
+    T1 = 1.0
+    T2 = pt
+    T3 = 2pt*T2 - T1
+    T4 = 2pt*T3 - T2
+    T5 = 2pt*T4 - T3
+    T6 = 2pt*T5 - T4
+    T7 = 2pt*T6 - T5
+    T8 = 2pt*T7 - T6
+    T9 = 2pt*T8 - T7
+    T10 = 2pt*T9 - T8
+    T11 = 2pt*T10 - T9
+    T12 = 2pt*T11 - T10
+    T13 = 2pt*T12 - T11
+    T14 = 2pt*T13 - T12
+    T15 = 2pt*T14 - T13
+    T16 = 2pt*T15 - T14
+    T17 = 2pt*T16 - T15
+    T18 = 2pt*T17 - T16
+    T19 = 2pt*T18 - T17
+    T20 = 2pt*T19 - T18
+    T21 = 2pt*T20 - T19
+    T22 = 2pt*T21 - T20
+    T23 = 2pt*T22 - T21
+    T24 = 2pt*T23 - T22
+    T25 = 2pt*T24 - T23
+    T26 = 2pt*T25 - T24
+    T27 = 2pt*T26 - T25
+    T28 = 2pt*T27 - T26
+    T29 = 2pt*T28 - T27
+    T30 = 2pt*T29 - T28
+
+    T = SVector(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15,T16,T17,T18,T19,T20,T21,T22,T23,T24,T25,T26,T27,T28,T29,T30)
+    return T
+end
 
 function Piessens(ν::Float64)
     # Piessens's Chebyshev series approximations (1984). Calculates the 6 first
     # zeros to at least 12 decimal figures in region -1 ≤ ν ≤ 5:
     C = Piessens_C
-    T = Array{Float64}(undef,size(C,1))
-    pt = (ν-2)/3
-    T[1], T[2] = 1., pt
-    for k = 2:size(C,1)-1
-        T[k+1] = 2*pt*T[k] - T[k-1]
-    end
+    T = _piessens_chebyshev30(ν)
     y = C'*T
-    y[1] *= sqrt(ν+1)  # Scale the first root.
-    return y
+    _y = collect(y)
+    _y[1] *= sqrt(ν+1)  # Scale the first root.
+    return _y
+end
+
+
+function besselZeroRoots(m)
+    # BESSEL0ROOTS ROOTS OF BESSELJ(0,x). USE ASYMPTOTICS.
+    # Use McMahon's expansion for the remainder (NIST, 10.21.19):
+    jk = Array{Float64}(undef, m)
+    p = (1071187749376 / 315, 0.0, -401743168 / 105, 0.0, 120928 / 15,
+         0.0, -124 / 3, 0.0, 1.0, 0.0)
+    # First 20 are precomputed:
+    @inbounds for jj = 1:min(m, 20)
+        jk[jj] = J0_roots[jj]
+    end
+    @inbounds for jj = 21:min(m, 47)
+        ak = π * (jj - .25)
+        ak82 = (.125 / ak)^2
+        jk[jj] = ak + .125 / ak * @evalpoly(ak82, 1.0, p[7], p[5], p[3])
+    end
+    @inbounds for jj = 48:min(m, 344)
+        ak = π * (jj - .25)
+        ak82 = (.125 / ak)^2
+        jk[jj] = ak + .125 / ak * @evalpoly(ak82, 1.0, p[7], p[5])
+    end
+    @inbounds for jj = 345:min(m,13191)
+        ak = π * (jj - .25)
+        ak82 = (.125 / ak)^2
+        jk[jj] = ak + .125 / ak * muladd(ak82, p[7], 1.0)
+    end
+    @inbounds for jj = 13192:m
+        ak = π * (jj - .25)
+        jk[jj] = ak + .125 / ak
+    end
+    return jk
+end
+
+function besselJ1(m)
+    # BESSELJ1 EVALUATE BESSELJ(1,x)^2 AT ROOTS OF BESSELJ(0,x).
+    # USE ASYMPTOTICS. Use Taylor series of (NIST, 10.17.3) and McMahon's
+    # expansion (NIST, 10.21.19):
+    Jk2 = Array{Float64}(undef, m)
+    c = (-171497088497 / 15206400, 461797 / 1152, -172913 / 8064,
+         151 / 80, -7 / 24, 0.0, 2.0)
+    # First 10 are precomputed:
+    @inbounds for jj = 1:min(m, 10)
+        Jk2[jj] = besselJ1_10[jj]
+    end
+    @inbounds for jj = 11:min(m, 15)
+        ak = π * (jj - .25)
+        ak2 = (1 / ak)^2
+        Jk2[jj] = 1 / (π * ak) * muladd(@evalpoly(ak2, c[5], c[4], c[3],
+                                                  c[2], c[1]), ak2^2, c[7])
+    end
+    @inbounds for jj = 16:min(m, 21)
+        ak = π * (jj - .25)
+        ak2 = (1 / ak)^2
+        Jk2[jj] = 1 / (π * ak) * muladd(@evalpoly(ak2, c[5], c[4], c[3], c[2]),
+                                        ak2^2, c[7])
+    end
+    @inbounds for jj = 22:min(m,55)
+        ak = π * (jj - .25)
+        ak2 = (1 / ak)^2
+        Jk2[jj] = 1 / (π * ak) * muladd(@evalpoly(ak2, c[5], c[4], c[3]),
+                                        ak2^2, c[7])
+    end
+    @inbounds for jj = 56:min(m,279)
+        ak = π * (jj - .25)
+        ak2 = (1 / ak)^2
+        Jk2[jj] = 1 / (π * ak) * muladd(muladd(ak2, c[4], c[5]), ak2^2, c[7])
+    end
+    @inbounds for jj = 280:min(m,2279)
+        ak = π * (jj - .25)
+        ak2 = (1 / ak)^2
+        Jk2[jj] = 1 / (π * ak) * muladd(ak2^2, c[5], c[7])
+    end
+    @inbounds for jj = 2280:m
+        ak = π * (jj - .25)
+        Jk2[jj] = 1 / (π * ak) * c[7]
+    end
+    return Jk2
 end