Skip to content

Commit

Permalink
Slightly more performance (#94)
Browse files Browse the repository at this point in the history
* update if statement, unicode symbols

* remove meaningless assignment

* remove redifinition of Base functions

* fix type unstability, update symbols to unicode greek

* update symbols to unicode greek

* update symbols using Unicode greeks

* move some bessel-roots related functions to besselroots.jl, and remove duplicates

* remove unnecessary min function

* add StaticArrays.jl for faster besselroots

* add (a)inline for faster gausslegendre

* add constants.jl to store all constants

* fix compat table for StaticArrays.jl

* bump version to v0.4.6
  • Loading branch information
hyrodium committed Jan 17, 2021
1 parent 0b3be86 commit 5506f5f
Show file tree
Hide file tree
Showing 9 changed files with 322 additions and 254 deletions.
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
name = "FastGaussQuadrature"
uuid = "442a2c76-b920-505d-bb47-c5924d526838"
version = "0.4.5"
version = "0.4.6"

[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"

[compat]
SpecialFunctions = "0.7, 0.8, 0.9, 0.10, 1.0"
StaticArrays = "1.0"
julia = "1"

[extras]
Expand Down
11 changes: 4 additions & 7 deletions src/FastGaussQuadrature.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
module FastGaussQuadrature

using SpecialFunctions, LinearAlgebra
cumprod(A::AbstractArray) = Base.cumprod(A, dims=1)
cumprod(A::AbstractArray, d::Int) = Base.cumprod(A, dims=d)
sum(A::AbstractArray, n::Int) = Base.sum(A, dims=n)
sum(A) = Base.sum(A)
flipdim(A, d) = reverse(A, dims=d)

using LinearAlgebra
using SpecialFunctions
using StaticArrays

export gausslegendre
export gausschebyshev
Expand All @@ -19,6 +15,7 @@ export besselroots

import SpecialFunctions: besselj, airyai, airyaiprime

include("constants.jl")
include("gausslegendre.jl")
include("gausschebyshev.jl")
include("gausslaguerre.jl")
Expand Down
213 changes: 135 additions & 78 deletions src/besselroots.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,22 @@ function besselroots(ν::Float64, n::Integer)
end

x = zeros(n)
if n > 0 && ν == 0
if ν == 0
for k in 1:min(n,20)
x[k] = J0_roots[k]
end
for k in min(n,20)+1:n
x[k] = McMahon(ν, k)
end
elseif n > 0 && ν -1 && ν 5
correctFirstFew = Piessens( ν )
elseif -1 ν 5
correctFirstFew = Piessens(ν)
for k in 1:min(n,6)
x[k] = correctFirstFew[k]
end
for k in min(n,6)+1:n
x[k] = McMahon(ν, k)
end
elseif ν > 5
elseif 5 < ν
for k in 1:n
x[k] = McMahon(ν, k)
end
Expand All @@ -74,91 +74,148 @@ function McMahon(ν::Float64, k::Integer)
# McMahon's expansion. This expansion gives very accurate approximation
# for the sth zero (s ≥ 7) in the whole region ν ≥ -1, and moderate
# approximation in other cases.
mu = 4ν^2
μ = 4ν^2
a1 = 1 / 8
a3 = (7mu-31) / 384
a5 = 4*(3779+mu*(-982+83mu)) / 61440 # Evaluate via Horner's method.
a7 = 6*(-6277237+mu*(1585743+mu*(-153855+6949mu))) / 20643840
a9 = 144*(2092163573+mu*(-512062548+mu*(48010494+mu*(-2479316+70197mu)))) / 11890851840
a11 = 720 *(-8249725736393+mu*(1982611456181+mu*(-179289628602+mu*(8903961290 +
mu*(-287149133 + 5592657mu))))) / 10463949619200
a13 = 576 *(423748443625564327 + mu*(-100847472093088506 + mu*(8929489333108377 +
mu*(-426353946885548+mu*(13172003634537+mu*(-291245357370 + 4148944183mu)))))) / 13059009124761600
a3 = (7μ-31) / 384
a5 = 4*(3779+μ*(-982+83μ)) / 61440 # Evaluate via Horner's method.
a7 = 6*(-6277237+μ*(1585743+μ*(-153855+6949μ))) / 20643840
a9 = 144*(2092163573+μ*(-512062548+μ*(48010494+μ*(-2479316+70197μ)))) / 11890851840
a11 = 720 *(-8249725736393+μ*(1982611456181+μ*(-179289628602+μ*(8903961290 +
μ*(-287149133 + 5592657μ))))) / 10463949619200
a13 = 576 *(423748443625564327 + μ*(-100847472093088506 + μ*(8929489333108377 +
μ*(-426353946885548+μ*(13172003634537+μ*(-291245357370 + 4148944183μ)))))) / 13059009124761600
b = 0.25 * (2ν+4k-1)*π
# Evaluate using Horner's scheme:
x = b - (mu-1)*( ((((((a13/b^2 + a11)/b^2 + a9)/b^2 + a7)/b^2 + a5)/b^2 + a3)/b^2 + a1)/b)
x = b - (μ-1)*( ((((((a13/b^2 + a11)/b^2 + a9)/b^2 + a7)/b^2 + a5)/b^2 + a3)/b^2 + a1)/b)
return x
end

# Roots of Bessel funcion ``J_0`` in Float64.
# https://mathworld.wolfram.com/BesselFunctionZeros.html
const J0_roots =
[ 2.4048255576957728
5.5200781102863106
8.6537279129110122
11.791534439014281
14.930917708487785
18.071063967910922
21.211636629879258
24.352471530749302
27.493479132040254
30.634606468431975
33.775820213573568
36.917098353664044
40.058425764628239
43.199791713176730
46.341188371661814
49.482609897397817
52.624051841114996
55.765510755019979
58.906983926080942
62.048469190227170 ]


const Piessens_C = [
2.883975316228 8.263194332307 11.493871452173 14.689036505931 17.866882871378 21.034784308088
0.767665211539 4.209200330779 4.317988625384 4.387437455306 4.435717974422 4.471319438161
-0.086538804759 -0.164644722483 -0.130667664397 -0.109469595763 -0.094492317231 -0.083234240394
0.020433979038 0.039764618826 0.023009510531 0.015359574754 0.011070071951 0.008388073020
-0.006103761347 -0.011799527177 -0.004987164201 -0.002655024938 -0.001598668225 -0.001042443435
0.002046841322 0.003893555229 0.001204453026 0.000511852711 0.000257620149 0.000144611721
-0.000734476579 -0.001369989689 -0.000310786051 -0.000105522473 -0.000044416219 -0.000021469973
0.000275336751 0.000503054700 0.000083834770 0.000022761626 0.000008016197 0.000003337753
-0.000106375704 -0.000190381770 -0.000023343325 -0.000005071979 -0.000001495224 -0.000000536428
0.000042003336 0.000073681222 0.000006655551 0.000001158094 0.000000285903 0.000000088402
-0.000016858623 -0.000029010830 -0.000001932603 -0.000000269480 -0.000000055734 -0.000000014856
0.000006852440 0.000011579131 0.000000569367 0.000000063657 0.000000011033 0.000000002536
-0.000002813300 -0.000004672877 -0.000000169722 -0.000000015222 -0.000000002212 -0.000000000438
0.000001164419 0.000001903082 0.000000051084 0.000000003677 0.000000000448 0.000000000077
-0.000000485189 -0.000000781030 -0.000000015501 -0.000000000896 -0.000000000092 -0.000000000014
0.000000203309 0.000000322648 0.000000004736 0.000000000220 0.000000000019 0.000000000002
-0.000000085602 -0.000000134047 -0.000000001456 -0.000000000054 -0.000000000004 0
0.000000036192 0.000000055969 0.000000000450 0.000000000013 0 0
-0.000000015357 -0.000000023472 -0.000000000140 -0.000000000003 0 0
0.000000006537 0.000000009882 0.000000000043 0.000000000001 0 0
-0.000000002791 -0.000000004175 -0.000000000014 0 0 0
0.000000001194 0.000000001770 0.000000000004 0 0 0
-0.000000000512 -0.000000000752 0 0 0 0
0.000000000220 0.000000000321 0 0 0 0
-0.000000000095 -0.000000000137 0 0 0 0
0.000000000041 0.000000000059 0 0 0 0
-0.000000000018 -0.000000000025 0 0 0 0
0.000000000008 0.000000000011 0 0 0 0
-0.000000000003 -0.000000000005 0 0 0 0
0.000000000001 0.000000000002 0 0 0 0]
function _piessens_chebyshev30::Float64)
# Piessens's Chebyshev series approximations (1984). Calculates the 6 first
# zeros to at least 12 decimal figures in region -1 ≤ ν ≤ 5:
pt =-2)/3

T1 = 1.0
T2 = pt
T3 = 2pt*T2 - T1
T4 = 2pt*T3 - T2
T5 = 2pt*T4 - T3
T6 = 2pt*T5 - T4
T7 = 2pt*T6 - T5
T8 = 2pt*T7 - T6
T9 = 2pt*T8 - T7
T10 = 2pt*T9 - T8
T11 = 2pt*T10 - T9
T12 = 2pt*T11 - T10
T13 = 2pt*T12 - T11
T14 = 2pt*T13 - T12
T15 = 2pt*T14 - T13
T16 = 2pt*T15 - T14
T17 = 2pt*T16 - T15
T18 = 2pt*T17 - T16
T19 = 2pt*T18 - T17
T20 = 2pt*T19 - T18
T21 = 2pt*T20 - T19
T22 = 2pt*T21 - T20
T23 = 2pt*T22 - T21
T24 = 2pt*T23 - T22
T25 = 2pt*T24 - T23
T26 = 2pt*T25 - T24
T27 = 2pt*T26 - T25
T28 = 2pt*T27 - T26
T29 = 2pt*T28 - T27
T30 = 2pt*T29 - T28

T = SVector(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15,T16,T17,T18,T19,T20,T21,T22,T23,T24,T25,T26,T27,T28,T29,T30)
return T
end

function Piessens::Float64)
# Piessens's Chebyshev series approximations (1984). Calculates the 6 first
# zeros to at least 12 decimal figures in region -1 ≤ ν ≤ 5:
C = Piessens_C
T = Array{Float64}(undef,size(C,1))
pt =-2)/3
T[1], T[2] = 1., pt
for k = 2:size(C,1)-1
T[k+1] = 2*pt*T[k] - T[k-1]
end
T = _piessens_chebyshev30(ν)
y = C'*T
y[1] *= sqrt+1) # Scale the first root.
return y
_y = collect(y)
_y[1] *= sqrt+1) # Scale the first root.
return _y
end


function besselZeroRoots(m)
# BESSEL0ROOTS ROOTS OF BESSELJ(0,x). USE ASYMPTOTICS.
# Use McMahon's expansion for the remainder (NIST, 10.21.19):
jk = Array{Float64}(undef, m)
p = (1071187749376 / 315, 0.0, -401743168 / 105, 0.0, 120928 / 15,
0.0, -124 / 3, 0.0, 1.0, 0.0)
# First 20 are precomputed:
@inbounds for jj = 1:min(m, 20)
jk[jj] = J0_roots[jj]
end
@inbounds for jj = 21:min(m, 47)
ak = π * (jj - .25)
ak82 = (.125 / ak)^2
jk[jj] = ak + .125 / ak * @evalpoly(ak82, 1.0, p[7], p[5], p[3])
end
@inbounds for jj = 48:min(m, 344)
ak = π * (jj - .25)
ak82 = (.125 / ak)^2
jk[jj] = ak + .125 / ak * @evalpoly(ak82, 1.0, p[7], p[5])
end
@inbounds for jj = 345:min(m,13191)
ak = π * (jj - .25)
ak82 = (.125 / ak)^2
jk[jj] = ak + .125 / ak * muladd(ak82, p[7], 1.0)
end
@inbounds for jj = 13192:m
ak = π * (jj - .25)
jk[jj] = ak + .125 / ak
end
return jk
end

function besselJ1(m)
# BESSELJ1 EVALUATE BESSELJ(1,x)^2 AT ROOTS OF BESSELJ(0,x).
# USE ASYMPTOTICS. Use Taylor series of (NIST, 10.17.3) and McMahon's
# expansion (NIST, 10.21.19):
Jk2 = Array{Float64}(undef, m)
c = (-171497088497 / 15206400, 461797 / 1152, -172913 / 8064,
151 / 80, -7 / 24, 0.0, 2.0)
# First 10 are precomputed:
@inbounds for jj = 1:min(m, 10)
Jk2[jj] = besselJ1_10[jj]
end
@inbounds for jj = 11:min(m, 15)
ak = π * (jj - .25)
ak2 = (1 / ak)^2
Jk2[jj] = 1 /* ak) * muladd(@evalpoly(ak2, c[5], c[4], c[3],
c[2], c[1]), ak2^2, c[7])
end
@inbounds for jj = 16:min(m, 21)
ak = π * (jj - .25)
ak2 = (1 / ak)^2
Jk2[jj] = 1 /* ak) * muladd(@evalpoly(ak2, c[5], c[4], c[3], c[2]),
ak2^2, c[7])
end
@inbounds for jj = 22:min(m,55)
ak = π * (jj - .25)
ak2 = (1 / ak)^2
Jk2[jj] = 1 /* ak) * muladd(@evalpoly(ak2, c[5], c[4], c[3]),
ak2^2, c[7])
end
@inbounds for jj = 56:min(m,279)
ak = π * (jj - .25)
ak2 = (1 / ak)^2
Jk2[jj] = 1 /* ak) * muladd(muladd(ak2, c[4], c[5]), ak2^2, c[7])
end
@inbounds for jj = 280:min(m,2279)
ak = π * (jj - .25)
ak2 = (1 / ak)^2
Jk2[jj] = 1 /* ak) * muladd(ak2^2, c[5], c[7])
end
@inbounds for jj = 2280:m
ak = π * (jj - .25)
Jk2[jj] = 1 /* ak) * c[7]
end
return Jk2
end
Loading

2 comments on commit 5506f5f

@hyrodium
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/28102

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.4.6 -m "<description of version>" 5506f5fde7c7f3098c1007140af6eb8105c0f5ec
git push origin v0.4.6

Please sign in to comment.