diff --git a/src/cpu.jl b/src/cpu.jl index 3302b7d..94be9fa 100644 --- a/src/cpu.jl +++ b/src/cpu.jl @@ -1,21 +1,19 @@ using LoopVectorization -# FIXME match interface of gpu side -function dedisp(source, freqs, dms, δt) +circmod(x,y) = mod(x-1,y) + 1 + +function dedisp(source::AbstractMatrix{T}, plan) where {T <: Real} n_samp, n_chan = size(source) - n_dm = length(dms) - f_max = maximum(freqs) - output = zeros(Float32, n_samp, n_dm) + _, n_dm = size(plan) + output = zeros(T, n_samp, n_dm) + μ = mean(source) @tturbo for i in 1:n_dm - for j in 1:n_chan - for k in 1:n_samp - dm = dms[i] - f = freqs[j] - dt = Δt(f, f_max, dm, δt, n_samp) - source_idx = circmod(dt + k - 1, n_samp) - output[k, i] += source[source_idx, j] + for k in 1:n_samp + for j in 1:n_chan + shifted_samp_idx = circmod(k + plan[j,i],n_samp) + output[k, i] += source[shifted_samp_idx, j] / n_samp end end end return output -end \ No newline at end of file +end diff --git a/src/gpu.jl b/src/gpu.jl index aa449d8..8c5034a 100644 --- a/src/gpu.jl +++ b/src/gpu.jl @@ -65,7 +65,9 @@ function dedisp!(output::CuArray{A,2}, source, plan::CuArray{C,2}) where {A,C} # Run kernel kernel(output, source, plan, μ; threads=threads, blocks=blocks, shmem=sizeof(UInt32) * n_chan) - return output + + # Normalize + return output ./ n_chan end # Source is n_samp * n_chan