tweaks for pipeline

RvSpectML · Jan 7, 2022 · 371462a · 371462a
1 parent 737bd95
commit 371462a
Show file tree

Hide file tree

Showing 3 changed files with 37 additions and 29 deletions.
diff --git a/Project.toml b/Project.toml
@@ -29,6 +29,7 @@ MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411"
 NaNMath = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
 NeidArchive = "b2c48323-12ae-41a0-acdb-062d24b276bc"
 OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
 PkgVersion = "eebad327-c553-4316-9ea0-9fa01ccd7688"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Pluto = "c3e4b0f8-55cb-11ea-2926-15256bba5781"
@@ -50,7 +51,7 @@ SunAsAStar = "1bd1a468-645d-4223-8813-48273918c0e0"
 TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
 
 [compat]
-CSV = "0.7, 0.8"
+CSV = "0.7, 0.8, 0.9.11"
 DataFrames = "0.20, 0.21, 0.22, 0.23, 0.24, 1, 1.1"
 EchelleInstruments = ">=0.2.5"
 FileIO = "1.4"

diff --git a/examples/calc_order_ccfs_using_continuum_1.1.jl b/examples/calc_order_ccfs_using_continuum_1.1.jl
@@ -500,7 +500,8 @@ pipeline_plan = PipelinePlan()
     end
 
 
-   if args["apply_continuum_normalization"]==true && args["continuum_normalization_individually"]==true
+   if args["apply_continuum_normalization"] && args["continuum_normalization_individually"]
+        println("Applying continuum normalization to each spectrum individually.")
         local anchors, continuum, f_filtered
         if args["anchors_filename"] != nothing
             @assert isfile(args["anchors_filename"]) && filesize(args["anchors_filename"])>0
@@ -525,19 +526,24 @@ pipeline_plan = PipelinePlan()
                 mean_clean_flux_continuum_normalized .+= f_norm # .*weight
                 mean_clean_var_continuum_normalized .+= var_norm # .*weight
                 global mean_clean_flux_continuum_normalized_weight_sum += weight
-            end
+        end
         spec.flux .= f_norm
         spec.var .= var_norm
 
     end
     push!(all_spectra,spec)
  end
  GC.gc()
+ mean_lambda ./= mean_clean_flux_weight_sum
+ mean_clean_flux ./= mean_clean_flux_weight_sum
+ mean_clean_var ./= mean_clean_flux_weight_sum
+ mean_clean_flux_sed_normalized ./= mean_clean_flux_sed_normalized_weight_sum
+ mean_clean_var_sed_normalized ./= mean_clean_flux_sed_normalized_weight_sum
  dont_need_to!(pipeline_plan,:read_spectra);
 
 
- if args["apply_continuum_normalization"]==true && !(args["continuum_normalization_individually"] == true)
-     println("# Computing continuum normalization from mean spectra.")
+ if args["apply_continuum_normalization"] && !args["continuum_normalization_individually"]
+     println("Applying continuum normalization based on mean of clean spectra.")
      local anchors, continuum, f_filtered
      if args["anchors_filename"] !=nothing
          @assert isfile(args["anchors_filename"]) && filesize(args["anchors_filename"])>0
@@ -549,15 +555,20 @@ pipeline_plan = PipelinePlan()
              (anchors, continuum, f_filtered) = Continuum.calc_continuum(spec.λ, mean_clean_flux_sed_normalized, mean_clean_var_sed_normalized; fwhm = args["fwhm_continuum"]*1000, ν = args["nu_continuum"],
                 stretch_factor = args["stretch_factor"], merging_threshold = args["merging_threshold"], smoothing_half_width = args["smoothing_half_width"], min_R_factor = args["min_rollingpin_r"],
                 orders_to_use = orders_to_use_for_continuum, verbose = false )
-            save(args["anchors_filename_output"], Dict("anchors" => anchors) )
+            if !isnothing(args["anchors_filename_output"])
+                println("# Storing anchors used for continuum model in ",args["anchors_filename_output"],  ".")
+                save(args["anchors_filename_output"], Dict("anchors" => anchors) )
+            end
         else
             (anchors, continuum, f_filtered) = Continuum.calc_continuum(spec.λ, mean_clean_flux, mean_clean_var; fwhm = args["fwhm_continuum"]*1000, ν = args["nu_continuum"],
                 stretch_factor = args["stretch_factor"], merging_threshold = args["merging_threshold"], smoothing_half_width = args["smoothing_half_width"], min_R_factor = args["min_rollingpin_r"],
                 orders_to_use = orders_to_use_for_continuum, verbose = false )
-            save(args["anchors_filename_output"], Dict("anchors" => anchors) )
-        end
-        println("# Stored anchors used for continuum model.")
-    end
+            if !isnothing(args["anchors_filename_output"])
+                println("# Storing anchors used for continuum model in ",args["anchors_filename_output"],  ".")
+                save(args["anchors_filename_output"], Dict("anchors" => anchors) )
+            end
+        end # @isdefined sed
+    end # args["anchors_filename"] 
     normalization_anchors_list = anchors
 
     weight = 1
@@ -573,13 +584,8 @@ pipeline_plan = PipelinePlan()
         end
     end
  end
-mean_lambda ./= mean_clean_flux_weight_sum
-  mean_clean_flux ./= mean_clean_flux_weight_sum
-  mean_clean_var ./= mean_clean_flux_weight_sum
-  mean_clean_flux_sed_normalized ./= mean_clean_flux_sed_normalized_weight_sum
-  mean_clean_var_sed_normalized ./= mean_clean_flux_sed_normalized_weight_sum
-  mean_clean_flux_continuum_normalized ./= mean_clean_flux_continuum_normalized_weight_sum
-  mean_clean_var_continuum_normalized ./= mean_clean_flux_continuum_normalized_weight_sum
+ mean_clean_flux_continuum_normalized ./= mean_clean_flux_continuum_normalized_weight_sum
+ mean_clean_var_continuum_normalized ./= mean_clean_flux_continuum_normalized_weight_sum
 
  order_list_timeseries = extract_orders(all_spectra, pipeline_plan,  orders_to_use=orders_to_use, remove_bad_chunks=false, recalc=true )
 
@@ -600,7 +606,6 @@ line_width = line_width_50_default
    @assert all(map(k->k ∈ names(line_list_espresso), ["lambda","weight","order"]))
    dont_need_to!(pipeline_plan,:clean_line_list_tellurics)
  else
-    println("# Can't find ", line_list_filename, ".  Trying ESPRESSO line list.")
     #orders_to_use = good_orders
     #order_list_timeseries = extract_orders(all_spectra,pipeline_plan, orders_to_use=orders_to_use, recalc=true )
     touch(line_list_filename)
@@ -609,6 +614,7 @@ line_width = line_width_50_default
     else
         line_list_input_filename = joinpath(pkgdir(EchelleCCFs),"data","masks","espresso+neid_mask_97_to_108.mas")
     end
+    println("# Recreating line list weights from ", line_list_input_filename)
     line_list_espresso = prepare_line_list(line_list_input_filename, all_spectra, pipeline_plan, v_center_to_avoid_tellurics=ccf_mid_velocity,
        Δv_to_avoid_tellurics = 2*max_bc+range_no_mask_change*line_width_50_default+max_mask_scale_factor*default_ccf_mask_v_width(NEID2D()), orders_to_use=#=orders_to_use=#56:108, recalc=true, verbose=true)
      if args["recompute_line_weights"] && !isnothing(args["line_list_output_filename"])
@@ -651,7 +657,7 @@ if verbose println(now()) end
 println("# Saving results to ", daily_ccf_filename, ".")
   stop_processing_time = now()
   jldopen(daily_ccf_filename, "w") do f
-    f["v_grid"] = v_grid_order_ccfs
+    f["v_grid"] = collect(v_grid_order_ccfs)
     f["order_ccfs"] = order_ccfs
     f["order_ccf_vars"] = order_ccf_vars
     f["Δfwhm"] = Δfwhm 
@@ -690,7 +696,7 @@ for (i,row) in enumerate(eachrow(df_files_use))
     #ccf_filename = joinpath(neid_data_path,target_subdir,"output","ccfs", m[1] * "_ccfs=default.jld2")
     ccf_filename = joinpath(neid_data_path,target_subdir,"ccfs", m[1] * "_ccfs=default.jld2")
     jldopen(ccf_filename, "w") do f
-        f["v_grid"] = v_grid_order_ccfs
+        f["v_grid"] = collect(v_grid_order_ccfs)
         f["order_ccfs"] = order_ccfs[:,:,i]
         f["order_ccf_vars"] = order_ccf_vars[:,:,i]
         f["orders_to_use"] = orders_to_use
@@ -807,7 +813,7 @@ msf = lsf_width/default_ccf_mask_v_width(NEID2D()); fwtf = 0.5  # using LSF widt
    mask_type=:gaussian, Δfwhm=Δfwhm,
    mask_scale_factor=msf, range_no_mask_change=5*line_width_50, ccf_mid_velocity=ccf_mid_velocity, v_step=100, #155,
    v_max=max(5*line_width_50,2*max_bc), allow_nans=true, calc_ccf_var=true, recalc=true)
-  outputs["v_grid"] = v_grid
+  outputs["v_grid"] = collect(v_grid)
   outputs["ccfs_espresso"] = ccfs_espresso
   outputs["ccf_vars_espresso"] = ccf_vars_espresso
 

diff --git a/examples/daily_rvs_v1.2.jl b/examples/daily_rvs_v1.2.jl
@@ -250,8 +250,9 @@ println("# Found ", length(obs_to_use), " files of ",  size(manifest,1), " to us
                                     :mean_pyroflux=>:pyrflux_mean, :rms_pyroflux=>:pyrflux_rms ) |> DataFrame
    df_out[!,"mask"] = mask
 
-ccf_sum = reshape(sum(order_ccfs,dims=2),num_vels,num_obs)
-ccf_var_sum = reshape(sum(input_data["order_ccf_vars"],dims=2),num_vels,num_obs)
+orders_to_use_for_rvs =  56:78  # TODO: Implement, make parameter, generalize, etc.
+ccf_sum = reshape(sum(view(order_ccfs,:,orders_to_use_for_rvs,:),dims=2),num_vels,num_obs)
+ccf_var_sum = reshape(sum(view(input_data["order_ccf_vars"],:,orders_to_use_for_rvs,:),dims=2),num_vels,num_obs)
 
 println("# Measuring RVs from RvSpectML's CCFs.")
 v_grid = collect(input_data["v_grid"])
@@ -276,20 +277,20 @@ if occursin("template",args["rv_alg"])
       template_obs_to_use = template_data["clean_obs_mask"]
       template_num_obs = sum(template_obs_to_use)
       @assert template_num_obs >= 1
-      template_ccf = reshape(sum(view(template_order_ccfs,:,:,template_obs_to_use),dims=2),num_vels,template_num_obs)
-      template_ccf_var = reshape(sum(view(template_order_ccf_vars,:,:,template_obs_to_use),dims=2),num_vels,template_num_obs)
+      template_ccf = reshape(sum(view(template_order_ccfs,:,orders_to_use_for_rvs,template_obs_to_use),dims=2),num_vels,template_num_obs)
+      template_ccf_var = reshape(sum(view(template_order_ccf_vars,:,orders_to_use_for_rvs,template_obs_to_use),dims=2),num_vels,template_num_obs)
    end
    #ccf_template = EchelleCCFs.calc_ccf_template(view(ccf_sum,:,obs_to_use), view(ccf_var_sum,:,obs_to_use) )
-   ccf_template = EchelleCCFs.calc_ccf_template(template_ccf, template_ccf_var,frac_of_width_to_fit=args["frac_of_width_to_fit"], measure_width_at_frac_depth=args["measure_width_at_frac_depth"])
-   alg_template = MeasureRvFromCCFTemplate(v_grid=collect(v_grid),template=ccf_template)
+   ccf_template = EchelleCCFs.calc_ccf_template(template_ccf, template_ccf_var)
+   alg_template = MeasureRvFromCCFTemplate(v_grid=collect(v_grid),template=ccf_template,frac_of_width_to_fit=args["frac_of_width_to_fit"], measure_width_at_frac_depth=args["measure_width_at_frac_depth"])
    rvs_template = DataFrame(map(i->any(isnan.(view(ccf_sum,:,i))) ? (rv=NaN, σ_rv=NaN) : measure_rv_from_ccf(v_grid,view(ccf_sum,:,i),view(ccf_var_sum,:,i),alg=alg_template),1:num_obs))
    df_out[!,Symbol("rv_template")] = rvs_template.rv
    df_out[!,Symbol("σrv_template")] = rvs_template.σ_rv
 end
 if occursin("gaussian",args["rv_alg"])
    alg_gauss = MeasureRvFromCCFGaussian(frac_of_width_to_fit=args["frac_of_width_to_fit"], measure_width_at_frac_depth=args["measure_width_at_frac_depth"])
    rvs_gauss = DataFrame(map(i->any(isnan.(view(ccf_sum,:,i))) ? (rv=NaN, σ_rv=NaN)  : measure_rv_from_ccf(v_grid,view(ccf_sum,:,i),view(ccf_var_sum,:,i),alg=alg_gauss),1:num_obs))
-   df_out[!,Symbol("rv_template")]= rvs_gauss.rv
+   df_out[!,Symbol("rv_gaussian")]= rvs_gauss.rv
    df_out[!,Symbol("σrv_gaussian")] = rvs_gauss.σ_rv
 end
 if occursin("quadratic",args["rv_alg"])
@@ -315,7 +316,7 @@ for j in 1:num_orders_to_use
    #local ccf_template = EchelleCCFs.calc_ccf_template(view(order_ccfs,:,j,obs_to_use), view(order_ccf_vars,:,j,obs_to_use) )
    local ccf_template = EchelleCCFs.calc_ccf_template(template_ccf, template_ccf_var)
    local alg_template = MeasureRvFromCCFTemplate(v_grid=collect(v_grid),template=ccf_template, frac_of_width_to_fit=args["frac_of_width_to_fit"], measure_width_at_frac_depth=args["measure_width_at_frac_depth"])
-   order_rvs_template = DataFrame(map(i->any(isnan.(view(order_ccfs,:,j,i))) ? (rv=NaN, σ_rv=NaN) : measure_rv_from_ccf(v_grid,view(order_ccfs,:,j,i),view(order_ccf_vars,:,j,i),alg=alg_template),1:num_obs))
+   order_rvs_template = DataFrame(map(i->measure_rv_from_ccf(v_grid,view(order_ccfs,:,j,i),view(order_ccf_vars,:,j,i),alg=alg_template),1:num_obs))
    df_out[!,Symbol("rv_" * string(orders_physical[j]) * "_template")] = order_rvs_template.rv
    df_out[!,Symbol("σrv_" * string(orders_physical[j]) * "_template")] = order_rvs_template.σ_rv
    end