Extract the efficiency of the matching between the shear catalog and the object catalog
Standard import
[1]:
import tables_io
import numpy as np
import hpmcm
import matplotlib.pyplot as plt
Set up the configuration
[2]:
keys = ['_cluster_stats'] # which tables to read
st_ = 'pgauss' # which catalog type
tract = 10463 # which tract to study
dd = tables_io.read(f"test_data/obj_{st_}_match_{tract}.pq", keys=keys)
data = dd['_cluster_stats']
column_list None
Make maskes of different types of matches
[3]:
good_mask = np.bitwise_and(data.nSrc ==2, data.nUnique ==2)
#in_tract = skymap.findTractIdArray(data.ra.values, data.dec.values, degrees=True) == 10463
in_tract = data.ra.values < 38.09
missing_md = np.bitwise_and(~good_mask, data.hasRefCat)
missing_ref = np.bitwise_and(~good_mask, ~data.hasRefCat)
extra = data.nSrc > 2
Make a histogram of the different match types
[4]:
_ = plt.hist(data[good_mask*in_tract].SNR, bins=np.logspace(0, 4, 101), alpha=0.5, label="Good")
_ = plt.hist(data[missing_md*in_tract].SNR, bins=np.logspace(0, 4, 101), alpha=0.5, label="Object only")
_ = plt.hist(data[missing_ref*in_tract].SNR, bins=np.logspace(0, 4, 101), alpha=0.5, label="Shear only")
_ = plt.hist(data[extra*in_tract].SNR, bins=np.logspace(0, 4, 101), alpha=0.5, label="Confusion")
_ = plt.xscale('log')
_ = plt.yscale('log')
_ = plt.legend()
_ = plt.xlabel("Signal-to-noise [r-band]")
_ = plt.ylabel("Objects [per 0.05 dex]")
Make a scatter plot of positions of missing matches, to make sure we haven’t messed up the sky overlap
[5]:
_ = plt.scatter(data.ra[good_mask*in_tract], data.dec[good_mask*in_tract], s=1)
_ = plt.scatter(data.ra[missing_ref*in_tract], data.dec[missing_ref*in_tract], s=1)
Estimate the good match efficiency as a function of SNR
[6]:
hist_all = np.histogram(data.iloc[in_tract].SNR, bins=np.logspace(0, 5, 101))[0]
hist_in_ref = np.histogram(data.iloc[data.hasRefCat.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]
hist_missing_md = np.histogram(data.iloc[missing_md.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]
hist_good = np.histogram(data.iloc[good_mask.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]
hist_missing_ref = np.histogram(data.iloc[missing_ref.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]
hist_in_md = hist_good + hist_missing_ref
hist_extra = np.histogram(data.iloc[extra.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]
[7]:
ineffic_missing_md = hist_missing_md/hist_all
ineffic_missing_ref = hist_missing_ref/hist_all
ineffic_extra = hist_extra/hist_all
ineffic_ref_in_md = hist_missing_ref/hist_in_md
npq_missing_md = np.sqrt(ineffic_missing_md*(1-ineffic_missing_md)/hist_all)
npq_missing_ref = np.sqrt(ineffic_missing_ref*(1-ineffic_missing_ref)/hist_all)
npq_missing_extra = np.sqrt(ineffic_extra*(1-ineffic_extra)/hist_all)
npq_ref_in_md = np.sqrt(ineffic_ref_in_md*(1-ineffic_ref_in_md)/hist_in_md)
bin_edges = np.logspace(0, 5, 101)
bin_centers = np.sqrt(bin_edges[0:-1] * bin_edges[1:])
/tmp/ipykernel_1263/451252170.py:1: RuntimeWarning: invalid value encountered in divide
ineffic_missing_md = hist_missing_md/hist_all
/tmp/ipykernel_1263/451252170.py:2: RuntimeWarning: invalid value encountered in divide
ineffic_missing_ref = hist_missing_ref/hist_all
/tmp/ipykernel_1263/451252170.py:3: RuntimeWarning: invalid value encountered in divide
ineffic_extra = hist_extra/hist_all
/tmp/ipykernel_1263/451252170.py:4: RuntimeWarning: invalid value encountered in divide
ineffic_ref_in_md = hist_missing_ref/hist_in_md
Plot the good match efficiency as a function of SNR
[8]:
_ = plt.errorbar(bin_centers, ineffic_missing_md, yerr=npq_missing_md, label="Has Ref", ls="", marker='.')
_ = plt.errorbar(bin_centers, ineffic_missing_ref, yerr=npq_missing_ref, label="No Ref", ls="", marker='.')
_ = plt.xscale('log')
_ = plt.yscale('log')
Plot the good match efficiency w.r.t. the metadataect catalog as a function of SNR
[9]:
_ = plt.errorbar(bin_centers, ineffic_ref_in_md, yerr=npq_ref_in_md, label="Has Ref", ls="", marker='.')
_ = plt.xscale('log')
_ = plt.yscale('log')
_ = plt.xlabel("Signal-to-noise [r-band]")
_ = plt.ylabel("Inefficiency w.r.t. MD objects")
Estimate the good match efficiency w.r.t. the metadataect catalog
[10]:
nMissing = hist_missing_ref[20:].sum()
nAll = hist_in_md[20:].sum()
effic = (nAll-nMissing)/nAll
effic_err = np.sqrt(effic*(1-effic)/nAll)
[11]:
print(f"Effic: {effic:.5} +- {effic_err:.5f}")
Effic: 0.99022 +- 0.00152
[ ]:
[ ]: