{ "cells": [ { "cell_type": "markdown", "id": "a1be93b6-33b3-45e1-a18d-fc960c3f55be", "metadata": {}, "source": [ "## Extract the efficiency of the matching between the shear catalog and the object catalog\n" ] }, { "cell_type": "markdown", "id": "ccadb16e-dd15-4942-ae14-9a4ed6075be8", "metadata": {}, "source": [ "#### Standard import" ] }, { "cell_type": "code", "execution_count": null, "id": "c7cc9d49-9352-4804-97f4-4647fcae1c77", "metadata": {}, "outputs": [], "source": [ "import tables_io\n", "import numpy as np\n", "import hpmcm\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "id": "ec47da1a-3808-40ee-8cfc-0069ca5d79e9", "metadata": {}, "source": [ "#### Set up the configuration" ] }, { "cell_type": "code", "execution_count": null, "id": "af5e01db-98c6-4281-a1b9-ef8a9e058ba9", "metadata": {}, "outputs": [], "source": [ "keys = ['_cluster_stats'] # which tables to read\n", "st_ = 'pgauss' # which catalog type\n", "tract = 10463 # which tract to study\n", "dd = tables_io.read(f\"test_data/obj_{st_}_match_{tract}.pq\", keys=keys)\n", "data = dd['_cluster_stats']" ] }, { "cell_type": "markdown", "id": "e9c06396-614c-46c0-9e85-796a1abe671d", "metadata": {}, "source": [ "#### Make maskes of different types of matches" ] }, { "cell_type": "code", "execution_count": null, "id": "1be8077d-3d09-4791-99ba-fd1e45fcc154", "metadata": {}, "outputs": [], "source": [ "good_mask = np.bitwise_and(data.nSrc ==2, data.nUnique ==2) \n", "#in_tract = skymap.findTractIdArray(data.ra.values, data.dec.values, degrees=True) == 10463\n", "in_tract = data.ra.values < 38.09\n", "missing_md = np.bitwise_and(~good_mask, data.hasRefCat)\n", "missing_ref = np.bitwise_and(~good_mask, ~data.hasRefCat)\n", "extra = data.nSrc > 2" ] }, { "cell_type": "markdown", "id": "8af14a29-ea9e-42c5-8f8e-b60a88ccdf27", "metadata": {}, "source": [ "#### Make a histogram of the different match types" ] }, { "cell_type": "code", "execution_count": null, "id": "abf9c0a4-ea84-4e26-8edc-1b9e833386e0", "metadata": {}, "outputs": [], "source": [ "_ = plt.hist(data[good_mask*in_tract].SNR, bins=np.logspace(0, 4, 101), alpha=0.5, label=\"Good\")\n", "_ = plt.hist(data[missing_md*in_tract].SNR, bins=np.logspace(0, 4, 101), alpha=0.5, label=\"Object only\")\n", "_ = plt.hist(data[missing_ref*in_tract].SNR, bins=np.logspace(0, 4, 101), alpha=0.5, label=\"Shear only\")\n", "_ = plt.hist(data[extra*in_tract].SNR, bins=np.logspace(0, 4, 101), alpha=0.5, label=\"Confusion\")\n", "\n", "_ = plt.xscale('log')\n", "_ = plt.yscale('log')\n", "_ = plt.legend()\n", "\n", "_ = plt.xlabel(\"Signal-to-noise [r-band]\")\n", "_ = plt.ylabel(\"Objects [per 0.05 dex]\")" ] }, { "cell_type": "markdown", "id": "d760ebd2-12d2-4898-ae82-4dfee0e9884d", "metadata": {}, "source": [ "#### Make a scatter plot of positions of missing matches, to make sure we haven't messed up the sky overlap" ] }, { "cell_type": "code", "execution_count": null, "id": "a3b50ecc-c47c-4475-998a-24d1e7d1c180", "metadata": {}, "outputs": [], "source": [ "_ = plt.scatter(data.ra[good_mask*in_tract], data.dec[good_mask*in_tract], s=1)\n", "_ = plt.scatter(data.ra[missing_ref*in_tract], data.dec[missing_ref*in_tract], s=1)" ] }, { "cell_type": "markdown", "id": "f4f305dc-3965-44b3-bbb7-a39e290e774a", "metadata": {}, "source": [ "#### Estimate the good match efficiency as a function of SNR" ] }, { "cell_type": "code", "execution_count": null, "id": "bb1c4a49-7f6d-4d8e-a1c4-2574f1d4f146", "metadata": {}, "outputs": [], "source": [ "hist_all = np.histogram(data.iloc[in_tract].SNR, bins=np.logspace(0, 5, 101))[0]\n", "hist_in_ref = np.histogram(data.iloc[data.hasRefCat.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]\n", "hist_missing_md = np.histogram(data.iloc[missing_md.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]\n", "hist_good = np.histogram(data.iloc[good_mask.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]\n", "hist_missing_ref = np.histogram(data.iloc[missing_ref.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]\n", "hist_in_md = hist_good + hist_missing_ref\n", "hist_extra = np.histogram(data.iloc[extra.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "a697d001-0c21-4fea-9a67-82eae441b7ec", "metadata": {}, "outputs": [], "source": [ "ineffic_missing_md = hist_missing_md/hist_all\n", "ineffic_missing_ref = hist_missing_ref/hist_all\n", "ineffic_extra = hist_extra/hist_all\n", "ineffic_ref_in_md = hist_missing_ref/hist_in_md\n", "npq_missing_md = np.sqrt(ineffic_missing_md*(1-ineffic_missing_md)/hist_all)\n", "npq_missing_ref = np.sqrt(ineffic_missing_ref*(1-ineffic_missing_ref)/hist_all)\n", "npq_missing_extra = np.sqrt(ineffic_extra*(1-ineffic_extra)/hist_all)\n", "npq_ref_in_md = np.sqrt(ineffic_ref_in_md*(1-ineffic_ref_in_md)/hist_in_md)\n", "bin_edges = np.logspace(0, 5, 101)\n", "bin_centers = np.sqrt(bin_edges[0:-1] * bin_edges[1:])" ] }, { "cell_type": "markdown", "id": "2757b709-c251-4852-ae9d-4a3e68c689b1", "metadata": {}, "source": [ "#### Plot the good match efficiency as a function of SNR" ] }, { "cell_type": "code", "execution_count": null, "id": "91cbbf02-6fd4-407c-af5c-c0b7fcaac0de", "metadata": {}, "outputs": [], "source": [ "_ = plt.errorbar(bin_centers, ineffic_missing_md, yerr=npq_missing_md, label=\"Has Ref\", ls=\"\", marker='.')\n", "_ = plt.errorbar(bin_centers, ineffic_missing_ref, yerr=npq_missing_ref, label=\"No Ref\", ls=\"\", marker='.')\n", "_ = plt.xscale('log')\n", "_ = plt.yscale('log')" ] }, { "cell_type": "markdown", "id": "01e3f535-331d-4daa-9136-f0cc887410a3", "metadata": {}, "source": [ "#### Plot the good match efficiency w.r.t. the metadataect catalog as a function of SNR" ] }, { "cell_type": "code", "execution_count": null, "id": "11549078-5d20-44ca-a14d-5241276e0b64", "metadata": {}, "outputs": [], "source": [ "_ = plt.errorbar(bin_centers, ineffic_ref_in_md, yerr=npq_ref_in_md, label=\"Has Ref\", ls=\"\", marker='.')\n", "_ = plt.xscale('log')\n", "_ = plt.yscale('log')\n", "_ = plt.xlabel(\"Signal-to-noise [r-band]\")\n", "_ = plt.ylabel(\"Inefficiency w.r.t. MD objects\")" ] }, { "cell_type": "markdown", "id": "dc56345b-bb31-47f6-9a2e-863f7080a978", "metadata": {}, "source": [ "#### Estimate the good match efficiency w.r.t. the metadataect catalog" ] }, { "cell_type": "code", "execution_count": null, "id": "53111cc1-2b48-4e33-813d-b08510098c50", "metadata": {}, "outputs": [], "source": [ "nMissing = hist_missing_ref[20:].sum()\n", "nAll = hist_in_md[20:].sum()\n", "effic = (nAll-nMissing)/nAll\n", "effic_err = np.sqrt(effic*(1-effic)/nAll)" ] }, { "cell_type": "code", "execution_count": null, "id": "b8c0d68d-f777-474b-93de-0c9dccc95fdc", "metadata": {}, "outputs": [], "source": [ "print(f\"Effic: {effic:.5} +- {effic_err:.5f}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "7687e18e-65c4-405b-90b4-9c37c1d78cb4", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "94722af4-c8ca-4f4c-b526-d74cc6b88b01", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.12" } }, "nbformat": 4, "nbformat_minor": 5 }