{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a1be93b6-33b3-45e1-a18d-fc960c3f55be",
   "metadata": {},
   "source": [
    "## Extract the efficiency of the matching between the shear catalog and the object catalog\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ccadb16e-dd15-4942-ae14-9a4ed6075be8",
   "metadata": {},
   "source": [
    "#### Standard import"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c7cc9d49-9352-4804-97f4-4647fcae1c77",
   "metadata": {},
   "outputs": [],
   "source": [
    "import tables_io\n",
    "import numpy as np\n",
    "import hpmcm\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ec47da1a-3808-40ee-8cfc-0069ca5d79e9",
   "metadata": {},
   "source": [
    "#### Set up the configuration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "af5e01db-98c6-4281-a1b9-ef8a9e058ba9",
   "metadata": {},
   "outputs": [],
   "source": [
    "keys = ['_cluster_stats']  # which tables to read\n",
    "st_ = 'pgauss'             # which catalog type\n",
    "tract = 10463              # which tract to study\n",
    "dd = tables_io.read(f\"test_data/obj_{st_}_match_{tract}.pq\", keys=keys)\n",
    "data = dd['_cluster_stats']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e9c06396-614c-46c0-9e85-796a1abe671d",
   "metadata": {},
   "source": [
    "#### Make maskes of different types of matches"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1be8077d-3d09-4791-99ba-fd1e45fcc154",
   "metadata": {},
   "outputs": [],
   "source": [
    "good_mask = np.bitwise_and(data.nSrc ==2, data.nUnique ==2) \n",
    "#in_tract = skymap.findTractIdArray(data.ra.values, data.dec.values, degrees=True) == 10463\n",
    "in_tract = data.ra.values < 38.09\n",
    "missing_md = np.bitwise_and(~good_mask, data.hasRefCat)\n",
    "missing_ref = np.bitwise_and(~good_mask, ~data.hasRefCat)\n",
    "extra = data.nSrc > 2"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8af14a29-ea9e-42c5-8f8e-b60a88ccdf27",
   "metadata": {},
   "source": [
    "#### Make a histogram of the different match types"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "abf9c0a4-ea84-4e26-8edc-1b9e833386e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "_ = plt.hist(data[good_mask*in_tract].SNR, bins=np.logspace(0, 4, 101), alpha=0.5, label=\"Good\")\n",
    "_ = plt.hist(data[missing_md*in_tract].SNR, bins=np.logspace(0, 4, 101), alpha=0.5, label=\"Object only\")\n",
    "_ = plt.hist(data[missing_ref*in_tract].SNR, bins=np.logspace(0, 4, 101), alpha=0.5, label=\"Shear only\")\n",
    "_ = plt.hist(data[extra*in_tract].SNR, bins=np.logspace(0, 4, 101), alpha=0.5, label=\"Confusion\")\n",
    "\n",
    "_ = plt.xscale('log')\n",
    "_ = plt.yscale('log')\n",
    "_ = plt.legend()\n",
    "\n",
    "_ = plt.xlabel(\"Signal-to-noise [r-band]\")\n",
    "_ = plt.ylabel(\"Objects [per 0.05 dex]\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d760ebd2-12d2-4898-ae82-4dfee0e9884d",
   "metadata": {},
   "source": [
    "#### Make a scatter plot of positions of missing matches, to make sure we haven't messed up the sky overlap"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a3b50ecc-c47c-4475-998a-24d1e7d1c180",
   "metadata": {},
   "outputs": [],
   "source": [
    "_ = plt.scatter(data.ra[good_mask*in_tract], data.dec[good_mask*in_tract], s=1)\n",
    "_ = plt.scatter(data.ra[missing_ref*in_tract], data.dec[missing_ref*in_tract], s=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f4f305dc-3965-44b3-bbb7-a39e290e774a",
   "metadata": {},
   "source": [
    "#### Estimate the good match efficiency as a function of SNR"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bb1c4a49-7f6d-4d8e-a1c4-2574f1d4f146",
   "metadata": {},
   "outputs": [],
   "source": [
    "hist_all =  np.histogram(data.iloc[in_tract].SNR, bins=np.logspace(0, 5, 101))[0]\n",
    "hist_in_ref = np.histogram(data.iloc[data.hasRefCat.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]\n",
    "hist_missing_md = np.histogram(data.iloc[missing_md.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]\n",
    "hist_good = np.histogram(data.iloc[good_mask.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]\n",
    "hist_missing_ref = np.histogram(data.iloc[missing_ref.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]\n",
    "hist_in_md = hist_good + hist_missing_ref\n",
    "hist_extra = np.histogram(data.iloc[extra.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a697d001-0c21-4fea-9a67-82eae441b7ec",
   "metadata": {},
   "outputs": [],
   "source": [
    "ineffic_missing_md = hist_missing_md/hist_all\n",
    "ineffic_missing_ref = hist_missing_ref/hist_all\n",
    "ineffic_extra = hist_extra/hist_all\n",
    "ineffic_ref_in_md = hist_missing_ref/hist_in_md\n",
    "npq_missing_md = np.sqrt(ineffic_missing_md*(1-ineffic_missing_md)/hist_all)\n",
    "npq_missing_ref = np.sqrt(ineffic_missing_ref*(1-ineffic_missing_ref)/hist_all)\n",
    "npq_missing_extra = np.sqrt(ineffic_extra*(1-ineffic_extra)/hist_all)\n",
    "npq_ref_in_md = np.sqrt(ineffic_ref_in_md*(1-ineffic_ref_in_md)/hist_in_md)\n",
    "bin_edges = np.logspace(0, 5, 101)\n",
    "bin_centers = np.sqrt(bin_edges[0:-1] * bin_edges[1:])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2757b709-c251-4852-ae9d-4a3e68c689b1",
   "metadata": {},
   "source": [
    "#### Plot the good match efficiency as a function of SNR"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "91cbbf02-6fd4-407c-af5c-c0b7fcaac0de",
   "metadata": {},
   "outputs": [],
   "source": [
    "_ = plt.errorbar(bin_centers, ineffic_missing_md, yerr=npq_missing_md, label=\"Has Ref\", ls=\"\", marker='.')\n",
    "_ = plt.errorbar(bin_centers, ineffic_missing_ref, yerr=npq_missing_ref, label=\"No Ref\", ls=\"\", marker='.')\n",
    "_ = plt.xscale('log')\n",
    "_ = plt.yscale('log')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "01e3f535-331d-4daa-9136-f0cc887410a3",
   "metadata": {},
   "source": [
    "#### Plot the good match efficiency w.r.t. the metadataect catalog as a function of SNR"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11549078-5d20-44ca-a14d-5241276e0b64",
   "metadata": {},
   "outputs": [],
   "source": [
    "_ = plt.errorbar(bin_centers, ineffic_ref_in_md, yerr=npq_ref_in_md, label=\"Has Ref\", ls=\"\", marker='.')\n",
    "_ = plt.xscale('log')\n",
    "_ = plt.yscale('log')\n",
    "_ = plt.xlabel(\"Signal-to-noise [r-band]\")\n",
    "_ = plt.ylabel(\"Inefficiency w.r.t. MD objects\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dc56345b-bb31-47f6-9a2e-863f7080a978",
   "metadata": {},
   "source": [
    "#### Estimate the good match efficiency w.r.t. the metadataect catalog"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "53111cc1-2b48-4e33-813d-b08510098c50",
   "metadata": {},
   "outputs": [],
   "source": [
    "nMissing = hist_missing_ref[20:].sum()\n",
    "nAll = hist_in_md[20:].sum()\n",
    "effic = (nAll-nMissing)/nAll\n",
    "effic_err = np.sqrt(effic*(1-effic)/nAll)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b8c0d68d-f777-474b-93de-0c9dccc95fdc",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f\"Effic: {effic:.5} +- {effic_err:.5f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7687e18e-65c4-405b-90b4-9c37c1d78cb4",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "94722af4-c8ca-4f4c-b526-d74cc6b88b01",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}