From 0c651023cb10d8253e699737953b390c439bb115 Mon Sep 17 00:00:00 2001 From: Troy Raen Date: Wed, 10 Dec 2025 13:58:26 -0800 Subject: [PATCH 1/4] Add magnitudes tutorial --- .../4-euclid-q1-hats-magnitudes.md | 357 ++++++++++++++++++ 1 file changed, 357 insertions(+) create mode 100644 tutorials/parquet-catalog-demos/euclid-q1-hats/4-euclid-q1-hats-magnitudes.md diff --git a/tutorials/parquet-catalog-demos/euclid-q1-hats/4-euclid-q1-hats-magnitudes.md b/tutorials/parquet-catalog-demos/euclid-q1-hats/4-euclid-q1-hats-magnitudes.md new file mode 100644 index 00000000..e23aa35e --- /dev/null +++ b/tutorials/parquet-catalog-demos/euclid-q1-hats/4-euclid-q1-hats-magnitudes.md @@ -0,0 +1,357 @@ +--- +short_title: Magnitudes +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.1 + root_level_metadata_filter: -short_title +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Euclid Q1 Merged Objects HATS Catalog: Magnitudes + ++++ + +This tutorial explores Euclid photometry measurements. +It assumes you are familiar with the [first tutorial](1-euclid-q1-hats-intro.md) in this series, which covers the Euclid Q1 Merged Objects HATS Catalog content, format, and basic access. + ++++ + +## Learning Goals + +By the end of this tutorial, you will be able to: + +- Understand the different Euclid Q1 flux measurements and their intended use cases. +- Load aperture and template-fit magnitudes for Euclid I, Y, J, and H bands from the Euclid Q1 Merged Objects HATS Catalog. +- Visualize and understand the template-fit magnitude distributions as a function of object classification. +- Compare aperture and template-fit magnitudes to understand their differences. + ++++ + +## 1. Introduction + +The [Euclid Q1](https://irsa.ipac.caltech.edu/data/Euclid/docs/overview_q1.html) data release contains photometry from Euclid as well as from external surveys. +There are several flux measurements per band. +The measurements are described in [Euclid Collaboration: Romelli et al., 2025](https://arxiv.org/pdf/2503.15305) (hereafter, Romelli), especially sections 6 and 8. + +In this tutorial, we will look at aperture and template-fit photometry measurements in the four Euclid bands: I (from the VIS instrument), Y, J, and H (from the NISP instrument). +**Aperture fluxes** are generally more accurate for point-like sources, especially bright stars in the NIR bands, likely due to better handling of PSF-related effects. +**Template-fit fluxes** are expected to be more accurate for extended sources because the templates do a better job of excluding contamination from nearby sources. +Additional photometry measurements that we won't cover here include: **Sérsic-fit fluxes** (computed for parametric morphology), **PSF-fit fluxes** (VIS only), and **class-corrected fluxes** that were corrected based on the PHZ (photometric) classifications. + +The best-estimate flux in the detection band is given by the column `mer_flux_detection_total`. +This can also be used to color-correct flux measurements in non-detection bands, as we will demonstrate. +The object detection process is described in Romelli. +The [MER Photometry Cookbook](http://st-dm.pages.euclid-sgs.uk/data-product-doc/dmq1/merdpd/merphotometrycookbook.html) describes the color corrections and how to convert from flux to magnitude. +In this tutorial, we will restrict to objects detected in VIS (I band) because it simplifies the calculations. + ++++ + +## 2. Imports and Paths + +```{code-cell} ipython3 +# # Uncomment the next line to install dependencies if needed. +# %pip install hpgeom matplotlib pandas pyarrow +``` + +```{code-cell} ipython3 +import hpgeom # Find HEALPix indexes from RA and Dec +import matplotlib.pyplot as plt # Create figures +import pyarrow.compute as pc # Filter dataset +import pyarrow.dataset # Load the dataset +import pyarrow.fs # Simple S3 filesystem pointer +import pyarrow.parquet # Load the schema +``` + +```{code-cell} ipython3 +# AWS S3 paths. +s3_bucket = "nasa-irsa-euclid-q1" +dataset_prefix = "contributed/q1/merged_objects/hats/euclid_q1_merged_objects-hats/dataset" +dataset_path = f"{s3_bucket}/{dataset_prefix}" + +# S3 pointer. Use `anonymous=True` to access without credentials. +s3 = pyarrow.fs.S3FileSystem(anonymous=True) +``` + +## 3. Load Template-fit and Aperture Magnitudes + +The following columns will be important. +Descriptions come from Romelli. + +```{code-cell} ipython3 +# Whether the source was detected in VIS mosaic (1) or only in NIR-stack mosaic (0). +VIS_DET = "mer_vis_det" + +# Best estimate of the total flux in the detection band. +# From aperture photometry within a Kron radius. +# Detection band is VIS if `VIS_DET == 1`. Otherwise, this is a +# non-physical NIR-stack flux and there was no VIS detection (aka, NIR-only). +# We will only deal with VIS-detected objects in this notebook. +FLUX_DET_TOTAL = "mer_flux_detection_total" + +# Peak surface brightness minus the magnitude used for `mer_point_like_prob`. +# This is a measure of compactness. +MUMAX_MINUS_MAG = "mer_mumax_minus_mag" + +# Whether the detection has a >50% probability of being spurious (1=Yes, 0=No). +SPURIOUS_FLAG = "mer_spurious_flag" + +# PHZ classification: 1=Star, 2=Galaxy, 4=QSO. +# Combinations (3, 5, 6, and 7) indicate multiple probability thresholds were exceeded. +PHZ_CLASS = "phz_phz_classification" +``` + +We'll convert the catalog fluxes to magnitudes following the [MER Photometry Cookbook](http://st-dm.pages.euclid-sgs.uk/data-product-doc/dmq1/merdpd/merphotometrycookbook.html). +For convenience, we'll have PyArrow do the conversion during the read operation and return only the magnitudes. + +```{code-cell} ipython3 +def flux_to_magnitude(flux_col_name: str) -> pc.Expression: + """Convert catalog fluxes to magnitudes following the MER Photometry Cookbook. + + Parameters + ---------- + flux_col_name : str + The name of the flux column to convert to magnitude. + + Returns + ------- + pyarrow.compute.Expression + An expression for the magnitude. It can be used in the `filter` and `columns` + keyword arguments when loading data from a PyArrow dataset. + """ + # We expect to be dealing with VIS_DET == 1 objects, so FLUX_DET_TOTAL == VIS flux. + vis_flux = pc.field(FLUX_DET_TOTAL) + band_flux = pc.field(flux_col_name) + + if flux_col_name == FLUX_DET_TOTAL: + # Best-estimate flux in VIS is FLUX_DET_TOTAL. + best_flux = vis_flux + elif flux_col_name.endswith("_templfit"): + # Best-estimate template-fit flux is the band flux scaled by a color correction. + band = flux_col_name.split("_")[-2] # y, j, or h + color_scale = pc.divide(vis_flux, pc.field(f"mer_flux_vis_to_{band}_templfit")) + best_flux = pc.multiply(band_flux, color_scale) + elif flux_col_name.endswith("aper"): + # Best-estimate aperture flux is the band flux scaled by a color correction. + nfwhm = flux_col_name.split("_")[-2] # e.g., 2fwhm + color_scale = pc.divide(vis_flux, pc.field(f"mer_flux_vis_{nfwhm}_aper")) + best_flux = pc.multiply(band_flux, color_scale) + + # magnitude = -2.5 * log10(flux) + 23.9. + scale = pc.scalar(-2.5) + log10_flux = pc.log10(best_flux) + zeropoint = pc.scalar(23.9) + mag_expression = pc.add(pc.multiply(scale, log10_flux), zeropoint) + return mag_expression +``` + +Define the columns we want to load. +This needs to be a dictionary of PyArrow expressions (rather than a simple list of column names) because we're asking PyArrow to convert flux -> magnitude before returning the data. + +```{code-cell} ipython3 +I_MAG = "I (mag)" +columns = { + PHZ_CLASS: pc.field(PHZ_CLASS), + I_MAG: flux_to_magnitude(FLUX_DET_TOTAL), + "Y aperture (mag)": flux_to_magnitude("mer_flux_y_2fwhm_aper"), + "J aperture (mag)": flux_to_magnitude("mer_flux_j_2fwhm_aper"), + "H aperture (mag)": flux_to_magnitude("mer_flux_h_2fwhm_aper"), + "Y templfit (mag)": flux_to_magnitude("mer_flux_y_templfit"), + "J templfit (mag)": flux_to_magnitude("mer_flux_j_templfit"), + "H templfit (mag)": flux_to_magnitude("mer_flux_h_templfit"), + MUMAX_MINUS_MAG: pc.field(MUMAX_MINUS_MAG), +} +# Let's see what one of these looks like. +columns["Y aperture (mag)"] +``` + +We'll restrict to the Euclid Deep Field - Fornax (EDF-F) to reduce the amount of data loaded. +Compute the HEALPix order 9 pixel indexes, following the [introductory tutorial](1-euclid-q1-hats-intro.md). + +```{code-cell} ipython3 +ra, dec, radius = 52.932, -28.088, 3 # 10 sq deg +edff_k9_pixels = hpgeom.query_circle(hpgeom.order_to_nside(9), ra, dec, radius, inclusive=True) +``` + +Construct the row filter. + +```{code-cell} ipython3 +row_filter = ( + # Stars, Galaxies, QSOs, and mixed classes. + pc.field(PHZ_CLASS).isin([1, 2, 3, 4, 5, 6, 7]) + # Basic quality cut. + & (pc.field(SPURIOUS_FLAG) == 0) + # VIS-detected objects. (If you want to include NIR-only objects, alter flux_to_magnitude() + # following MER Photometry Cookbook and also comment out the next line.) + & (pc.field(VIS_DET) == 1) + # EDF-F region. (Comment out the next line to do an all-sky search.) + & pc.field("_healpix_9").isin(edff_k9_pixels) +) +``` + +Load the data. + +```{code-cell} ipython3 +# Load the catalog as a PyArrow dataset. Include partitioning="hive" +# so PyArrow understands the file naming scheme and can navigate the partitions. +schema = pyarrow.parquet.read_schema(f"{dataset_path}/_common_metadata", filesystem=s3) +dataset = pyarrow.dataset.dataset(dataset_path, partitioning="hive", filesystem=s3, schema=schema) + +mags_df = dataset.to_table(columns=columns, filter=row_filter).to_pandas() +mags_df +``` + +## 4. Magnitude Distributions of Galaxies, Stars, and QSOs + ++++ + +Let's visualize the template-fit magnitude distributions as a function of PHZ classification. +Since the template-fit photometry is recommended for extended objects, we'll separate the point-like objects. +[Euclid Collaboration: Tucci et al., 2025](https://arxiv.org/pdf/2503.15306) defines point-like objects as having `MUMAX_MINUS_MAG < -2.5`. + +```{code-cell} ipython3 +# Galaxy + any. Star + galaxy. QSO + galaxy. +classes = {"Galaxy": (2, 3, 6, 7), "Star": (1, 3), "QSO": (4, 6)} +class_colors = ["tab:green", "tab:blue", "tab:orange"] + +bands = [I_MAG, "Y templfit (mag)", "J templfit (mag)", "H templfit (mag)"] +mag_limits = (14, 28) # Excluding all magnitudes outside this range. +hist_kwargs = dict(bins=20, range=mag_limits, histtype="step") + +fig, axes = plt.subplots(3, 4, figsize=(18, 12), sharey="row", sharex=True) +for (class_name, class_ids), class_color in zip(classes.items(), class_colors): + hist_kwargs["color"] = class_color + + # Get the objects that are in this class only. + class_df = mags_df.loc[mags_df[PHZ_CLASS] == class_ids[0]] + # Plot histograms for each band. Galaxies on top row, then stars, then QSOs. + axs = axes[0] if class_name == "Galaxy" else (axes[1] if class_name == "Star" else axes[2]) + for ax, band in zip(axs, bands): + ax.hist(class_df[band], label=class_name, **hist_kwargs) + + # Get the objects that were accepted as multiple classes. + class_df = mags_df.loc[mags_df[PHZ_CLASS].isin(class_ids)] + label = "+Galaxy" if class_name != "Galaxy" else "+any" + # Of those objects, restrict to the ones that are point-like. + classpt_df = class_df.loc[class_df[MUMAX_MINUS_MAG] < -2.5] + pt_label = f"{label} and point-like" + # Plot histograms for both sets of objects. + for ax, band in zip(axs, bands): + ax.hist(class_df[band], label=label, linestyle=":", **hist_kwargs) + ax.hist(classpt_df[band], linestyle="-.", label=pt_label, **hist_kwargs) + +# Add axis labels, etc. +for ax in axes[:, 0]: + ax.set_ylabel("Counts") + ax.legend(framealpha=0.2, loc=2) +for axs, band in zip(axes.transpose(), bands): + axs[0].set_title(band.split()[0]) + axs[-1].set_xlabel(band) +plt.tight_layout() +``` + +The Euclid instruments are tuned to detect galaxies for cosmology studies, so it's no surprise that there are many more galaxies than other object types. + +The green lines (top row) show the magnitude distributions of objects classified as galaxy only (solid) and those classified as galaxy plus possibly other types (dot and dash-dot). +The dash-dot line highlights the population of point-like "galaxies", which are likely misclassified stars or QSOs and mostly appear at faint magnitudes. + +The star distributions (middle row, blue) are broader and peak at brighter magnitudes than the galaxy distributions, as expected. +Adding objects classified as both star and galaxy (dotted line) adds significant numbers, especially near the peak and toward the faint end where confusion is more likely. +Restricting these to point-like objects (dash-dot line) shows that many bright objects surpassing both probability thresholds are likely to be stars, not galaxies. +However, this doesn't hold at the faint end where even some star-only classified objects fail the point-like cut. + +The bottom row (orange) is the same as the middle row but for QSOs instead of stars. +There are very few point-like QSOs, reminding us that most QSO classifications in Q1 should be treated with skepticism (as discussed in the Classifications tutorial). +By default, this figure only includes objects in the EDF-F region. +High-confidence QSOs are more concentrated in the EDF-N region where advantageous external photometry (particularly u-band from UNIONS) was available. + ++++ + +## 5. Template-fit vs. Aperture Magnitudes + ++++ + +Now let's compare template-fit and aperture magnitudes by plotting their differences. +This comparison reveals systematic offsets that depend on factors including morphology (extended vs. point-like) and brightness. + +This figure is inspired by Romelli Fig. 6 (top panel). + +```{code-cell} ipython3 +# Only consider objects within these mag and mag difference limits. +mag_limits, mag_diff_limits = (16, 24), (-1, 1) +mag_limited_df = mags_df.loc[(mags_df[I_MAG] > mag_limits[0]) & (mags_df[I_MAG] < mag_limits[1])] + +fig, axes = plt.subplots(2, 3, figsize=(18, 9), sharey=True, sharex=True) +bands = [ + ("Y templfit (mag)", "Y aperture (mag)"), + ("J templfit (mag)", "J aperture (mag)"), + ("H templfit (mag)", "H aperture (mag)"), +] +hexbin_kwargs = dict( + cmap="YlGnBu", bins="log", extent=(*mag_limits, *mag_diff_limits), gridsize=25 +) +annotate_kwargs = dict( + xycoords="axes fraction", ha="left", fontweight="bold", bbox=dict(facecolor="white", alpha=0.8) +) + +# Plot +for axs, (ref_band, aper_band) in zip(axes.transpose(), bands): + # Extended objects, top row. + ax = axs[0] + extended = mags_df.loc[mags_df[MUMAX_MINUS_MAG] >= -2.5, [I_MAG, ref_band, aper_band]] + extended["mag_diff"] = extended[ref_band] - extended[aper_band] + extended = extended.dropna(subset="mag_diff") + cb = ax.hexbin(extended[I_MAG], extended["mag_diff"], **hexbin_kwargs) + plt.colorbar(cb) + ax.set_ylabel(f"{ref_band} - {aper_band}") + # Annotate top (bottom) with the fraction of objects having a magnitude difference greater (less) than 0. + frac_tmpl_greater = len(extended.loc[extended["mag_diff"] > 0]) / len(extended) + ax.annotate(f"{frac_tmpl_greater:.3f}", xy=(0.01, 0.99), va="top", **annotate_kwargs) + frac_tmpl_less = len(extended.loc[extended["mag_diff"] < 0]) / len(extended) + ax.annotate(f"{frac_tmpl_less:.3f}", xy=(0.01, 0.01), va="bottom", **annotate_kwargs) + + # Point-like objects, bottom row. + ax = axs[1] + pointlike = mags_df.loc[mags_df[MUMAX_MINUS_MAG] < -2.5, [I_MAG, ref_band, aper_band]] + pointlike["mag_diff"] = pointlike[ref_band] - pointlike[aper_band] + pointlike = pointlike.dropna(subset="mag_diff") + cb = ax.hexbin(pointlike[I_MAG], pointlike["mag_diff"], **hexbin_kwargs) + plt.colorbar(cb) + ax.set_ylabel(f"{ref_band} - {aper_band}") + # Annotate top (bottom) with the fraction of objects having a magnitude difference greater (less) than 0. + frac_tmpl_greater = len(pointlike.loc[pointlike["mag_diff"] > 0]) / len(pointlike) + ax.annotate(f"{frac_tmpl_greater:.3f}", xy=(0.01, 0.99), va="top", **annotate_kwargs) + frac_tmpl_less = len(pointlike.loc[pointlike["mag_diff"] < 0]) / len(pointlike) + ax.annotate(f"{frac_tmpl_less:.3f}", xy=(0.01, 0.01), va="bottom", **annotate_kwargs) + +# Add axis labels, etc. +for i, ax in enumerate(axes.flatten()): + ax.axhline(0, color="gray", linewidth=1) + if i == 1: + ax.set_title("Extended objects") + if i == 4: + ax.set_title("Point-like objects") + if i > 2: + ax.set_xlabel(I_MAG) +plt.tight_layout() +``` + +The panel annotations give the fraction of objects with magnitude differences that are positive (top number) and negative (bottom number). +The magnitude difference is fairly tightly clustered around 0 for extended objects (top row), but with asymmetric outliers. +There is a positive offset, indicating fainter template-fit magnitudes, as expected: templates better exclude contaminating light from nearby sources. +The offset is more pronounced for point-like objects (bottom row), likely due to the PSF handling mentioned above, and we are reminded that aperture magnitudes are more reliable here. + ++++ + +## About this notebook + +**Authors:** Troy Raen, Vandana Desai, Andreas Faisst, Shoubaneh Hemmati, Jaladh Singhal, Brigitta Sipőcz, Jessica Krick, the IRSA Data Science Team, and the Euclid NASA Science Center at IPAC (ENSCI). + +**Updated:** 2025-12-22 + +**Contact:** [IRSA Helpdesk](https://irsa.ipac.caltech.edu/docs/help_desk.html) From 43df5a0c47be9ebd8acf2f766d35a86618181203 Mon Sep 17 00:00:00 2001 From: Troy Raen Date: Mon, 22 Dec 2025 17:20:23 -0800 Subject: [PATCH 2/4] Update Introduction short_title, intro, lexer --- .../euclid-q1-hats/1-euclid-q1-hats-intro.md | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/tutorials/parquet-catalog-demos/euclid-q1-hats/1-euclid-q1-hats-intro.md b/tutorials/parquet-catalog-demos/euclid-q1-hats/1-euclid-q1-hats-intro.md index e741525a..ef1afa18 100644 --- a/tutorials/parquet-catalog-demos/euclid-q1-hats/1-euclid-q1-hats-intro.md +++ b/tutorials/parquet-catalog-demos/euclid-q1-hats/1-euclid-q1-hats-intro.md @@ -1,11 +1,12 @@ --- -short_title: "Merged Objects HATS Catalog" +short_title: Introduction jupytext: text_representation: extension: .md format_name: myst format_version: 0.13 jupytext_version: 1.18.1 + root_level_metadata_filter: -short_title kernelspec: display_name: Python 3 (ipykernel) language: python @@ -18,6 +19,7 @@ kernelspec: This tutorial is an introduction to the content and format of the Euclid Q1 Merged Objects HATS Catalog. Later tutorials in this series will show how to load quality samples. +See [Euclid Tutorial Notebooks: Catalogs](../../euclid_access/euclid.md#catalogs) for a list of tutorials in this series. +++ @@ -25,9 +27,9 @@ Later tutorials in this series will show how to load quality samples. In this tutorial, we will: -- Learn about the Euclid Merged Objects catalog that IRSA created by combining information from multiple Euclid Quick Release 1 catalogs +- Learn about the Euclid Merged Objects catalog that IRSA created by combining information from multiple Euclid Quick Release 1 (Q1) catalogs. - Find columns of interest. -- Perform a basic spatial query in each of the Euclid Deep Fields using the Python library PyArrow. +- Perform a basic query using the Python library PyArrow. +++ @@ -51,12 +53,12 @@ Access is free and no credentials are required. ## 2. Imports -```{code-cell} python3 +```{code-cell} ipython3 # # Uncomment the next line to install dependencies if needed. # %pip install hpgeom pandas pyarrow ``` -```{code-cell} python3 +```{code-cell} ipython3 import hpgeom # Find HEALPix indexes from RA and Dec import pyarrow.compute as pc # Filter the catalog import pyarrow.dataset # Load the catalog @@ -70,7 +72,7 @@ First we'll load the Parquet schema (column information) of the Merged Objects c The Parquet schema is accessible from a few locations, all of which include the column names and types. Here, we load it from the `_common_metadata` file because it also includes the column units and descriptions. -```{code-cell} python3 +```{code-cell} ipython3 # AWS S3 paths. s3_bucket = "nasa-irsa-euclid-q1" dataset_prefix = "contributed/q1/merged_objects/hats/euclid_q1_merged_objects-hats/dataset" @@ -82,7 +84,7 @@ schema_path = f"{dataset_path}/_common_metadata" s3 = pyarrow.fs.S3FileSystem(anonymous=True) ``` -```{code-cell} python3 +```{code-cell} ipython3 # Load the Parquet schema. schema = pyarrow.parquet.read_schema(schema_path, filesystem=s3) @@ -136,7 +138,7 @@ The tables are: Find all columns from these tables in the Parquet schema: -```{code-cell} python3 +```{code-cell} ipython3 mer_prefixes = ["mer_", "morph_", "cutouts_"] mer_col_counts = {p: len([n for n in schema.names if n.startswith(p)]) for p in mer_prefixes} @@ -193,7 +195,7 @@ The tables are: Find all columns from these tables in the Parquet schema: -```{code-cell} python3 +```{code-cell} ipython3 phz_prefixes = ["phz_", "class_", "physparam_", "galaxysed_", "physparamqso_", "starclass_", "starsed_", "physparamnir_"] phz_col_counts = {p: len([n for n in schema.names if n.startswith(p)]) for p in phz_prefixes} @@ -240,7 +242,7 @@ The tables are: Find all columns from these tables in the Parquet schema: -```{code-cell} python3 +```{code-cell} ipython3 spe_prefixes = ["z_", "lines_", "models_"] spe_col_counts = {p: len([n for n in schema.names if n.startswith(p)]) for p in spe_prefixes} @@ -272,7 +274,7 @@ They are useful for spatial queries, as demonstrated in the Euclid Deep Fields s The HEALPix, Euclid object ID, and Euclid tile ID columns appear first: -```{code-cell} python3 +```{code-cell} ipython3 schema.names[:5] ``` @@ -288,7 +290,7 @@ However, PyArrow automatically makes them available as regular columns when the The HATS columns appear at the end: -```{code-cell} python3 +```{code-cell} ipython3 schema.names[-3:] ``` @@ -297,12 +299,12 @@ schema.names[-3:] The subsections above show how to find all columns from a given Euclid table as well as the additional columns. Here we show some additional techniques for finding columns. -```{code-cell} python3 +```{code-cell} ipython3 # Access the data type using the `field` method. schema.field("mer_flux_y_2fwhm_aper") ``` -```{code-cell} python3 +```{code-cell} ipython3 # The column metadata includes unit and description. # Parquet metadata is always stored as bytestrings, which are denoted by a leading 'b'. schema.field("mer_flux_y_2fwhm_aper").metadata @@ -311,7 +313,7 @@ schema.field("mer_flux_y_2fwhm_aper").metadata Euclid Q1 offers many flux measurements, both from Euclid detections and from external ground-based surveys. They are given in microjanskys, so all flux columns can be found by searching the metadata for this unit. -```{code-cell} python3 +```{code-cell} ipython3 # Find all flux columns. flux_columns = [field.name for field in schema if field.metadata[b"unit"] == b"uJy"] @@ -321,7 +323,7 @@ flux_columns[:4] Columns associated with external surveys are identified by the inclusion of "ext" in the name. -```{code-cell} python3 +```{code-cell} ipython3 external_flux_columns = [name for name in flux_columns if "ext" in name] print(f"{len(external_flux_columns)} flux columns from external surveys. First four are:") external_flux_columns[:4] @@ -339,7 +341,7 @@ The regions are well separated, so we can distinguish them using a simple cone s We can load data more efficiently using the HEALPix order 9 pixels that cover each area rather than using RA and Dec values directly. These will be used in later tutorials. -```{code-cell} python3 +```{code-cell} ipython3 # EDF-N (Euclid Deep Field - North) ra, dec, radius = 269.733, 66.018, 4 # 20 sq deg edfn_k9_pixels = hpgeom.query_circle(hpgeom.order_to_nside(9), ra, dec, radius, inclusive=True) @@ -362,7 +364,7 @@ ldn_k9_pixels = hpgeom.query_circle(hpgeom.order_to_nside(9), ra, dec, radius, i To demonstrate a basic query, we'll search for objects with a galaxy photometric redshift estimate of 6.0 (largest possible). Other tutorials in this series will show more complex queries and describe the redshifts and other data in more detail. -```{code-cell} python3 +```{code-cell} ipython3 dataset = pyarrow.dataset.dataset(dataset_path, partitioning="hive", filesystem=s3, schema=schema) highz_objects = dataset.to_table( From 3736f2b278b8cb5c63447fc75e9169c99ea7f279 Mon Sep 17 00:00:00 2001 From: Troy Raen Date: Mon, 22 Dec 2025 12:58:54 -0800 Subject: [PATCH 3/4] Update TOC and landing page --- toc.yml | 4 +++- tutorials/euclid_access/euclid.md | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/toc.yml b/toc.yml index 99a989ba..f72626dd 100644 --- a/toc.yml +++ b/toc.yml @@ -16,7 +16,9 @@ project: - file: tutorials/euclid_access/2_Euclid_intro_MER_catalog.md - file: tutorials/euclid_access/4_Euclid_intro_PHZ_catalog.md - file: tutorials/euclid_access/5_Euclid_intro_SPE_catalog.md - - file: tutorials/parquet-catalog-demos/euclid-q1-hats/1-euclid-q1-hats-intro.md + - title: Merged Objects HATS Catalog + children: + - pattern: tutorials/parquet-catalog-demos/euclid-q1-hats/*-euclid-q1-hats-*.md - file: tutorials/cloud_access/euclid-cloud-access.md - file: tutorials/euclid_access/Euclid_ERO.md - title: WISE diff --git a/tutorials/euclid_access/euclid.md b/tutorials/euclid_access/euclid.md index 37383425..ad3bdf9e 100644 --- a/tutorials/euclid_access/euclid.md +++ b/tutorials/euclid_access/euclid.md @@ -24,7 +24,9 @@ Data products include MERged mosaics of calibrated and stacked frames; combined - [PHZ Catalogs](4_Euclid_intro_PHZ_catalog.md) — Join the PHZ and MER catalogs and do a box search for galaxies with quality redshifts, load a MER mosaic cutout of the box, and plot the cutout with the catalog results overlaid. Then plot the SIR spectrum of the brightest galaxy and look at a MER mosaic cutout of the galaxy in Firefly. - [SPE Catalogs](5_Euclid_intro_SPE_catalog.md) — Join the SPE and MER catalogs and query for galaxies with H-alpha line detections, then plot the SIR spectrum of a galaxy with a high SNR H-alpha line measurement. -- [Merged Objects HATS Catalog](../parquet-catalog-demos/euclid-q1-hats/1-euclid-q1-hats-intro.md) — Understand the content and format of the Euclid Q1 Merged Objects HATS Catalog, then perform a basic query. +- **Merged Objects HATS Catalog** — This product was created by IRSA and contains the Euclid MER, PHZ, and SPE catalogs in a single [HATS](https://hats.readthedocs.io/en/latest/) catalog. + - [Introduction](../parquet-catalog-demos/euclid-q1-hats/1-euclid-q1-hats-intro.md) — Understand the content and format of the Euclid Q1 Merged Objects HATS Catalog, then perform a basic query. + - [Magnitudes](../parquet-catalog-demos/euclid-q1-hats/4-euclid-q1-hats-magnitudes.md) — Review the types of flux measurements available, load template-fit and aperture magnitudes, and plot distributions and comparisons for different object types. ## Special Topics From e9155bde2358df6a82fe39867d15dcd552a97df6 Mon Sep 17 00:00:00 2001 From: Troy Raen Date: Tue, 23 Dec 2025 17:00:02 -0800 Subject: [PATCH 4/4] Apply review feedback from Brigitta and Jaladh --- .../euclid-q1-hats/1-euclid-q1-hats-intro.md | 7 +++-- .../4-euclid-q1-hats-magnitudes.md | 29 ++++++++++++++++--- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/tutorials/parquet-catalog-demos/euclid-q1-hats/1-euclid-q1-hats-intro.md b/tutorials/parquet-catalog-demos/euclid-q1-hats/1-euclid-q1-hats-intro.md index ef1afa18..630b419e 100644 --- a/tutorials/parquet-catalog-demos/euclid-q1-hats/1-euclid-q1-hats-intro.md +++ b/tutorials/parquet-catalog-demos/euclid-q1-hats/1-euclid-q1-hats-intro.md @@ -334,7 +334,7 @@ external_flux_columns[:4] +++ Euclid Q1 includes data from three Euclid Deep Fields: EDF-N (North), EDF-S (South), EDF-F (Fornax; also in the southern hemisphere). -There is also a small amount of data from a fourth field: LDN1641 (Lynds' Dark Nebula 1641), which was observed for technical reasons during Euclid's verification phase and mostly ignored here. +There is also a small amount of data from a fourth field: LDN1641 (Lynds' Dark Nebula 1641), which was observed for technical reasons during Euclid's verification phase. The fields are described in [Euclid Collaboration: Aussel et al., 2025](https://arxiv.org/pdf/2503.15302) and can be seen on this [skymap](https://irsa.ipac.caltech.edu/data/download/parquet/euclid/q1/merged_objects/hats/euclid_q1_merged_objects-hats/skymap.png). The regions are well separated, so we can distinguish them using a simple cone search without having to be too picky about the radius. @@ -362,7 +362,8 @@ ldn_k9_pixels = hpgeom.query_circle(hpgeom.order_to_nside(9), ra, dec, radius, i ## 6. Basic Query To demonstrate a basic query, we'll search for objects with a galaxy photometric redshift estimate of 6.0 (largest possible). -Other tutorials in this series will show more complex queries and describe the redshifts and other data in more detail. +Other tutorials in this series will show more complex queries, and describe the redshifts and other data in more detail. +PyArrow dataset filters are described at [Filtering by Expressions](https://arrow.apache.org/docs/python/compute.html#filtering-by-expressions), and the list of available functions is at [Compute Functions](https://arrow.apache.org/docs/python/api/compute.html). ```{code-cell} ipython3 dataset = pyarrow.dataset.dataset(dataset_path, partitioning="hive", filesystem=s3, schema=schema) @@ -377,6 +378,6 @@ highz_objects **Authors:** Troy Raen, Vandana Desai, Andreas Faisst, Shoubaneh Hemmati, Jaladh Singhal, Brigitta Sipőcz, Jessica Krick, the IRSA Data Science Team, and the Euclid NASA Science Center at IPAC (ENSCI). -**Updated:** 2025-12-22 +**Updated:** 2025-12-23 **Contact:** [IRSA Helpdesk](https://irsa.ipac.caltech.edu/docs/help_desk.html) diff --git a/tutorials/parquet-catalog-demos/euclid-q1-hats/4-euclid-q1-hats-magnitudes.md b/tutorials/parquet-catalog-demos/euclid-q1-hats/4-euclid-q1-hats-magnitudes.md index e23aa35e..5d700c74 100644 --- a/tutorials/parquet-catalog-demos/euclid-q1-hats/4-euclid-q1-hats-magnitudes.md +++ b/tutorials/parquet-catalog-demos/euclid-q1-hats/4-euclid-q1-hats-magnitudes.md @@ -107,7 +107,8 @@ PHZ_CLASS = "phz_phz_classification" ``` We'll convert the catalog fluxes to magnitudes following the [MER Photometry Cookbook](http://st-dm.pages.euclid-sgs.uk/data-product-doc/dmq1/merdpd/merphotometrycookbook.html). -For convenience, we'll have PyArrow do the conversion during the read operation and return only the magnitudes. +PyArrow can do the conversion during the read operation and return only the magnitudes. +To do this, we'll use the following function to define the magnitudes as `pyarrow.compute` (`pc`) functions, which are described at [Compute Functions](https://arrow.apache.org/docs/python/api/compute.html). ```{code-cell} ipython3 def flux_to_magnitude(flux_col_name: str) -> pc.Expression: @@ -151,7 +152,9 @@ def flux_to_magnitude(flux_col_name: str) -> pc.Expression: ``` Define the columns we want to load. -This needs to be a dictionary of PyArrow expressions (rather than a simple list of column names) because we're asking PyArrow to convert flux -> magnitude before returning the data. +This needs to be a dictionary (rather than a simple list of column names) because we're asking PyArrow to compute the magnitudes dynamically from the catalog fluxes. +The dictionary keys will be the column names in the resultant table. +The values must be `pyarrow.compute` expressions (described above). ```{code-cell} ipython3 I_MAG = "I (mag)" @@ -215,6 +218,14 @@ Since the template-fit photometry is recommended for extended objects, we'll sep [Euclid Collaboration: Tucci et al., 2025](https://arxiv.org/pdf/2503.15306) defines point-like objects as having `MUMAX_MINUS_MAG < -2.5`. ```{code-cell} ipython3 + +``` + +```{code-cell} ipython3 +--- +jupyter: + source_hidden: true +--- # Galaxy + any. Star + galaxy. QSO + galaxy. classes = {"Galaxy": (2, 3, 6, 7), "Star": (1, 3), "QSO": (4, 6)} class_colors = ["tab:green", "tab:blue", "tab:orange"] @@ -252,6 +263,7 @@ for ax in axes[:, 0]: for axs, band in zip(axes.transpose(), bands): axs[0].set_title(band.split()[0]) axs[-1].set_xlabel(band) +plt.title("Magnitude Distributions by Object Type") plt.tight_layout() ``` @@ -282,11 +294,18 @@ This comparison reveals systematic offsets that depend on factors including morp This figure is inspired by Romelli Fig. 6 (top panel). ```{code-cell} ipython3 + +``` + +```{code-cell} ipython3 +--- +jupyter: + source_hidden: true +--- # Only consider objects within these mag and mag difference limits. mag_limits, mag_diff_limits = (16, 24), (-1, 1) mag_limited_df = mags_df.loc[(mags_df[I_MAG] > mag_limits[0]) & (mags_df[I_MAG] < mag_limits[1])] -fig, axes = plt.subplots(2, 3, figsize=(18, 9), sharey=True, sharex=True) bands = [ ("Y templfit (mag)", "Y aperture (mag)"), ("J templfit (mag)", "J aperture (mag)"), @@ -300,6 +319,7 @@ annotate_kwargs = dict( ) # Plot +fig, axes = plt.subplots(2, 3, figsize=(18, 9), sharey=True, sharex=True) for axs, (ref_band, aper_band) in zip(axes.transpose(), bands): # Extended objects, top row. ax = axs[0] @@ -338,6 +358,7 @@ for i, ax in enumerate(axes.flatten()): ax.set_title("Point-like objects") if i > 2: ax.set_xlabel(I_MAG) +plt.title("Magnitude Differences: Template-fit - Aperture") plt.tight_layout() ``` @@ -352,6 +373,6 @@ The offset is more pronounced for point-like objects (bottom row), likely due to **Authors:** Troy Raen, Vandana Desai, Andreas Faisst, Shoubaneh Hemmati, Jaladh Singhal, Brigitta Sipőcz, Jessica Krick, the IRSA Data Science Team, and the Euclid NASA Science Center at IPAC (ENSCI). -**Updated:** 2025-12-22 +**Updated:** 2025-12-23 **Contact:** [IRSA Helpdesk](https://irsa.ipac.caltech.edu/docs/help_desk.html)