Source code for velociraptor.observations.objects

"""
Objects for observational data plotting.

Tools for adding in extra (e.g. observational) data to plots.

Includes an object container and helper functions for creating
and reading files.
"""

from unyt import unyt_quantity, unyt_array
from numpy import tanh, log10
from matplotlib.pyplot import Axes
from matplotlib import rcParams

from astropy.units import Quantity
from astropy.cosmology.core import Cosmology
from astropy.cosmology import wCDM, FlatLambdaCDM

import h5py
import json

from typing import Union, Optional, List

from velociraptor import __version__ as code_version
from velociraptor.exceptions import ObservationalDataError

# Default z_orders for errorbar points and lines
line_zorder = -5
points_zorder = -6


[docs]def save_cosmology(handle: h5py.File, cosmology: Cosmology): """ Save the (astropy) cosmology to a HDF5 dataset. Parameters ---------- handle: h5py.File h5py file handle to save the cosmology to. This is performed by creating a cosmology group and setting attributes. cosmology: astropy.cosmology.Cosmology The Astropy cosmology instance to save to the HDF5 file. This is performed by extracting all of the key variables and saving them as either floating point numbers or strings. Notes ----- This process can be reversed by using load_cosmology. """ group = handle.create_group("cosmology").attrs group.create("H0", cosmology.H0) group.create("Om0", cosmology.Om0) group.create("Ode0", cosmology.Ode0) group.create("Tcmb0", cosmology.Tcmb0) group.create("Neff", cosmology.Neff) group.create("m_nu", cosmology.m_nu if cosmology.m_nu is not None else 0.0) group.create( "m_nu_units", str(cosmology.m_nu.unit if cosmology.m_nu is not None else "") ) group.create("Ob0", cosmology.Ob0 if cosmology.Ob0 is not None else 0.0) group.create("name", cosmology.name if cosmology.name is not None else "") try: group.create("w0", cosmology.w0) except: # No EoS! pass return
[docs]def load_cosmology(handle: h5py.File): """ Save the (astropy) cosmology to a HDF5 dataset. Parameters ---------- handle: h5py.File h5py file handle to read the cosmology from. Returns ------- astropy.cosmology.Cosmology: Astropy cosmology instance extracted from the HDF5 file. """ try: group = handle["cosmology"].attrs except: return None try: cosmology = wCDM( H0=group["H0"], Om0=group["Om0"], Ode0=group["Ode0"], w0=group["w0"], Tcmb0=group["Tcmb0"], Neff=group["Neff"], m_nu=Quantity(group["m_nu"], unit=group["m_nu_units"]), Ob0=group["Ob0"], name=group["name"], ) except KeyError: # No EoS cosmology = FlatLambdaCDM( H0=group["H0"], Om0=group["Om0"], Tcmb0=group["Tcmb0"], Neff=group["Neff"], m_nu=Quantity(group["m_nu"], unit=group["m_nu_units"]), Ob0=group["Ob0"], name=group["name"], ) return cosmology
[docs]class ObservationalData(object): """ Observational data object. Contains routines for both writing and reading HDF5 files containing the observations, as well as plotting. Attributes ---------- name: str Name of the observation for users to identifty x_units: unyt_quantity Units for horizontal axes y_units: unyt_quantity Units for vertical axes x: unyt_array Horizontal data points y: unyt_array Vertical data points x_scatter: Union[unyt_array, None] Scatter in horizontal direction. Can be None, or an unyt_array of shape 1XN (symmetric) or 2XN (non-symmetric) such that it can be passed to plt.errorbar easily. y_scatter: Union[unyt_array, None] Scatter in vertical direction. Can be None, or an unyt_array of shape 1XN (symmetric) or 2XN (non-symmetric) such that it can be passed to plt.errorbar easily. x_comoving: bool Whether or not the horizontal values are comoving (True) or physical (False) y_comoving: bool Whether or not the vertical values are comoving (True) or physical (False) x_description: str Default label for horizontal axis (without units), also a description of the variable. y_description: str Default label for horizontal axis (without units), also a description of the variable. filename: str Filename that the data was read from, or was written to. comment: str A free-text comment describing the data, including e.g. which cosmology and IMF it is calibrated to. citation: str Short citation for data, e.g. Author et al. (Year) (Project), such as Baldry et al. (2012) (GAMA) bibcode: str Bibcode for citation, this can be found on the NASA ADS. redshift: float Redshift at which the data is collected at. If a range, use the mid-point. redshift_lower: float Lowest redshift at which the data is collected at. Used to determine whether it should be plotted on a given figure. redshift_upper: float Highest redshift at which the data is collected at. Used to determine whether it should be plotted on a given figure. plot_as: Union[str, None] Whether the data should be plotted as points (typical for observations) or as a line (typical for simulation data). Allowed values: + points + line cosmology: Cosmology Astropy cosmology that the data has been corrected to. """ # Data stored in this object # name of the observation (to be plotted on axes) name: str # units for axes x_units: unyt_quantity y_units: unyt_quantity # data for axes x: unyt_array y: unyt_array # scatter x_scatter: Union[unyt_array, None] y_scatter: Union[unyt_array, None] # x and y are comoving? x_comoving: bool y_comoving: bool # x and y labels x_description: str y_description: str # filename to read from or write to filename: str # free-text comment describing data comment: str # citation for data citation: str bibcode: str # redshift that the data is at redshift: float # redshift upper and lower bounds for plotting redshift_lower: float redshift_upper: float # plot as points, or a line? plot_as: Union[str, None] = None # the cosmology that this dataset was corrected to cosmology: Cosmology def __init__(self): """ Initialises the object for observational data. Does nothing as we are unsure if we wish to read or write data at this point. """ return
[docs] def load(self, filename: str, prefix: Optional[str] = None): """ Loads the observations from file. Parameters ---------- filename: str The filename to load the data from. Probably should end in .hdf5. prefix: str, optional An optional prefix to all fields that enables multiple observational datasets to be saved in a single file. Not for general use, only within :class:`MultiRedshiftObservationalData`. If a prefix is used, cosmology is not saved. """ if prefix is not None: # To enable human-readable files prefix = f"{prefix}_" else: prefix = "" self.filename = filename # Load data here. self.x = unyt_array.from_hdf5( filename, dataset_name=f"{prefix}values", group_name="x" ) self.y = unyt_array.from_hdf5( filename, dataset_name=f"{prefix}values", group_name="y" ) self.x_units = self.x.units self.y_units = self.y.units try: self.x_scatter = unyt_array.from_hdf5( filename, dataset_name=f"{prefix}scatter", group_name="x" ) except KeyError: self.x_scatter = None try: self.y_scatter = unyt_array.from_hdf5( filename, dataset_name=f"{prefix}scatter", group_name="y" ) except KeyError: self.y_scatter = None with h5py.File(filename, "r") as handle: metadata = handle[f"{prefix}metadata"].attrs self.comment = metadata["comment"] self.name = metadata["name"] self.citation = metadata["citation"] self.bibcode = metadata["bibcode"] self.redshift = metadata["redshift"] self.redshift_lower = metadata.get("redshift_lower", self.redshift) self.redshift_upper = metadata.get("redshift_upper", self.redshift) self.plot_as = metadata["plot_as"] self.x_comoving = bool(handle["x"].attrs[f"{prefix}comoving"]) self.y_comoving = bool(handle["y"].attrs[f"{prefix}comoving"]) self.y_description = str(handle["y"].attrs[f"{prefix}description"]) self.x_description = str(handle["x"].attrs[f"{prefix}description"]) self.cosmology = load_cosmology(handle) self.x.name = self.x_description self.y.name = self.y_description return
[docs] def write(self, filename: str, prefix: Optional[str] = None): """ Writes the observations to file. Parameters ---------- filename: str The filename to write the data to. Probably should end in .hdf5. prefix: str, optional An optional prefix to all fields that enables multiple observational datasets to be saved in a single file. Not for general use, only within :class:`MultiRedshiftObservationalData`. If a prefix is used, cosmology is not saved. """ if prefix is not None: # To enable human-readable files prefix = f"{prefix}_" else: prefix = "" self.filename = filename # Write data here self.x.write_hdf5(filename, dataset_name=f"{prefix}values", group_name="x") self.y.write_hdf5(filename, dataset_name=f"{prefix}values", group_name="y") if self.x_scatter is not None: self.x_scatter.write_hdf5( filename, dataset_name=f"{prefix}scatter", group_name="x" ) if self.y_scatter is not None: self.y_scatter.write_hdf5( filename, dataset_name=f"{prefix}scatter", group_name="y" ) with h5py.File(filename, "a") as handle: metadata = handle.create_group(f"{prefix}metadata").attrs metadata.create("comment", self.comment) metadata.create("name", self.name) metadata.create("citation", self.citation) metadata.create("bibcode", self.bibcode) metadata.create("redshift", self.redshift) metadata.create("redshift_lower", self.redshift_lower) metadata.create("redshift_upper", self.redshift_upper) metadata.create("plot_as", self.plot_as) handle["x"].attrs.create(f"{prefix}comoving", self.x_comoving) handle["y"].attrs.create(f"{prefix}comoving", self.y_comoving) handle["x"].attrs.create(f"{prefix}description", self.x_description) handle["y"].attrs.create(f"{prefix}description", self.y_description) if not prefix: save_cosmology(handle=handle, cosmology=self.cosmology) return
[docs] def associate_x( self, array: unyt_array, scatter: Union[unyt_array, None], comoving: bool, description: str, ): """ Associate an x quantity with this observational data instance. Parameters ---------- array: unyt_array The array of (horizontal) data points, including units. scatter: Union[unyt_array, None] The array of scatter (1XN or 2XN) in the horizontal co-ordinates with associated units. comoving: bool Whether or not the horizontal values are comoving. description: str Short description of the data, e.g. Stellar Masses """ self.x = array self.x_units = array.units self.x_comoving = comoving self.x_description = description if scatter is not None: self.x_scatter = scatter.to(self.x_units) else: self.x_scatter = None return
[docs] def associate_y( self, array: unyt_array, scatter: Union[unyt_array, None], comoving: bool, description: str, ): """ Associate an y quantity with this observational data instance. Parameters ---------- array: unyt_array The array of (vertical) data points, including units. scatter: Union[unyt_array, None] The array of scatter (1XN or 2XN) in the vertical co-ordinates with associated units. comoving: bool Whether or not the vertical values are comoving. description: str Short description of the data, e.g. Stellar Masses """ self.y = array self.y_units = array.units self.y_comoving = comoving self.y_description = description if scatter is not None: self.y_scatter = scatter.to(self.y_units) else: self.y_scatter = None return
[docs] def associate_citation(self, citation: str, bibcode: str): """ Associate a citation with this observational data instance. Parameters ---------- citation: str Short citation, formatted as follows: Author et al. (Year) (Project), e.g. Baldry et al. (2012) (GAMA) bibcode: str Bibcode for the paper the data was extracted from, available from the NASA ADS or publisher. E.g. 2012MNRAS.421..621B """ self.citation = citation self.bibcode = bibcode return
[docs] def associate_name(self, name: str): """ Associate a name with this observational data instance. Parameters ---------- name: str Short name to describe the dataset. """ self.name = name return
[docs] def associate_comment(self, comment: str): """ Associate a comment with this observational data instance. Parameters ---------- comment: str A free-text comment describing the data, including e.g. which cosmology and IMF it is calibrated to. """ self.comment = comment return
[docs] def associate_redshift( self, redshift: float, redshift_lower: Optional[float] = None, redshift_upper: Optional[float] = None, ): """ Associate the redshift that the observations were taken at with this observational data instance. Parameters ---------- redshift: float Redshift at which the data is collected at. If a range, use the mid-point. redshift_lower: Optional[float] Lower bound for this set of observations. Used to determine if plotting is viable, and should always be present in the case where a multiple redshift dataset is used. redshift_upper: Optional[float] Upper bound for this set of observations. Used to determine if plotting is viable, and should always be present in the case where a multiple redshift dataset is used. """ self.redshift = redshift self.redshift_lower = redshift_lower if redshift_lower is not None else redshift self.redshift_upper = redshift_upper if redshift_upper is not None else redshift return
[docs] def associate_plot_as(self, plot_as: str): """ Associate the 'plot_as' field - this should either be line or points. Parameters ---------- plot_as: str Either points or line """ if plot_as not in ["line", "points"]: raise Exception("Please supply plot_as as either points or line.") self.plot_as = plot_as return
[docs] def associate_cosmology(self, cosmology: Cosmology): """ Associate a cosmology with this dataset that it has been corrected for. This should be an astropy cosmology instance. Parameters ---------- cosmology: astropy.cosmology.Cosmology Astropy cosmology instance describing what cosmology the data has been corrected to. """ self.cosmology = cosmology return
[docs] def plot_on_axes(self, axes: Axes, errorbar_kwargs: Union[dict, None] = None): """ Plot this set of observational data as an errorbar(). Parameters ---------- axes: plt.Axes The matplotlib axes to plot the data on. This will either plot the data as a line or a set of errorbar points, with the short citation (self.citation) being included in the legend automatically. errorbar_kwargs: dict Optional keyword arguments to pass to plt.errorbar. """ # Do this because dictionaries are mutable if errorbar_kwargs is not None: kwargs = errorbar_kwargs else: kwargs = {} # Ensure correct units throughout, in case somebody changed them if self.x_scatter is not None: self.x_scatter.convert_to_units(self.x.units) if self.y_scatter is not None: self.y_scatter.convert_to_units(self.y.units) if self.plot_as == "points": kwargs["linestyle"] = "none" kwargs["marker"] = "." kwargs["zorder"] = points_zorder # Need to "intelligently" size the markers kwargs["markersize"] = ( rcParams["lines.markersize"] * (1.5 - tanh(2.0 * log10(len(self.x)) - 4.0)) / 2.5 ) kwargs["alpha"] = (3.0 - tanh(2.0 * log10(len(self.x)) - 4.0)) / 4.0 # Looks weird if errorbars are present if self.y_scatter is None: kwargs["markerfacecolor"] = "none" if len(self.x) > 1000: kwargs["rasterized"] = True elif self.plot_as == "line": kwargs["zorder"] = line_zorder # Make both the data name and redshift appear in the legend data_label = f"{self.citation} ($z={self.redshift:.1f}$)" try: axes.errorbar( self.x, self.y, yerr=self.y_scatter, xerr=self.x_scatter, **kwargs, label=data_label, ) except ValueError as e: # at some point, matplotlib decided that negative xerr or yerr # values for errorbars are no longer allowed. This makes sense. # Unfortunately, the current observational data repository does not # guarantee non-negative scatter values. In order to avoid # meaningless error messages, we make the error message more explicit # by naming the offending data. raise ValueError( f"Problem while adding {data_label} data!" f" x: {self.x}, y: {self.y}," f" x_scatter: {self.x_scatter}, y_scatter: {self.y_scatter}" ) raise e return
[docs]class MultiRedshiftObservationalData(object): """ Multi-redshift version of :class:`ObservationalData` class, which should be used instead of the :class:`ObservationalData` class for reading files, and writing multiple redshift files. Essentially, this contains multiple instances of :class:`ObservationalData`, and allows for multiple redshift data to be read transparently. """ # List of the individual redshift datasets. datasets: List[ObservationalData] # name of the observation (to be plotted on axes) name: str # x and y labels x_description: str y_description: str # filename to read from or write to filename: str # free-text comment describing data comment: str # citation for data citation: str bibcode: str # the cosmology that this dataset was corrected to cosmology: Cosmology # the code version this was created with code_version = code_version # maximum number of returned datasets when asking for # redshift overlap. maximum_number_of_returns = 1024 def __init__(self): """ Initialises the object for observational data. Does nothing as we are unsure if we wish to read or write data at this point. """ self.datasets = [] return
[docs] def get_datasets_overlapping_with( self, redshifts: List[float] = [0.0, 1000.0] ) -> List[ObservationalData]: """ Gets individual redshift datasets overlapping with the specified redshift range. The check is performed inclusively, so if you ask for overlaps with [0.25, 0.75], and an observation has a redshift range of [0.75, 1.25], it will be included. Note that ``maximum_number_of_returns`` modifies the behaviour of this function, and the maximum length of the returned list will be the same as that attribute. Parameters ---------- redshifts: List[float] Redshifts to check for overlaps with. This defaults to the range [0.0, 1000.0], and hence should overlap with all reasonable datasets. Notes ----- You can access this ability in a slightly more user-friendly way using the :func:`velociraptor.observations.load_observations` function. Datasets are returned in order of their scale-factor proximity to the centre of the range specified in ``redshifts``. """ overlapping_datasets = [] lower, upper = redshifts for dataset in self.datasets: if ( (dataset.redshift_lower <= lower and lower <= dataset.redshift_upper) or (dataset.redshift_lower <= upper and upper <= dataset.redshift_upper) or (lower <= dataset.redshift_lower and dataset.redshift_upper <= upper) ): overlapping_datasets.append(dataset) # Filter datasets so that they are returned ordered by their # proximity in scale factor a = lambda z: 1.0 / (1.0 + z) central_scale_factor = 0.5 * sum([a(z) for z in redshifts]) overlapping_datasets = sorted( overlapping_datasets, key=lambda x: abs(central_scale_factor - a(x.redshift)), )[: self.maximum_number_of_returns] return overlapping_datasets
[docs] def associate_dataset(self, dataset: ObservationalData): """ Associate an individual redshift dataset with this object. Parameters ---------- dataset: ObservationalData Instance of ObservationalData that may or may not be completed; comments are handled at the top level and are over-written. In total, ``citation``, ``bibcode``, ``name``, ``comment``, and ``cosmology`` are shared. """ try: dataset.associate_citation(citation=self.citation, bibcode=self.bibcode) dataset.associate_comment(comment=self.comment) dataset.associate_name(name=self.name) dataset.associate_cosmology(cosmology=self.cosmology) except AttributeError: raise ObservationalDataError( "Ensure that you have associated the citation, including bibcode, " "comment, name, and cosmology with the multi-redshift container " "object before associating any individual datasets. This is required " "to preserve metadata integrity." ) self.datasets.append(dataset) return
[docs] def associate_citation(self, citation: str, bibcode: str): """ Associate a citation with this observational data instance. Parameters ---------- citation: str Short citation, formatted as follows: Author et al. (Year) (Project), e.g. Baldry et al. (2012) (GAMA) bibcode: str Bibcode for the paper the data was extracted from, available from the NASA ADS or publisher. E.g. 2012MNRAS.421..621B """ self.citation = citation self.bibcode = bibcode return
[docs] def associate_name(self, name: str): """ Associate a name with this observational data instance. Parameters ---------- name: str Short name to describe the dataset. """ self.name = name return
[docs] def associate_comment(self, comment: str): """ Associate a comment with this observational data instance. Parameters ---------- comment: str A free-text comment describing the data, including e.g. which cosmology and IMF it is calibrated to. """ self.comment = comment return
[docs] def associate_cosmology(self, cosmology: Cosmology): """ Associate a cosmology with this dataset that it has been corrected for. This should be an astropy cosmology instance. Parameters ---------- cosmology: astropy.cosmology.Cosmology Astropy cosmology instance describing what cosmology the data has been corrected to. """ self.cosmology = cosmology return
[docs] def associate_maximum_number_of_returns(self, maximum_number_of_returns: int): """ Associate a maximum number of returned datasets with this object. This number will give the maximum number of datasets that are returned in a call to ``load_datasets``. This is particularly useful in the case where you want to provide a large number of fits and only want to show a single curve on the figure. Parameters ---------- maximum_number_of_returns: int The maximum number of datasets to plot simultaneously. """ self.maximum_number_of_returns = maximum_number_of_returns return
[docs] def write(self, filename: str): """ Writes all of the datasets currently present in the object to a HDF5 file. Parameters ---------- filename: str File to be written to, including the .hdf5. """ prefixes = [f"z{dataset.redshift:07.3f}" for dataset in self.datasets] self.filename = filename for dataset, prefix in zip(self.datasets, prefixes): dataset.write(filename, prefix=prefix) with h5py.File(filename, "a") as handle: group = handle.create_group("multi_file_metadata") group.attrs.create("prefixes", prefixes) group.attrs.create("number_of_datasets", len(prefixes)) group.attrs.create( "minimal_redshift", min([dataset.redshift_lower for dataset in self.datasets]), ) group.attrs.create( "maximal_redshift", max([dataset.redshift_upper for dataset in self.datasets]), ) group.attrs.create( "maximum_number_of_returns", self.maximum_number_of_returns ) group.attrs.create("comment", self.comment) group.attrs.create("name", self.name) group.attrs.create("citation", self.citation) group.attrs.create("bibcode", self.bibcode) group.attrs.create("code_version", self.code_version) save_cosmology(handle, self.cosmology) return
[docs] def load(self, filename: str): """ Reads all of the datasets from an associated file to the object. Parameters ---------- filename: str File to be read from, including the .hdf5. """ self.filename = filename with h5py.File(filename, "r") as handle: try: group = handle["multi_file_metadata"].attrs except KeyError: raise ObservationalDataError( "This file is not a multi-redshift dataset. Try opening " "it with ObservationalData instead, or use load_observations." ) self.prefixes = group["prefixes"] self.maximum_number_of_returns = group["maximum_number_of_returns"] self.comment = group["comment"] self.name = group["name"] self.citation = group["citation"] self.bibcode = group["bibcode"] self.code_version = group["code_version"] self.cosmology = load_cosmology(handle) # Now load our datasets for prefix in self.prefixes: this_observation = ObservationalData() this_observation.load(filename, prefix=prefix) self.datasets.append(this_observation) return