"""
Objects for observational data plotting.
Tools for adding in extra (e.g. observational) data to plots.
Includes an object container and helper functions for creating
and reading files.
"""
from unyt import unyt_quantity, unyt_array
from numpy import tanh, log10
from matplotlib.pyplot import Axes
from matplotlib import rcParams
from astropy.units import Quantity
from astropy.cosmology.core import Cosmology
from astropy.cosmology import wCDM, FlatLambdaCDM
import h5py
import json
from typing import Union, Optional, List
from velociraptor import __version__ as code_version
from velociraptor.exceptions import ObservationalDataError
# Default z_orders for errorbar points and lines
line_zorder = -5
points_zorder = -6
[docs]def save_cosmology(handle: h5py.File, cosmology: Cosmology):
"""
Save the (astropy) cosmology to a HDF5 dataset.
Parameters
----------
handle: h5py.File
h5py file handle to save the cosmology to. This is performed
by creating a cosmology group and setting attributes.
cosmology: astropy.cosmology.Cosmology
The Astropy cosmology instance to save to the HDF5 file. This
is performed by extracting all of the key variables and saving
them as either floating point numbers or strings.
Notes
-----
This process can be reversed by using load_cosmology.
"""
group = handle.create_group("cosmology").attrs
group.create("H0", cosmology.H0)
group.create("Om0", cosmology.Om0)
group.create("Ode0", cosmology.Ode0)
group.create("Tcmb0", cosmology.Tcmb0)
group.create("Neff", cosmology.Neff)
group.create("m_nu", cosmology.m_nu if cosmology.m_nu is not None else 0.0)
group.create(
"m_nu_units", str(cosmology.m_nu.unit if cosmology.m_nu is not None else "")
)
group.create("Ob0", cosmology.Ob0 if cosmology.Ob0 is not None else 0.0)
group.create("name", cosmology.name if cosmology.name is not None else "")
try:
group.create("w0", cosmology.w0)
except:
# No EoS!
pass
return
[docs]def load_cosmology(handle: h5py.File):
"""
Save the (astropy) cosmology to a HDF5 dataset.
Parameters
----------
handle: h5py.File
h5py file handle to read the cosmology from.
Returns
-------
astropy.cosmology.Cosmology:
Astropy cosmology instance extracted from the HDF5 file.
"""
try:
group = handle["cosmology"].attrs
except:
return None
try:
cosmology = wCDM(
H0=group["H0"],
Om0=group["Om0"],
Ode0=group["Ode0"],
w0=group["w0"],
Tcmb0=group["Tcmb0"],
Neff=group["Neff"],
m_nu=Quantity(group["m_nu"], unit=group["m_nu_units"]),
Ob0=group["Ob0"],
name=group["name"],
)
except KeyError:
# No EoS
cosmology = FlatLambdaCDM(
H0=group["H0"],
Om0=group["Om0"],
Tcmb0=group["Tcmb0"],
Neff=group["Neff"],
m_nu=Quantity(group["m_nu"], unit=group["m_nu_units"]),
Ob0=group["Ob0"],
name=group["name"],
)
return cosmology
[docs]class ObservationalData(object):
"""
Observational data object. Contains routines
for both writing and reading HDF5 files containing
the observations, as well as plotting.
Attributes
----------
name: str
Name of the observation for users to identifty
x_units: unyt_quantity
Units for horizontal axes
y_units: unyt_quantity
Units for vertical axes
x: unyt_array
Horizontal data points
y: unyt_array
Vertical data points
x_scatter: Union[unyt_array, None]
Scatter in horizontal direction. Can be None, or an
unyt_array of shape 1XN (symmetric) or 2XN (non-symmetric)
such that it can be passed to plt.errorbar easily.
y_scatter: Union[unyt_array, None]
Scatter in vertical direction. Can be None, or an
unyt_array of shape 1XN (symmetric) or 2XN (non-symmetric)
such that it can be passed to plt.errorbar easily.
x_comoving: bool
Whether or not the horizontal values are comoving (True)
or physical (False)
y_comoving: bool
Whether or not the vertical values are comoving (True)
or physical (False)
x_description: str
Default label for horizontal axis (without units), also a
description of the variable.
y_description: str
Default label for horizontal axis (without units), also a
description of the variable.
filename: str
Filename that the data was read from, or was written to.
comment: str
A free-text comment describing the data, including e.g.
which cosmology and IMF it is calibrated to.
citation: str
Short citation for data, e.g. Author et al. (Year) (Project),
such as Baldry et al. (2012) (GAMA)
bibcode: str
Bibcode for citation, this can be found on the NASA ADS.
redshift: float
Redshift at which the data is collected at. If a range, use
the mid-point.
redshift_lower: float
Lowest redshift at which the data is collected at. Used to
determine whether it should be plotted on a given figure.
redshift_upper: float
Highest redshift at which the data is collected at. Used to
determine whether it should be plotted on a given figure.
plot_as: Union[str, None]
Whether the data should be plotted as points (typical for observations)
or as a line (typical for simulation data). Allowed values:
+ points
+ line
cosmology: Cosmology
Astropy cosmology that the data has been corrected to.
"""
# Data stored in this object
# name of the observation (to be plotted on axes)
name: str
# units for axes
x_units: unyt_quantity
y_units: unyt_quantity
# data for axes
x: unyt_array
y: unyt_array
# scatter
x_scatter: Union[unyt_array, None]
y_scatter: Union[unyt_array, None]
# x and y are comoving?
x_comoving: bool
y_comoving: bool
# x and y labels
x_description: str
y_description: str
# filename to read from or write to
filename: str
# free-text comment describing data
comment: str
# citation for data
citation: str
bibcode: str
# redshift that the data is at
redshift: float
# redshift upper and lower bounds for plotting
redshift_lower: float
redshift_upper: float
# plot as points, or a line?
plot_as: Union[str, None] = None
# the cosmology that this dataset was corrected to
cosmology: Cosmology
def __init__(self):
"""
Initialises the object for observational data. Does nothing as we are
unsure if we wish to read or write data at this point.
"""
return
[docs] def load(self, filename: str, prefix: Optional[str] = None):
"""
Loads the observations from file.
Parameters
----------
filename: str
The filename to load the data from. Probably should end in
.hdf5.
prefix: str, optional
An optional prefix to all fields that enables multiple
observational datasets to be saved in a single file. Not
for general use, only within :class:`MultiRedshiftObservationalData`.
If a prefix is used, cosmology is not saved.
"""
if prefix is not None:
# To enable human-readable files
prefix = f"{prefix}_"
else:
prefix = ""
self.filename = filename
# Load data here.
self.x = unyt_array.from_hdf5(
filename, dataset_name=f"{prefix}values", group_name="x"
)
self.y = unyt_array.from_hdf5(
filename, dataset_name=f"{prefix}values", group_name="y"
)
self.x_units = self.x.units
self.y_units = self.y.units
try:
self.x_scatter = unyt_array.from_hdf5(
filename, dataset_name=f"{prefix}scatter", group_name="x"
)
except KeyError:
self.x_scatter = None
try:
self.y_scatter = unyt_array.from_hdf5(
filename, dataset_name=f"{prefix}scatter", group_name="y"
)
except KeyError:
self.y_scatter = None
with h5py.File(filename, "r") as handle:
metadata = handle[f"{prefix}metadata"].attrs
self.comment = metadata["comment"]
self.name = metadata["name"]
self.citation = metadata["citation"]
self.bibcode = metadata["bibcode"]
self.redshift = metadata["redshift"]
self.redshift_lower = metadata.get("redshift_lower", self.redshift)
self.redshift_upper = metadata.get("redshift_upper", self.redshift)
self.plot_as = metadata["plot_as"]
self.x_comoving = bool(handle["x"].attrs[f"{prefix}comoving"])
self.y_comoving = bool(handle["y"].attrs[f"{prefix}comoving"])
self.y_description = str(handle["y"].attrs[f"{prefix}description"])
self.x_description = str(handle["x"].attrs[f"{prefix}description"])
self.cosmology = load_cosmology(handle)
self.x.name = self.x_description
self.y.name = self.y_description
return
[docs] def write(self, filename: str, prefix: Optional[str] = None):
"""
Writes the observations to file.
Parameters
----------
filename: str
The filename to write the data to. Probably should end in
.hdf5.
prefix: str, optional
An optional prefix to all fields that enables multiple
observational datasets to be saved in a single file. Not
for general use, only within :class:`MultiRedshiftObservationalData`.
If a prefix is used, cosmology is not saved.
"""
if prefix is not None:
# To enable human-readable files
prefix = f"{prefix}_"
else:
prefix = ""
self.filename = filename
# Write data here
self.x.write_hdf5(filename, dataset_name=f"{prefix}values", group_name="x")
self.y.write_hdf5(filename, dataset_name=f"{prefix}values", group_name="y")
if self.x_scatter is not None:
self.x_scatter.write_hdf5(
filename, dataset_name=f"{prefix}scatter", group_name="x"
)
if self.y_scatter is not None:
self.y_scatter.write_hdf5(
filename, dataset_name=f"{prefix}scatter", group_name="y"
)
with h5py.File(filename, "a") as handle:
metadata = handle.create_group(f"{prefix}metadata").attrs
metadata.create("comment", self.comment)
metadata.create("name", self.name)
metadata.create("citation", self.citation)
metadata.create("bibcode", self.bibcode)
metadata.create("redshift", self.redshift)
metadata.create("redshift_lower", self.redshift_lower)
metadata.create("redshift_upper", self.redshift_upper)
metadata.create("plot_as", self.plot_as)
handle["x"].attrs.create(f"{prefix}comoving", self.x_comoving)
handle["y"].attrs.create(f"{prefix}comoving", self.y_comoving)
handle["x"].attrs.create(f"{prefix}description", self.x_description)
handle["y"].attrs.create(f"{prefix}description", self.y_description)
if not prefix:
save_cosmology(handle=handle, cosmology=self.cosmology)
return
[docs] def associate_x(
self,
array: unyt_array,
scatter: Union[unyt_array, None],
comoving: bool,
description: str,
):
"""
Associate an x quantity with this observational data instance.
Parameters
----------
array: unyt_array
The array of (horizontal) data points, including units.
scatter: Union[unyt_array, None]
The array of scatter (1XN or 2XN) in the horizontal
co-ordinates with associated units.
comoving: bool
Whether or not the horizontal values are comoving.
description: str
Short description of the data, e.g. Stellar Masses
"""
self.x = array
self.x_units = array.units
self.x_comoving = comoving
self.x_description = description
if scatter is not None:
self.x_scatter = scatter.to(self.x_units)
else:
self.x_scatter = None
return
[docs] def associate_y(
self,
array: unyt_array,
scatter: Union[unyt_array, None],
comoving: bool,
description: str,
):
"""
Associate an y quantity with this observational data instance.
Parameters
----------
array: unyt_array
The array of (vertical) data points, including units.
scatter: Union[unyt_array, None]
The array of scatter (1XN or 2XN) in the vertical
co-ordinates with associated units.
comoving: bool
Whether or not the vertical values are comoving.
description: str
Short description of the data, e.g. Stellar Masses
"""
self.y = array
self.y_units = array.units
self.y_comoving = comoving
self.y_description = description
if scatter is not None:
self.y_scatter = scatter.to(self.y_units)
else:
self.y_scatter = None
return
[docs] def associate_citation(self, citation: str, bibcode: str):
"""
Associate a citation with this observational data instance.
Parameters
----------
citation: str
Short citation, formatted as follows: Author et al. (Year) (Project),
e.g. Baldry et al. (2012) (GAMA)
bibcode: str
Bibcode for the paper the data was extracted from, available
from the NASA ADS or publisher. E.g. 2012MNRAS.421..621B
"""
self.citation = citation
self.bibcode = bibcode
return
[docs] def associate_name(self, name: str):
"""
Associate a name with this observational data instance.
Parameters
----------
name: str
Short name to describe the dataset.
"""
self.name = name
return
[docs] def associate_redshift(
self,
redshift: float,
redshift_lower: Optional[float] = None,
redshift_upper: Optional[float] = None,
):
"""
Associate the redshift that the observations were taken at
with this observational data instance.
Parameters
----------
redshift: float
Redshift at which the data is collected at. If a range, use
the mid-point.
redshift_lower: Optional[float]
Lower bound for this set of observations. Used to determine if
plotting is viable, and should always be present in the case
where a multiple redshift dataset is used.
redshift_upper: Optional[float]
Upper bound for this set of observations. Used to determine if
plotting is viable, and should always be present in the case
where a multiple redshift dataset is used.
"""
self.redshift = redshift
self.redshift_lower = redshift_lower if redshift_lower is not None else redshift
self.redshift_upper = redshift_upper if redshift_upper is not None else redshift
return
[docs] def associate_plot_as(self, plot_as: str):
"""
Associate the 'plot_as' field - this should either be line
or points.
Parameters
----------
plot_as: str
Either points or line
"""
if plot_as not in ["line", "points"]:
raise Exception("Please supply plot_as as either points or line.")
self.plot_as = plot_as
return
[docs] def associate_cosmology(self, cosmology: Cosmology):
"""
Associate a cosmology with this dataset that it has been corrected for.
This should be an astropy cosmology instance.
Parameters
----------
cosmology: astropy.cosmology.Cosmology
Astropy cosmology instance describing what cosmology the data has
been corrected to.
"""
self.cosmology = cosmology
return
[docs] def plot_on_axes(self, axes: Axes, errorbar_kwargs: Union[dict, None] = None):
"""
Plot this set of observational data as an errorbar().
Parameters
----------
axes: plt.Axes
The matplotlib axes to plot the data on. This will either
plot the data as a line or a set of errorbar points, with
the short citation (self.citation) being included in the
legend automatically.
errorbar_kwargs: dict
Optional keyword arguments to pass to plt.errorbar.
"""
# Do this because dictionaries are mutable
if errorbar_kwargs is not None:
kwargs = errorbar_kwargs
else:
kwargs = {}
# Ensure correct units throughout, in case somebody changed them
if self.x_scatter is not None:
self.x_scatter.convert_to_units(self.x.units)
if self.y_scatter is not None:
self.y_scatter.convert_to_units(self.y.units)
if self.plot_as == "points":
kwargs["linestyle"] = "none"
kwargs["marker"] = "."
kwargs["zorder"] = points_zorder
# Need to "intelligently" size the markers
kwargs["markersize"] = (
rcParams["lines.markersize"]
* (1.5 - tanh(2.0 * log10(len(self.x)) - 4.0))
/ 2.5
)
kwargs["alpha"] = (3.0 - tanh(2.0 * log10(len(self.x)) - 4.0)) / 4.0
# Looks weird if errorbars are present
if self.y_scatter is None:
kwargs["markerfacecolor"] = "none"
if len(self.x) > 1000:
kwargs["rasterized"] = True
elif self.plot_as == "line":
kwargs["zorder"] = line_zorder
# Make both the data name and redshift appear in the legend
data_label = f"{self.citation} ($z={self.redshift:.1f}$)"
try:
axes.errorbar(
self.x,
self.y,
yerr=self.y_scatter,
xerr=self.x_scatter,
**kwargs,
label=data_label,
)
except ValueError as e:
# at some point, matplotlib decided that negative xerr or yerr
# values for errorbars are no longer allowed. This makes sense.
# Unfortunately, the current observational data repository does not
# guarantee non-negative scatter values. In order to avoid
# meaningless error messages, we make the error message more explicit
# by naming the offending data.
raise ValueError(
f"Problem while adding {data_label} data!"
f" x: {self.x}, y: {self.y},"
f" x_scatter: {self.x_scatter}, y_scatter: {self.y_scatter}"
)
raise e
return
[docs]class MultiRedshiftObservationalData(object):
"""
Multi-redshift version of :class:`ObservationalData` class, which
should be used instead of the :class:`ObservationalData` class for
reading files, and writing multiple redshift files.
Essentially, this contains multiple instances of
:class:`ObservationalData`, and allows for multiple redshift data
to be read transparently.
"""
# List of the individual redshift datasets.
datasets: List[ObservationalData]
# name of the observation (to be plotted on axes)
name: str
# x and y labels
x_description: str
y_description: str
# filename to read from or write to
filename: str
# free-text comment describing data
comment: str
# citation for data
citation: str
bibcode: str
# the cosmology that this dataset was corrected to
cosmology: Cosmology
# the code version this was created with
code_version = code_version
# maximum number of returned datasets when asking for
# redshift overlap.
maximum_number_of_returns = 1024
def __init__(self):
"""
Initialises the object for observational data. Does nothing as we are
unsure if we wish to read or write data at this point.
"""
self.datasets = []
return
[docs] def get_datasets_overlapping_with(
self, redshifts: List[float] = [0.0, 1000.0]
) -> List[ObservationalData]:
"""
Gets individual redshift datasets overlapping with the specified
redshift range. The check is performed inclusively, so if you ask
for overlaps with [0.25, 0.75], and an observation has a redshift
range of [0.75, 1.25], it will be included. Note that
``maximum_number_of_returns`` modifies the behaviour of this
function, and the maximum length of the returned list will
be the same as that attribute.
Parameters
----------
redshifts: List[float]
Redshifts to check for overlaps with. This defaults to the
range [0.0, 1000.0], and hence should overlap with all
reasonable datasets.
Notes
-----
You can access this ability in a slightly more user-friendly
way using the :func:`velociraptor.observations.load_observations`
function.
Datasets are returned in order of their scale-factor proximity
to the centre of the range specified in ``redshifts``.
"""
overlapping_datasets = []
lower, upper = redshifts
for dataset in self.datasets:
if (
(dataset.redshift_lower <= lower and lower <= dataset.redshift_upper)
or (dataset.redshift_lower <= upper and upper <= dataset.redshift_upper)
or (lower <= dataset.redshift_lower and dataset.redshift_upper <= upper)
):
overlapping_datasets.append(dataset)
# Filter datasets so that they are returned ordered by their
# proximity in scale factor
a = lambda z: 1.0 / (1.0 + z)
central_scale_factor = 0.5 * sum([a(z) for z in redshifts])
overlapping_datasets = sorted(
overlapping_datasets,
key=lambda x: abs(central_scale_factor - a(x.redshift)),
)[: self.maximum_number_of_returns]
return overlapping_datasets
[docs] def associate_dataset(self, dataset: ObservationalData):
"""
Associate an individual redshift dataset with this object.
Parameters
----------
dataset: ObservationalData
Instance of ObservationalData that may or may not be completed;
comments are handled at the top level and are over-written. In total,
``citation``, ``bibcode``, ``name``, ``comment``, and
``cosmology`` are shared.
"""
try:
dataset.associate_citation(citation=self.citation, bibcode=self.bibcode)
dataset.associate_comment(comment=self.comment)
dataset.associate_name(name=self.name)
dataset.associate_cosmology(cosmology=self.cosmology)
except AttributeError:
raise ObservationalDataError(
"Ensure that you have associated the citation, including bibcode, "
"comment, name, and cosmology with the multi-redshift container "
"object before associating any individual datasets. This is required "
"to preserve metadata integrity."
)
self.datasets.append(dataset)
return
[docs] def associate_citation(self, citation: str, bibcode: str):
"""
Associate a citation with this observational data instance.
Parameters
----------
citation: str
Short citation, formatted as follows: Author et al. (Year) (Project),
e.g. Baldry et al. (2012) (GAMA)
bibcode: str
Bibcode for the paper the data was extracted from, available
from the NASA ADS or publisher. E.g. 2012MNRAS.421..621B
"""
self.citation = citation
self.bibcode = bibcode
return
[docs] def associate_name(self, name: str):
"""
Associate a name with this observational data instance.
Parameters
----------
name: str
Short name to describe the dataset.
"""
self.name = name
return
[docs] def associate_cosmology(self, cosmology: Cosmology):
"""
Associate a cosmology with this dataset that it has been corrected for.
This should be an astropy cosmology instance.
Parameters
----------
cosmology: astropy.cosmology.Cosmology
Astropy cosmology instance describing what cosmology the data has
been corrected to.
"""
self.cosmology = cosmology
return
[docs] def associate_maximum_number_of_returns(self, maximum_number_of_returns: int):
"""
Associate a maximum number of returned datasets with this object.
This number will give the maximum number of datasets that are returned in
a call to ``load_datasets``. This is particularly useful in the
case where you want to provide a large number of fits and only want
to show a single curve on the figure.
Parameters
----------
maximum_number_of_returns: int
The maximum number of datasets to plot simultaneously.
"""
self.maximum_number_of_returns = maximum_number_of_returns
return
[docs] def write(self, filename: str):
"""
Writes all of the datasets currently present in the object
to a HDF5 file.
Parameters
----------
filename: str
File to be written to, including the .hdf5.
"""
prefixes = [f"z{dataset.redshift:07.3f}" for dataset in self.datasets]
self.filename = filename
for dataset, prefix in zip(self.datasets, prefixes):
dataset.write(filename, prefix=prefix)
with h5py.File(filename, "a") as handle:
group = handle.create_group("multi_file_metadata")
group.attrs.create("prefixes", prefixes)
group.attrs.create("number_of_datasets", len(prefixes))
group.attrs.create(
"minimal_redshift",
min([dataset.redshift_lower for dataset in self.datasets]),
)
group.attrs.create(
"maximal_redshift",
max([dataset.redshift_upper for dataset in self.datasets]),
)
group.attrs.create(
"maximum_number_of_returns", self.maximum_number_of_returns
)
group.attrs.create("comment", self.comment)
group.attrs.create("name", self.name)
group.attrs.create("citation", self.citation)
group.attrs.create("bibcode", self.bibcode)
group.attrs.create("code_version", self.code_version)
save_cosmology(handle, self.cosmology)
return
[docs] def load(self, filename: str):
"""
Reads all of the datasets from an associated file to the
object.
Parameters
----------
filename: str
File to be read from, including the .hdf5.
"""
self.filename = filename
with h5py.File(filename, "r") as handle:
try:
group = handle["multi_file_metadata"].attrs
except KeyError:
raise ObservationalDataError(
"This file is not a multi-redshift dataset. Try opening "
"it with ObservationalData instead, or use load_observations."
)
self.prefixes = group["prefixes"]
self.maximum_number_of_returns = group["maximum_number_of_returns"]
self.comment = group["comment"]
self.name = group["name"]
self.citation = group["citation"]
self.bibcode = group["bibcode"]
self.code_version = group["code_version"]
self.cosmology = load_cosmology(handle)
# Now load our datasets
for prefix in self.prefixes:
this_observation = ObservationalData()
this_observation.load(filename, prefix=prefix)
self.datasets.append(this_observation)
return