Source code for xtgeo.grid3d.grid_properties

"""Module for Grid Properties."""

from __future__ import annotations

import hashlib
import warnings
from typing import TYPE_CHECKING, List, Literal, Tuple, Union

import numpy as np
import pandas as pd

from xtgeo.common import XTGDescription, XTGeoDialog, null_logger
from xtgeo.common.constants import MAXDATES, MAXKEYWORDS
from xtgeo.common.exceptions import InvalidFileFormatError
from xtgeo.io._file import FileFormat, FileWrapper

from . import _grid3d_utils as utils, _grid_etc1
from ._grid3d import _Grid3D
from ._gridprops_import_eclrun import (
    import_ecl_init_gridproperties,
    import_ecl_restart_gridproperties,
    read_eclrun_properties,
)
from ._gridprops_import_roff import import_roff_gridproperties, read_roff_properties

xtg = XTGeoDialog()
logger = null_logger(__name__)

if TYPE_CHECKING:
    from collections.abc import Iterable, Iterator

    from xtgeo import Grid
    from xtgeo.common.types import FileLike

    from .grid_property import GridProperty

KeywordTuple = Tuple[str, str, int, int]
KeywordDateTuple = Tuple[str, str, int, int, Union[str, int]]
GridPropertiesKeywords = Union[
    List[Union[KeywordTuple, KeywordDateTuple]], pd.DataFrame
]


def list_gridproperties(
    property_file: FileLike,
    fformat: Literal["roffasc", "roff", "finit", "init", "funrst", "unrst"] = None,
) -> list[str]:
    """List the properties in a ROFF or Eclipse INIT/UNRST file.

    Args:
        property_file: The `FileLike` containing the file to inspect.
        fformat: The optional format of the provided file. If not provided will
            attempt to detect it. None by default.

    Returns:
        A list of property names within the file.

    Raises:
        ValueError: Unknown or invalid file format.

    Example::
        >>> static_props = xtgeo.list_gridproperties(reek_dir + "/REEK.INIT")
        >>> roff_props = xtgeo.list_gridproperties(
        ...     reek_dir + "/reek_grd_w_props.roff",
        ...     fformat="roff",
        ... )
    """
    pfile = FileWrapper(property_file, mode="rb")
    pfile.check_file(raiseerror=ValueError)

    fmt = pfile.fileformat(fformat)
    if fmt in (FileFormat.ROFF_ASCII, FileFormat.ROFF_BINARY):
        return list(read_roff_properties(pfile))
    if fmt in (
        FileFormat.FINIT,
        FileFormat.INIT,
        FileFormat.FUNRST,
        FileFormat.UNRST,
    ):
        return list(read_eclrun_properties(pfile))

    extensions = FileFormat.extensions_string(
        [
            FileFormat.ROFF_BINARY,
            FileFormat.ROFF_ASCII,
            FileFormat.INIT,
            FileFormat.FINIT,
            FileFormat.UNRST,
            FileFormat.FUNRST,
        ]
    )
    raise InvalidFileFormatError(
        f"File format {fformat} is invalid for type GridProperties. "
        f"Supported formats are {extensions}."
    )


def gridproperties_from_file(
    property_file: FileLike,
    fformat: str | None = None,
    names: list[str] | None = None,
    dates: list[str] | None = None,
    grid: Grid | None = None,
    namestyle: int = 0,
    strict: tuple[bool, bool] = (True, False),
) -> GridProperties:
    """Import grid properties from file.

    In case of names='all' then all vectors which have a valid length
    (number of total or active cells in the grid) will be read

    Args:
        property_file (str or Path): Name of file with properties
        fformat (str): roff/init/unrst
        names: list of property names, e.g. ['PORO', 'PERMX'] or 'all'
        dates: list of dates on YYYYMMDD format, for restart files, or 'all'
        grid (obj): The grid geometry object (optional if ROFF)
        namestyle (int): 0 (default) for style SWAT_20110223,
            1 for SWAT--2011_02_23 (applies to restart only)
        strict (tuple of (bool, bool)): If (True, False) (default) then an
            Error is raised if keyword name is not found, or a key-date combination
            is not found. However, the dates will processed so that non-valid dates
            are skipped (still, invalid key-date combinations may occur!).
            If (True, True) all keywords and dates are tried, while (False, False)
            means that that only valid entries are imported, more or less silently.
            Saturations keywords SWAT/SOIL/SGAS are not evaluated as they may be
            derived.
    Example::
        >>> grd = xtgeo.grid_from_file(reek_dir + "/REEK.EGRID", fformat='egrid')
        >>> gps = xtgeo.gridproperties_from_file(
        ...     reek_dir + "/REEK.INIT",
        ...     fformat='init',
        ...     names=["PORO", "PERMX"],
        ...     grid=grd,
        ... )
    """
    pfile = FileWrapper(property_file, mode="rb")
    pfile.check_file(raiseerror=ValueError)

    fmt = pfile.fileformat(fformat)
    if fmt in (FileFormat.ROFF_ASCII, FileFormat.ROFF_BINARY):
        return GridProperties(
            props=import_roff_gridproperties(pfile, names, strict=strict)
        )
    if fmt in (FileFormat.FINIT, FileFormat.INIT):
        return GridProperties(
            props=import_ecl_init_gridproperties(
                pfile,
                grid=grid,
                names=names,
                strict=strict[0],
                maxkeys=MAXKEYWORDS,
            )
        )
    if fmt in (FileFormat.FUNRST, FileFormat.UNRST):
        return GridProperties(
            props=import_ecl_restart_gridproperties(
                pfile,
                dates=dates,
                grid=grid,
                names=names,
                namestyle=namestyle,
                strict=strict,
                maxkeys=MAXKEYWORDS,
            )
        )

    extensions = FileFormat.extensions_string(
        [
            FileFormat.ROFF_BINARY,
            FileFormat.ROFF_ASCII,
            FileFormat.INIT,
            FileFormat.FINIT,
            FileFormat.UNRST,
            FileFormat.FUNRST,
        ]
    )
    raise InvalidFileFormatError(
        f"File format {fformat} is invalid for type GridProperties. "
        f"Supported formats are {extensions}."
    )


# --------------------------------------------------------------------------------------
# Comment on 'asmasked' vs 'activeonly:
#
# 'asmasked'=True will return a np.ma array, while 'asmasked' = False will
# return a np.ndarray
#
# The 'activeonly' will filter out masked entries, or use None or np.nan
# if 'activeonly' is False.
#
# Use word 'zerobased' for a bool regrading startcell basis is 1 or 0
#
# For functions with mask=... ,they should be replaced with asmasked=...
# --------------------------------------------------------------------------------------


def gridproperties_dataframe(
    gridproperties: Iterable[GridProperties],
    grid: Grid | None = None,
    activeonly: bool = True,
    ijk: bool = False,
    xyz: bool = False,
    doubleformat: bool = False,
) -> pd.DataFrame:
    """Returns a Pandas dataframe table for the properties.

    Similar to :meth:`GridProperties.get_dataframe()` but takes any list of
    grid properties as its first argument.

    Args:
        gridproperties: List (also GridProperties or iterable) of GridProperty
            to create dataframe of.
        activeonly (bool): If True, return only active cells, NB!
            If True, will require a grid instance (see grid key)
        ijk (bool): If True, show cell indices, IX JY KZ columns
        xyz (bool): If True, show cell center coordinates (needs grid).
        doubleformat (bool): If True, floats are 64 bit, otherwise 32 bit.
            Note that coordinates (if xyz=True) is always 64 bit floats.
        grid (Grid): The grid geometry object. This is required for the
            xyz option.
    Returns:
        Pandas dataframe object
    Examples::
        >>> grd = xtgeo.grid_from_file(reek_dir + "/REEK.EGRID", fformat='egrid')
        >>> names = ['SOIL', 'SWAT', 'PRESSURE']
        >>> dates = [19991201]
        >>> gps = xtgeo.gridproperties_from_file(
        ...     reek_dir + "/REEK.UNRST",
        ...     fformat='unrst',
        ...     names=names,
        ...     dates=dates,
        ...     grid=grd
        ... )
        >>> df = xtgeo.gridproperties_dataframe(gps, grid=grd)
    """

    proplist = list(gridproperties)

    dataframe_dict = {}
    if ijk:
        if activeonly:
            if grid:
                ix, jy, kz = _grid_etc1.get_ijk(grid)
                dataframe_dict["IX"] = ix.get_active_npvalues1d()
                dataframe_dict["JY"] = jy.get_active_npvalues1d()
                dataframe_dict["KZ"] = kz.get_active_npvalues1d()
            elif proplist:
                ix, jy, kz = _grid_etc1.get_ijk(proplist[0])
                dataframe_dict["IX"] = ix.get_active_npvalues1d()
                dataframe_dict["JY"] = jy.get_active_npvalues1d()
                dataframe_dict["KZ"] = kz.get_active_npvalues1d()
        else:
            if not grid:
                raise ValueError(
                    "You ask for active_only but no Grid is present. Use grid=..."
                )
            act = grid.get_actnum(dual=True)
            ix, jy, kz = grid.get_ijk(asmasked=False)
            dataframe_dict["ACTNUM"] = act.values1d
            dataframe_dict["IX"] = ix.values1d
            dataframe_dict["JY"] = jy.values1d
            dataframe_dict["KZ"] = kz.values1d

    if xyz:
        if not grid:
            raise ValueError("You ask for xyz but no Grid is present. Use grid=...")

        xc, yc, zc = grid.get_xyz(asmasked=activeonly)
        if activeonly:
            dataframe_dict["X_UTME"] = xc.get_active_npvalues1d()
            dataframe_dict["Y_UTMN"] = yc.get_active_npvalues1d()
            dataframe_dict["Z_TVDSS"] = zc.get_active_npvalues1d()
        else:
            dataframe_dict["X_UTME"] = xc.values1d
            dataframe_dict["Y_UTMN"] = yc.values1d
            dataframe_dict["Z_TVDSS"] = zc.values1d

    for prop in gridproperties:
        if activeonly:
            vector = prop.get_active_npvalues1d()
        else:
            vector = prop.values1d.copy()
            # mask values not supported in Pandas:
            if prop.isdiscrete:
                vector = vector.filled(fill_value=0)
            else:
                vector = vector.filled(fill_value=np.nan)

        if doubleformat:
            vector = vector.astype(np.float64)
        else:
            vector = vector.astype(np.float32)

        dataframe_dict[prop.name] = vector

    return pd.DataFrame.from_dict(dataframe_dict)


[docs] class GridProperties(_Grid3D): """Class for a collection of 3D grid props, belonging to the same grid topology. It is a thin wrapper on a list that 1) checks that the GridProperties belong to the same Grid (loosely). 2) Contains operations that can be called on lists of GridProperty objects for easy discoverability. Examples:: >>> import xtgeo >>> # Create an >>> grid_properties = xtgeo.GridProperties(props=[]) >>> # Get the dataframe via the gridproperties object >>> grid_properties.get_dataframe() Empty DataFrame... >>> # Convert the gridproperties to a list >>> grid_properties_list = list(grid_properties) >>> # Get the dataframe of the list: >>> gridproperties_dataframe(grid_properties_list) Empty DataFrame... Args: props: The list of GridProperty objects. See Also: The :class:`GridProperty` class. """
[docs] def __init__( self, props: list[GridProperty] | None = None, ): ncol = 4 nrow = 3 nlay = 5 if props: ncol, nrow, nlay = props[0].dimensions super().__init__(ncol, nrow, nlay) # This triggers the setter for 'props', ensuring proper # setup of related attributes like '_ncol', '_nrow', # and '_nlay', and performs a consistency check. self.props = props or []
def __repr__(self) -> str: # noqa: D105 return ( f"{self.__class__.__name__} (id={id(self)}) ncol={self._ncol!r}, " f"nrow={self._nrow!r}, nlay={self._nlay!r}, filesrc={self.names!r}" ) def __str__(self) -> str: """str: User friendly print.""" return self.describe(flush=False) or "" def __contains__(self, name: str) -> bool: """bool: Emulate 'if "PORO" in props'.""" return bool(self.get_prop_by_name(name, raiseserror=False)) def __getitem__(self, name: str) -> GridProperty: # noqa: D105 prop = self.get_prop_by_name(name, raiseserror=False) if prop is None: raise KeyError(f"Key {name} does not exist") return prop def __iter__(self) -> Iterator[GridProperty]: # noqa: D105 return iter(self._props) @property def names(self) -> list[str]: """Returns a list of property names. Example:: >>> import xtgeo >>> grid = xtgeo.grid_from_file(reek_dir + "/REEK.EGRID") >>> props = xtgeo.gridproperties_from_file( ... reek_dir + "/REEK.INIT", ... fformat="init", ... names=["PERMX"], ... grid=grid, ... ) >>> namelist = props.names >>> for name in namelist: ... print ('Property name is {}'.format(name)) Property name is PERMX """ return [prop.name for prop in self._props] @property def props(self) -> list[GridProperty] | None: """Returns a list of XTGeo GridProperty objects, None if empty. Example:: >>> import xtgeo >>> grid = xtgeo.grid_from_file(reek_dir + "/REEK.EGRID") >>> myprops = xtgeo.gridproperties_from_file( ... reek_dir + "/REEK.INIT", ... fformat="init", ... names=["PERMX"], ... grid=grid, ... ) >>> proplist = myprops.props >>> for prop in proplist: ... print ('Property object name is {}'.format(prop.name)) Property object name is PERMX >>> # adding a property, e.g. get ACTNUM as a property from the grid >>> actn = grid.get_actnum() # this will get actn as a GridProperty >>> myprops.append_props([actn]) """ if not self._props: return None return self._props @props.setter def props(self, propslist: list[GridProperty]) -> None: self._props = propslist if propslist: self._ncol = propslist[0].ncol self._nrow = propslist[0].nrow self._nlay = propslist[0].nlay self._consistency_check() @property def dates(self) -> list[str | None] | None: """Returns a list of valid (found) dates after import. Returns None if no dates present Note: See also :meth:`GridProperties.scan_dates` for scanning available dates in advance Example:: >>> import xtgeo >>> grid = xtgeo.grid_from_file(reek_dir + "/REEK.EGRID") >>> props = xtgeo.gridproperties_from_file( ... reek_dir + "/REEK.INIT", ... fformat="init", ... names=["PERMX"], ... grid=grid, ... ) >>> datelist = props.dates >>> for date in datelist: ... print ('Date applied is {}'.format(date)) Date applied is 19991201 .. versionchanged:: 2.16 dates is no longer an alias (undocumented behavior), and simply return the dates of the underlying list of GridProperty. """ if not self.props: return None return [p.date for p in self.props] # Copy, and etc aka setters and getters
[docs] def copy(self) -> GridProperties: """Copy a GridProperties instance to a new unique instance. Note that the GridProperty instances will also be unique. """ return GridProperties( props=[p.copy() for p in self.props] if self.props else [] )
[docs] def describe(self, flush: bool = True) -> str | None: """Describe an instance by printing to stdout.""" dsc = XTGDescription() dsc.title("Description of GridProperties instance") dsc.txt("Object ID", id(self)) dsc.txt("Shape: NCOL, NROW, NLAY", self.ncol, self.nrow, self.nlay) dsc.txt("Attached grid props objects (names)", self.names) if flush: dsc.flush() return None return dsc.astext()
[docs] def generate_hash(self) -> str: """str: Return a unique hash ID for current gridproperties instance. .. versionadded:: 2.10 """ mhash = hashlib.sha256() hashinput = "" for prop in self._props: gid = ( f"{prop.ncol}{prop.nrow}{prop.nlay}{prop.values.mean()}" f"{prop.values.min()}{prop.values.max()}" ) hashinput += gid mhash.update(hashinput.encode()) return mhash.hexdigest()
[docs] def get_prop_by_name( self, name: str, raiseserror: bool = True ) -> GridProperty | None: """Find and return a property object (GridProperty) by name. Args: name (str): Name of property to look for raiseserror (bool): If True, raises a ValueError if not found, otherwise return None """ for prop in self._props: logger.debug("Look for %s, actual is %s", name, prop.name) if prop.name == name: logger.debug(repr(prop)) return prop if raiseserror: raise ValueError(f"Cannot find property with name <{name}>") return None
[docs] def append_props(self, proplist: list[GridProperty]) -> None: """Add a list of GridProperty objects to current GridProperties instance.""" if not self._props and proplist: self._ncol = proplist[0].ncol self._nrow = proplist[0].nrow self._nlay = proplist[0].nlay self._props += proplist self._consistency_check()
[docs] def get_ijk( self, names: tuple[str, str, str] = ("IX", "JY", "KZ"), zerobased: bool = False, asmasked: bool = False, ) -> tuple[GridProperty, GridProperty, GridProperty]: """Returns 3 xtgeo.grid3d.GridProperty objects: I counter, J counter, K counter. Args: names: a 3 x tuple of names per property (default IX, JY, KZ). asmasked: If True, then active cells only. zerobased: If True, counter start from 0, otherwise 1 (default=1). """ return _grid_etc1.get_ijk( self, names=names, zerobased=zerobased, asmasked=asmasked )
[docs] def get_actnum( self, name: str = "ACTNUM", asmasked: bool = False, ) -> GridProperty | None: """Return an ACTNUM GridProperty object. Args: name (str): name of property in the XTGeo GridProperty object. asmasked (bool): ACTNUM is returned with all cells as default. Use asmasked=True to make 0 entries masked. Example:: >>> import xtgeo >>> grid = xtgeo.grid_from_file(reek_dir + "/REEK.EGRID") >>> myprops = xtgeo.gridproperties_from_file( ... reek_dir + "/REEK.INIT", ... fformat="init", ... names=["PERMX"], ... grid=grid, ... ) >>> act = myprops.get_actnum() >>> print('{}% of cells are active'.format(act.values.mean() * 100)) 99.99...% of cells are active Returns: A GridProperty instance of ACTNUM, or None if no props present. """ # borrow function from GridProperty class: if self._props: return self._props[0].get_actnum(name=name, asmasked=asmasked) warnings.warn("No gridproperty in list", UserWarning) return None
[docs] def get_dataframe( self, activeonly: bool = False, ijk: bool = False, xyz: bool = False, doubleformat: bool = False, grid: Grid | None = None, ) -> pd.DataFrame: """Returns a Pandas dataframe table for the properties. See also :func:`xtgeo.gridproperties_dataframe()` Args: activeonly (bool): If True, return only active cells, NB! If True, will require a grid instance (see grid key) ijk (bool): If True, show cell indices, IX JY KZ columns xyz (bool): If True, show cell center coordinates (needs grid). doubleformat (bool): If True, floats are 64 bit, otherwise 32 bit. Note that coordinates (if xyz=True) is always 64 bit floats. grid (Grid): The grid geometry object. This is required for the xyz option. Returns: Pandas dataframe object Examples:: >>> import xtgeo >>> grid = xtgeo.grid_from_file(reek_dir + "/REEK.EGRID") >>> pps = xtgeo.gridproperties_from_file( ... reek_dir + "/REEK.UNRST", ... fformat="unrst", ... names=['SOIL', 'SWAT', 'PRESSURE'], ... dates=[19991201], ... grid=grid, ... ) >>> df = pps.get_dataframe(activeonly=False, ijk=True, xyz=True, grid=grid) >>> print(df) ACTNUM IX JY ... SOIL_19991201 SWAT_19991201 PRESSURE_19991201 0 1 1 1 ... 0.0 1.0 341.694183 1 1 1 1 ... 0.0 1.0 342.097107 2 1 1 1 ... 0.0 1.0 342.500061 3 1 1 1 ... 0.0 1.0 342.902954 4 1 1 1 ... 0.0 1.0 343.305908 ... """ return gridproperties_dataframe( self, activeonly=activeonly, ijk=ijk, xyz=xyz, doubleformat=doubleformat, grid=grid, )
def _consistency_check(self) -> None: for p in self._props: if (p.ncol, p.nrow, p.nlay) != (self.ncol, self.nrow, self.nlay): raise ValueError("Mismatching dimensions in GridProperties members.")
[docs] @staticmethod def scan_dates( pfile: FileLike, fformat: Literal["unrst"] = "unrst", maxdates: int = MAXDATES, dataframe: bool = False, datesonly: bool = False, ) -> list | pd.DataFrame: """Quick scan dates in a simulation restart file. Args: pfile (str): Name of file or file handle with properties fformat (str): unrst (so far) maxdates (int): Maximum number of dates to collect dataframe (bool): If True, return a Pandas dataframe instead datesonly (bool): If True, SEQNUM is skipped, Return: A list of tuples or a dataframe with (seqno, date), date is on YYYYMMDD form. If datesonly is True and dataframe is False, the returning list will be a simple list of dates. Example:: >>> dlist = GridProperties.scan_dates(reek_dir + "/REEK.UNRST") >>> #or getting all dates a simple list: >>> dlist = GridProperties.scan_dates( ... reek_dir + "/REEK.UNRST", ... datesonly=True) .. versionchanged:: 2.13 Added datesonly keyword """ logger.info("Format supported as default is %s", fformat) _pfile = FileWrapper(pfile) _pfile.check_file(raiseerror=ValueError) dlist = utils.scan_dates(_pfile, maxdates=maxdates, dataframe=dataframe) if datesonly and dataframe: assert isinstance(dlist, pd.DataFrame) dlist.drop("SEQNUM", axis=1, inplace=True) if datesonly and not dataframe: dlist = [date for (_, date) in dlist] return dlist