# pylint: disable=too-many-lines from pathlib import Path import fnmatch import warnings import json from copy import deepcopy from typing import Optional import yaml import numpy as np import pandas as pd import dash from dash_table import DataTable import dash_html_components as html import dash_core_components as dcc import webviz_core_components as wcc from dash.dependencies import Input, Output, State, ALL from dash.exceptions import PreventUpdate from webviz_config.common_cache import CACHE from webviz_config.webviz_store import webvizstore from webviz_config import WebvizPluginABC from .._datainput.fmu_input import load_smry, load_smry_meta from .._abbreviations.reservoir_simulation import ( simulation_vector_base, simulation_vector_description, simulation_region_vector_breakdown, simulation_region_vector_recompose, simulation_unit_reformat, historical_vector, ) from .._abbreviations.number_formatting import table_statistics_base from .._utils.unique_theming import unique_colors from .._utils.simulation_timeseries import ( set_simulation_line_shape_fallback, get_simulation_line_shape, ) # pylint: disable=too-many-instance-attributes class ReservoirSimulationTimeSeriesRegional(WebvizPluginABC): """### ReservoirSimulationTimeSeriesRegional This plugins aggregates and visualizes regional time series data from simulation ensembles. That is cumulatives, rates and inplace volumes. In addiotion recovery is calculated based on the inplace volumes. #### Mandatory input * `ensembles`: Which ensembles in `shared_settings` to include in the plugin. #### Optional input * `fipfile`: Path to a yaml-file that defines a match between FIPXXX (e.g. FIPNUM) regions and _'human readable'_ regions, zones and etc. These will be used as filters. _Note that this is a single file used for all ensembles_. If all region numbers for a filter are missing in the data, this filter value will be silently ignored. E.g. if no vectors matches 5 or 6 in [this example file]( https://github.com/equinor/webviz-subsurface-testdata/tree/master/reek_history_match/share/ regions/fip.yaml), `ZONE` == `LowerReek` would be ignored in the plugin for `FIPNUM`. This is to allow you to use the same file for e.g. a sector and a full field model. * `initial_vector`: First vector to plot (default is `ROIP`) * `column_keys`: List of vector patterns to include or `None`, the latter gives all available vectors, and `None` is also the default.<br/> Vectors that don't match the following patterns will be filtered out for this plugin: * `R[OGW]IP*` (regional in place), * `R[OGW][IP][RT]*` (regional injection and production rates and cumulatives) * `sampling`: Time series data will be sampled (and interpolated) at this frequency. Options: * `daily` * `monthly` (default) * `yearly` * `line_shape_fallback`: Fallback interpolation method between points. Vectors identified as rates always backfilled, vectors identified as cumulative (totals) are always linearly interpolated. The rest use the fallback. Options: * `linear` (default) * `backfilled`<br/> * `hv` (regular Plotly option) * `vh` (regular Plotly option) * `hvh` (regular Plotly option) * `vhv` (regular Plotly option) * `spline` (regular Plotly option) """ TABLE_STATISTICS = [("Group", {})] + table_statistics_base() ENSEMBLE_COLUMNS = ["REAL", "ENSEMBLE", "DATE"] # pylint: disable=dangerous-default-value def __init__( self, app, ensembles: list, fipfile: Path = None, initial_vector: str = "ROIP", column_keys: Optional[list] = None, sampling: str = "monthly", line_shape_fallback: str = "linear", ): super().__init__() self.column_keys = column_keys self.time_index = sampling if self.time_index not in ("daily", "monthly", "yearly"): raise ValueError( "Incorrent arguments. 'time_index' has to be a specified frequency 'daily'," "'monthly' or 'yearly', as the statistics require the same dates throughout an" "ensemble." ) self.ens_paths = { ensemble: app.webviz_settings["shared_settings"]["scratch_ensembles"][ ensemble ] for ensemble in ensembles } self.smry = load_smry( ensemble_paths=self.ens_paths, column_keys=self.column_keys, time_index=self.time_index, ) self.smry_meta = load_smry_meta( ensemble_paths=self.ens_paths, column_keys=self.column_keys ) self.field_totals = [ col for col in self.smry.columns if fnmatch.fnmatch(col, "F[OWG]PT") ] if self.field_totals: self.smry_init_prod = pd.concat( [ df[["ENSEMBLE", "DATE"] + self.field_totals][ df["DATE"] == min(df["DATE"]) ] for _, df in self.smry.groupby("ENSEMBLE") ] ) else: self.smry_init_prod = load_smry( ensemble_paths=self.ens_paths, column_keys=["F[OWG]PT"], time_index="first", ) self.rec_ensembles = set(self.smry["ENSEMBLE"].unique()) for col in self.smry_init_prod.columns: if col not in ReservoirSimulationTimeSeriesRegional.ENSEMBLE_COLUMNS: for ens in self.smry_init_prod["ENSEMBLE"].unique(): if any( self.smry_init_prod[self.smry_init_prod["ENSEMBLE"] == ens][col] > 0.0001 ): warnings.warn( f"Ensemble '{ens}' has initial production above 0, can therefore not" " calculate recovery for this ensemble (FOPT, FGPT and/or FWPT > 0)." " This ensemble probably includes restarts where we were not able to" " identify the filepaths to the original cases. Note that RESTART" " paths with more than 72 characters are not supported due to a" " simulator metadata file format limitation." ) self.rec_ensembles.discard(ens) self.smry_cols = [] self.smry_options = [] for col in self.smry.columns: if ( col in ReservoirSimulationTimeSeriesRegional.ENSEMBLE_COLUMNS or historical_vector(col, False) in self.smry_cols ): continue if fnmatch.fnmatch(col, "R[OGW]IP*") or fnmatch.fnmatch( col, "R[OGW][IP][RT]*" ): self.smry_cols.append(col) if not self.smry_cols: raise ValueError( "No data. Either no data was found, or all ensembles were dropped due to " "non-zero initial production. (FOPT, FGPT and/or FWPT > 0)" ) self.initial_vector = ( initial_vector if any( [ col.startswith((f"{initial_vector}:", f"{initial_vector}_")) for col in self.smry_cols ] ) else simulation_vector_base(self.smry_cols[0]) ) self.fipfile = fipfile self.fipdesc = ( None if self.fipfile is None else get_fipdesc(self.fipfile, self.smry_cols) ) self.theme = app.webviz_settings["theme"] self.line_shape_fallback = set_simulation_line_shape_fallback( line_shape_fallback ) self.fip_arrays = list( {simulation_region_vector_breakdown(col)[1] for col in self.smry_cols} ) self.set_callbacks(app) @property def tour_steps(self): return [ { "id": self.uuid("layout"), "content": ( "Dashboard visualizing regional in-place, cumulatives, rates and recovery. " "Data aggregation and recovery calculation for selected regions are performed " "on demand." ), }, { "id": self.uuid("graph"), "content": ( "Visualization of selected data as a time series. Controlled by the options " "in the menu to the left. Clicking this graph will update the date for the " "statistics in the view below." ), }, { "id": self.uuid("date_view_wrapper"), "content": ( "Visualization of statistics for a single date (which is selected by clicking " "the time series graph above). A table, box plot, histogram or bar chart per " "realization dependent on selection in the menu to the left." ), }, { "id": self.uuid("fip_array"), "content": ( "Select the Eclipse FIP array, e.g. FIPNUM or a custom FIPXXX" ), }, { "id": self.uuid("groupby"), "content": "Select how the data should be grouped.", }, { "id": self.uuid("ensemble"), "content": ( "Select ensembles. Multiple ensembles allowed only when grouping by ensemble." ), }, { "id": self.uuid("vector"), "content": ( "Select time series. The options here are the base names of the " "vectors (e.g ROIP), FIP array and FIP regions (e.g. FIPNUM == 1) are decided " "by the other options and filters in the menu. Recovery is added as an " "additional option for oil and gas in-place vectors, and is calculated on the " "fly." ), }, { "id": self.uuid("timeseries_visualization"), "content": ( "Select if the time series should be plotted per realization or as a " "fan chart." ), }, { "id": self.uuid("date_view"), "content": ( "Select if the single date statistics should be shown as a table, box " "plot, histogram or bar chart per realization." ), }, { "id": self.uuid("filters"), "content": ( "Filters. If you have defined a fipfile in your config yaml, these " "will be the groups defined in that file. Otherwise the only option will be " "Regions, which is the numbers for each region in the selected FIP array. " "E.g. if FIP array is FIPNUM, time series is ROIP and selected nodes are " "1 and 2, the aggregation is ROIP:1+ROIP:2. If FIP array had been a custom " "FIPXXX, the aggregation would be ROIP_XXX:1+ROIP_XXX:2." ), }, ] @property def ensembles(self): return list(self.smry["ENSEMBLE"].unique()) @property def all_nodes(self): sorted_int_list = sorted( list( { int(col.split(":")[1]) for col in self.smry_cols if len(col.split(":")) > 1 and col.split(":")[1].isdigit() } ) ) return [str(i) for i in sorted_int_list] @property def groupby_colors(self): color_dict = {"ENSEMBLE": unique_colors(self.ensembles, self.theme)} if self.fipdesc is None: color_dict.update({"regions": unique_colors(self.all_nodes, self.theme)}) else: color_dict.update( { group: unique_colors( group_df["SUBGROUP"].unique().tolist(), self.theme ) for group, group_df in self.fipdesc.groupby("GROUP") } ) return color_dict def add_webvizstore(self): functions = [ ( load_smry, [ { "ensemble_paths": self.ens_paths, "column_keys": self.column_keys, "time_index": self.time_index, } ], ), ( load_smry_meta, [{"ensemble_paths": self.ens_paths, "column_keys": self.column_keys,}], ), ] if not self.field_totals: functions.append( ( load_smry, [ { "ensemble_paths": self.ens_paths, "column_keys": ["F[OWG]PT"], "time_index": "first", } ], ) ) if self.fipfile is not None: functions.append( ( get_fipdesc, [{"fipfile": self.fipfile, "column_keys": self.smry_cols}], ) ) return functions def selectors_id(self, selector): return {"page": self.uuid("selectors"), "value": selector} def selectors_context_string(self, selector, prop): return '{"page":"' + self.uuid("selectors") + f'","value":"{selector}"}}.{prop}' def selectors_unwrap_context_string(self, context_string): return ( context_string.split(',"value":"', 1)[1] .split('"}')[0] .rstrip(self.uuid("")) ) @property def layout(self): return wcc.FlexBox( id=self.uuid("layout"), children=[ html.Div( style={"flex": 1}, children=[ html.Label( style={"paddingBottom": "15px"}, id=self.uuid("fip_array"), children=[ html.Div("FIP array:", style={"font-weight": "bold"}), dcc.Dropdown( id=self.uuid("fip"), options=[ {"label": i, "value": i} for i in self.fip_arrays ], value=( "FIPNUM" if "FIPNUM" in self.fip_arrays else self.fip_arrays[0] ), clearable=False, ), ], ), html.Label( style={"paddingBottom": "15px"}, id=self.uuid("groupby"), children=[ html.Div("Group by:", style={"font-weight": "bold"}), dcc.Dropdown( id=self.selectors_id("groupby"), clearable=False, ), ], ), html.Label( style={"paddingBottom": "15px"}, id=self.uuid("ensemble"), children=[ html.Div("Ensembles:", style={"font-weight": "bold"}), dcc.Dropdown( id=self.selectors_id("ensemble"), options=[ {"label": i, "value": i} for i in self.ensembles ], value=self.ensembles, multi=True, clearable=False, ), ], ), html.Label( style={"paddingBottom": "15px"}, id=self.uuid("vector"), children=[ html.Div("Time series:", style={"font-weight": "bold"}), dcc.Dropdown( id=self.selectors_id("vector"), clearable=False, optionHeight=80, ), ], ), html.Label( style={"paddingBottom": "15px"}, id=self.uuid("timeseries_visualization"), children=[ html.Div( "Time series visualization:", style={"font-weight": "bold"}, ), dcc.RadioItems( id=self.selectors_id("timeseries_visualization"), options=[ { "label": "Individual realizations", "value": "realizations", }, { "label": "Statistical fanchart", "value": "statistics", }, ], value="statistics", ), ], ), html.Label( style={"paddingBottom": "15px"}, id=self.uuid("date_visualization"), children=[ html.Div( "Single date statistics as:", style={"font-weight": "bold"}, ), dcc.Dropdown( id=self.selectors_id("date_view"), options=[ {"label": i.lower().capitalize(), "value": i} for i in [ "table", "box plot", "histogram", "per realization", ] ], value="table", clearable=False, ), ], ), html.Div(children="Filters:", style={"font-weight": "bold"},), html.Div(id=self.uuid("filters"), children=[]), ], ), html.Div( style={"flex": 6, "paddingLeft": "5px"}, children=[ wcc.Graph( id=self.uuid("graph"), clickData={"points": [{"x": str(self.smry["DATE"].min())}]}, style={"height": "450px"}, ), html.Div( id=self.uuid("stats_title"), style={"textAlign": "center",}, children="", ), html.Div(id=self.uuid("date_view_wrapper")), ], ), dcc.Store( id=self.uuid("date"), data=json.dumps(str(self.smry["DATE"].min())) ), dcc.Store(id=self.uuid("ref_vec"), data=json.dumps("")), ], ) # pylint: disable=too-many-statements def set_callbacks(self, app): @app.callback( [ Output(self.uuid("filters"), "children"), Output(self.selectors_id("vector"), "options"), Output(self.selectors_id("vector"), "value"), Output(self.selectors_id("groupby"), "options"), Output(self.selectors_id("groupby"), "value"), ], [Input(self.uuid("fip"), "value")], [ State(self.selectors_id("vector"), "value"), State(self.selectors_id("groupby"), "value"), ], ) def _update_filters_vectors_groupby(fip, current_vector, current_groupby): """ Makes "wcc.Select" components based on the available filters. If a fipfile is provided, the filters will be based on the available groups in that file. Otherwise the filter will be the region numbers for the specific FIP array (e.g. FIPNUM) In addition: Available vectors and groupby options are updated for the selected fip_array. These actions are done in the same callback to prevent multiple executions of the _render_charts callback at FIP array change. """ fipdesc = ( None if self.fipdesc is None or fip not in self.fipdesc["FIP"].values else self.fipdesc[self.fipdesc["FIP"] == fip] ) # Creating wcc.Select components if fipdesc is None: nodes = get_fip_array_nodes(fip, self.smry_cols) filters = [ html.Details( open=True, children=[ html.Summary("Regions:"), wcc.Select( id=self.selectors_id(fip + self.uuid("regions")), options=[{"label": i, "value": i} for i in nodes], size=min([len(nodes), 10]), value=nodes, ), ], ) ] else: filters = [ html.Details( open=True, children=[ html.Summary(group.lower().capitalize()), wcc.Select( id=self.selectors_id(fip + self.uuid(group)), options=[ {"label": i, "value": i} for i in group_df["SUBGROUP"].unique() ], size=min([len(group_df["SUBGROUP"].unique()), 5]), value=group_df["SUBGROUP"].unique(), ), ], ) for group, group_df in fipdesc.groupby("GROUP") ] # Update vectors vectors = set() for col in self.smry_cols: if simulation_region_vector_breakdown(col)[1] == fip: vector_base = simulation_vector_base(col) vectors.add(vector_base) if fnmatch.fnmatch(vector_base, "R[OG]IP*"): vectors.add( f"Recovery Factor of {simulation_vector_description(vector_base)} ((" f"{vector_base} (initial) - {vector_base} (now))/{vector_base}" " (initial))" ) vector_options = [ { "label": simulation_vector_description(i) + ("" if i.startswith("Recovery") else f" ({i}) "), "value": i, } for i in sorted(list(vectors)) ] vector_value = ( current_vector if current_vector in vectors else self.initial_vector if self.initial_vector in vectors else sorted(list(vectors))[0] ) # Update groupby groups = ["ENSEMBLE"] + ( ["regions"] if fipdesc is None else fipdesc["GROUP"].unique().tolist() ) groupby_options = [ {"label": i.lower().capitalize(), "value": i,} for i in groups ] groupby_value = current_groupby if current_groupby in groups else "ENSEMBLE" return ( filters, vector_options, vector_value, groupby_options, groupby_value, ) @app.callback( [ Output(self.uuid("graph"), "figure"), Output(self.uuid("date_view_wrapper"), "children"), Output(self.uuid("ref_vec"), "data"), ], [Input(self.uuid("date"), "data")] + [Input({"page": self.uuid("selectors"), "value": ALL}, "value")], [State(self.uuid("fip"), "value")], ) # pylint: disable=too-many-locals def _render_charts(date, _, fip_array): inputs = dash.callback_context.inputs date = json.loads(inputs.pop(f"{self.uuid('date')}.data")) ensembles = inputs.pop(self.selectors_context_string("ensemble", "value")) ensembles = ensembles if isinstance(ensembles, list) else [ensembles] groupby = inputs.pop(self.selectors_context_string("groupby", "value")) vector = inputs.pop(self.selectors_context_string("vector", "value")) time_series_viz = inputs.pop( self.selectors_context_string("timeseries_visualization", "value") ) date_viz = inputs.pop(self.selectors_context_string("date_view", "value")) filters = { self.selectors_unwrap_context_string(key)[len(fip_array) :]: ( value if isinstance(value, list) else [value] ) for (key, value) in inputs.items() } if not filters: # If filter selectors are not generated yet. raise PreventUpdate if vector.startswith("Recovery Factor of"): mode = "rec" vector_base = vector.split("/")[-1].rstrip("(initial)").strip() else: mode = "agg" vector_base = vector try: df, ref_vector = filter_and_aggregate_vectors( smry=self.smry, ensembles=ensembles, groupby=groupby, vector=vector_base, filters=filters, fipdesc=self.fipdesc, fip=fip_array, ) except KeyError as exception: return [ [{}], html.Div( children=( f"KeyError: {exception}\n" f"Likely to be that one or more vectors are missing in your data." ), style={ "textAlign": "center", "font-weight": "bold", "whiteSpace": "pre-wrap", }, ), json.dumps(""), ] if len(df.columns) < 4: # Filter combination has removed all other vectors # than ENSEMBLE, REAL and DATE return [ [{}], html.Div( children="Filter combination yielded no matching vectors", style={"textAlign": "center", "font-weight": "bold"}, ), json.dumps(""), ] line_shape = get_simulation_line_shape( line_shape_fallback=self.line_shape_fallback, vector=ref_vector, smry_meta=self.smry_meta, ) (timeseries_traces, df) = per_real_calculations( df=df, ensembles=ensembles, rec_ensembles=self.rec_ensembles, groupby=groupby, groupby_colors=self.groupby_colors, vector=vector_base, filters=filters, mode=mode, visualization=time_series_viz, line_shape=line_shape, ) if time_series_viz == "statistics" or date_viz == "table": stat_df = calc_statistics(df) if time_series_viz == "statistics": timeseries_traces = add_statistic_traces( stat_df=stat_df, ensembles=ensembles, mode=mode, groupby=groupby, groupby_color=self.groupby_colors, line_shape=line_shape, ) if date_viz == "table": date_view = render_table( stat_df=stat_df, mode=mode, groupby=groupby, date=date ) elif date_viz in ["box plot", "histogram", "per realization"]: date_view = render_single_date_graph( date_viz=date_viz, df=df, mode=mode, groupby=groupby, date=date, theme=self.theme, title=make_title(self.smry_meta, ref_vector, vector, mode), colors=self.groupby_colors[groupby], ) else: date_view = html.Div(children="") timeseries_layout = { "hovermode": "closest", "yaxis": { "title": make_title(self.smry_meta, ref_vector, vector, mode), "showgrid": False, }, "xaxis": {"showgrid": False}, "height": 450, } if mode == "rec": timeseries_layout["yaxis"].update( { "exponentformat": "none", "tickformat": ".0%", "hoverformat": ".2%", }, ) return ( ( [ { "data": timeseries_traces, "layout": self.theme.create_themed_layout( timeseries_layout ), }, ] ) + [date_view] + [json.dumps(ref_vector)] ) @app.callback( [ Output(self.selectors_id("ensemble"), "multi"), Output(self.selectors_id("ensemble"), "value"), ], [Input(self.selectors_id("groupby"), "value")], [State(self.selectors_id("ensemble"), "multi")], ) def _set_ensemble_selector(group_by, multi): """If ensemble is selected as group by, set the ensemble selector to allow multiple selections. Otherwise single selection. """ if group_by == "ENSEMBLE": if multi: raise PreventUpdate return (True, self.ensembles) if not multi: raise PreventUpdate return (False, self.ensembles[0]) @app.callback( Output(self.uuid("date"), "data"), [Input(self.uuid("graph"), "clickData")], [State(self.uuid("date"), "data")], ) def _update_date(clickdata, date): """Store clicked date for use in other callback""" date = clickdata["points"][0]["x"] if clickdata else json.loads(date) return json.dumps(date) @app.callback( Output(self.uuid("stats_title"), "children"), [Input(self.uuid("date"), "data"), Input(self.uuid("ref_vec"), "data"),], [State(self.selectors_id("vector"), "value")], ) def _update_single_date_title(date, ref_vector, vector): """Update single date title""" date = json.loads(date) ref_vector = json.loads(ref_vector) if ref_vector == "": return "" title = f"Date: {date}, {simulation_vector_description(vector)}" + ( "" if vector.startswith("Recovery") else f" ({vector})" + ( "" if get_unit(self.smry_meta, ref_vector) is None else f" [{get_unit(self.smry_meta, ref_vector)}]" ) ) return title @CACHE.memoize(timeout=CACHE.TIMEOUT) def make_title(smry_meta: pd.DataFrame, ref_vector: str, vector: str, mode: str): return ( f"{simulation_vector_description(vector).split(')')[0]})" if mode == "rec" else f"{simulation_vector_description(vector)} ({vector})" + ( "" if get_unit(smry_meta, ref_vector) is None else f" [{get_unit(smry_meta, ref_vector)}]" ) ) def render_single_date_graph(date_viz, df, mode, groupby, date, theme, title, colors): def _make_trace(date_viz, df, col, name, color): if date_viz == "histogram": return { "x": df[col], "type": "histogram", "name": name, "marker": {"color": color}, } if date_viz == "box plot": return { "y": df[col], "type": "box", "name": name, "marker": {"color": color}, } if date_viz == "per realization": return { "x": df["REAL"], "y": df[col], "type": "bar", "name": name, "marker": {"color": color}, } return None columns = [] if mode == "agg": columns = [col for col in df.columns if col.startswith("AGG_")] elif mode == "rec": columns = [col for col in df.columns if col.startswith("REC_")] if not columns: return [] columns = list( dict.fromkeys(columns) ) # Make unique while preserving order of first occurance. traces = [] df["DATE"] = df["DATE"].astype(str) df = df.loc[df["DATE"] == date] if groupby == "ENSEMBLE": for ens in df["ENSEMBLE"].unique(): if len(columns) != 1: # Should never occur raise ValueError( "Not unique data for column, date and ensemble combination." ) trace = _make_trace( date_viz, df[df["ENSEMBLE"] == ens], columns[0], ens, colors[ens] ) if trace is not None: traces.append(trace) else: for col in columns: if len(df["ENSEMBLE"].unique()) > 1: # Should never occur raise ValueError( "Not unique data for column, date and ensemble combination." ) trace = _make_trace( date_viz, df, col, col.split("_")[-1], colors[col.split("_")[-1]] ) if trace is not None: traces.append(trace) layout = {"height": 600, "margin": {"t": 10, "b": 230}, "showlegend": True} if date_viz == "histogram": layout.update( { "barmode": "overlay", "bargap": 0.01, "bargroupgap": 0.2, "xaxis": { "exponentformat": "none", "tickformat": ".1%", "hoverformat": ".2%", "title": title, } if mode == "rec" else {"title": title}, "yaxis": { "title": "Count", "tickformat": "d", "exponentformat": "none", }, } ) else: layout.update( { "yaxis": { "exponentformat": "none", "tickformat": ".1%", "hoverformat": ".2%", "title": title, } if mode == "rec" else {"title": title}, } ) if date_viz == "per realization": layout.update( { "xaxis": { "exponentformat": "none", "tickformat": "d", "title": "Realization", } } ) return wcc.Graph( figure={"data": traces, "layout": theme.create_themed_layout(layout)} ) def render_table(stat_df, mode, groupby, date): columns = [] if mode == "agg": columns = [col[0] for col in stat_df.columns if col[0].startswith("AGG_")] elif mode == "rec": columns = [col[0] for col in stat_df.columns if col[0].startswith("REC_")] if not columns: return [] columns = list( dict.fromkeys(columns) ) # Make unique while preserving order of first occurance. stat_df["DATE"] = stat_df["DATE"].astype(str) stat_df = stat_df.loc[stat_df["DATE"] == date] table = [] for col in columns: if groupby == "ENSEMBLE": for ens in stat_df["ENSEMBLE"].unique(): df = stat_df[stat_df["ENSEMBLE"] == ens][col] if len(df.index) > 1: # Should never occur raise ValueError( "Not unique data for column, date and ensemble combination." ) table.append( { "Group": ens, "Minimum": df["nanmin"].iat[0], "Maximum": df["nanmax"].iat[0], "Mean": df["nanmean"].iat[0], "Stddev": df["nanstd"].iat[0], "P10": df["p10"].iat[0], "P90": df["p90"].iat[0], } ) else: df = stat_df[col] if len(df.index) > 1: # Should never occur raise ValueError("Not unique data for column and date combination.") table.append( { "Group": col.split("_")[-1], "Minimum": df["nanmin"].iat[0], "Maximum": df["nanmax"].iat[0], "Mean": df["nanmean"].iat[0], "Stddev": df["nanstd"].iat[0], "P10": df["p10"].iat[0], "P90": df["p90"].iat[0], } ) columns = [ {**{"name": i[0], "id": i[0]}, **i[1]} for i in deepcopy(ReservoirSimulationTimeSeriesRegional.TABLE_STATISTICS) ] if mode == "rec": for col in columns: try: col["format"]["specifier"] = ".2%" except KeyError: pass return ( DataTable( sort_action="native", filter_action="native", page_action="native", page_size=10, data=table, columns=columns, ), ) @CACHE.memoize(timeout=CACHE.TIMEOUT) def filter_and_aggregate_vectors( smry: pd.DataFrame, ensembles: list, groupby: str, vector: str, filters: dict, fipdesc: pd.DataFrame, fip: str, ) -> pd.DataFrame: """Aggregate inplace vectors based on filters Note: ensemble is only in the list of inputs to reduce risk with caching See: https://github.com/equinor/webviz-config/issues/211 Creating Eclipse format summary vectors from selection """ if groupby != "ENSEMBLE" and len(ensembles) > 1: # This should never happen raise ValueError("Cannot have multiple ensembles unless you group by ensemble") df = smry[smry["ENSEMBLE"].isin(ensembles)] if fipdesc is None or fip not in fipdesc["FIP"].values: if groupby == "ENSEMBLE": nodes = filters else: nodes = {str(node): [node] for node in filters["regions"]} else: nodes = get_nodes(groupby=groupby, fipdesc=fipdesc, fip=fip, filters=filters,) subgroup_vectors = { subgroup: [ simulation_region_vector_recompose( vector_base_name=vector, fiparray=fip, node=node ) for node in values ] for subgroup, values in nodes.items() } # Storing a full vector name that exists in the dataset to be used for metadata ref_vector = "" for _, vector_list in subgroup_vectors.items(): for vec in vector_list: if vec in smry.columns: ref_vector = vec break else: continue break # Aggregate, concatenate and return. return ( pd.concat( [df[["ENSEMBLE", "REAL", "DATE"]]] + [ df[vectors].sum(axis=1).to_frame(f"AGG_{vector}_filtered_on_{subgroup}") for subgroup, vectors in subgroup_vectors.items() ], axis=1, ), ref_vector, ) @CACHE.memoize(timeout=CACHE.TIMEOUT) def get_nodes(groupby: str, fipdesc: pd.DataFrame, fip: str, filters: dict): df = fipdesc[fipdesc["FIP"] == fip] nodes = dict() for node, dfn in df.groupby("NODE"): node_inc = True node_subgroups = [] for group, subgroups in filters.items(): if dfn[dfn["GROUP"] == group].empty or not all( dfn[dfn["GROUP"] == group]["SUBGROUP"].isin(subgroups) ): node_inc = False break if group == groupby and groupby != "ENSEMBLE": node_subgroups.extend(dfn[dfn["GROUP"] == group]["SUBGROUP"]) if node_inc: if groupby == "ENSEMBLE": if "ENSEMBLE" in nodes: nodes["ENSEMBLE"].append(node) else: nodes["ENSEMBLE"] = [node] elif len(node_subgroups) == 1: if node_subgroups[0] in nodes: nodes[node_subgroups[0]].append(node) else: nodes[node_subgroups[0]] = [node] elif len(node_subgroups) > 1: raise ValueError( f"This should not occur, likely to be a bug. Vector nr {node} matched several" f"{groupby} that your tried to group by." "Please report this at https://github.com/equinor/webviz-subsurface/issues" ) return nodes def calc_real_recovery(df, agg_vectors): first = df[agg_vectors].values[0] with np.errstate(invalid="ignore"): return (first - df[agg_vectors].values) / first # pylint: disable=too-many-arguments, too-many-locals, unused-argument @CACHE.memoize(timeout=CACHE.TIMEOUT) def per_real_calculations( df: pd.DataFrame, ensembles: list, rec_ensembles: list, groupby: str, groupby_colors: dict, vector: str, # only used too reduce caching risk filters: dict, mode: str, visualization: str, line_shape: str, ) -> tuple: """All calls that are per realization are called here to avoid multiple loops: That includes calculation of recovery and making traces per realization. This method assumes that the DataFrame 'df' has already been processed with the 'filter_and_aggregate_vectors' method. """ if groupby != "ENSEMBLE" and len(ensembles) > 1: # This should never happen raise ValueError("Cannot have multiple ensembles unless you group by ensemble") traces = [] ens_dfs = [] # Find aggregated vectors agg_vectors = df.columns[df.columns.str.contains("AGG_.*")] # Subgroups from aggregated vector names to be used for e.g. legend. groupby_names = [ agg_vector.split("_filtered_on_")[-1] for agg_vector in agg_vectors ] # Make recovery vector names if relevant if mode == "rec": rec_vectors = ["REC" + vec[3:] for vec in agg_vectors] # Iterate over ensembles and realizations for ens, ens_df in df.groupby("ENSEMBLE"): ens_rec = [] if mode == "rec" and ens not in rec_ensembles: continue for real_no, (real, real_df) in enumerate(ens_df.groupby("REAL")): if mode == "rec": rec = calc_real_recovery(real_df, agg_vectors) ens_rec.extend(rec) if visualization == "realizations": for i, vec in enumerate(agg_vectors): name = ens if groupby == "ENSEMBLE" else groupby_names[i] traces.append( { "x": real_df["DATE"], "y": rec[:, i] if mode == "rec" else real_df[vec], "hovertext": ( f"{groupby.lower().capitalize()}: {name} " + f"Realization: {real}" ), "name": name, "legendgroup": name, "marker": {"color": groupby_colors[groupby][name]}, "showlegend": real_no == 0, "line": {"shape": line_shape}, } ) if mode == "rec": # We want to store calculated recovery for statistical graphs and tables ens_dfs.append( pd.concat( [ ens_df.reset_index(drop=True), pd.DataFrame(ens_rec, columns=rec_vectors).reset_index( drop=True ), ], axis=1, ) ) # Concat ensemble dfs with calculated recovery if ens_dfs: df = pd.concat(ens_dfs, ignore_index=True) return (traces, df) def calc_statistics(df): # Switched P10 and P90 due to convention in petroleum industry def p10(x): return np.nanpercentile(x, q=90) def p90(x): return np.nanpercentile(x, q=10) stat_dfs = [] for ens, ens_df in df.groupby("ENSEMBLE"): stat_dfs.append( ens_df.drop(columns=["REAL", "ENSEMBLE"]) .groupby("DATE", as_index=False) .agg([np.nanmean, np.nanstd, np.nanmin, np.nanmax, p10, p90]) .reset_index() .assign(ENSEMBLE=ens) ) return pd.concat(stat_dfs) def add_statistic_traces( stat_df, ensembles, mode, groupby, groupby_color, line_shape, ): columns = [] if mode == "agg": columns = [col[0] for col in stat_df.columns if col[0].startswith("AGG_")] elif mode == "rec": columns = [col[0] for col in stat_df.columns if col[0].startswith("REC_")] if not columns: return [] columns = list( dict.fromkeys(columns) ) # Make unique while preserving order of first occurance. traces = [] for col in columns: if groupby == "ENSEMBLE": for ens in ensembles: traces.extend( add_fanchart_traces( stat_df=stat_df[stat_df["ENSEMBLE"] == ens], col=col, legend_group=ens, color=groupby_color[groupby][ens], line_shape=line_shape, ) ) else: traces.extend( add_fanchart_traces( stat_df=stat_df, col=col, legend_group=col.split("_")[-1], color=groupby_color[groupby][col.split("_")[-1]], line_shape=line_shape, ) ) return traces def add_fanchart_traces(stat_df, col, legend_group, color, line_shape): """Renders a fanchart""" fill_color = hex_to_rgb(color, 0.3) line_color = hex_to_rgb(color, 1) return [ { "name": legend_group, "hovertext": f"Maximum {legend_group}", "x": stat_df["DATE"], "y": stat_df[col]["nanmax"], "mode": "lines", "line": {"width": 0, "color": line_color, "shape": line_shape}, "legendgroup": legend_group, "showlegend": False, }, { "name": legend_group, "hovertext": f"P90 {legend_group}", "x": stat_df["DATE"], "y": stat_df[col]["p90"], "mode": "lines", "fill": "tonexty", "fillcolor": fill_color, "line": {"width": 0, "color": line_color, "shape": line_shape}, "legendgroup": legend_group, "showlegend": False, }, { "name": legend_group, "hovertext": f"Mean {legend_group}", "x": stat_df["DATE"], "y": stat_df[col]["nanmean"], "mode": "lines", "fill": "tonexty", "fillcolor": fill_color, "line": {"color": line_color, "shape": line_shape}, "legendgroup": legend_group, "showlegend": True, }, { "name": legend_group, "hovertext": f"P10 {legend_group}", "x": stat_df["DATE"], "y": stat_df[col]["p10"], "mode": "lines", "fill": "tonexty", "fillcolor": fill_color, "line": {"width": 0, "color": line_color, "shape": line_shape}, "legendgroup": legend_group, "showlegend": False, }, { "name": legend_group, "hovertext": f"Minimum {legend_group}", "x": stat_df["DATE"], "y": stat_df[col]["nanmin"], "mode": "lines", "fill": "tonexty", "fillcolor": fill_color, "line": {"width": 0, "color": line_color, "shape": line_shape}, "legendgroup": legend_group, "showlegend": False, }, ] def hex_to_rgb(hex_string, opacity=1): """Converts a hex color to rgb""" hex_string = hex_string.lstrip("#") hlen = len(hex_string) rgb = [int(hex_string[i : i + hlen // 3], 16) for i in range(0, hlen, hlen // 3)] rgb.append(opacity) return f"rgba{tuple(rgb)}" def get_fip_array_nodes(fip, smry_cols): """Sorted list of all available nodes for a given fip array (e.g FIPNUM)""" sorted_int_list = sorted( list( { int(col.split(":")[1]) for col in smry_cols if ( len(col.split(":")) > 1 and col.split(":")[1].isdigit() and simulation_region_vector_breakdown(col)[1] == fip ) } ) ) return [str(i) for i in sorted_int_list] @webvizstore def get_fipdesc(fipfile: Path, column_keys: list) -> pd.DataFrame: fipdesc: list = [] with open(Path(fipfile), "r") as stream: fipdict = yaml.safe_load(stream) for fip, fipdef in fipdict.items(): for group, group_def in fipdef.get("groups").items(): for key, fip_nodes in group_def.items(): for x in fip_nodes: if not isinstance(x, int): raise TypeError( f"FIP: {fip}, group: {group}, subgroup: {key} has non-integer input." ) if (fip, group, x) in fipdesc: raise ValueError( f"FIP: {fip}, group: {group} has input which is not unique." f"Value {x} is used for multiple subgroups." ) fipdesc.append((str(fip), str(group), str(key), x)) df_before_data_verification = pd.DataFrame( fipdesc, columns=("FIP", "GROUP", "SUBGROUP", "NODE") ) dfs = [] for fip, fip_df in df_before_data_verification.groupby("FIP"): nodes_in_data = get_fip_array_nodes(fip, column_keys) dfs.extend( [ subgroup_df for _, subgroup_df in fip_df.groupby(["GROUP", "SUBGROUP"]) if not subgroup_df[subgroup_df["NODE"].isin(nodes_in_data)].empty ] ) return pd.concat(dfs).sort_index() @CACHE.memoize(timeout=CACHE.TIMEOUT) def get_unit(smry_meta, vec): return ( None if (smry_meta is None or vec not in smry_meta.index) else simulation_unit_reformat(smry_meta.unit[vec]) )