8000 Categorical plugin accepts deterministic conditions by mspelman07 · Pull Request #1965 · metoppv/improver · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Categorical plugin accepts deterministic conditions #1965

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,31 @@ accessed with this key contains the essentials that make the node function.
against the spp__relative_to_threshold attribute of the threshold coordinate
in the provided diagnostic.

It is also possible to build a node which uses a deterministic forecast. This
is not currently used within the weather symbols decision tree but, as an example, the following shows
how such a node would be encoded::

{
"precip_rate": {
"if_true": "rain",
"if_false": "dry",
"if_diagnostic_missing": "if_false",
"thresholds": [0],
"threshold_condition": ">",
"diagnostic_fields": ["precipitation_rate"],
"deterministic": True
},
}

The keys for this dictionary have the same meaning as for a probabilistic node but with the
following additional keys:

- **thresholds** (list(float)): The threshold(s) that must be exceeded or not
exceeded (see threshold_condition) for the node to progress to the succeed target.
Two values required if condition_combination is being used.
- **deterministic** (boolean): Determines whether the node is expecting a deterministic
input.

The first leaf node above is encoded as follows::

{
Expand Down
171 changes: 102 additions & 69 deletions improver/categorical/decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,68 +203,87 @@ def prepare_input_cubes(
diagnostics = get_parameter_names(
expand_nested_lists(query, "diagnostic_fields")
)
thresholds = expand_nested_lists(query, "diagnostic_thresholds")
conditions = expand_nested_lists(query, "diagnostic_conditions")
for diagnostic, threshold, condition in zip(
diagnostics, thresholds, conditions
):

# First we check the diagnostic name and units, performing
# a conversion is required and possible.
test_condition = iris.Constraint(name=diagnostic)
matched_cube = cubes.extract(test_condition)
if not matched_cube:
if "if_diagnostic_missing" in query:
optional_node_data_missing.append(key)
if query.get("deterministic", False):
for diagnostic in diagnostics:
test_condition = iris.Constraint(name=diagnostic)
matched_cube = cubes.extract(test_condition)
if not matched_cube:
if "if_diagnostic_missing" in query:
optional_node_data_missing.append(key)
else:
missing_data.append(f"name: {diagnostic} (deterministic)")
continue
used_cubes.extend(matched_cube)
else:
thresholds = expand_nested_lists(query, "diagnostic_thresholds")
conditions = expand_nested_lists(query, "diagnostic_conditions")
for diagnostic, threshold, condition in zip(
diagnostics, thresholds, conditions
):

# First we check the diagnostic name and units, performing
# a conversion is required and possible.
test_condition = iris.Constraint(name=diagnostic)
matched_cube = cubes.extract(test_condition)
if not matched_cube:
if "if_diagnostic_missing" in query:
optional_node_data_missing.append(key)
else:
missing_data.append(
f"name: {diagnostic}, threshold: {threshold}, "
f"spp__relative_to_threshold: {condition}\n"
)
continue

cube_threshold_units = find_threshold_coordinate(
matched_cube[0]
).units
threshold.convert_units(cube_threshold_units)

# Then we check if the required threshold is present in the
# cube, and that the thresholding is relative to it correctly.
threshold = threshold.points.item()
threshold_name = find_threshold_coordinate(matched_cube[0]).name()

# Set flag to check for old threshold coordinate names
if threshold_name == "threshold" and not self.coord_named_threshold:
self.coord_named_threshold = True

# Check threshold == 0.0
if abs(threshold) < self.float_abs_tolerance:
coord_constraint = {
threshold_name: lambda cell: np.isclose(
cell.point, 0, rtol=0, atol=self.float_abs_tolerance
)
}
else:
missing_data.append([diagnostic, threshold, condition])
continue

cube_threshold_units = find_threshold_coordinate(matched_cube[0]).units
threshold.convert_units(cube_threshold_units)

# Then we check if the required threshold is present in the
# cube, and that the thresholding is relative to it correctly.
threshold = threshold.points.item()
threshold_name = find_threshold_coordinate(matched_cube[0]).name()

# Set flag to check for old threshold coordinate names
if threshold_name == "threshold" and not self.coord_named_threshold:
self.coord_named_threshold = True

# Check threshold == 0.0
if abs(threshold) < self.float_abs_tolerance:
coord_constraint = {
threshold_name: lambda cell: np.isclose(
cell.point, 0, rtol=0, atol=self.float_abs_tolerance
)
}
else:
coord_constraint = {
threshold_name: lambda cell: np.isclose(
cell.point, threshold, rtol=self.float_tolerance, atol=0
coord_constraint = {
threshold_name: lambda cell: np.isclose(
cell.point, threshold, rtol=self.float_tolerance, atol=0
)
}

# Checks whether the spp__relative_to_threshold attribute is above
# or below a threshold and and compares to the diagnostic_condition.
test_condition = iris.Constraint(
coord_values=coord_constraint,
cube_func=lambda cube: (
probability_is_above_or_below(cube) == condition
),
)
matched_threshold = matched_cube.extract(test_condition)
if not matched_threshold:
missing_data.append(
f"name: {diagnostic}, threshold: {threshold}, "
f"spp__relative_to_threshold: {condition}\n"
)
}

# Checks whether the spp__relative_to_threshold attribute is above
# or below a threshold and and compares to the diagnostic_condition.
test_condition = iris.Constraint(
coord_values=coord_constraint,
cube_func=lambda cube: (
probability_is_above_or_below(cube) == condition
),
)
matched_threshold = matched_cube.extract(test_condition)
if not matched_threshold:
missing_data.append([diagnostic, threshold, condition])
else:
used_cubes.extend(matched_threshold)
else:
used_cubes.extend(matched_threshold)

if missing_data:
msg = "Decision Tree input cubes are missing the following required input fields:\n"
dyn_msg = "name: {}, threshold: {}, " "spp__relative_to_threshold: {}\n"
for item in missing_data:
msg = msg + dyn_msg.format(*item)
for dyn_msg in missing_data:
msg += dyn_msg
raise IOError(msg)

if not optional_node_data_missing:
Expand Down Expand Up @@ -378,13 +397,18 @@ def create_condition_chain(self, test_conditions: Dict) -> List:
"""
conditions = []
loop = 0
for diagnostic, p_threshold, d_threshold in zip(
test_conditions["diagnostic_fields"],
test_conditions["probability_thresholds"],
test_conditions["diagnostic_thresholds"],
if test_conditions.get("deterministic", False):
coord = "thresholds"
else:
coord = "probability_thresholds"

for index, (diagnostic, p_threshold) in enumerate(
zip(test_conditions["diagnostic_fields"], test_conditions[coord])
):
loop += 1

d_threshold = test_conditions.get("diagnostic_thresholds")
d_threshold = d_threshold[index] if d_threshold else None
loop += 1
if isinstance(diagnostic, list):
# We have a list which could contain variable names, operators and
# numbers. The variable names need converting into Iris Constraint
Expand All @@ -409,9 +433,12 @@ def create_condition_chain(self, test_conditions: Dict) -> List:
extract_constraint.append(item)
else:
# Non-lists are assumed to be constraints on a single variable.
extract_constraint = self.construct_extract_constraint(
diagnostic, d_threshold, self.coord_named_threshold
)
if d_threshold:
extract_constraint = self.construct_extract_constraint(
diagnostic, d_threshold, self.coord_named_threshold
)
else:
extract_constraint = iris.Constraint(diagnostic)
conditions.append(
[
extract_constraint,
Expand Down Expand Up @@ -559,10 +586,16 @@ def create_categorical_cube(self, cubes: Union[List[Cube], CubeList]) -> Cube:
that will fill it and data initiated with the value -1 to allow
any unset points to be readily identified.
"""
threshold_coord = find_threshold_coordinate(self.template_cube)
template_cube = next(self.template_cube.slices_over([threshold_coord])).copy()
# remove coordinates and bounds that do not apply to a categorical cube
template_cube.remove_coord(threshold_coord)
try:
threshold_coord = find_threshold_coordinate(self.template_cube)
except CoordinateNotFoundError:
template_cube = self.template_cube
else:
template_cube = next(
self.template_cube.slices_over([threshold_coord])
).copy()
# remove coordinates and bounds that do not apply to a categorical cube
template_cube.remove_coord(threshold_coord)

mandatory_attributes = generate_mandatory_attributes(cubes)
if self.title:
Expand Down
Loading
0