Source code for bridge.pipelines.policies.gh2bt.reconcile_gh_over_bt
"""
Generic reconciliation policy for GitHub-over-bio.tools mapping.
This module provides a generic function to reconcile metadata between
GitHub and bio.tools according to a defined policy that prioritizes GitHub
values while preserving bio.tools values when GitHub is silent.
"""
from collections.abc import Callable
from typing import TypeVar
from bridge.logging import get_user_logger
logger = get_user_logger()
BT = TypeVar("BT") # bio.tools type
GHN = TypeVar("GHN") # normalized GitHub representation
BTN = TypeVar("BTN") # normalized bio.tools representation
[docs]
def reconcile_gh_over_bt(
*,
gh_norm: GHN | None,
bt_norm: BTN | None,
bt_value: BT | None,
build_bt_from_gh: Callable[[GHN], BT],
log_label: str,
equality_fn: Callable[[GHN, BTN], bool] | None = None,
) -> BT | None:
"""
Apply a generic GitHub-over-bio.tools reconciliation policy.
This function operates on *normalized* representations of GitHub and
bio.tools values (``gh_norm`` and ``bt_norm``), while returning and
constructing concrete bio.tools values (``bt_value`` and the output).
Policy:
1. If ``gh_norm`` is ``None``, GitHub is treated as silent and the existing
bio.tools value (``bt_value``) is preserved. An "unchanged" log entry is
emitted.
2. If ``gh_norm`` is not ``None`` and ``bt_norm`` is ``None``, GitHub is
treated as the only source. A new bio.tools value is constructed via
``build_bt_from_gh(gh_norm)`` and an "added" log entry is emitted.
3. If both ``gh_norm`` and ``bt_norm`` are not ``None`` and they compare
equal (``gh_norm == bt_norm``), the existing bio.tools value
(``bt_value``) is preserved and an exact-match log entry is emitted.
4. If both ``gh_norm`` and ``bt_norm`` are not ``None`` and differ, the
GitHub value is treated as authoritative. A new bio.tools value is
constructed via ``build_bt_from_gh(gh_norm)`` and a conflict log entry
is emitted.
Parameters
----------
gh_norm : GHN | None
Normalized representation of the GitHub value (e.g., canonicalized URL,
lowercased language set, enum, etc.), or ``None`` if GitHub provides no
usable value.
bt_norm : BTN | None
Normalized representation of the existing bio.tools value, or ``None``
if no value is recorded.
bt_value : BT | None
The current bio.tools value to be preserved when GitHub is silent or
when the normalized values match.
build_bt_from_gh : Callable[[GHN], BT]
Callable that constructs a concrete bio.tools value from the normalized
GitHub representation.
log_label : str
Short label used in log messages to identify the reconciled field
(e.g., ``"license"``, ``"languages"``, ``"homepage"``).
equality_fn : Callable[[GHN, BTN], bool] | None, optional
Optional callable to determine equality between normalized GitHub and
bio.tools values. If ``None``, the default equality operator (``==``)
is used. This parameter is useful when the normalized representations
require custom comparison logic (e.g., set equality for lists).
Returns
-------
BT | None
The reconciled bio.tools value according to the policy, or ``None`` if
both sources effectively provide no usable value.
"""
if gh_norm is None:
logger.unchanged(f"No GitHub {log_label} found, nothing to map.")
return bt_value
gh_from_bt = build_bt_from_gh(gh_norm)
if gh_from_bt is None:
logger.unchanged(f"GitHub {log_label} could not be cast as bio.tools, nothing to map.")
return bt_value
if bt_norm is None:
logger.added(f"{log_label} from GitHub: {gh_norm!r}")
return gh_from_bt
if equality_fn is not None:
equal = equality_fn(gh_from_bt, bt_norm)
else:
equal = gh_from_bt == bt_norm
if equal:
logger.exact(f"GitHub {log_label} matches bio.tools {log_label}.")
return bt_value
logger.conflict(f"Existing GitHub {log_label} {gh_norm!r} differs from bio.tools {log_label} {bt_norm!r}")
return gh_from_bt