Source code for bridge.pipelines.gh2bt_for_meta.map_funcs.maturity
"""
Mapping functions for maturity metrics.
This module derives bio.tools maturity metadata from GitHub repository
signals. GitHub archived status is treated as authoritative, while a
simple popularity-based heuristic is used to distinguish between
emerging and mature tools when the repository is active.
"""
from typing import Any
import numpy as np
from bridge.core.biotools import Maturity
from bridge.logging import get_user_logger
logger = get_user_logger()
def _safe_metric(gh_schema: dict[str, Any], key: str) -> float:
"""
Safely extract a numeric GitHub metric from a repository schema.
Missing, null, or non-numeric values are converted to ``0.0`` and
logged at note level.
Parameters
----------
gh_schema : dict[str, Any]
GitHub repository metadata dictionary.
key : str
Name of the metric field to extract.
Returns
-------
float
Numeric value of the metric, or ``0.0`` if unavailable or invalid.
"""
value = gh_schema.get(key, 0)
try:
return float(value)
except (TypeError, ValueError):
logger.note(f"Non-numeric GitHub metric {key}={value!r}, treating as 0.")
return 0.0
[docs]
def map_maturity(gh_schema: dict | None, bt_maturity: Maturity | None) -> Maturity | None:
"""
Map GitHub repository signals to bio.tools maturity metadata.
Policy:
1. If the GitHub repository is archived, maturity is always set to
``Maturity.Legacy``, regardless of existing bio.tools values.
2. Otherwise, a popularity score based on stars, forks,
watchers, and subscribers is computed.
3. Scores above a fixed threshold are classified as
``Maturity.Mature``; lower scores as ``Maturity.Emerging``.
4. Existing bio.tools maturity is preserved only when it matches the
GitHub-derived classification.
5. Conflicting values are overwritten in favor of GitHub-derived
maturity and logged as conflicts.
Parameters
----------
gh_schema : dict[str, Any] | None
GitHub repository metadata dictionary, or ``None`` if unavailable.
Expected keys include:
- 'archived' : Boolean indicating if the repository is archived.
- 'stargazers_count' : Number of stars.
- 'forks_count' : Number of forks.
- 'watchers_count' : Number of watchers.
- 'subscribers_count' : Number of subscribers.
bt_maturity : Maturity | None
Existing bio.tools maturity annotation, or ``None`` if unset.
Returns
-------
Maturity | None
The reconciled bio.tools maturity classification, or the existing
value if no GitHub-derived maturity could be computed.
"""
if gh_schema is None:
logger.unchanged("No GitHub schema provided, nothing to map.")
return bt_maturity
gh_archived = gh_schema.get("archived", False)
has_bt_maturity = False
if bt_maturity is not None:
has_bt_maturity = True
if gh_archived:
if has_bt_maturity and bt_maturity == Maturity.Legacy:
logger.exact("GitHub archived status matches existing bio.tools maturity 'Legacy'.")
return bt_maturity
elif has_bt_maturity:
logger.conflict("GitHub archived status conflicts with existing bio.tools maturity.")
else:
logger.added("Using GitHub archived status to set bio.tools maturity to 'Legacy'.")
return Maturity.Legacy
gh_stargazers = _safe_metric(gh_schema, "stargazers_count")
gh_forks = _safe_metric(gh_schema, "forks_count")
gh_watchers = _safe_metric(gh_schema, "watchers_count")
gh_subscribers = _safe_metric(gh_schema, "subscribers_count")
# Crude differentiation scheme based on PCA analysis
score = np.log1p(gh_stargazers) + np.log1p(gh_forks) + np.log1p(gh_watchers) + np.log1p(gh_subscribers)
# Separate tools into two maturity levels based on score threshold
if score > 3:
if has_bt_maturity and bt_maturity == Maturity.Mature:
logger.exact("GitHub maturity score matches existing bio.tools maturity 'Mature'.")
return bt_maturity
elif has_bt_maturity:
logger.conflict("GitHub maturity score conflicts with existing bio.tools maturity. Will overwrite.")
else:
logger.added("Using GitHub maturity score to set bio.tools maturity to 'Mature'.")
return Maturity.Mature
else:
if has_bt_maturity and bt_maturity == Maturity.Emerging:
logger.exact("GitHub maturity score matches existing bio.tools maturity 'Emerging'.")
return bt_maturity
elif has_bt_maturity:
logger.conflict("GitHub maturity score conflicts with existing bio.tools maturity. Will overwrite.")
else:
logger.added("Using GitHub maturity score to set bio.tools maturity to 'Emerging'.")
return Maturity.Emerging