Source code for bridge.pipelines.shared.version

"""
Shared version parsing and comparison utilities for pipelines.
"""

import re
from dataclasses import dataclass
from datetime import datetime
from enum import Enum, auto
from typing import Any

from packaging.version import InvalidVersion, Version

from bridge.core.biotools import VersionType as BiotoolsVersionType

_SEMVER_PREFIX_RE = re.compile(r"^[vV]?(\d+\.\d+(?:\.\d+)?(?:[^\s]*)?)$")
_RANGE_SEP_RE = re.compile(r"\s+[-–]\s+")  # hyphen or en dash
_DATE_PATTERNS: list[tuple[str, re.Pattern[str]]] = [
    ("%Y-%m-%d", re.compile(r"^\d{4}-\d{2}-\d{2}$")),
    ("%Y.%m.%d", re.compile(r"^\d{4}\.\d{2}\.\d{2}$")),
    ("%Y%m%d", re.compile(r"^\d{8}$")),
]


[docs] class VersionKind(Enum): """ Enumeration of version kinds. Used to classify free-text version strings into comparable groups: semantic versions, dates, integers, numeric ranges, and raw labels. """ SEMVER = auto() DATE = auto() INT = auto() RANGE = auto() RAW = auto()
[docs] @dataclass(frozen=True) class ParsedVersion: """ Parsed version representation. Parameters ---------- kind : VersionKind The kind of version parsed (SEMVER, DATE, INT, RANGE, RAW). value : Any The parsed value: - SEMVER: packaging.version.Version - DATE : datetime.date - INT : int - RANGE : (ParsedVersion, ParsedVersion) # [low, high] - RAW : str raw : str The original version string. """ kind: VersionKind value: Any # Version | date | int | (ParsedVersion, ParsedVersion) | str raw: str def _normalized_for_comparison(self) -> tuple[VersionKind, Any] | None: """ Normalize the version into a comparable (kind, value) pair. RANGE values are reduced to their upper bound. RAW values and unsupported kinds return ``None``. Returns ------- tuple[VersionKind, Any] | None Normalized comparison tuple, or ``None`` if the version is not safely comparable. """ if self.kind == VersionKind.RANGE: lo, hi = self.value # represent range by upper bound return hi._normalized_for_comparison() if self.kind in {VersionKind.SEMVER, VersionKind.DATE, VersionKind.INT}: return (self.kind, self.value) # raw and anything else: not safely comparable return None def __eq__(self, other: object) -> bool: """ Compare two parsed versions for equality. If both versions can be normalized, equality is based on their normalized (kind, value) pair. Otherwise, equality falls back to raw string comparison. """ if not isinstance(other, ParsedVersion): return NotImplemented # same normalized (kind, value) if possible, # otherwise fall back to raw string equality. self_norm = self._normalized_for_comparison() other_norm = other._normalized_for_comparison() if self_norm is not None and other_norm is not None: return self_norm == other_norm return self.raw == other.raw def __lt__(self, other: object) -> bool: """ Define strict ordering between comparable parsed versions. Ordering is only defined when both versions normalize to the same comparable kind. In all other cases, the versions are treated as incomparable and ``NotImplemented`` is returned. """ if not isinstance(other, ParsedVersion): return NotImplemented self_norm = self._normalized_for_comparison() other_norm = other._normalized_for_comparison() # if either cannot be normalized or kinds differ, treat as incomparable if self_norm is None or other_norm is None: return NotImplemented kind_a, val_a = self_norm kind_b, val_b = other_norm if kind_a != kind_b: return NotImplemented return val_a < val_b
def _parse_version_label(label: str) -> ParsedVersion: """ Parse a free-text version label into a structured ``ParsedVersion``. The following formats are recognized, in order: - Numeric ranges (e.g. "1.0 - 2.0") - Dates (YYYY-MM-DD, YYYY.MM.DD, YYYYMMDD) - Plain integers - Semantic-style versions (with optional "v" prefix) - Raw strings (fallback) Parameters ---------- label : str Raw version label. Returns ------- ParsedVersion Structured parsed representation of the version label. """ s = label.strip() # range: "A - B" or "A – B" if _RANGE_SEP_RE.search(s): parts = _RANGE_SEP_RE.split(s, maxsplit=1) if len(parts) == 2: lo = _parse_version_label(parts[0]) hi = _parse_version_label(parts[1]) return ParsedVersion(VersionKind.RANGE, (lo, hi), s) # date formats for fmt, pattern in _DATE_PATTERNS: if pattern.match(s): try: dt = datetime.strptime(s, fmt).date() return ParsedVersion(VersionKind.DATE, dt, s) except ValueError: pass # plain integer if re.fullmatch(r"\d+", s): return ParsedVersion(VersionKind.INT, int(s), s) # semver-ish via packaging.Version (optional "v" prefix) candidate = s m = _SEMVER_PREFIX_RE.match(s) if m: candidate = m.group(1) try: v = Version(candidate) return ParsedVersion(VersionKind.SEMVER, v, s) except InvalidVersion: # not semver; fall through pass # fallback: raw string, not safely comparable return ParsedVersion(VersionKind.RAW, s, s)
[docs] def any_bt_newer_than_gh( gh_latest: BiotoolsVersionType, bt_versions: list[BiotoolsVersionType], ) -> bool: """ Check whether any bio.tools version appears newer than the GitHub latest. Versions are parsed into ``ParsedVersion`` objects and compared using their defined partial ordering. Incomparable versions (e.g. raw or differing kinds) are ignored. Parameters ---------- gh_latest : BiotoolsVersionType GitHub latest release tag. bt_versions : list[BiotoolsVersionType] Existing bio.tools versions. Returns ------- bool ``True`` if any bio.tools version is strictly newer than the GitHub latest version under the comparison rules, otherwise ``False``. """ gh_parsed = _parse_version_label(gh_latest.root) for bt in bt_versions: bt_parsed = _parse_version_label(bt.root) try: if bt_parsed > gh_parsed: return True except TypeError: # incomparable (different kind / raw) -> ignore continue return False
[docs] def find_latest_bt_version(bt_versions: list[BiotoolsVersionType] | None) -> BiotoolsVersionType | None: """ Find the latest bio.tools version from a list of versions. Versions are parsed into ``ParsedVersion`` objects and compared using their defined partial ordering. Incomparable versions (e.g. raw or differing kinds) are ignored. Parameters ---------- bt_versions : list[BiotoolsVersionType] | None Existing bio.tools versions, or ``None``. Returns ------- BiotoolsVersionType | None The latest bio.tools version, or ``None`` if no comparable versions were found. """ latest_bt: BiotoolsVersionType | None = None latest_parsed: ParsedVersion | None = None if not bt_versions: return None for bt in bt_versions: bt_parsed = _parse_version_label(bt.root) if latest_parsed is None: latest_bt = bt latest_parsed = bt_parsed continue try: if bt_parsed > latest_parsed: latest_bt = bt latest_parsed = bt_parsed except TypeError: # incomparable (different kind / raw) -> ignore continue return latest_bt