Source code for bridge.pipelines.gh2bt_for_meta.map_funcs.homepage
"""
Map homepage metadata from GitHub to bio.tools.
This module reconciles homepage URLs between GitHub repository metadata and
existing bio.tools metadata. It applies a merge policy that prefers explicit
GitHub homepage configuration when available, preserves existing bio.tools
values when GitHub is silent, and falls back to the repository URL when no
homepage is defined anywhere.
"""
from pydantic import AnyUrl
from bridge.core.biotools import UrlftpType
from bridge.logging import get_user_logger
from bridge.pipelines.policies.gh2bt import reconcile_gh_over_bt
from bridge.pipelines.utils import canonicalize_url
logger = get_user_logger()
[docs]
def map_homepage(gh_schema: dict[str, AnyUrl | str | None], bt_homepage: UrlftpType | None) -> UrlftpType | None:
"""
Map and reconcile GitHub and bio.tools homepage URLs using the generic
GitHub-over-bio.tools policy with URL canonicalization.
Homepage comparison is performed on canonicalized URLs. If neither GitHub
nor bio.tools defines a homepage, the GitHub repository URL
(``gh_schema["html_url"]``) is used as a fallback.
Parameters
----------
gh_schema : dict[str, AnyUrl | str | None]
GitHub repository metadata dictionary.
Expected keys include:
- 'homepage' : The homepage URL configured on GitHub (may be None).
- 'html_url' : The GitHub repository URL (used as fallback).
bt_homepage : UrlftpType | None
Existing homepage value from bio.tools metadata, or ``None`` if none
is defined.
Returns
-------
UrlftpType | None
The resolved homepage as a `UrlftpType` instance, or ``None`` if no
homepage could be determined (only possible if `gh_schema` is malformed).
"""
gh_homepage = gh_schema.get("homepage")
gh_url = gh_schema.get("html_url")
gh_norm = canonicalize_url(str(gh_homepage)) if gh_homepage is not None else None
bt_norm = canonicalize_url(str(bt_homepage.root)) if bt_homepage is not None else None
if gh_norm is None and bt_homepage is None:
# special-case fallback: repo URL
if gh_url is None:
logger.unchanged("No GitHub homepage or repo url found, nothing to map.")
return None
logger.added(f"homepage as GitHub repo url '{gh_url}'")
return UrlftpType(root=str(gh_url))
if gh_norm is None:
logger.unchanged("No GitHub homepage found, nothing to map.")
return bt_homepage
# reconciliation between GitHub homepage and bio.tools homepage
return reconcile_gh_over_bt(
gh_norm=gh_norm,
bt_norm=bt_norm,
bt_value=bt_homepage,
build_bt_from_gh=lambda url: UrlftpType(root=url),
log_label="homepage",
)