Source code for bridge.pipelines.gh2bt_for_meta.map_funcs.description
"""
Mapping functions for description metadata.
This module reconciles textual tool descriptions between GitHub and
bio.tools. GitHub descriptions are treated as authoritative when present.
If GitHub provides no description and bio.tools is empty, a short description
is generated from the README using an LLM.
"""
from bridge.logging import get_user_logger
from bridge.pipelines.utils import normalize_text
from bridge.services import ChatMessage, HuggingFaceProvider
logger = get_user_logger()
MAX_README_CHARS = 10000
[docs]
async def map_description(gh_params: dict | None, bt_description: str | None) -> str | None:
"""
Map and reconcile GitHub description metadata and bio.tools description metadata.
Policy:
1. If GitHub provides no metadata, the existing bio.tools description
is preserved.
2. If GitHub provides a description:
- If it is effectively identical to the bio.tools description
(ignoring trailing punctuation and whitespace), no change is made.
- Otherwise, the GitHub description overwrites the bio.tools value.
3. If GitHub provides no description and bio.tools is empty:
- If a README is available, a short description is generated from
the README using an LLM and normalized before storage.
- If no README is available, no description is set.
4. LLM failures never overwrite existing bio.tools descriptions.
Parameters
----------
gh_params : dict | None
GitHub metadata dictionary.
Expected keys include:
- ``"description"`` : Repository description string (optional)
- ``"readme"`` : README contents used for LLM-based description generation
bt_description : str | None
Existing bio.tools description, or ``None`` if unset.
Returns
-------
str | None
The reconciled bio.tools description, or ``None`` if no description
could be determined.
"""
if gh_params is None:
logger.unchanged("No GitHub description found, nothing to map.")
return bt_description
gh_description = normalize_text(gh_params.get("description"))
if gh_description is None:
# if there is no GitHub description, run LLM call on readme, overwrite only when no bt_description'
if bt_description is None:
readme = gh_params.get("readme")
if readme is None:
logger.unchanged("No GitHub description and no readme found, nothing to map.")
return None
hf_provider = HuggingFaceProvider()
prompt = (
f"Based on the following README content, generate a description for a bioinformatics tool. "
f"Limit your response to 1–2 sentences. "
f"Do not include any extra commentary or explanation. "
f"Only output the description itself.\n\n{readme.strip()[:MAX_README_CHARS]}"
)
message_sys = ChatMessage(
role="system",
content=(
"You are an expert in bioinformatics tool documentation. "
"Your task is to generate a concise, clear, and short description for a software tool. "
"Limit your response to 1–2 sentences. "
"Do not include any explanation, reasoning, or commentary. "
"Only output the description itself."
"/nothink"
),
)
message_user = ChatMessage(
role="user",
content=prompt,
)
try:
response = await hf_provider.generate([message_sys, message_user])
logger.added(
"No GitHub description and no existing bio.tools description; using "
"readme to generate description."
)
return normalize_text(response.content).strip()[0:999]
except Exception as e:
logger.note(f"HuggingFaceProvider call failed: {e}. Returning empty description.")
return None
return bt_description
else:
# if there is a GitHub description, overwrite the bt_description if it is different
# check if they are different (ignoring trailing periods and whitespace)
if (
bt_description is not None
and gh_description.rstrip(". ").strip() == normalize_text(bt_description).rstrip(". ").strip()
):
logger.exact("GitHub description matches existing bio.tools description.")
return bt_description
elif bt_description is not None:
logger.conflict("Using GitHub description to overwrite existing bio.tools description.")
else:
logger.added("Using GitHub description as no existing bio.tools description.")
return gh_description