Source code for bridge.services.biotools.biotools_ingestor
"""
Async client for the bio.tools API.
Fetches a Tool entry by ID and returns raw JSON consumed by the bio.tools builder.
"""
import logging
from typing import Any
import httpx
from bridge.config import settings
from bridge.services.protocols import Ingestor
logger = logging.getLogger(__name__)
[docs]
class BiotoolsIngestor(Ingestor):
"""
Ingest metadata from bio.tools ID via the bio.tools REST API.
Parameters
----------
biotools_id : str
The bio.tools identifier for the tool to fetch.
"""
def __init__(self, biotools_id: str):
self.biotools_id = biotools_id
async def _get(self, endpoint: str = "", params: dict | None = None) -> dict[str, Any]:
"""
Perform async GET requests to bio.tools API.
Parameters
----------
endpoint : str
Specific API endpoint to query (default is root tool endpoint).
params : dict, optional
Query parameters for the request. Defaults to None.
Returns
-------
dict
JSON response from the bio.tools API.
"""
base = f"{settings.biotools_api_base}/tool/{self.biotools_id}"
url = f"{base}{endpoint}?format=json"
try:
async with httpx.AsyncClient(timeout=10) as client:
response = await client.get(url, params=params)
response.raise_for_status()
data = response.json()
logger.debug(f"Fetched data from bio.tools for ID {self.biotools_id} successfully")
return data
except httpx.RequestError as e:
logger.error(f"Network error while fetching {url}: {e}")
raise
except httpx.HTTPStatusError as e:
logger.warning(f"HTTP error from bio.tools: {e.response.status_code} for {url}")
raise
[docs]
async def fetch(self) -> dict[str, Any]:
"""
Fetch the bio.tools entry for the specified tool ID.
Returns
-------
dict
JSON metadata for the bio.tools entry.
"""
logger.debug(f"Fetching bio.tools entry for {self.biotools_id}")
data = await self._get()
if not data.get("name"):
logger.warning(f"bio.tools entry {self.biotools_id} missing 'name' field")
logger.info(f"Ingested bio.tools entry for {self.biotools_id} successfully")
return data