mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-20 13:32:30 +00:00
fix(browse-sh): fetch SKILL.md via /api/skills/{slug}+skillMdUrl
The catalog's sourceUrl points at github.com/browserbase/browse.sh,
whose underlying repository is not always public — most raw URLs derived
from it 404. Use the per-skill detail endpoint instead, which returns a
skillMdUrl CDN blob that reliably resolves to the SKILL.md text. Fall
back to a raw.githubusercontent.com sourceUrl if the detail call fails.
- tools/skills_hub.py: rewrite BrowseShSource.fetch() to resolve via
/api/skills/{slug} -> skillMdUrl; drop the unreachable _to_raw_url
helper; expose the resolved URL in bundle.metadata.skill_md_url.
- tests/tools/test_skills_hub_browse_sh.py: match the real catalog
shape (name = task name, slug = host/task-id), exercise the
detail-endpoint -> blob two-call flow, and add a fallback test.
- scripts/release.py: map kylejeong21@gmail.com -> Kylejeong2.
This commit is contained in:
@@ -104,6 +104,7 @@ AUTHOR_MAP = {
|
||||
"147827411+EloquentBrush@users.noreply.github.com": "AhmetArif0",
|
||||
"97489706+purzbeats@users.noreply.github.com": "purzbeats",
|
||||
"hugosequier@gmail.com": "Hugo-SEQUIER",
|
||||
"kylejeong21@gmail.com": "Kylejeong2",
|
||||
"128259593+Gutslabs@users.noreply.github.com": "Gutslabs",
|
||||
"50326054+nocturnum91@users.noreply.github.com": "nocturnum91",
|
||||
"52470719+gianfrancopiana@users.noreply.github.com": "gianfrancopiana",
|
||||
|
||||
@@ -6,29 +6,31 @@ from unittest.mock import patch
|
||||
from tools.skills_hub import BrowseShSource, SkillMeta, SkillBundle
|
||||
|
||||
|
||||
# Catalog shape mirrors the real ``GET https://browse.sh/api/skills`` response:
|
||||
# ``slug`` is ``<hostname>/<task-id>`` and ``name`` is the task name.
|
||||
SAMPLE_CATALOG = [
|
||||
{
|
||||
"slug": "airbnb.com/search-listings-ddgioa",
|
||||
"name": "airbnb.com",
|
||||
"name": "search-listings",
|
||||
"title": "Airbnb Search Listings",
|
||||
"description": "Search and browse Airbnb listings by location and dates.",
|
||||
"hostname": "airbnb.com",
|
||||
"category": "travel",
|
||||
"tags": ["travel", "accommodation"],
|
||||
"sourceUrl": "https://github.com/browserbase/browse-sh/blob/main/skills/airbnb.com/SKILL.md",
|
||||
"sourceUrl": "https://github.com/browserbase/browse.sh/blob/main/skills/airbnb.com/search-listings-ddgioa/SKILL.md",
|
||||
"recommendedMethod": "stagehand",
|
||||
"proxies": False,
|
||||
"installCount": 42,
|
||||
},
|
||||
{
|
||||
"slug": "amazon.com/search-products-xyz",
|
||||
"name": "amazon.com",
|
||||
"name": "search-products",
|
||||
"title": "Amazon Product Search",
|
||||
"description": "Search for products on Amazon.",
|
||||
"hostname": "amazon.com",
|
||||
"category": "shopping",
|
||||
"tags": ["shopping", "ecommerce"],
|
||||
"sourceUrl": "https://raw.githubusercontent.com/browserbase/browse-sh/main/skills/amazon.com/SKILL.md",
|
||||
"sourceUrl": "https://github.com/browserbase/browse.sh/blob/main/skills/amazon.com/search-products-xyz/SKILL.md",
|
||||
"recommendedMethod": "stagehand",
|
||||
"proxies": False,
|
||||
"installCount": 99,
|
||||
@@ -60,7 +62,7 @@ class TestBrowseShSource(unittest.TestCase):
|
||||
self.assertGreaterEqual(len(results), 1)
|
||||
meta = results[0]
|
||||
self.assertIsInstance(meta, SkillMeta)
|
||||
self.assertEqual(meta.name, "airbnb.com")
|
||||
self.assertEqual(meta.name, "search-listings")
|
||||
self.assertEqual(meta.source, "browse-sh")
|
||||
self.assertEqual(meta.trust_level, "community")
|
||||
self.assertEqual(meta.identifier, "browse-sh/airbnb.com/search-listings-ddgioa")
|
||||
@@ -70,7 +72,7 @@ class TestBrowseShSource(unittest.TestCase):
|
||||
def test_search_filters_by_query(self, _mock_catalog):
|
||||
results = self.src.search("amazon", limit=10)
|
||||
self.assertEqual(len(results), 1)
|
||||
self.assertEqual(results[0].name, "amazon.com")
|
||||
self.assertEqual(results[0].extra["hostname"], "amazon.com")
|
||||
|
||||
results_all = self.src.search("", limit=10)
|
||||
self.assertEqual(len(results_all), 2)
|
||||
@@ -78,22 +80,50 @@ class TestBrowseShSource(unittest.TestCase):
|
||||
@patch("tools.skills_hub.httpx.get")
|
||||
@patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG)
|
||||
def test_fetch_returns_bundle(self, _mock_catalog, mock_get):
|
||||
mock_get.return_value = _MockResponse(
|
||||
status_code=200,
|
||||
text="# Airbnb Skill\n\nSearch and book Airbnb listings.",
|
||||
# First call: GET /api/skills/{slug} returns the detail object with skillMdUrl.
|
||||
# Second call: GET the CDN blob URL returns the SKILL.md text.
|
||||
blob_url = (
|
||||
"https://gh0lfhlmyzhg6tww.public.blob.vercel-storage.com"
|
||||
"/skills/airbnb.com/search-listings-ddgioa/SKILL.md"
|
||||
)
|
||||
mock_get.side_effect = [
|
||||
_MockResponse(status_code=200, json_data={"skillMdUrl": blob_url}),
|
||||
_MockResponse(status_code=200, text="# Airbnb Skill\n\nSearch and book Airbnb listings."),
|
||||
]
|
||||
bundle = self.src.fetch("browse-sh/airbnb.com/search-listings-ddgioa")
|
||||
self.assertIsNotNone(bundle)
|
||||
self.assertIsInstance(bundle, SkillBundle)
|
||||
self.assertEqual(bundle.name, "airbnb.com")
|
||||
self.assertEqual(bundle.name, "search-listings")
|
||||
self.assertIn("SKILL.md", bundle.files)
|
||||
self.assertIn("Airbnb", bundle.files["SKILL.md"])
|
||||
self.assertEqual(bundle.source, "browse-sh")
|
||||
self.assertEqual(bundle.trust_level, "community")
|
||||
self.assertEqual(bundle.identifier, "browse-sh/airbnb.com/search-listings-ddgioa")
|
||||
mock_get.assert_called_once()
|
||||
call_url = mock_get.call_args.args[0]
|
||||
self.assertIn("raw.githubusercontent.com", call_url)
|
||||
self.assertEqual(bundle.metadata["skill_md_url"], blob_url)
|
||||
# Two HTTP calls: detail endpoint + blob.
|
||||
self.assertEqual(mock_get.call_count, 2)
|
||||
first_url = mock_get.call_args_list[0].args[0]
|
||||
second_url = mock_get.call_args_list[1].args[0]
|
||||
self.assertIn("/api/skills/airbnb.com/search-listings-ddgioa", first_url)
|
||||
self.assertEqual(second_url, blob_url)
|
||||
|
||||
@patch("tools.skills_hub.httpx.get")
|
||||
@patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG)
|
||||
def test_fetch_falls_back_to_raw_github_url(self, _mock_catalog, mock_get):
|
||||
# Detail endpoint fails → fall back to a raw.githubusercontent.com sourceUrl.
|
||||
raw_catalog = [dict(SAMPLE_CATALOG[0])]
|
||||
raw_catalog[0]["sourceUrl"] = (
|
||||
"https://raw.githubusercontent.com/example/repo/main/skills/"
|
||||
"airbnb.com/search-listings-ddgioa/SKILL.md"
|
||||
)
|
||||
with patch.object(BrowseShSource, "_fetch_catalog", return_value=raw_catalog):
|
||||
mock_get.side_effect = [
|
||||
_MockResponse(status_code=500, json_data=None), # detail endpoint fails
|
||||
_MockResponse(status_code=200, text="# Fallback content"),
|
||||
]
|
||||
bundle = self.src.fetch("browse-sh/airbnb.com/search-listings-ddgioa")
|
||||
self.assertIsNotNone(bundle)
|
||||
self.assertEqual(bundle.files["SKILL.md"], "# Fallback content")
|
||||
|
||||
@patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG)
|
||||
def test_fetch_missing_slug_returns_none(self, _mock_catalog):
|
||||
@@ -105,28 +135,12 @@ class TestBrowseShSource(unittest.TestCase):
|
||||
meta = self.src.inspect("browse-sh/airbnb.com/search-listings-ddgioa")
|
||||
self.assertIsNotNone(meta)
|
||||
self.assertIsInstance(meta, SkillMeta)
|
||||
self.assertEqual(meta.name, "airbnb.com")
|
||||
self.assertEqual(meta.name, "search-listings")
|
||||
self.assertEqual(meta.identifier, "browse-sh/airbnb.com/search-listings-ddgioa")
|
||||
self.assertEqual(meta.extra["hostname"], "airbnb.com")
|
||||
self.assertEqual(meta.extra["category"], "travel")
|
||||
self.assertEqual(meta.extra["install_count"], 42)
|
||||
|
||||
def test_to_raw_url_conversion(self):
|
||||
# GitHub HTML URL should be converted
|
||||
html_url = "https://github.com/browserbase/browse-sh/blob/main/skills/airbnb.com/SKILL.md"
|
||||
raw_url = self.src._to_raw_url(html_url)
|
||||
self.assertEqual(
|
||||
raw_url,
|
||||
"https://raw.githubusercontent.com/browserbase/browse-sh/main/skills/airbnb.com/SKILL.md",
|
||||
)
|
||||
|
||||
# Already a raw URL — should be returned unchanged
|
||||
already_raw = "https://raw.githubusercontent.com/browserbase/browse-sh/main/skills/amazon.com/SKILL.md"
|
||||
self.assertEqual(self.src._to_raw_url(already_raw), already_raw)
|
||||
|
||||
# Unrecognised URL — should return None
|
||||
self.assertIsNone(self.src._to_raw_url("https://example.com/something"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
+45
-25
@@ -2358,12 +2358,17 @@ class LobeHubSource(SkillSource):
|
||||
class BrowseShSource(SkillSource):
|
||||
"""Discover and install site-specific browser automation skills from browse.sh.
|
||||
|
||||
browse.sh (https://browse.sh) is Browserbase's catalog of 169+ SKILL.md files
|
||||
browse.sh (https://browse.sh) is Browserbase's catalog of 200+ SKILL.md files
|
||||
that describe how to automate specific websites (Airbnb, Amazon, arXiv, etc.).
|
||||
Each skill has a sourceUrl pointing to the raw SKILL.md on GitHub.
|
||||
The catalog lives at ``/api/skills`` and each skill's actual SKILL.md content
|
||||
is fetched via ``/api/skills/{slug}`` which returns a ``skillMdUrl`` field
|
||||
pointing at a CDN-hosted blob — the catalog's ``sourceUrl`` field is a GitHub
|
||||
HTML URL whose underlying repository is not always public, so it cannot be
|
||||
relied on for content fetch.
|
||||
"""
|
||||
|
||||
CATALOG_URL = "https://browse.sh/api/skills"
|
||||
SKILL_DETAIL_URL = "https://browse.sh/api/skills/{slug}"
|
||||
_CACHE_KEY = "browse_sh_catalog"
|
||||
|
||||
def source_id(self) -> str:
|
||||
@@ -2454,20 +2459,22 @@ class BrowseShSource(SkillSource):
|
||||
item = next((i for i in catalog if i.get("slug") == slug), None)
|
||||
if not item:
|
||||
return None
|
||||
source_url = item.get("sourceUrl", "")
|
||||
if not source_url:
|
||||
return None
|
||||
# Convert GitHub HTML URL to raw URL if needed
|
||||
raw_url = self._to_raw_url(source_url)
|
||||
if not raw_url:
|
||||
|
||||
# Resolve the actual SKILL.md content URL via the per-skill detail
|
||||
# endpoint, which returns a ``skillMdUrl`` (CDN blob). The catalog's
|
||||
# ``sourceUrl`` is a GitHub HTML link whose underlying repo is not
|
||||
# reliably public, so we don't use it for content.
|
||||
md_url = self._resolve_skill_md_url(slug, item)
|
||||
if not md_url:
|
||||
return None
|
||||
try:
|
||||
resp = httpx.get(raw_url, timeout=20, follow_redirects=True)
|
||||
resp = httpx.get(md_url, timeout=20, follow_redirects=True)
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
content = resp.text
|
||||
except httpx.HTTPError:
|
||||
return None
|
||||
|
||||
meta = self._item_to_meta(item)
|
||||
name = meta.name if meta else slug.split("/")[-1]
|
||||
return SkillBundle(
|
||||
@@ -2479,31 +2486,44 @@ class BrowseShSource(SkillSource):
|
||||
metadata={
|
||||
"slug": slug,
|
||||
"hostname": item.get("hostname", ""),
|
||||
"source_url": source_url,
|
||||
"source_url": item.get("sourceUrl", ""),
|
||||
"skill_md_url": md_url,
|
||||
},
|
||||
)
|
||||
|
||||
def _resolve_skill_md_url(self, slug: str, item: Dict) -> Optional[str]:
|
||||
"""Resolve the SKILL.md content URL for a slug.
|
||||
|
||||
Primary path: hit ``/api/skills/{slug}`` and read ``skillMdUrl``.
|
||||
Fallback: if the catalog item already has a ``raw.githubusercontent.com``
|
||||
``sourceUrl`` (some entries may), use it directly.
|
||||
"""
|
||||
try:
|
||||
detail = httpx.get(
|
||||
self.SKILL_DETAIL_URL.format(slug=slug),
|
||||
timeout=20,
|
||||
follow_redirects=True,
|
||||
)
|
||||
if detail.status_code == 200:
|
||||
data = detail.json()
|
||||
if isinstance(data, dict):
|
||||
md_url = data.get("skillMdUrl")
|
||||
if isinstance(md_url, str) and md_url.startswith("http"):
|
||||
return md_url
|
||||
except (httpx.HTTPError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
source_url = item.get("sourceUrl", "") if isinstance(item, dict) else ""
|
||||
if source_url and "raw.githubusercontent.com" in source_url:
|
||||
return source_url
|
||||
return None
|
||||
|
||||
def _slug_from_identifier(self, identifier: str) -> str:
|
||||
"""Extract slug from identifier like 'browse-sh/airbnb.com/search-listings-abc'."""
|
||||
if identifier.startswith("browse-sh/"):
|
||||
return identifier[len("browse-sh/"):]
|
||||
return identifier
|
||||
|
||||
def _to_raw_url(self, url: str) -> Optional[str]:
|
||||
"""Convert a GitHub HTML URL to a raw.githubusercontent.com URL."""
|
||||
if "raw.githubusercontent.com" in url:
|
||||
return url
|
||||
# https://github.com/owner/repo/blob/branch/path -> raw URL
|
||||
import re
|
||||
m = re.match(
|
||||
r"https://github\.com/([^/]+)/([^/]+)/blob/([^/]+)/(.+)",
|
||||
url,
|
||||
)
|
||||
if m:
|
||||
owner, repo, branch, path = m.groups()
|
||||
return f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{path}"
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Official optional skills source adapter
|
||||
|
||||
Reference in New Issue
Block a user