From 545dbfdf63fadc5fc5fbf32e9256bf15ffc4e3e3 Mon Sep 17 00:00:00 2001 From: Lauren Tan Date: Wed, 16 Apr 2025 15:59:26 -0400 Subject: [PATCH] [mcp] Dedupe docs Previously the resource would return a bunch of dupes because the algolia results would return multiple hashes (headings) for the same url. --- .../packages/react-mcp-server/src/index.ts | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/compiler/packages/react-mcp-server/src/index.ts b/compiler/packages/react-mcp-server/src/index.ts index 9f81de88ce..fbe5f58f66 100644 --- a/compiler/packages/react-mcp-server/src/index.ts +++ b/compiler/packages/react-mcp-server/src/index.ts @@ -40,14 +40,30 @@ const server = new McpServer({ version: '0.0.0', }); +function slugify(heading: string): string { + return heading + .split(' ') + .map(w => w.toLowerCase()) + .join('-'); +} + // TODO: how to verify this works? server.resource( 'docs', new ResourceTemplate('docs://{message}', {list: undefined}), - async (uri, {message}) => { + async (_uri, {message}) => { const hits = await queryAlgolia(message); + const deduped = new Map(); + for (const hit of hits) { + // drop hashes to dedupe properly + const u = new URL(hit.url); + if (deduped.has(u.pathname)) { + continue; + } + deduped.set(u.pathname, hit); + } const pages: Array = await Promise.all( - hits.map(hit => { + Array.from(deduped.values()).map(hit => { return fetch(hit.url, { headers: { 'User-Agent': @@ -70,16 +86,17 @@ server.resource( .filter(html => html !== null) .map(html => { const $ = cheerio.load(html); + const title = encodeURIComponent(slugify($('h1').text())); // react.dev should always have at least one
with the main content const article = $('article').html(); if (article != null) { return { - uri: uri.href, + uri: `docs://${title}`, text: turndownService.turndown(article), }; } else { return { - uri: uri.href, + uri: `docs://${title}`, // Fallback to converting the whole page to markdown text: turndownService.turndown($.html()), };