Duplicates bot v2: focus on root cause, larger issue body preview (#50209)

Release Notes: - N/A
2026-04-18 07:47:53 +00:00 · 2026-02-26 15:09:29 +01:00
parent 2434a77dbf
commit ceb7c0e1b2
2 changed files with 43 additions and 9 deletions
@@ -89,7 +89,12 @@ def post_comment(issue_number: int, body):
 def build_duplicate_comment(matches):
    """Build the comment body for potential duplicates."""
    match_list = "\n".join(f"- #{m['number']}" for m in matches)
-    explanations = "\n\n".join(f"**#{m['number']}:** {m['explanation']}" for m in matches)
+    explanations = "\n\n".join(
+        f"**#{m['number']}:** {m['explanation']}\n\n**Shared root cause:** {m['shared_root_cause']}"
+        if m.get('shared_root_cause')
+        else f"**#{m['number']}:** {m['explanation']}"
+        for m in matches
+    )

    return f"""This issue appears to be a duplicate of:

@@ -307,7 +312,7 @@ def enrich_magnets(magnets):
    for magnet in magnets:
        data = github_api_get(f"/repos/{REPO_OWNER}/{REPO_NAME}/issues/{magnet['number']}")
        magnet["title"] = data["title"]
-        magnet["body_preview"] = (data.get("body") or "")[:500]
+        magnet["body_preview"] = (data.get("body") or "")[:1000]


 def areas_match(detected, magnet_area):
@@ -381,7 +386,7 @@ def search_for_similar_issues(issue, detected_areas, max_searches=6):
                        "title": item["title"],
                        "state": item.get("state", ""),
                        "created_at": item.get("created_at", ""),
-                        "body_preview": body[:500],
+                        "body_preview": body[:1000],
                        "source": search_type,
                    }
        except requests.RequestException as e:
@@ -414,12 +419,30 @@ def analyze_duplicates(anthropic_key, issue, magnets, search_results):

    system_prompt = """You analyze GitHub issues to identify potential duplicates.

-Given a new issue and a list of existing issues, identify which existing issues might be duplicates.
+Given a new issue and a list of existing issues, identify which existing issues are duplicates — meaning
+they are caused by the SAME BUG in the code, not just similar symptoms.
+
+CRITICAL DISTINCTION — shared symptoms vs shared root cause:
+- "models missing", "can't sign in", "editor hangs", "venv not detected" are SYMPTOMS that many
+  different bugs can produce. Two reports of the same symptom are NOT duplicates unless you can
+  identify a specific shared root cause.
+- A duplicate means: if a developer fixed the existing issue, the new issue would also be fixed.
+- If the issues just happen to be in the same feature area, or describe similar-sounding problems
+  with different specifics (different error messages, different triggers, different platforms, different
+  configurations), they are NOT duplicates.

 For each potential duplicate, assess confidence:
- "high": Very likely the same issue (same root cause, same symptoms)
- "medium": Possibly related (likely to be the same root cause)
- Do NOT include tangentially related issues (same general area but probably different issues)
+- "high": Almost certainly the same bug. You can name a specific shared root cause, and the
+  reproduction steps / error messages / triggers are consistent.
+- "medium": Likely the same bug based on specific technical details, but some uncertainty remains.
+- Do NOT include issues that merely share symptoms, affect the same feature area, or sound similar
+  at a surface level.
+
+Examples of things that are NOT duplicates:
+- Two issues about "Copilot models not showing" — one caused by a Zed update breaking the model list,
+  the other caused by the user's plan not including those models.
+- Two issues about "Zed hangs" — one triggered by network drives, the other by large projects.
+- Two issues about "can't sign in" — one caused by a missing system package, the other by a server-side error.

 Output only valid JSON (no markdown code blocks) with this structure:
 {
@@ -427,13 +450,18 @@ Output only valid JSON (no markdown code blocks) with this structure:
    {
      "number": 12345,
      "confidence": "high|medium",
-      "explanation": "Brief explanation of why this might be a duplicate"
+      "shared_root_cause": "The specific bug/root cause shared by both issues",
+      "explanation": "Brief explanation with concrete evidence from both issues"
    }
  ],
  "summary": "One sentence summary of findings"
 }

-Only include matches with "high" or "medium" confidence. Return empty matches array if none found."""
+When in doubt, return an empty matches array. A false positive (flagging a non-duplicate) is much
+worse than a false negative (missing a real duplicate), because it wastes the time of both the
+issue author and the maintainers.
+
+Return empty matches array if none found or if you can only identify shared symptoms."""

    user_content = f"""## New Issue #{issue['number']}
 **Title:** {issue['title']}
@@ -39,6 +39,10 @@ BOT_START_DATE = "2026-02-18"
 NEEDS_TRIAGE_LABEL = "state:needs triage"
 DEFAULT_PROJECT_NUMBER = 76
 VALID_CLOSED_AS_VALUES = {"duplicate", "not_planned", "completed"}
+# Bump this when the duplicate-detection bot's behavior changes in a way that
+# could affect outcome rates (e.g. prompt rewrites, model swaps, candidate
+# filtering changes). Don't bump for unrelated changes like comment formatting.
+BOT_VERSION = "v2"


 def github_api_get(path, params=None):
@@ -279,6 +283,8 @@ def add_or_update_project_item(issue_node_id, outcome, closed_as=None, status="A
    if notes:
        set_field_value(item_id, "Notes", notes)

+    set_field_value(item_id, "Bot version", BOT_VERSION)
+
    return item_id