fix(twilio): deduplicate matches to prevent O(N×M) result explosion (#4954)

This commit is contained in:
Kashif Khan
2026-05-15 09:34:01 +05:00
committed by GitHub
parent 6c542a5ae6
commit cd6b46a502
3 changed files with 40 additions and 18 deletions
+14 -9
View File
@@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"io"
"maps"
"net/http"
regexp "github.com/wasilibs/go-re2"
@@ -23,7 +24,7 @@ type Scanner struct {
var _ detectors.Detector = (*Scanner)(nil)
var (
defaultClient = common.RetryableHTTPClient()
defaultClient = detectors.NewClientWithDedup(common.RetryableHTTPClient())
sidPat = regexp.MustCompile(`\bAC[0-9a-f]{32}\b`)
keyPat = regexp.MustCompile(`\b[0-9a-f]{32}\b`)
)
@@ -56,11 +57,17 @@ func (s Scanner) Keywords() []string {
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)
keyMatches := keyPat.FindAllString(dataStr, -1)
sidMatches := sidPat.FindAllString(dataStr, -1)
uniqueKeys := make(map[string]struct{})
for _, k := range keyPat.FindAllString(dataStr, -1) {
uniqueKeys[k] = struct{}{}
}
uniqueSIDs := make(map[string]struct{})
for _, s := range sidPat.FindAllString(dataStr, -1) {
uniqueSIDs[s] = struct{}{}
}
for _, sid := range sidMatches {
for _, key := range keyMatches {
for sid := range uniqueSIDs {
for key := range uniqueKeys {
s1 := detectors.Result{
DetectorType: detector_typepb.DetectorType_Twilio,
Raw: []byte(sid),
@@ -78,9 +85,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
s1.Verified = isVerified
s1.SetVerificationError(verificationErr)
for key, value := range extraData {
s1.ExtraData[key] = value
}
maps.Copy(s1.ExtraData, extraData)
}
results = append(results, s1)
@@ -107,7 +112,7 @@ func verifyTwilio(ctx context.Context, client *http.Client, key, sid string) (ma
req.Header.Add("Content-Type", "application/x-www-form-urlencoded")
req.Header.Add("Accept", "*/*")
req.SetBasicAuth(sid, key)
resp, err := client.Do(req)
resp, err := detectors.DoWithDedup(client, detector_typepb.DetectorType_Twilio, sid+key, req)
if err != nil {
return nil, false, nil
}
+14 -9
View File
@@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"io"
"maps"
"net/http"
regexp "github.com/wasilibs/go-re2"
@@ -23,7 +24,7 @@ type Scanner struct {
var _ detectors.Detector = (*Scanner)(nil)
var (
defaultClient = common.SaneHttpClient()
defaultClient = detectors.NewClientWithDedup(common.SaneHttpClient())
apiKeyPat = regexp.MustCompile(`\bSK[a-zA-Z0-9]{32}\b`)
secretPat = regexp.MustCompile(`\b[0-9a-zA-Z]{32}\b`)
)
@@ -54,11 +55,17 @@ func (s Scanner) Keywords() []string {
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)
apiKeyMatches := apiKeyPat.FindAllString(dataStr, -1)
secretMatches := secretPat.FindAllString(dataStr, -1)
uniqueAPIKeys := make(map[string]struct{})
for _, k := range apiKeyPat.FindAllString(dataStr, -1) {
uniqueAPIKeys[k] = struct{}{}
}
uniqueSecrets := make(map[string]struct{})
for _, s := range secretPat.FindAllString(dataStr, -1) {
uniqueSecrets[s] = struct{}{}
}
for _, apiKey := range apiKeyMatches {
for _, secret := range secretMatches {
for apiKey := range uniqueAPIKeys {
for secret := range uniqueSecrets {
s1 := detectors.Result{
DetectorType: detector_typepb.DetectorType_Twilio,
Raw: []byte(apiKey),
@@ -73,9 +80,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
s1.Verified = isVerified
s1.SetVerificationError(verificationErr)
for key, value := range extraData {
s1.ExtraData[key] = value
}
maps.Copy(s1.ExtraData, extraData)
}
results = append(results, s1)
@@ -103,7 +108,7 @@ func verifyTwilioAPIKey(ctx context.Context, client *http.Client, apiKey, secret
req.Header.Add("Accept", "*/*")
req.SetBasicAuth(apiKey, secret)
resp, err := client.Do(req)
resp, err := detectors.DoWithDedup(client, detector_typepb.DetectorType_TwilioApiKey, apiKey+secret, req)
if err != nil {
return nil, false, nil
}
+12
View File
@@ -504,6 +504,10 @@ func filterDetectors(filterFunc func(detectors.Detector) bool, input []detectors
// been processed before, thereby saving computational overhead.
func (e *Engine) initialize(ctx context.Context) error {
// TODO (ahrav): Determine the optimal cache size.
// KNOWN ISSUE: 512 entries is far too small for large scans. Under concurrent notifier
// workers a single burst of unique findings easily evicts previously seen keys, allowing
// the same secret to be re-emitted on every subsequent pass. Raise to at least 10000
// (or make configurable via Config).
const cacheSize = 512 // number of entries in the LRU cache
cache, err := lru.New[string, detectorspb.DecoderType](cacheSize)
@@ -1287,6 +1291,14 @@ func (e *Engine) notifierWorker(ctx context.Context) {
// Duplicate results with the same decoder type SHOULD have their own entry in the
// results list, this would happen if the same secret is found multiple times.
// Note: If the source type is postman, we dedupe the results regardless of decoder type.
//
// KNOWN ISSUE: The condition below only suppresses duplicates when the decoder type
// differs (cross-decoder dedup). For the same decoder type the condition evaluates to
// false and EVERY occurrence passes through, even when key is already in the cache.
// The LRU cache size of 512 entries further compounds this under concurrent notifier
// workers: entries are evicted quickly, re-admitting the same finding on every pass.
// Proposed fix: change the condition to `if _, ok := e.dedupeCache.Get(key); ok`
// and raise the cache size (see cacheSize const in initialize()).
key := fmt.Sprintf("%s%s%s%+v", result.DetectorType.String(), result.Raw, result.RawV2, result.SourceMetadata)
if val, ok := e.dedupeCache.Get(key); ok && (val != result.DecoderType ||
result.SourceType == sourcespb.SourceType_SOURCE_TYPE_POSTMAN) {