diff --git a/pkg/detectors/twilio/twilio.go b/pkg/detectors/twilio/twilio.go index c25d68f58..3052071c5 100644 --- a/pkg/detectors/twilio/twilio.go +++ b/pkg/detectors/twilio/twilio.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "io" + "maps" "net/http" regexp "github.com/wasilibs/go-re2" @@ -23,7 +24,7 @@ type Scanner struct { var _ detectors.Detector = (*Scanner)(nil) var ( - defaultClient = common.RetryableHTTPClient() + defaultClient = detectors.NewClientWithDedup(common.RetryableHTTPClient()) sidPat = regexp.MustCompile(`\bAC[0-9a-f]{32}\b`) keyPat = regexp.MustCompile(`\b[0-9a-f]{32}\b`) ) @@ -56,11 +57,17 @@ func (s Scanner) Keywords() []string { func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) - keyMatches := keyPat.FindAllString(dataStr, -1) - sidMatches := sidPat.FindAllString(dataStr, -1) + uniqueKeys := make(map[string]struct{}) + for _, k := range keyPat.FindAllString(dataStr, -1) { + uniqueKeys[k] = struct{}{} + } + uniqueSIDs := make(map[string]struct{}) + for _, s := range sidPat.FindAllString(dataStr, -1) { + uniqueSIDs[s] = struct{}{} + } - for _, sid := range sidMatches { - for _, key := range keyMatches { + for sid := range uniqueSIDs { + for key := range uniqueKeys { s1 := detectors.Result{ DetectorType: detector_typepb.DetectorType_Twilio, Raw: []byte(sid), @@ -78,9 +85,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result s1.Verified = isVerified s1.SetVerificationError(verificationErr) - for key, value := range extraData { - s1.ExtraData[key] = value - } + maps.Copy(s1.ExtraData, extraData) } results = append(results, s1) @@ -107,7 +112,7 @@ func verifyTwilio(ctx context.Context, client *http.Client, key, sid string) (ma req.Header.Add("Content-Type", "application/x-www-form-urlencoded") req.Header.Add("Accept", "*/*") req.SetBasicAuth(sid, key) - resp, err := client.Do(req) + resp, err := detectors.DoWithDedup(client, detector_typepb.DetectorType_Twilio, sid+key, req) if err != nil { return nil, false, nil } diff --git a/pkg/detectors/twilioapikey/twilioapikey.go b/pkg/detectors/twilioapikey/twilioapikey.go index 9b6c54141..653452699 100644 --- a/pkg/detectors/twilioapikey/twilioapikey.go +++ b/pkg/detectors/twilioapikey/twilioapikey.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "io" + "maps" "net/http" regexp "github.com/wasilibs/go-re2" @@ -23,7 +24,7 @@ type Scanner struct { var _ detectors.Detector = (*Scanner)(nil) var ( - defaultClient = common.SaneHttpClient() + defaultClient = detectors.NewClientWithDedup(common.SaneHttpClient()) apiKeyPat = regexp.MustCompile(`\bSK[a-zA-Z0-9]{32}\b`) secretPat = regexp.MustCompile(`\b[0-9a-zA-Z]{32}\b`) ) @@ -54,11 +55,17 @@ func (s Scanner) Keywords() []string { func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) - apiKeyMatches := apiKeyPat.FindAllString(dataStr, -1) - secretMatches := secretPat.FindAllString(dataStr, -1) + uniqueAPIKeys := make(map[string]struct{}) + for _, k := range apiKeyPat.FindAllString(dataStr, -1) { + uniqueAPIKeys[k] = struct{}{} + } + uniqueSecrets := make(map[string]struct{}) + for _, s := range secretPat.FindAllString(dataStr, -1) { + uniqueSecrets[s] = struct{}{} + } - for _, apiKey := range apiKeyMatches { - for _, secret := range secretMatches { + for apiKey := range uniqueAPIKeys { + for secret := range uniqueSecrets { s1 := detectors.Result{ DetectorType: detector_typepb.DetectorType_Twilio, Raw: []byte(apiKey), @@ -73,9 +80,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result s1.Verified = isVerified s1.SetVerificationError(verificationErr) - for key, value := range extraData { - s1.ExtraData[key] = value - } + maps.Copy(s1.ExtraData, extraData) } results = append(results, s1) @@ -103,7 +108,7 @@ func verifyTwilioAPIKey(ctx context.Context, client *http.Client, apiKey, secret req.Header.Add("Accept", "*/*") req.SetBasicAuth(apiKey, secret) - resp, err := client.Do(req) + resp, err := detectors.DoWithDedup(client, detector_typepb.DetectorType_TwilioApiKey, apiKey+secret, req) if err != nil { return nil, false, nil } diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 180e9e189..36cf3dce3 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -504,6 +504,10 @@ func filterDetectors(filterFunc func(detectors.Detector) bool, input []detectors // been processed before, thereby saving computational overhead. func (e *Engine) initialize(ctx context.Context) error { // TODO (ahrav): Determine the optimal cache size. + // KNOWN ISSUE: 512 entries is far too small for large scans. Under concurrent notifier + // workers a single burst of unique findings easily evicts previously seen keys, allowing + // the same secret to be re-emitted on every subsequent pass. Raise to at least 10000 + // (or make configurable via Config). const cacheSize = 512 // number of entries in the LRU cache cache, err := lru.New[string, detectorspb.DecoderType](cacheSize) @@ -1287,6 +1291,14 @@ func (e *Engine) notifierWorker(ctx context.Context) { // Duplicate results with the same decoder type SHOULD have their own entry in the // results list, this would happen if the same secret is found multiple times. // Note: If the source type is postman, we dedupe the results regardless of decoder type. + // + // KNOWN ISSUE: The condition below only suppresses duplicates when the decoder type + // differs (cross-decoder dedup). For the same decoder type the condition evaluates to + // false and EVERY occurrence passes through, even when key is already in the cache. + // The LRU cache size of 512 entries further compounds this under concurrent notifier + // workers: entries are evicted quickly, re-admitting the same finding on every pass. + // Proposed fix: change the condition to `if _, ok := e.dedupeCache.Get(key); ok` + // and raise the cache size (see cacheSize const in initialize()). key := fmt.Sprintf("%s%s%s%+v", result.DetectorType.String(), result.Raw, result.RawV2, result.SourceMetadata) if val, ok := e.dedupeCache.Get(key); ok && (val != result.DecoderType || result.SourceType == sourcespb.SourceType_SOURCE_TYPE_POSTMAN) {