mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2026-05-16 13:20:35 +00:00
fix(twilio): deduplicate matches to prevent O(N×M) result explosion (#4954)
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"maps"
|
||||
"net/http"
|
||||
|
||||
regexp "github.com/wasilibs/go-re2"
|
||||
@@ -23,7 +24,7 @@ type Scanner struct {
|
||||
var _ detectors.Detector = (*Scanner)(nil)
|
||||
|
||||
var (
|
||||
defaultClient = common.RetryableHTTPClient()
|
||||
defaultClient = detectors.NewClientWithDedup(common.RetryableHTTPClient())
|
||||
sidPat = regexp.MustCompile(`\bAC[0-9a-f]{32}\b`)
|
||||
keyPat = regexp.MustCompile(`\b[0-9a-f]{32}\b`)
|
||||
)
|
||||
@@ -56,11 +57,17 @@ func (s Scanner) Keywords() []string {
|
||||
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
|
||||
dataStr := string(data)
|
||||
|
||||
keyMatches := keyPat.FindAllString(dataStr, -1)
|
||||
sidMatches := sidPat.FindAllString(dataStr, -1)
|
||||
uniqueKeys := make(map[string]struct{})
|
||||
for _, k := range keyPat.FindAllString(dataStr, -1) {
|
||||
uniqueKeys[k] = struct{}{}
|
||||
}
|
||||
uniqueSIDs := make(map[string]struct{})
|
||||
for _, s := range sidPat.FindAllString(dataStr, -1) {
|
||||
uniqueSIDs[s] = struct{}{}
|
||||
}
|
||||
|
||||
for _, sid := range sidMatches {
|
||||
for _, key := range keyMatches {
|
||||
for sid := range uniqueSIDs {
|
||||
for key := range uniqueKeys {
|
||||
s1 := detectors.Result{
|
||||
DetectorType: detector_typepb.DetectorType_Twilio,
|
||||
Raw: []byte(sid),
|
||||
@@ -78,9 +85,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
|
||||
s1.Verified = isVerified
|
||||
s1.SetVerificationError(verificationErr)
|
||||
|
||||
for key, value := range extraData {
|
||||
s1.ExtraData[key] = value
|
||||
}
|
||||
maps.Copy(s1.ExtraData, extraData)
|
||||
}
|
||||
|
||||
results = append(results, s1)
|
||||
@@ -107,7 +112,7 @@ func verifyTwilio(ctx context.Context, client *http.Client, key, sid string) (ma
|
||||
req.Header.Add("Content-Type", "application/x-www-form-urlencoded")
|
||||
req.Header.Add("Accept", "*/*")
|
||||
req.SetBasicAuth(sid, key)
|
||||
resp, err := client.Do(req)
|
||||
resp, err := detectors.DoWithDedup(client, detector_typepb.DetectorType_Twilio, sid+key, req)
|
||||
if err != nil {
|
||||
return nil, false, nil
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"maps"
|
||||
"net/http"
|
||||
|
||||
regexp "github.com/wasilibs/go-re2"
|
||||
@@ -23,7 +24,7 @@ type Scanner struct {
|
||||
var _ detectors.Detector = (*Scanner)(nil)
|
||||
|
||||
var (
|
||||
defaultClient = common.SaneHttpClient()
|
||||
defaultClient = detectors.NewClientWithDedup(common.SaneHttpClient())
|
||||
apiKeyPat = regexp.MustCompile(`\bSK[a-zA-Z0-9]{32}\b`)
|
||||
secretPat = regexp.MustCompile(`\b[0-9a-zA-Z]{32}\b`)
|
||||
)
|
||||
@@ -54,11 +55,17 @@ func (s Scanner) Keywords() []string {
|
||||
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
|
||||
dataStr := string(data)
|
||||
|
||||
apiKeyMatches := apiKeyPat.FindAllString(dataStr, -1)
|
||||
secretMatches := secretPat.FindAllString(dataStr, -1)
|
||||
uniqueAPIKeys := make(map[string]struct{})
|
||||
for _, k := range apiKeyPat.FindAllString(dataStr, -1) {
|
||||
uniqueAPIKeys[k] = struct{}{}
|
||||
}
|
||||
uniqueSecrets := make(map[string]struct{})
|
||||
for _, s := range secretPat.FindAllString(dataStr, -1) {
|
||||
uniqueSecrets[s] = struct{}{}
|
||||
}
|
||||
|
||||
for _, apiKey := range apiKeyMatches {
|
||||
for _, secret := range secretMatches {
|
||||
for apiKey := range uniqueAPIKeys {
|
||||
for secret := range uniqueSecrets {
|
||||
s1 := detectors.Result{
|
||||
DetectorType: detector_typepb.DetectorType_Twilio,
|
||||
Raw: []byte(apiKey),
|
||||
@@ -73,9 +80,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
|
||||
s1.Verified = isVerified
|
||||
s1.SetVerificationError(verificationErr)
|
||||
|
||||
for key, value := range extraData {
|
||||
s1.ExtraData[key] = value
|
||||
}
|
||||
maps.Copy(s1.ExtraData, extraData)
|
||||
}
|
||||
|
||||
results = append(results, s1)
|
||||
@@ -103,7 +108,7 @@ func verifyTwilioAPIKey(ctx context.Context, client *http.Client, apiKey, secret
|
||||
req.Header.Add("Accept", "*/*")
|
||||
req.SetBasicAuth(apiKey, secret)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
resp, err := detectors.DoWithDedup(client, detector_typepb.DetectorType_TwilioApiKey, apiKey+secret, req)
|
||||
if err != nil {
|
||||
return nil, false, nil
|
||||
}
|
||||
|
||||
@@ -504,6 +504,10 @@ func filterDetectors(filterFunc func(detectors.Detector) bool, input []detectors
|
||||
// been processed before, thereby saving computational overhead.
|
||||
func (e *Engine) initialize(ctx context.Context) error {
|
||||
// TODO (ahrav): Determine the optimal cache size.
|
||||
// KNOWN ISSUE: 512 entries is far too small for large scans. Under concurrent notifier
|
||||
// workers a single burst of unique findings easily evicts previously seen keys, allowing
|
||||
// the same secret to be re-emitted on every subsequent pass. Raise to at least 10000
|
||||
// (or make configurable via Config).
|
||||
const cacheSize = 512 // number of entries in the LRU cache
|
||||
|
||||
cache, err := lru.New[string, detectorspb.DecoderType](cacheSize)
|
||||
@@ -1287,6 +1291,14 @@ func (e *Engine) notifierWorker(ctx context.Context) {
|
||||
// Duplicate results with the same decoder type SHOULD have their own entry in the
|
||||
// results list, this would happen if the same secret is found multiple times.
|
||||
// Note: If the source type is postman, we dedupe the results regardless of decoder type.
|
||||
//
|
||||
// KNOWN ISSUE: The condition below only suppresses duplicates when the decoder type
|
||||
// differs (cross-decoder dedup). For the same decoder type the condition evaluates to
|
||||
// false and EVERY occurrence passes through, even when key is already in the cache.
|
||||
// The LRU cache size of 512 entries further compounds this under concurrent notifier
|
||||
// workers: entries are evicted quickly, re-admitting the same finding on every pass.
|
||||
// Proposed fix: change the condition to `if _, ok := e.dedupeCache.Get(key); ok`
|
||||
// and raise the cache size (see cacheSize const in initialize()).
|
||||
key := fmt.Sprintf("%s%s%s%+v", result.DetectorType.String(), result.Raw, result.RawV2, result.SourceMetadata)
|
||||
if val, ok := e.dedupeCache.Get(key); ok && (val != result.DecoderType ||
|
||||
result.SourceType == sourcespb.SourceType_SOURCE_TYPE_POSTMAN) {
|
||||
|
||||
Reference in New Issue
Block a user