Github and Gitlab Authentication via http.extraHeader for cloning Repositories (#4139)

* initial code changes

* move args before clone command

* add comments

* added flag for github source to support backward compatibility

* added flag for gitlab source to support backward compatibility

* inverse the logic for enterprise

* remove print statement

* remove flag defaults

* updated comments and removed CloneRepoUsingTokenInHeader function

* false->true
This commit is contained in:
Kashif Khan
2025-05-27 11:24:43 +05:00
committed by GitHub
parent acb9826e00
commit 92e9157267
15 changed files with 613 additions and 532 deletions
+4
View File
@@ -119,6 +119,7 @@ var (
githubScanPRComments = githubScan.Flag("pr-comments", "Include pull request descriptions and comments in scan.").Bool()
githubScanGistComments = githubScan.Flag("gist-comments", "Include gist comments in scan.").Bool()
githubCommentsTimeframeDays = githubScan.Flag("comments-timeframe", "Number of days in the past to review when scanning issue, PR, and gist comments.").Uint32()
githubAuthInUrl = githubScan.Flag("auth-in-url", "Embed authentication credentials in repository URLs instead of using secure HTTP headers").Bool()
// GitHub Cross Fork Object Reference Experimental Feature
githubExperimentalScan = cli.Command("github-experimental", "Run an experimental GitHub scan. Must specify at least one experimental sub-module to run: object-discovery.")
@@ -139,6 +140,7 @@ var (
gitlabScanExcludePaths = gitlabScan.Flag("exclude-paths", "Path to file with newline separated regexes for files to exclude in scan.").Short('x').String()
gitlabScanIncludeRepos = gitlabScan.Flag("include-repos", `Repositories to include in an org scan. This can also be a glob pattern. You can repeat this flag. Must use Gitlab repo full name. Example: "trufflesecurity/trufflehog", "trufflesecurity/t*"`).Strings()
gitlabScanExcludeRepos = gitlabScan.Flag("exclude-repos", `Repositories to exclude in an org scan. This can also be a glob pattern. You can repeat this flag. Must use Gitlab repo full name. Example: "trufflesecurity/driftwood", "trufflesecurity/d*"`).Strings()
gitlabAuthInUrl = gitlabScan.Flag("auth-in-url", "Embed authentication credentials in repository URLs instead of using secure HTTP headers").Bool()
filesystemScan = cli.Command("filesystem", "Find credentials in a filesystem.")
filesystemPaths = filesystemScan.Arg("path", "Path to file or directory to scan.").Strings()
@@ -741,6 +743,7 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
IncludeGistComments: *githubScanGistComments,
CommentsTimeframeDays: *githubCommentsTimeframeDays,
Filter: filter,
AuthInUrl: *githubAuthInUrl,
}
if ref, err = eng.ScanGitHub(ctx, cfg); err != nil {
return scanMetrics, fmt.Errorf("failed to scan Github: %v", err)
@@ -769,6 +772,7 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
IncludeRepos: *gitlabScanIncludeRepos,
ExcludeRepos: *gitlabScanExcludeRepos,
Filter: filter,
AuthInUrl: *gitlabAuthInUrl,
}
if ref, err = eng.ScanGitLab(ctx, cfg); err != nil {
return scanMetrics, fmt.Errorf("failed to scan GitLab: %v", err)
+1
View File
@@ -28,6 +28,7 @@ func (e *Engine) ScanGitHub(ctx context.Context, c sources.GithubConfig) (source
IncludeWikis: c.IncludeWikis,
SkipBinaries: c.SkipBinaries,
CommentsTimeframeDays: c.CommentsTimeframeDays,
RemoveAuthInUrl: !c.AuthInUrl, // configuration uses the opposite field in proto to keep credentials in the URL by default.
}
if len(c.Token) > 0 {
connection.Credential = &sourcespb.GitHub_Token{
+4 -1
View File
@@ -24,7 +24,10 @@ func (e *Engine) ScanGitLab(ctx context.Context, c sources.GitlabConfig) (source
}
scanOptions := git.NewScanOptions(opts...)
connection := &sourcespb.GitLab{SkipBinaries: c.SkipBinaries}
connection := &sourcespb.GitLab{
SkipBinaries: c.SkipBinaries,
RemoveAuthInUrl: !c.AuthInUrl, // configuration uses the opposite field in proto to keep credentials in the URL by default.
}
switch {
case len(c.Token) > 0:
File diff suppressed because it is too large Load Diff
+4
View File
@@ -2317,6 +2317,8 @@ func (m *GitLab) validate(all bool) error {
// no validation rules for ExcludeProjectsSharedIntoGroups
// no validation rules for RemoveAuthInUrl
switch v := m.Credential.(type) {
case *GitLab_Token:
if v == nil {
@@ -2548,6 +2550,8 @@ func (m *GitHub) validate(all bool) error {
// no validation rules for CommentsTimeframeDays
// no validation rules for RemoveAuthInUrl
switch v := m.Credential.(type) {
case *GitHub_GithubApp:
if v == nil {
+42 -18
View File
@@ -3,6 +3,7 @@ package git
import (
"bufio"
"bytes"
"encoding/base64"
"errors"
"fmt"
"io"
@@ -92,6 +93,8 @@ type Config struct {
// When set to true, the parser will use a custom contentWriter provided through the WithContentWriter option.
// When false, the parser will use the default buffer (in-memory) contentWriter.
UseCustomContentWriter bool
// pass authentication embedded in the repository urls
AuthInUrl bool
}
// NewGit creates a new Git instance with the provided configuration. The Git instance is used to interact with
@@ -276,7 +279,7 @@ func (s *Source) scanRepo(ctx context.Context, repoURI string, reporter sources.
cloneFunc = func() (string, *git.Repository, error) {
user := cred.BasicAuth.Username
token := cred.BasicAuth.Password
return CloneRepoUsingToken(ctx, token, repoURI, user)
return CloneRepoUsingToken(ctx, token, repoURI, user, true)
}
case *sourcespb.Git_Unauthenticated:
cloneFunc = func() (string, *git.Repository, error) {
@@ -373,6 +376,7 @@ func GitURLParse(gitURL string) (*url.URL, error) {
return nil, originalError
}
}
return parsedURL, nil
}
@@ -381,6 +385,7 @@ type cloneParams struct {
gitURL string
args []string
clonePath string
authInUrl bool
}
// CloneRepo orchestrates the cloning of a given Git repository, returning its local path
@@ -388,13 +393,13 @@ type cloneParams struct {
// infrastructure, ensuring that any encountered errors trigger a cleanup of resources.
// The core cloning logic is delegated to a nested function, which returns errors to the
// outer function for centralized error handling and cleanup.
func CloneRepo(ctx context.Context, userInfo *url.Userinfo, gitURL string, args ...string) (string, *git.Repository, error) {
func CloneRepo(ctx context.Context, userInfo *url.Userinfo, gitURL string, authInUrl bool, args ...string) (string, *git.Repository, error) {
clonePath, err := cleantemp.MkdirTemp()
if err != nil {
return "", nil, err
}
repo, err := executeClone(ctx, cloneParams{userInfo, gitURL, args, clonePath})
repo, err := executeClone(ctx, cloneParams{userInfo, gitURL, args, clonePath, authInUrl})
if err != nil {
// DO NOT FORGET TO CLEAN UP THE CLONE PATH HERE!!
// If we don't, we'll end up with a bunch of orphaned directories in the temp dir.
@@ -412,21 +417,40 @@ func executeClone(ctx context.Context, params cloneParams) (*git.Repository, err
if err != nil {
return nil, err
}
if cloneURL.User == nil {
cloneURL.User = params.userInfo
var gitArgs []string
if params.authInUrl {
if cloneURL.User == nil {
cloneURL.User = params.userInfo
}
} else { // default
cloneURL.User = nil // remove user information from the url
pass, ok := params.userInfo.Password()
if ok {
/*
Sources:
- https://medium.com/%40szpytfire/authenticating-with-github-via-a-personal-access-token-7c639a979eb3
- https://trinhngocthuyen.com/posts/tech/50-shades-of-git-remotes-and-authentication/#using-httpextraheader-config
*/
authHeader := base64.StdEncoding.EncodeToString(fmt.Appendf([]byte(""), "%s:%s", params.userInfo.Username(), pass))
gitArgs = append(gitArgs, "-c", fmt.Sprintf("http.extraHeader=Authorization: Basic %s", authHeader))
}
}
gitArgs := []string{
"clone",
cloneURL.String(),
params.clonePath,
"--quiet", // https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--quietcode
}
if !feature.SkipAdditionalRefs.Load() {
gitArgs = append(gitArgs,
"-c",
"remote.origin.fetch=+refs/*:refs/remotes/origin/*")
}
gitArgs = append(gitArgs, "clone",
cloneURL.String(),
params.clonePath,
"--quiet", // https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--quietcode
)
gitArgs = append(gitArgs, params.args...)
cloneCmd := exec.Command("git", gitArgs...)
@@ -499,23 +523,23 @@ func PingRepoUsingToken(ctx context.Context, token, gitUrl, user string) error {
}
// CloneRepoUsingToken clones a repo using a provided token.
func CloneRepoUsingToken(ctx context.Context, token, gitUrl, user string, args ...string) (string, *git.Repository, error) {
func CloneRepoUsingToken(ctx context.Context, token, gitUrl, user string, authInUrl bool, args ...string) (string, *git.Repository, error) {
userInfo := url.UserPassword(user, token)
return CloneRepo(ctx, userInfo, gitUrl, args...)
return CloneRepo(ctx, userInfo, gitUrl, authInUrl, args...)
}
// CloneRepoUsingUnauthenticated clones a repo with no authentication required.
func CloneRepoUsingUnauthenticated(ctx context.Context, url string, args ...string) (string, *git.Repository, error) {
return CloneRepo(ctx, nil, url, args...)
return CloneRepo(ctx, nil, url, false, args...)
}
// CloneRepoUsingSSH clones a repo using SSH.
func CloneRepoUsingSSH(ctx context.Context, gitURL string, args ...string) (string, *git.Repository, error) {
if isCodeCommitURL(gitURL) {
return CloneRepo(ctx, nil, gitURL, args...)
return CloneRepo(ctx, nil, gitURL, false, args...)
}
userInfo := url.User("git")
return CloneRepo(ctx, userInfo, gitURL, args...)
return CloneRepo(ctx, userInfo, gitURL, false, args...)
}
var codeCommitRE = regexp.MustCompile(`ssh://git-codecommit\.[\w-]+\.amazonaws\.com`)
@@ -1142,7 +1166,7 @@ func prepareRepoSinceCommit(ctx context.Context, uriString, commitHash string) (
if !ok {
return "", true, fmt.Errorf("password must be included in Git repo URL when username is provided")
}
path, _, err = CloneRepoUsingToken(ctx, password, remotePath, uri.User.Username(), "--shallow-since", timestamp)
path, _, err = CloneRepoUsingToken(ctx, password, remotePath, uri.User.Username(), true, "--shallow-since", timestamp)
if err != nil {
return path, true, fmt.Errorf("failed to clone authenticated Git repo (%s): %s", uri.Redacted(), err)
}
@@ -1180,7 +1204,7 @@ func PrepareRepo(ctx context.Context, uriString string) (string, bool, error) {
if !ok {
return "", remote, fmt.Errorf("password must be included in Git repo URL when username is provided")
}
path, _, err = CloneRepoUsingToken(ctx, password, remotePath, uri.User.Username())
path, _, err = CloneRepoUsingToken(ctx, password, remotePath, uri.User.Username(), true)
if err != nil {
return path, remote, fmt.Errorf("failed to clone authenticated Git repo (%s): %s", uri.Redacted(), err)
}
+1 -1
View File
@@ -88,7 +88,7 @@ func (c *appConnector) Clone(ctx context.Context, repoURL string, args ...string
return "", nil, fmt.Errorf("could not create installation token: %w", err)
}
return git.CloneRepoUsingToken(ctx, token.GetToken(), repoURL, "x-access-token", args...)
return git.CloneRepoUsingToken(ctx, token.GetToken(), repoURL, "x-access-token", true, args...)
}
func (c *appConnector) InstallationClient() *github.Client {
+1 -1
View File
@@ -44,5 +44,5 @@ func (c *basicAuthConnector) APIClient() *github.Client {
}
func (c *basicAuthConnector) Clone(ctx context.Context, repoURL string, args ...string) (string, *gogit.Repository, error) {
return git.CloneRepoUsingToken(ctx, c.password, repoURL, c.username, args...)
return git.CloneRepoUsingToken(ctx, c.password, repoURL, c.username, true, args...)
}
+5 -2
View File
@@ -20,11 +20,12 @@ type tokenConnector struct {
handleRateLimit func(context.Context, error) bool
user string
userMu sync.Mutex
authInUrl bool
}
var _ Connector = (*tokenConnector)(nil)
func NewTokenConnector(apiEndpoint string, token string, handleRateLimit func(context.Context, error) bool) (Connector, error) {
func NewTokenConnector(apiEndpoint string, token string, authInUrl bool, handleRateLimit func(context.Context, error) bool) (Connector, error) {
const httpTimeoutSeconds = 60
httpClient := common.RetryableHTTPClientTimeout(int64(httpTimeoutSeconds))
tokenSource := oauth2.StaticTokenSource(&oauth2.Token{AccessToken: token})
@@ -43,6 +44,7 @@ func NewTokenConnector(apiEndpoint string, token string, handleRateLimit func(co
token: token,
isGitHubEnterprise: !strings.EqualFold(apiEndpoint, cloudEndpoint),
handleRateLimit: handleRateLimit,
authInUrl: authInUrl,
}, nil
}
@@ -54,7 +56,8 @@ func (c *tokenConnector) Clone(ctx context.Context, repoURL string, args ...stri
if err := c.setUserIfUnset(ctx); err != nil {
return "", nil, err
}
return git.CloneRepoUsingToken(ctx, c.token, repoURL, c.user, args...)
return git.CloneRepoUsingToken(ctx, c.token, repoURL, c.user, c.authInUrl, args...)
}
func (c *tokenConnector) IsGithubEnterprise() bool {
+7 -1
View File
@@ -74,6 +74,8 @@ type Source struct {
sources.Progress
sources.CommonSourceUnitUnmarshaller
useAuthInUrl bool // pass credentials in the repository urls for cloning
}
// --------------------------------------------------------------------------------
@@ -226,6 +228,9 @@ func (s *Source) Init(aCtx context.Context, name string, jobID sources.JobID, so
}
s.conn = &conn
// configuration uses the inverse logic of the `useAuthInUrl` flag.
s.useAuthInUrl = !s.conn.RemoveAuthInUrl
connector, err := newConnector(s)
if err != nil {
return fmt.Errorf("could not create connector: %w", err)
@@ -290,6 +295,7 @@ func (s *Source) Init(aCtx context.Context, name string, jobID sources.JobID, so
}
},
UseCustomContentWriter: s.useCustomContentWriter,
AuthInUrl: s.useAuthInUrl,
}
s.git = git.NewGit(cfg)
@@ -1597,7 +1603,7 @@ func newConnector(source *Source) (Connector, error) {
return NewBasicAuthConnector(apiEndpoint, cred.BasicAuth)
case *sourcespb.GitHub_Token:
log.RedactGlobally(cred.Token)
return NewTokenConnector(apiEndpoint, cred.Token, func(c context.Context, err error) bool {
return NewTokenConnector(apiEndpoint, cred.Token, source.useAuthInUrl, func(c context.Context, err error) bool {
return source.handleRateLimit(c, err)
})
case *sourcespb.GitHub_Unauthenticated:
@@ -595,7 +595,7 @@ func (s *Source) EnumerateAndScanAllObjects(ctx context.Context, chunksChan chan
}
// download the repo
path, repo, err := git.CloneRepoUsingToken(ctx, ghToken, repoURL, ghUser)
path, repo, err := git.CloneRepoUsingToken(ctx, ghToken, repoURL, ghUser, true)
if err != nil {
return fmt.Errorf("failed to clone the repository: %w", err)
}
+10 -2
View File
@@ -60,6 +60,8 @@ type Source struct {
jobPool *errgroup.Group
sources.CommonSourceUnitUnmarshaller
useAuthInUrl bool
}
// WithCustomContentWriter sets the useCustomContentWriter flag on the source.
@@ -159,6 +161,9 @@ func (s *Source) Init(ctx context.Context, name string, jobId sources.JobID, sou
s.includeRepos = conn.GetIncludeRepos()
s.enumerateSharedProjects = !conn.ExcludeProjectsSharedIntoGroups
// configuration uses the inverse logic of the `useAuthInUrl` flag.
s.useAuthInUrl = !conn.RemoveAuthInUrl
ctx.Logger().V(3).Info("setting ignore repos patterns", "patterns", s.ignoreRepos)
ctx.Logger().V(3).Info("setting include repos patterns", "patterns", s.includeRepos)
@@ -213,6 +218,7 @@ func (s *Source) Init(ctx context.Context, name string, jobId sources.JobID, sou
}
},
UseCustomContentWriter: s.useCustomContentWriter,
AuthInUrl: s.useAuthInUrl,
}
s.git = git.NewGit(cfg)
@@ -653,7 +659,8 @@ func (s *Source) scanRepos(ctx context.Context, chunksChan chan *sources.Chunk)
if user == "" {
user = "placeholder"
}
path, repo, err = git.CloneRepoUsingToken(ctx, s.token, repoURL, user)
path, repo, err = git.CloneRepoUsingToken(ctx, s.token, repoURL, user, s.useAuthInUrl)
}
if err != nil {
scanErrs.Add(err)
@@ -836,7 +843,8 @@ func (s *Source) ChunkUnit(ctx context.Context, unit sources.SourceUnit, reporte
if user == "" {
user = "placeholder"
}
path, repo, err = git.CloneRepoUsingToken(ctx, s.token, repoURL, user)
path, repo, err = git.CloneRepoUsingToken(ctx, s.token, repoURL, user, s.useAuthInUrl)
}
if err != nil {
return err
+1 -1
View File
@@ -61,7 +61,7 @@ func (s *Source) cloneRepo(
return "", nil, err
}
case *sourcespb.Huggingface_Token:
path, repo, err = git.CloneRepoUsingToken(ctx, s.huggingfaceToken, repoURL, "")
path, repo, err = git.CloneRepoUsingToken(ctx, s.huggingfaceToken, repoURL, "", true)
if err != nil {
return "", nil, err
}
+4
View File
@@ -261,6 +261,8 @@ type GithubConfig struct {
IncludeWikis bool
// CommentsTimeframeDays indicates how many days of comments to include in the scan.
CommentsTimeframeDays uint32
// AuthInUrl determines wether to use authentication token in repository url or in header.
AuthInUrl bool
}
// GitHubExperimentalConfig defines the optional configuration for an experimental GitHub source.
@@ -293,6 +295,8 @@ type GitlabConfig struct {
IncludeRepos []string
// ExcludeRepos is a list of repositories to exclude from the scan.
ExcludeRepos []string
// AuthInUrl determines wether to use authentication token in repository url or in header.
AuthInUrl bool
}
// FilesystemConfig defines the optional configuration for a filesystem source.
+2
View File
@@ -230,6 +230,7 @@ message GitLab {
bool skip_archives = 8;
repeated string include_repos = 9;
bool exclude_projects_shared_into_groups = 10;
bool remove_auth_in_url = 11;
}
message GitHub {
@@ -255,6 +256,7 @@ message GitHub {
bool skip_archives = 18;
bool include_wikis = 19;
uint32 comments_timeframe_days = 20;
bool remove_auth_in_url = 21;
}
message GitHubExperimental {