[llvm] Avoid exposing unknown git repositories (PR #105220)

Tulio Magno Quites Machado Filho via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 3 14:40:50 PDT 2024


https://github.com/tuliom updated https://github.com/llvm/llvm-project/pull/105220

>From 0b7370329f0dc9da9f89287644c493f3fa4c9c60 Mon Sep 17 00:00:00 2001
From: Tulio Magno Quites Machado Filho <tuliom at redhat.com>
Date: Mon, 5 Aug 2024 14:29:11 -0300
Subject: [PATCH 1/3] Avoid exposing unknown git repositories

Restrict the URL that is exposed to the official LLVM repository at
Github in order to avoid exposing usernames, passwords or even private
URLS unintentionally.

Users willing to expose different Git repositories can continue to do so
by setting LLVM_FORCE_VC_REPOSITORY or CLANG_REPOSITORY_STRING.
---
 llvm/cmake/modules/VersionFromVCS.cmake | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/llvm/cmake/modules/VersionFromVCS.cmake b/llvm/cmake/modules/VersionFromVCS.cmake
index 18edbeabe3e4b5..a0068c5f725517 100644
--- a/llvm/cmake/modules/VersionFromVCS.cmake
+++ b/llvm/cmake/modules/VersionFromVCS.cmake
@@ -39,8 +39,14 @@ function(get_source_info path revision repository)
         OUTPUT_VARIABLE git_output
         ERROR_QUIET)
       if(git_result EQUAL 0)
-        string(STRIP "${git_output}" git_output)
-        set(${repository} ${git_output} PARENT_SCOPE)
+        # Avoid exposing sensitive data, e.g. usernames, passwords and
+        # private URLs.
+        string(FIND "${git_output}" "github.com/llvm/llvm-project" git_upstream)
+        if(git_upstream GREATER_EQUAL 0)
+          set(${repository} "https://github.com/llvm/llvm-project" PARENT_SCOPE)
+        else()
+          set(${repository} "forked repository" PARENT_SCOPE)
+        endif()
       else()
         set(${repository} ${path} PARENT_SCOPE)
       endif()

>From 0d18a3c6515fcee3c785cd5463e9459ed455bdb6 Mon Sep 17 00:00:00 2001
From: Tulio Magno Quites Machado Filho <tuliom at redhat.com>
Date: Thu, 29 Aug 2024 09:28:28 -0300
Subject: [PATCH 2/3] fixup! Avoid exposing unknown git repositories

---
 llvm/cmake/modules/VersionFromVCS.cmake | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/cmake/modules/VersionFromVCS.cmake b/llvm/cmake/modules/VersionFromVCS.cmake
index a0068c5f725517..5320960c41b5e2 100644
--- a/llvm/cmake/modules/VersionFromVCS.cmake
+++ b/llvm/cmake/modules/VersionFromVCS.cmake
@@ -41,11 +41,9 @@ function(get_source_info path revision repository)
       if(git_result EQUAL 0)
         # Avoid exposing sensitive data, e.g. usernames, passwords and
         # private URLs.
-        string(FIND "${git_output}" "github.com/llvm/llvm-project" git_upstream)
-        if(git_upstream GREATER_EQUAL 0)
+        string(REGEX MATCH "github.com[/:]llvm/llvm-project" git_upstream "${git_output}")
+        if(git_upstream)
           set(${repository} "https://github.com/llvm/llvm-project" PARENT_SCOPE)
-        else()
-          set(${repository} "forked repository" PARENT_SCOPE)
         endif()
       else()
         set(${repository} ${path} PARENT_SCOPE)

>From be6a0441c9e07739b7793d05a462586b42b4245d Mon Sep 17 00:00:00 2001
From: Tulio Magno Quites Machado Filho <tuliom at redhat.com>
Date: Tue, 3 Sep 2024 18:40:18 -0300
Subject: [PATCH 3/3] fixup! Avoid exposing unknown git repositories

---
 llvm/cmake/modules/VersionFromVCS.cmake | 29 ++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/llvm/cmake/modules/VersionFromVCS.cmake b/llvm/cmake/modules/VersionFromVCS.cmake
index 5320960c41b5e2..1dab5427be8585 100644
--- a/llvm/cmake/modules/VersionFromVCS.cmake
+++ b/llvm/cmake/modules/VersionFromVCS.cmake
@@ -39,12 +39,31 @@ function(get_source_info path revision repository)
         OUTPUT_VARIABLE git_output
         ERROR_QUIET)
       if(git_result EQUAL 0)
-        # Avoid exposing sensitive data, e.g. usernames, passwords and
-        # private URLs.
-        string(REGEX MATCH "github.com[/:]llvm/llvm-project" git_upstream "${git_output}")
-        if(git_upstream)
-          set(${repository} "https://github.com/llvm/llvm-project" PARENT_SCOPE)
+        # Passwords or tokens should not be stored in the remote URL at the
+        # risk of being leaked. In case we find one, error out and teach the
+        # user the best practices.
+        string(REGEX MATCH "https?://[^/]*:[^/]*@.*"
+          http_password "${git_output}")
+        if(http_password)
+          message(SEND_ERROR "The remote URL has an embedded password. \
+Remove the password from the URL or use \
+`-DLLVM_FORCE_VC_REPOSITORY=<URL without password>` in order to avoid \
+leaking your password.")
         endif()
+        # Github token formats are described at:
+        # https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/about-authentication-to-github#githubs-token-formats
+        string(REGEX MATCH
+          "https?://(gh[pousr]|github_pat)_[^/]+ at github.com.*"
+          github_token "${git_output}")
+        if(github_token)
+          message(SEND_ERROR "The remote URL has an embedded Github Token. \
+Remove the token from the URL or use \
+`-DLLVM_FORCE_VC_REPOSITORY=<URL without token>` in order to avoid leaking \
+your token.")
+        endif()
+
+        string(STRIP "${git_output}" git_output)
+        set(${repository} ${git_output} PARENT_SCOPE)
       else()
         set(${repository} ${path} PARENT_SCOPE)
       endif()



More information about the llvm-commits mailing list