Replaced 'git clone' with 'git clone --filter=blob:none' to speed

up cloning of customer-repository.
2023-10-18 11:36:13 +02:00 · 2023-10-18 11:36:13 +02:00 · 22f25e5251
commit 22f25e5251
parent 258d883a51
1 changed files with 50 additions and 3 deletions
--- a/git/git_client.cpp
+++ b/git/git_client.cpp
@ -26,12 +26,54 @@ GitClient::GitClient(QString const &customerNrStr,
 }

 bool GitClient::gitCloneCustomerRepository() {
-    QString gitCommand("git clone ");
+    /*  Blobless clone
+        ==============
+
+        When using the --filter=blob:none option, the initial git clone will
+        download all reachable commits and trees, and only download the blobs
+        for commits when you do a git checkout. This includes the first checkout
+        inside the git clone operation.
+
+        The important thing to notice is that we have a copy of every blob at
+        HEAD but the blobs in the history are not present. If your repository
+        has a deep history full of large blobs, then this option can
+        significantly reduce your git clone times. The commit and tree data is
+        still present, so any subsequent git checkout only needs to download
+        the missing blobs. The Git client knows how to batch these requests to
+        ask the server only for the missing blobs.
+
+        Further, when running git fetch in a blobless clone, the server only
+        sends the new commits and trees. The new blobs are downloaded only
+        after a git checkout. Note that git pull runs git fetch and then git
+        merge, so it will download the necessary blobs during the git merge
+        command.
+
+        When using a blobless clone, you will trigger a blob download whenever
+        you need the contents of a file, but you will not need one if you only
+        need the OID (object-id) of a file. This means that git log can detect
+        which commits changed a given path without needing to download extra
+        data.
+
+        This means that blobless clones can perform commands like git
+        merge-base, git log, or even git log -- <path> with the same performance
+        as a full clone.
+
+        Commands like git diff or git blame <path> require the contents of the
+        paths to compute diffs, so these will trigger blob downloads the first
+        time they are run. However, the good news is that after that you will
+        have those blobs in your repository and do not need to download them a
+        second time. Most developers only need to run git blame on a small
+        number of files, so this tradeoff of a slightly slower git blame command
+        is worth the faster clone and fetch times.
+
+        Note: git v2.18 does not support treeless clones: --filter=tree:0.
+     */
+    QString gitCommand("git clone --filter=blob:none ");
    gitCommand += m_repositoryPath;
    Command c(gitCommand);

    qInfo() << "IN CURRENT WD" << m_workingDirectory
-            << "CLONE" << m_repositoryPath << "...";
+            << "CLONE VIA COMMAND" << gitCommand;

    if (c.execute(m_workingDirectory)) { // execute the command in wd
        QString const result = c.getCommandResult();
@ -47,8 +89,13 @@ bool GitClient::gitCloneCustomerRepository() {
                    }
                }
            }
+            Utils::printCriticalErrorMsg(
+                QString("ERROR CLONE RESULT HAS WRONG FORMAT. rcc=%1 CLONE_RESULT=%2")
+                    .arg(re.captureCount())
+                    .arg(result));
+            return false;
        }
-        Utils::printCriticalErrorMsg(QString("ERROR CLONE RESULT HAS WRONG FORMAT. CLONE_RESULT=") + result);
+        Utils::printCriticalErrorMsg("ERROR CLONE RESULT IS EMPTY");
    }
    return false;
 }