auto create repo refs when setting KAS_REPO_REF_DIR

This patch reworks the logic when setting KAS_REPO_REF_DIR.
When this variable is set, a two-staged clone is used:
First, a bare-clone (or similar) is created in the ref-dir, according to
the naming scheme. This clone is executed in a way that is both
reentrant as well as race-free across multiple instances of KAS working
on the same dir. Internally we clone into a tmpdir below the refdir and
rename on success to guarantee the atomicity of the operation on POSIX
compliant filesystems.

Second, the clone in the KAS_WORK_DIR is executed against the local
copy. After that, the origin url is redirected to the upstream url.
By that, the KAS_REPO_REF_DIR directory can be cached across builds
which significantly speedsup clone times against large repos.
In case the requested refspec is already in the cache (very likely in CI
builds), no direct access to the upstream repo is required. This logic
is crucial for CI systems in China, where e.g. access to github is
blocked from time to time.

The clone-from-local logic is currently only implemented for the git plugin
as HG misses the caching logic. Repo implementations that do not support this
logic can simply opt-out for the first stage by returning 'true'.

The existing user-facing logic of KAS_REPO_REF_DIR is not affected.
However, internally we no longer clone via --reference as this still
requires access to the remote repo, even if the requested commit is
already in the local copy.

Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
This commit is contained in:
Felix Moessbauer 2023-01-05 08:50:21 +01:00 committed by Jan Kiszka
parent e7896c33d9
commit f2560588bc
2 changed files with 41 additions and 10 deletions

View File

@ -28,6 +28,11 @@ Environment variables
| | "https://github.com/siemens/meta-iot2000.git" |
| | resolves to the name |
| | "github.com.siemens.meta-iot2000.git". |
| | Repositories that are not found will be cloned |
| | below this directory. Multiple instances of kas |
| | can simultaneously work on the same directory, |
| | as long as the underlying filesystem is POSIX |
| | compatible. |
+--------------------------+--------------------------------------------------+
| ``KAS_DISTRO`` | This overwrites the respective setting in the |
| ``KAS_MACHINE`` | configuration file. |

View File

@ -28,6 +28,7 @@ import os
import sys
import logging
from urllib.parse import urlparse
from tempfile import TemporaryDirectory
from .context import get_context
from .libkas import run_cmd_async, run_cmd
@ -178,17 +179,35 @@ class RepoImpl(Repo):
"""
Starts asynchronous repository fetch.
"""
if self.operations_disabled:
return 0
refdir = get_context().kas_repo_ref_dir
sdir = os.path.join(refdir, self.qualified_name) if refdir else None
# fetch to refdir
if refdir and not os.path.exists(sdir):
os.makedirs(refdir, exist_ok=True)
with TemporaryDirectory(prefix=self.qualified_name + '.',
dir=refdir) as tmpdir:
(retc, _) = await run_cmd_async(
self.clone_cmd(tmpdir, createref=True),
cwd=get_context().kas_work_dir)
if retc != 0:
return retc
logging.debug('Created repo ref for %s', self.qualified_name)
try:
os.rename(tmpdir, sdir)
except OSError:
logging.debug('repo %s already cloned by other instance',
self.qualified_name)
if not os.path.exists(self.path):
os.makedirs(os.path.dirname(self.path), exist_ok=True)
sdir = os.path.join(get_context().kas_repo_ref_dir or '',
self.qualified_name)
logging.debug('Looking for repo ref dir in %s', sdir)
(retc, _) = await run_cmd_async(
self.clone_cmd(sdir),
self.clone_cmd(sdir, createref=False),
cwd=get_context().kas_work_dir)
if retc == 0:
logging.info('Repository %s cloned', self.name)
@ -355,10 +374,14 @@ class GitRepo(RepoImpl):
def add_cmd(self):
return ['git', 'add', '-A']
def clone_cmd(self, gitsrcdir):
cmd = ['git', 'clone', '-q', self.effective_url, self.path]
if get_context().kas_repo_ref_dir and os.path.exists(gitsrcdir):
cmd.extend(['--reference', gitsrcdir])
def clone_cmd(self, srcdir, createref):
cmd = ['git', 'clone', '-q']
if createref:
cmd.extend([self.effective_url, '--bare', srcdir])
elif srcdir:
cmd.extend([srcdir, '--reference', srcdir, self.path])
else:
cmd.extend([self.effective_url, self.path])
return cmd
def commit_cmd(self):
@ -414,7 +437,10 @@ class MercurialRepo(RepoImpl):
def add_cmd(self):
return ['hg', 'add']
def clone_cmd(self, srcdir):
def clone_cmd(self, srcdir, createref):
# Mercurial does not support repo references (object caches)
if createref:
return ['true']
return ['hg', 'clone', self.effective_url, self.path]
def commit_cmd(self):