wip: pygit2

This commit is contained in:
Dr.Lt.Data 2024-11-29 19:50:35 +09:00
parent 76b073c366
commit 34a48fbae4
5 changed files with 185 additions and 7 deletions

0
glob/__init__.py Normal file
View File

174
glob/git_wrapper.py Normal file
View File

@ -0,0 +1,174 @@
import pygit2
import os
from tqdm import tqdm
import traceback
class GitProgress(pygit2.RemoteCallbacks):
def __init__(self):
super().__init__()
self.pbar = None
def transfer_progress(self, stats):
if self.pbar is None:
self.pbar = tqdm(total=stats.total_objects, unit="obj", desc="Fetching objects")
self.pbar.n = stats.received_objects
self.pbar.refresh()
if stats.received_objects == stats.total_objects:
self.pbar.close()
self.pbar = None
class Remote:
def __init__(self, repo, remote):
self.repo = repo
self.remote = remote
def get_default_branch(self, remote_name='origin'):
remote = self.repo.remotes[remote_name]
remote.fetch() # Fetch latest data from the remote
# Look for the remote HEAD reference
head_ref = f'refs/remotes/{remote_name}/HEAD'
if head_ref in self.repo.references:
# Resolve the symbolic reference to get the actual branch
target_ref = self.repo.references[head_ref].resolve().name
return target_ref.replace(f'refs/remotes/{remote_name}/', '')
else:
raise ValueError(f"Could not determine the default branch for remote '{remote_name}'")
def pull(self, remote_name='origin'):
try:
# Detect if we are in detached HEAD state
if self.repo.head_is_detached:
# Find the default branch
branch_name = self.get_default_branch(remote_name)
# Checkout the branch if exists, or create it
branch_ref = f"refs/heads/{branch_name}"
if branch_ref in self.repo.references:
self.repo.checkout(branch_ref)
else:
# Create and checkout the branch
target_commit = self.repo.lookup_reference(f"refs/remotes/{remote_name}/{branch_name}").target
self.repo.create_branch(branch_name, self.repo[target_commit])
self.repo.checkout(branch_ref)
# Get the current branch
current_branch = self.repo.head.shorthand
# Fetch from the remote
remote = self.repo.remotes[remote_name]
remote.fetch()
# Merge changes from the remote
remote_branch_ref = f"refs/remotes/{remote_name}/{current_branch}"
remote_branch = self.repo.lookup_reference(remote_branch_ref).target
self.repo.merge(remote_branch)
# Check for merge conflicts
if self.repo.index.conflicts is not None:
print("Merge conflicts detected!")
for conflict in self.repo.index.conflicts:
print(f"Conflict: {conflict}")
return
# Commit the merge
user = self.repo.default_signature
merge_commit = self.repo.create_commit(
'HEAD',
user,
user,
f"Merge branch '{current_branch}' from {remote_name}",
self.repo.index.write_tree(),
[self.repo.head.target, remote_branch]
)
except Exception as e:
traceback.print_exc()
print(f"An error occurred: {e}")
self.repo.state_cleanup() # Clean up the merge state if necessary
class Repo:
def __init__(self, repo_path):
self.repo = pygit2.Repository(repo_path)
def remote(self, name="origin"):
return Remote(self.repo, self.repo.remotes[name])
def update_recursive(self):
update_submodules(self.repo)
def resolve_repository_state(repo):
if repo.is_empty:
raise ValueError("Repository is empty. Cannot proceed with submodule update.")
try:
state = repo.state() # Call the state method
except Exception as e:
print(f"Error retrieving repository state: {e}")
raise
if state != pygit2.GIT_REPOSITORY_STATE_NONE:
if state in (pygit2.GIT_REPOSITORY_STATE_MERGE, pygit2.GIT_REPOSITORY_STATE_REVERT):
print(f"Conflict detected. Cleaning up repository state... {repo.path} / {state}")
repo.state_cleanup()
print("Repository state cleaned up.")
else:
raise RuntimeError(f"Unsupported repository state: {state}")
def update_submodules(repo):
try:
resolve_repository_state(repo)
except Exception as e:
print(f"Error resolving repository state: {e}")
return
gitmodules_path = os.path.join(repo.workdir, ".gitmodules")
if not os.path.exists(gitmodules_path):
return
with open(gitmodules_path, "r") as f:
lines = f.readlines()
submodules = []
submodule_path = None
submodule_url = None
for line in lines:
if line.strip().startswith("[submodule"):
if submodule_path and submodule_url:
submodules.append((submodule_path, submodule_url))
submodule_path = None
submodule_url = None
elif line.strip().startswith("path ="):
submodule_path = line.strip().split("=", 1)[1].strip()
elif line.strip().startswith("url ="):
submodule_url = line.strip().split("=", 1)[1].strip()
if submodule_path and submodule_url:
submodules.append((submodule_path, submodule_url))
for path, url in submodules:
submodule_repo_path = os.path.join(repo.workdir, path)
print(f"submodule_repo_path: {submodule_repo_path}")
if not os.path.exists(submodule_repo_path):
print(f"Cloning submodule {path}...")
pygit2.clone_repository(url, submodule_repo_path, callbacks=GitProgress())
else:
print(f"Updating submodule {path}...")
submodule_repo = Repo(submodule_repo_path)
submodule_repo.remote("origin").pull()
update_submodules(submodule_repo)
def clone_from(git_url, repo_dir, recursive=True):
pygit2.clone_repository(git_url, repo_dir, callbacks=GitProgress())
Repo(repo_dir).update_recursive()

View File

@ -1,3 +1,4 @@
pygit2
GitPython
PyGithub
matrix-client==0.4.0
@ -5,4 +6,4 @@ transformers
huggingface-hub>0.20
typer
rich
typing-extensions
typing-extensions

View File

@ -1,6 +1,6 @@
#!/bin/bash
rm ~/.tmp/default/*.py > /dev/null 2>&1
python scanner.py ~/.tmp/default $*
python -m scanner ~/.tmp/default $*
cp extension-node-map.json node_db/new/.
echo "Integrity check"

View File

@ -2,7 +2,8 @@ import ast
import re
import os
import json
from git import Repo
import sys
from glob import git_wrapper
import concurrent
import datetime
import concurrent.futures
@ -243,25 +244,27 @@ def get_py_urls_from_json(json_file):
return py_files
import traceback
def clone_or_pull_git_repository(git_url):
repo_name = git_url.split("/")[-1].split(".")[0]
repo_dir = os.path.join(temp_dir, repo_name)
if os.path.exists(repo_dir):
try:
repo = Repo(repo_dir)
repo = git_wrapper.Repo(repo_dir)
origin = repo.remote(name="origin")
origin.pull()
repo.git.submodule('update', '--init', '--recursive')
repo.update_recursive()
print(f"Pulling {repo_name}...")
except Exception as e:
traceback.print_exc()
print(f"Pulling {repo_name} failed: {e}")
else:
try:
Repo.clone_from(git_url, repo_dir, recursive=True)
git_wrapper.clone_from(git_url, repo_dir, recursive=True)
print(f"Cloning {repo_name}...")
except Exception as e:
traceback.print_exc()
print(f"Cloning {repo_name} failed: {e}")