Add GitHub stats fetching feature

- Added PyGithub package to requirements.txt for GitHub API interaction
- Updated .gitignore to ignore github-stats-cache.json
- Produced github-stats.json for storing GitHub stats
- Modified scanner.py to include the GitHub stats fetching process
This commit is contained in:
Sida Liu 2024-04-02 14:28:51 +08:00
parent 9f2323d1fb
commit 19135aaa7d
4 changed files with 2714 additions and 1 deletions

1
.gitignore vendored
View File

@ -11,3 +11,4 @@ startup-scripts/**
matrix_auth
channels.list
comfyworkflows_sharekey
github-stats-cache.json

2666
github-stats.json Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,5 @@
GitPython
PyGithub
matrix-client==0.4.0
transformers
huggingface-hub>0.20

View File

@ -10,6 +10,10 @@ builtin_nodes = set()
import sys
from urllib.parse import urlparse
from github import Github
g = Github(os.environ.get('GITHUB_TOKEN'))
# prepare temp dir
if len(sys.argv) > 1:
@ -224,7 +228,48 @@ def update_custom_nodes():
if not skip_update:
clone_or_pull_git_repository(url)
with concurrent.futures.ThreadPoolExecutor(10) as executor:
def process_git_stats(git_url_titles_preemptions):
GITHUB_STATS_CACHE_FILENAME = 'github-stats-cache.json'
GITHUB_STATS_FILENAME = 'github-stats.json'
github_stats = {}
try:
with open(GITHUB_STATS_CACHE_FILENAME, 'r', encoding='utf-8') as file:
github_stats = json.load(file)
except FileNotFoundError:
pass
for url, title, preemptions, node_pattern in git_url_titles_preemptions:
if url not in github_stats:
# Parsing the URL
parsed_url = urlparse(url)
domain = parsed_url.netloc
path = parsed_url.path
path_parts = path.strip("/").split("/")
if len(path_parts) >= 2 and domain == "github.com":
owner_repo = "/".join(path_parts[-2:])
repo = g.get_repo(owner_repo)
last_update = repo.pushed_at.strftime("%Y-%m-%d %H:%M:%S") if repo.pushed_at else 'N/A'
github_stats[url] = {
"stars": repo.stargazers_count,
"last_update": last_update,
}
with open(GITHUB_STATS_CACHE_FILENAME, 'w', encoding='utf-8') as file:
json.dump(github_stats, file, ensure_ascii=False, indent=4)
# print(f"Title: {title}, Stars: {repo.stargazers_count}, Last Update: {last_update}")
else:
print(f"Invalid URL format for GitHub repository: {url}")
with open(GITHUB_STATS_FILENAME, 'w', encoding='utf-8') as file:
json.dump(github_stats, file, ensure_ascii=False, indent=4)
print(f"Successfully written to {GITHUB_STATS_FILENAME}, removing {GITHUB_STATS_CACHE_FILENAME}.")
try:
os.remove(GITHUB_STATS_CACHE_FILENAME) # This cache file is just for avoiding failure of GitHub API fetch, so it is safe to remove.
except:
pass
with concurrent.futures.ThreadPoolExecutor(11) as executor:
executor.submit(process_git_stats, git_url_titles_preemptions) # One single thread for `process_git_stats()`. Runs concurrently with `process_git_url_title()`.
for url, title, preemptions, node_pattern in git_url_titles_preemptions:
executor.submit(process_git_url_title, url, title, preemptions, node_pattern)