From 4120138e06e0ccd48a5b49f3a93926630c05a5a7 Mon Sep 17 00:00:00 2001 From: "Dr.Lt.Data" Date: Tue, 2 Apr 2024 20:52:59 +0900 Subject: [PATCH] fix: scanner - prevent stuck when exceed rate limit --- scanner.py | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/scanner.py b/scanner.py index 16d0a60b..d06ad71f 100644 --- a/scanner.py +++ b/scanner.py @@ -5,6 +5,7 @@ import json from git import Repo from torchvision.datasets.utils import download_url import concurrent +import datetime builtin_nodes = set() @@ -217,9 +218,6 @@ def update_custom_nodes(): git_url_titles_preemptions = get_git_urls_from_json('custom-node-list.json') def process_git_url_title(url, title, preemptions, node_pattern): - if 'Jovimetrix' in title: - pass - name = os.path.basename(url) if name.endswith(".git"): name = name[:-4] @@ -238,30 +236,33 @@ def update_custom_nodes(): github_stats = json.load(file) except FileNotFoundError: pass - for url, title, preemptions, node_pattern in git_url_titles_preemptions: - if url not in github_stats: - # Parsing the URL - parsed_url = urlparse(url) - domain = parsed_url.netloc - path = parsed_url.path - path_parts = path.strip("/").split("/") - if len(path_parts) >= 2 and domain == "github.com": - owner_repo = "/".join(path_parts[-2:]) - repo = g.get_repo(owner_repo) - last_update = repo.pushed_at.strftime("%Y-%m-%d %H:%M:%S") if repo.pushed_at else 'N/A' - github_stats[url] = { - "stars": repo.stargazers_count, - "last_update": last_update, - } - with open(GITHUB_STATS_CACHE_FILENAME, 'w', encoding='utf-8') as file: - json.dump(github_stats, file, ensure_ascii=False, indent=4) - # print(f"Title: {title}, Stars: {repo.stargazers_count}, Last Update: {last_update}") - else: - print(f"Invalid URL format for GitHub repository: {url}") + if g.rate_limiting_resettime-datetime.datetime.now().timestamp() <= 0: + for url, title, preemptions, node_pattern in git_url_titles_preemptions: + if url not in github_stats: + # Parsing the URL + parsed_url = urlparse(url) + domain = parsed_url.netloc + path = parsed_url.path + path_parts = path.strip("/").split("/") + if len(path_parts) >= 2 and domain == "github.com": + owner_repo = "/".join(path_parts[-2:]) + repo = g.get_repo(owner_repo) + + last_update = repo.pushed_at.strftime("%Y-%m-%d %H:%M:%S") if repo.pushed_at else 'N/A' + github_stats[url] = { + "stars": repo.stargazers_count, + "last_update": last_update, + } + with open(GITHUB_STATS_CACHE_FILENAME, 'w', encoding='utf-8') as file: + json.dump(github_stats, file, ensure_ascii=False, indent=4) + # print(f"Title: {title}, Stars: {repo.stargazers_count}, Last Update: {last_update}") + else: + print(f"Invalid URL format for GitHub repository: {url}") with open(GITHUB_STATS_FILENAME, 'w', encoding='utf-8') as file: json.dump(github_stats, file, ensure_ascii=False, indent=4) + print(f"Successfully written to {GITHUB_STATS_FILENAME}, removing {GITHUB_STATS_CACHE_FILENAME}.") try: os.remove(GITHUB_STATS_CACHE_FILENAME) # This cache file is just for avoiding failure of GitHub API fetch, so it is safe to remove.