fix(api-nodes): random issues on Windows by capturing general OSError for retries (#10486)

This commit is contained in:
Alexander Piskun 2025-10-26 08:51:06 +02:00 committed by GitHub
parent f6bbc1ac84
commit 9d529e5308
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 10 additions and 15 deletions

View File

@ -2,7 +2,6 @@ import asyncio
import contextlib import contextlib
import json import json
import logging import logging
import socket
import time import time
import uuid import uuid
from dataclasses import dataclass from dataclasses import dataclass
@ -456,24 +455,20 @@ async def _diagnose_connectivity() -> dict[str, bool]:
results = { results = {
"internet_accessible": False, "internet_accessible": False,
"api_accessible": False, "api_accessible": False,
"is_local_issue": False,
"is_api_issue": False,
} }
timeout = aiohttp.ClientTimeout(total=5.0) timeout = aiohttp.ClientTimeout(total=5.0)
async with aiohttp.ClientSession(timeout=timeout) as session: async with aiohttp.ClientSession(timeout=timeout) as session:
try: with contextlib.suppress(ClientError, OSError):
async with session.get("https://www.google.com") as resp: async with session.get("https://www.google.com") as resp:
results["internet_accessible"] = resp.status < 500 results["internet_accessible"] = resp.status < 500
except (ClientError, asyncio.TimeoutError, socket.gaierror): if not results["internet_accessible"]:
results["is_local_issue"] = True
return results return results
parsed = urlparse(default_base_url()) parsed = urlparse(default_base_url())
health_url = f"{parsed.scheme}://{parsed.netloc}/health" health_url = f"{parsed.scheme}://{parsed.netloc}/health"
with contextlib.suppress(ClientError, asyncio.TimeoutError): with contextlib.suppress(ClientError, OSError):
async with session.get(health_url) as resp: async with session.get(health_url) as resp:
results["api_accessible"] = resp.status < 500 results["api_accessible"] = resp.status < 500
results["is_api_issue"] = results["internet_accessible"] and not results["api_accessible"]
return results return results
@ -790,7 +785,7 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
except ProcessingInterrupted: except ProcessingInterrupted:
logging.debug("Polling was interrupted by user") logging.debug("Polling was interrupted by user")
raise raise
except (ClientError, asyncio.TimeoutError, socket.gaierror) as e: except (ClientError, OSError) as e:
if attempt <= cfg.max_retries: if attempt <= cfg.max_retries:
logging.warning( logging.warning(
"Connection error calling %s %s. Retrying in %.2fs (%d/%d): %s", "Connection error calling %s %s. Retrying in %.2fs (%d/%d): %s",
@ -824,7 +819,7 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
delay *= cfg.retry_backoff delay *= cfg.retry_backoff
continue continue
diag = await _diagnose_connectivity() diag = await _diagnose_connectivity()
if diag.get("is_local_issue"): if not diag["internet_accessible"]:
try: try:
request_logger.log_request_response( request_logger.log_request_response(
operation_id=operation_id, operation_id=operation_id,

View File

@ -32,7 +32,7 @@ async def download_url_to_bytesio(
dest: Optional[Union[BytesIO, IO[bytes], str, Path]], dest: Optional[Union[BytesIO, IO[bytes], str, Path]],
*, *,
timeout: Optional[float] = None, timeout: Optional[float] = None,
max_retries: int = 3, max_retries: int = 5,
retry_delay: float = 1.0, retry_delay: float = 1.0,
retry_backoff: float = 2.0, retry_backoff: float = 2.0,
cls: type[COMFY_IO.ComfyNode] = None, cls: type[COMFY_IO.ComfyNode] = None,
@ -177,7 +177,7 @@ async def download_url_to_bytesio(
return return
except asyncio.CancelledError: except asyncio.CancelledError:
raise ProcessingInterrupted("Task cancelled") from None raise ProcessingInterrupted("Task cancelled") from None
except (ClientError, asyncio.TimeoutError) as e: except (ClientError, OSError) as e:
if attempt <= max_retries: if attempt <= max_retries:
with contextlib.suppress(Exception): with contextlib.suppress(Exception):
request_logger.log_request_response( request_logger.log_request_response(
@ -191,7 +191,7 @@ async def download_url_to_bytesio(
continue continue
diag = await _diagnose_connectivity() diag = await _diagnose_connectivity()
if diag.get("is_local_issue"): if not diag["internet_accessible"]:
raise LocalNetworkError( raise LocalNetworkError(
"Unable to connect to the network. Please check your internet connection and try again." "Unable to connect to the network. Please check your internet connection and try again."
) from e ) from e

View File

@ -290,7 +290,7 @@ async def upload_file(
return return
except asyncio.CancelledError: except asyncio.CancelledError:
raise ProcessingInterrupted("Task cancelled") from None raise ProcessingInterrupted("Task cancelled") from None
except (aiohttp.ClientError, asyncio.TimeoutError) as e: except (aiohttp.ClientError, OSError) as e:
if attempt <= max_retries: if attempt <= max_retries:
with contextlib.suppress(Exception): with contextlib.suppress(Exception):
request_logger.log_request_response( request_logger.log_request_response(
@ -313,7 +313,7 @@ async def upload_file(
continue continue
diag = await _diagnose_connectivity() diag = await _diagnose_connectivity()
if diag.get("is_local_issue"): if not diag["internet_accessible"]:
raise LocalNetworkError( raise LocalNetworkError(
"Unable to connect to the network. Please check your internet connection and try again." "Unable to connect to the network. Please check your internet connection and try again."
) from e ) from e