Ensure web.Application is instrumented, and ensure the latest aiohttp is installed in containers

This commit is contained in:
doctorpangloss 2025-11-14 17:46:01 -08:00
parent 4f6615e939
commit fa3eea9a18
5 changed files with 28 additions and 4 deletions

View File

@ -70,6 +70,12 @@ class InternalRoutes:
def get_app(self):
if self._app is None:
# Manually apply instrumentation to ensure web.Application is instrumented
from opentelemetry.instrumentation.aiohttp_server import AioHttpServerInstrumentor
instrumentor = AioHttpServerInstrumentor()
if not instrumentor.is_instrumented_by_opentelemetry:
instrumentor.instrument()
self._app = web.Application()
self.setup_routes()
self._app.add_routes(self.routes)

View File

@ -15,7 +15,6 @@ import shutil
import warnings
import fsspec
from opentelemetry.instrumentation.urllib3 import URLLib3Instrumentor
from .. import options
from ..app import logger
@ -130,6 +129,8 @@ def _create_tracer():
from opentelemetry.processor.baggage import BaggageSpanProcessor, ALLOW_ALL_BAGGAGE_KEYS
from opentelemetry.instrumentation.aiohttp_server import AioHttpServerInstrumentor
from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor
from opentelemetry.instrumentation.urllib3 import URLLib3Instrumentor
from ..tracing_compatibility import ProgressSpanSampler
from ..tracing_compatibility import patch_spanbuilder_set_channel
@ -163,6 +164,8 @@ def _create_tracer():
provider.add_span_processor(BaggageSpanProcessor(ALLOW_ALL_BAGGAGE_KEYS))
trace.set_tracer_provider(provider)
# makes this behave better as a library
return trace.get_tracer(args.otel_service_name, tracer_provider=provider)

View File

@ -252,6 +252,14 @@ class PromptServer(ExecutorToClientProgress):
middlewares.append(create_origin_only_middleware())
max_upload_size = round(args.max_upload_size * 1024 * 1024)
# Manually apply instrumentation to ensure web.Application is instrumented
# This works around the import caching bug in opentelemetry-instrumentation-aiohttp-server
from opentelemetry.instrumentation.aiohttp_server import AioHttpServerInstrumentor
instrumentor = AioHttpServerInstrumentor()
if not instrumentor.is_instrumented_by_opentelemetry:
instrumentor.instrument()
self.app: web.Application = web.Application(client_max_size=max_upload_size,
handler_args={'max_field_size': 16380},
middlewares=middlewares)

View File

@ -53,6 +53,12 @@ class DistributedPromptWorker:
return web.Response(text="UNHEALTHY: RabbitMQ connection is not healthy", status=503)
async def _start_health_check_server(self):
# Manually apply instrumentation to ensure web.Application is instrumented
from opentelemetry.instrumentation.aiohttp_server import AioHttpServerInstrumentor
instrumentor = AioHttpServerInstrumentor()
if not instrumentor.is_instrumented_by_opentelemetry:
instrumentor.instrument()
app = web.Application()
app.router.add_get('/health', self._health_check)

View File

@ -33,8 +33,8 @@ dependencies = [
"peft>=0.10.0",
"torchinfo",
"safetensors>=0.4.2",
"aiohttp>=3.11.8",
"yarl>=1.9.4",
"aiohttp>=3.13.2",
"yarl>=1.22.0",
"accelerate>=0.25.0",
"pyyaml>=6.0",
"scikit-image>=0.20.0",
@ -119,7 +119,8 @@ dependencies = [
"requests_cache",
"universal_pathlib",
# yanked propcache is omitted
"propcache!=0.4.0",
# we want this upgraded
"propcache>=0.4.1",
]
[build-system]