diff --git a/.github/workflows/test-integrations-data-processing.yml b/.github/workflows/test-integrations-data-processing.yml index ddac93d1e5..c40d45845d 100644 --- a/.github/workflows/test-integrations-data-processing.yml +++ b/.github/workflows/test-integrations-data-processing.yml @@ -25,7 +25,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.5","3.7","3.8","3.11","3.12"] + python-version: ["3.5","3.7","3.8","3.9","3.11","3.12"] # python3.6 reached EOL and is no longer being supported on # new versions of hosted runners on Github Actions # ubuntu-20.04 is the last version that supported python3.6 @@ -58,6 +58,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh "py${{ matrix.python-version }}-huey-latest" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch + - name: Test openai latest + run: | + set -x # print commands that are executed + ./scripts/runtox.sh "py${{ matrix.python-version }}-openai-latest" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch - name: Test rq latest run: | set -x # print commands that are executed @@ -110,6 +114,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh --exclude-latest "py${{ matrix.python-version }}-huey" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch + - name: Test openai pinned + run: | + set -x # print commands that are executed + ./scripts/runtox.sh --exclude-latest "py${{ matrix.python-version }}-openai" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch - name: Test rq pinned run: | set -x # print commands that are executed @@ -151,6 +159,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh --exclude-latest "py2.7-huey" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch + - name: Test openai py27 + run: | + set -x # print commands that are executed + ./scripts/runtox.sh --exclude-latest "py2.7-openai" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch - name: Test rq py27 run: | set -x # print commands that are executed diff --git a/mypy.ini b/mypy.ini index fef90c867e..c1444d61e5 100644 --- a/mypy.ini +++ b/mypy.ini @@ -67,6 +67,8 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-huey.*] ignore_missing_imports = True +[mypy-openai.*] +ignore_missing_imports = True [mypy-arq.*] ignore_missing_imports = True [mypy-grpc.*] diff --git a/scripts/split-tox-gh-actions/split-tox-gh-actions.py b/scripts/split-tox-gh-actions/split-tox-gh-actions.py index f8beffc219..13b81283ca 100755 --- a/scripts/split-tox-gh-actions/split-tox-gh-actions.py +++ b/scripts/split-tox-gh-actions/split-tox-gh-actions.py @@ -70,6 +70,7 @@ "beam", "celery", "huey", + "openai", "rq", ], "Databases": [ diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 2b58aecc24..e4edfddef1 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -219,6 +219,8 @@ class OP: MIDDLEWARE_STARLITE = "middleware.starlite" MIDDLEWARE_STARLITE_RECEIVE = "middleware.starlite.receive" MIDDLEWARE_STARLITE_SEND = "middleware.starlite.send" + OPENAI_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.openai" + OPENAI_EMBEDDINGS_CREATE = "ai.embeddings.create.openai" QUEUE_SUBMIT_ARQ = "queue.submit.arq" QUEUE_TASK_ARQ = "queue.task.arq" QUEUE_SUBMIT_CELERY = "queue.submit.celery" diff --git a/sentry_sdk/integrations/__init__.py b/sentry_sdk/integrations/__init__.py index 21f7188ff1..c9737ae589 100644 --- a/sentry_sdk/integrations/__init__.py +++ b/sentry_sdk/integrations/__init__.py @@ -78,6 +78,7 @@ def iter_default_integrations(with_auto_enabling_integrations): "sentry_sdk.integrations.fastapi.FastApiIntegration", "sentry_sdk.integrations.flask.FlaskIntegration", "sentry_sdk.integrations.httpx.HttpxIntegration", + "sentry_sdk.integrations.openai.OpenAIIntegration", "sentry_sdk.integrations.pyramid.PyramidIntegration", "sentry_sdk.integrations.redis.RedisIntegration", "sentry_sdk.integrations.rq.RqIntegration", diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py new file mode 100644 index 0000000000..5c05a43916 --- /dev/null +++ b/sentry_sdk/integrations/openai.py @@ -0,0 +1,279 @@ +from sentry_sdk import consts +from sentry_sdk._types import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any, Iterable, List, Optional, Callable, Iterator + from sentry_sdk.tracing import Span + +import sentry_sdk +from sentry_sdk._functools import wraps +from sentry_sdk.hub import Hub, _should_send_default_pii +from sentry_sdk.integrations import DidNotEnable, Integration +from sentry_sdk.utils import logger, capture_internal_exceptions, event_from_exception + +try: + from openai.resources.chat.completions import Completions + from openai.resources import Embeddings + + if TYPE_CHECKING: + from openai.types.chat import ChatCompletionMessageParam, ChatCompletionChunk +except ImportError: + raise DidNotEnable("OpenAI not installed") + +try: + import tiktoken # type: ignore + + enc = tiktoken.get_encoding("cl100k_base") + + def count_tokens(s): + # type: (str) -> int + return len(enc.encode_ordinary(s)) + + logger.debug("[OpenAI] using tiktoken to count tokens") +except ImportError: + logger.info( + "The Sentry Python SDK requires 'tiktoken' in order to measure token usage from some OpenAI APIs" + "Please install 'tiktoken' if you aren't receiving token usage in Sentry." + "See https://docs.sentry.io/platforms/python/integrations/openai/ for more information." + ) + + def count_tokens(s): + # type: (str) -> int + return 0 + + +COMPLETION_TOKENS_USED = "ai.completion_tоkens.used" +PROMPT_TOKENS_USED = "ai.prompt_tоkens.used" +TOTAL_TOKENS_USED = "ai.total_tоkens.used" + + +class OpenAIIntegration(Integration): + identifier = "openai" + + def __init__(self, include_prompts=True): + # type: (OpenAIIntegration, bool) -> None + self.include_prompts = include_prompts + + @staticmethod + def setup_once(): + # type: () -> None + Completions.create = _wrap_chat_completion_create(Completions.create) + Embeddings.create = _wrap_embeddings_create(Embeddings.create) + + +def _capture_exception(hub, exc): + # type: (Hub, Any) -> None + + if hub.client is not None: + event, hint = event_from_exception( + exc, + client_options=hub.client.options, + mechanism={"type": "openai", "handled": False}, + ) + hub.capture_event(event, hint=hint) + + +def _calculate_chat_completion_usage( + messages, response, span, streaming_message_responses=None +): + # type: (Iterable[ChatCompletionMessageParam], Any, Span, Optional[List[str]]) -> None + completion_tokens = 0 + prompt_tokens = 0 + total_tokens = 0 + if hasattr(response, "usage"): + if hasattr(response.usage, "completion_tokens") and isinstance( + response.usage.completion_tokens, int + ): + completion_tokens = response.usage.completion_tokens + if hasattr(response.usage, "prompt_tokens") and isinstance( + response.usage.prompt_tokens, int + ): + prompt_tokens = response.usage.prompt_tokens + if hasattr(response.usage, "total_tokens") and isinstance( + response.usage.total_tokens, int + ): + total_tokens = response.usage.total_tokens + + if prompt_tokens == 0: + for message in messages: + if "content" in message: + prompt_tokens += count_tokens(message["content"]) + + if completion_tokens == 0: + if streaming_message_responses is not None: + for message in streaming_message_responses: + completion_tokens += count_tokens(message) + elif hasattr(response, "choices"): + for choice in response.choices: + if hasattr(choice, "message"): + completion_tokens += count_tokens(choice.message) + + if total_tokens == 0: + total_tokens = prompt_tokens + completion_tokens + + if completion_tokens != 0: + span.set_data(COMPLETION_TOKENS_USED, completion_tokens) + if prompt_tokens != 0: + span.set_data(PROMPT_TOKENS_USED, prompt_tokens) + if total_tokens != 0: + span.set_data(TOTAL_TOKENS_USED, total_tokens) + + +def _wrap_chat_completion_create(f): + # type: (Callable[..., Any]) -> Callable[..., Any] + @wraps(f) + def new_chat_completion(*args, **kwargs): + # type: (*Any, **Any) -> Any + hub = Hub.current + if not hub: + return f(*args, **kwargs) + + integration = hub.get_integration(OpenAIIntegration) # type: OpenAIIntegration + if not integration: + return f(*args, **kwargs) + + if "messages" not in kwargs: + # invalid call (in all versions of openai), let it return error + return f(*args, **kwargs) + + try: + iter(kwargs["messages"]) + except TypeError: + # invalid call (in all versions), messages must be iterable + return f(*args, **kwargs) + + kwargs["messages"] = list(kwargs["messages"]) + messages = kwargs["messages"] + model = kwargs.get("model") + streaming = kwargs.get("stream") + + span = sentry_sdk.start_span( + op=consts.OP.OPENAI_CHAT_COMPLETIONS_CREATE, description="Chat Completion" + ) + span.__enter__() + try: + res = f(*args, **kwargs) + except Exception as e: + _capture_exception(Hub.current, e) + span.__exit__(None, None, None) + raise e from None + + with capture_internal_exceptions(): + if _should_send_default_pii() and integration.include_prompts: + span.set_data("ai.input_messages", messages) + span.set_data("ai.model_id", model) + span.set_data("ai.streaming", streaming) + + if hasattr(res, "choices"): + if _should_send_default_pii() and integration.include_prompts: + span.set_data( + "ai.responses", list(map(lambda x: x.message, res.choices)) + ) + _calculate_chat_completion_usage(messages, res, span) + span.__exit__(None, None, None) + elif hasattr(res, "_iterator"): + data_buf: list[list[str]] = [] # one for each choice + + old_iterator = res._iterator # type: Iterator[ChatCompletionChunk] + + def new_iterator(): + # type: () -> Iterator[ChatCompletionChunk] + with capture_internal_exceptions(): + for x in old_iterator: + if hasattr(x, "choices"): + choice_index = 0 + for choice in x.choices: + if hasattr(choice, "delta") and hasattr( + choice.delta, "content" + ): + content = choice.delta.content + if len(data_buf) <= choice_index: + data_buf.append([]) + data_buf[choice_index].append(content or "") + choice_index += 1 + yield x + if len(data_buf) > 0: + all_responses = list( + map(lambda chunk: "".join(chunk), data_buf) + ) + if ( + _should_send_default_pii() + and integration.include_prompts + ): + span.set_data("ai.responses", all_responses) + _calculate_chat_completion_usage( + messages, res, span, all_responses + ) + span.__exit__(None, None, None) + + res._iterator = new_iterator() + else: + span.set_data("unknown_response", True) + span.__exit__(None, None, None) + return res + + return new_chat_completion + + +def _wrap_embeddings_create(f): + # type: (Callable[..., Any]) -> Callable[..., Any] + + @wraps(f) + def new_embeddings_create(*args, **kwargs): + # type: (*Any, **Any) -> Any + + hub = Hub.current + if not hub: + return f(*args, **kwargs) + + integration = hub.get_integration(OpenAIIntegration) # type: OpenAIIntegration + if not integration: + return f(*args, **kwargs) + + with sentry_sdk.start_span( + op=consts.OP.OPENAI_EMBEDDINGS_CREATE, + description="OpenAI Embedding Creation", + ) as span: + if "input" in kwargs and ( + _should_send_default_pii() and integration.include_prompts + ): + if isinstance(kwargs["input"], str): + span.set_data("ai.input_messages", [kwargs["input"]]) + elif ( + isinstance(kwargs["input"], list) + and len(kwargs["input"]) > 0 + and isinstance(kwargs["input"][0], str) + ): + span.set_data("ai.input_messages", kwargs["input"]) + if "model" in kwargs: + span.set_data("ai.model_id", kwargs["model"]) + try: + response = f(*args, **kwargs) + except Exception as e: + _capture_exception(Hub.current, e) + raise e from None + + prompt_tokens = 0 + total_tokens = 0 + if hasattr(response, "usage"): + if hasattr(response.usage, "prompt_tokens") and isinstance( + response.usage.prompt_tokens, int + ): + prompt_tokens = response.usage.prompt_tokens + if hasattr(response.usage, "total_tokens") and isinstance( + response.usage.total_tokens, int + ): + total_tokens = response.usage.total_tokens + + if prompt_tokens == 0: + prompt_tokens = count_tokens(kwargs["input"] or "") + + if total_tokens == 0: + total_tokens = prompt_tokens + + span.set_data(PROMPT_TOKENS_USED, prompt_tokens) + span.set_data(TOTAL_TOKENS_USED, total_tokens) + + return response + + return new_embeddings_create diff --git a/setup.py b/setup.py index 0af275d6af..0299bf91fb 100644 --- a/setup.py +++ b/setup.py @@ -60,6 +60,7 @@ def get_file_text(file_name): "httpx": ["httpx>=0.16.0"], "huey": ["huey>=2"], "loguru": ["loguru>=0.5"], + "openai": ["openai>=1.0.0", "tiktoken>=0.3.0"], "opentelemetry": ["opentelemetry-distro>=0.35b0"], "opentelemetry-experimental": [ "opentelemetry-distro~=0.40b0", diff --git a/tests/integrations/openai/__init__.py b/tests/integrations/openai/__init__.py new file mode 100644 index 0000000000..d6cc3d5505 --- /dev/null +++ b/tests/integrations/openai/__init__.py @@ -0,0 +1,3 @@ +import pytest + +pytest.importorskip("openai") diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py new file mode 100644 index 0000000000..ecdedd2694 --- /dev/null +++ b/tests/integrations/openai/test_openai.py @@ -0,0 +1,231 @@ +import pytest +from openai import OpenAI, Stream, OpenAIError +from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding +from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk +from openai.types.chat.chat_completion import Choice +from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice +from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage + +from sentry_sdk import start_transaction +from sentry_sdk.integrations.openai import ( + OpenAIIntegration, + COMPLETION_TOKENS_USED, + PROMPT_TOKENS_USED, + TOTAL_TOKENS_USED, +) + +from unittest import mock # python 3.3 and above + + +EXAMPLE_CHAT_COMPLETION = ChatCompletion( + id="chat-id", + choices=[ + Choice( + index=0, + finish_reason="stop", + message=ChatCompletionMessage( + role="assistant", content="the model response" + ), + ) + ], + created=10000000, + model="model-id", + object="chat.completion", + usage=CompletionUsage( + completion_tokens=10, + prompt_tokens=20, + total_tokens=30, + ), +) + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_nonstreaming_chat_completion( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + + with start_transaction(name="openai tx"): + response = ( + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + .choices[0] + .message.content + ) + + assert response == "the model response" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "ai.chat_completions.create.openai" + + if send_default_pii and include_prompts: + assert "hello" in span["data"]["ai.input_messages"][0]["content"] + assert "the model response" in span["data"]["ai.responses"][0] + else: + assert "ai.input_messages" not in span["data"] + assert "ai.responses" not in span["data"] + + assert span["data"][COMPLETION_TOKENS_USED] == 10 + assert span["data"][PROMPT_TOKENS_USED] == 20 + assert span["data"][TOTAL_TOKENS_USED] == 30 + + +# noinspection PyTypeChecker +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_streaming_chat_completion( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = OpenAI(api_key="z") + returned_stream = Stream(cast_to=None, response=None, client=None) + returned_stream._iterator = [ + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=0, delta=ChoiceDelta(content="hel"), finish_reason=None + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=1, delta=ChoiceDelta(content="lo "), finish_reason=None + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=2, delta=ChoiceDelta(content="world"), finish_reason="stop" + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ] + + client.chat.completions._post = mock.Mock(return_value=returned_stream) + with start_transaction(name="openai tx"): + response_stream = client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + response_string = "".join( + map(lambda x: x.choices[0].delta.content, response_stream) + ) + assert response_string == "hello world" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "ai.chat_completions.create.openai" + + if send_default_pii and include_prompts: + assert "hello" in span["data"]["ai.input_messages"][0]["content"] + assert "hello world" in span["data"]["ai.responses"][0] + else: + assert "ai.input_messages" not in span["data"] + assert "ai.responses" not in span["data"] + + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import + + assert span["data"][COMPLETION_TOKENS_USED] == 2 + assert span["data"][PROMPT_TOKENS_USED] == 1 + assert span["data"][TOTAL_TOKENS_USED] == 3 + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly + + +def test_bad_chat_completion(sentry_init, capture_events): + sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock( + side_effect=OpenAIError("API rate limit reached") + ) + with pytest.raises(OpenAIError): + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + (event,) = events + assert event["level"] == "error" + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_embeddings_create( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = OpenAI(api_key="z") + + returned_embedding = CreateEmbeddingResponse( + data=[Embedding(object="embedding", index=0, embedding=[1.0, 2.0, 3.0])], + model="some-model", + object="list", + usage=EmbeddingTokenUsage( + prompt_tokens=20, + total_tokens=30, + ), + ) + + client.embeddings._post = mock.Mock(return_value=returned_embedding) + with start_transaction(name="openai tx"): + response = client.embeddings.create( + input="hello", model="text-embedding-3-large" + ) + + assert len(response.data[0].embedding) == 3 + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "ai.embeddings.create.openai" + if send_default_pii and include_prompts: + assert "hello" in span["data"]["ai.input_messages"][0] + else: + assert "ai.input_messages" not in span["data"] + + assert span["data"][PROMPT_TOKENS_USED] == 20 + assert span["data"][TOTAL_TOKENS_USED] == 30 diff --git a/tox.ini b/tox.ini index a23251f186..1e7ba06a00 100644 --- a/tox.ini +++ b/tox.ini @@ -146,6 +146,11 @@ envlist = {py3.5,py3.11,py3.12}-loguru-v{0.5} {py3.5,py3.11,py3.12}-loguru-latest + # OpenAI + {py3.9,py3.11,py3.12}-openai-v1 + {py3.9,py3.11,py3.12}-openai-latest + {py3.9,py3.11,py3.12}-openai-notiktoken + # OpenTelemetry (OTel) {py3.7,py3.9,py3.11,py3.12}-opentelemetry @@ -439,6 +444,13 @@ deps = loguru-v0.5: loguru~=0.5.0 loguru-latest: loguru + # OpenAI + openai-v1: openai~=1.0.0 + openai-v1: tiktoken~=0.6.0 + openai-latest: openai + openai-latest: tiktoken~=0.6.0 + openai-notiktoken: openai + # OpenTelemetry (OTel) opentelemetry: opentelemetry-distro @@ -597,6 +609,7 @@ setenv = httpx: TESTPATH=tests/integrations/httpx huey: TESTPATH=tests/integrations/huey loguru: TESTPATH=tests/integrations/loguru + openai: TESTPATH=tests/integrations/openai opentelemetry: TESTPATH=tests/integrations/opentelemetry pure_eval: TESTPATH=tests/integrations/pure_eval pymongo: TESTPATH=tests/integrations/pymongo