154154)
155155from langchain_openai .chat_models ._stream_events import (
156156 aconvert_openai_completions_stream ,
157+ aconvert_openai_responses_stream ,
157158 convert_openai_completions_stream ,
159+ convert_openai_responses_stream ,
158160)
159161from langchain_openai .data ._profiles import _PROFILES
160162
@@ -1892,22 +1894,24 @@ def _stream_chat_model_events(
18921894 message_id : str | None = None ,
18931895 ** kwargs : Any ,
18941896 ) -> Iterator [MessagesData ]:
1895- """Emit OpenAI-native content-block events for the Chat Completions path .
1897+ """Emit OpenAI-native content-block events for Completions and Responses .
18961898
1897- Defers to the compat bridge for cases this converter does not yet
1898- specialize: the Responses API, structured output (`response_format`),
1899- and raw-header mode. Detected by core's `_iter_v2_events`.
1899+ The standard Completions and Responses API paths run through their
1900+ native converters. Structured output (`response_format`) and raw-header
1901+ mode still defer to the compat bridge over `_stream`, since those keep
1902+ the final-completion handling only `_stream` performs. Detected by
1903+ core's `_iter_v2_events`.
19001904 """
1901- # Responses API / structured output / raw headers: bridge over `_stream`,
1902- # which (on `ChatOpenAI`) routes to the Responses path when applicable.
1905+ use_responses = self ._use_responses_api ({** kwargs , ** self .model_kwargs })
19031906 # `response_format` may arrive via call kwargs or be baked into
19041907 # `model_kwargs`; both fold into the payload, so check both.
1905- if (
1906- self ._use_responses_api ({** kwargs , ** self .model_kwargs })
1907- or kwargs .get ("response_format" ) is not None
1908+ has_response_format = (
1909+ kwargs .get ("response_format" ) is not None
19081910 or self .model_kwargs .get ("response_format" ) is not None
1909- or self .include_response_headers
1910- ):
1911+ )
1912+ # Structured output and raw-header mode keep the post-loop /
1913+ # final-completion handling that only `_stream` performs — defer those.
1914+ if has_response_format or self .include_response_headers :
19111915 # Forward kwargs untouched (as core's `_iter_v2_events` would):
19121916 # `_stream` handles `stream_usage` itself, and the Responses path
19131917 # rejects a stray `stream_usage` kwarg, so we must not inject one.
@@ -1921,6 +1925,35 @@ def _stream_chat_model_events(
19211925 message_id = message_id ,
19221926 )
19231927 return
1928+ if use_responses :
1929+ self ._ensure_sync_client_available ()
1930+ kwargs ["stream" ] = True
1931+ payload = self ._get_request_payload (messages , stop = stop , ** kwargs )
1932+ try :
1933+ with self .root_client .responses .create (** payload ) as response :
1934+ for event in convert_openai_responses_stream (
1935+ response ,
1936+ _convert_responses_chunk_to_generation_chunk ,
1937+ # Always None here: the `response_format` (structured
1938+ # output) path is handled by the bridge branch above.
1939+ schema = None ,
1940+ output_version = self .output_version ,
1941+ message_id = message_id ,
1942+ ):
1943+ if (
1944+ run_manager is not None
1945+ and event ["event" ] == "content-block-delta"
1946+ and event ["delta" ].get ("type" ) == "text-delta"
1947+ ):
1948+ run_manager .on_llm_new_token (
1949+ str (event ["delta" ].get ("text" , "" ))
1950+ )
1951+ yield event
1952+ except openai .BadRequestError as e :
1953+ _handle_openai_bad_request (e )
1954+ except openai .APIError as e :
1955+ _handle_openai_api_error (e )
1956+ return
19241957
19251958 self ._ensure_sync_client_available ()
19261959 kwargs ["stream" ] = True
@@ -1964,12 +1997,14 @@ async def _astream_chat_model_events(
19641997 ** kwargs : Any ,
19651998 ) -> AsyncIterator [MessagesData ]:
19661999 """Async twin of `_stream_chat_model_events`."""
1967- if (
1968- self . _use_responses_api ({ ** kwargs , ** self . model_kwargs })
1969- or kwargs .get ("response_format" ) is not None
2000+ use_responses = self . _use_responses_api ({ ** kwargs , ** self . model_kwargs })
2001+ has_response_format = (
2002+ kwargs .get ("response_format" ) is not None
19702003 or self .model_kwargs .get ("response_format" ) is not None
1971- or self .include_response_headers
1972- ):
2004+ )
2005+ # Structured output and raw-header mode keep the post-loop /
2006+ # final-completion handling that only `_astream` performs — defer those.
2007+ if has_response_format or self .include_response_headers :
19732008 # Forward kwargs untouched (as core's `_aiter_v2_events` would):
19742009 # `_astream` handles `stream_usage` itself, and the Responses path
19752010 # rejects a stray `stream_usage` kwarg, so we must not inject one.
@@ -1984,6 +2019,42 @@ async def _astream_chat_model_events(
19842019 ):
19852020 yield event
19862021 return
2022+ if use_responses :
2023+ kwargs ["stream" ] = True
2024+ payload = self ._get_request_payload (messages , stop = stop , ** kwargs )
2025+ try :
2026+ response = await self .root_async_client .responses .create (** payload )
2027+ async with response as stream :
2028+ # Mirror `_astream_responses`: apply per-chunk stall
2029+ # protection before the converter consumes the stream.
2030+ timed_stream = _astream_with_chunk_timeout (
2031+ stream ,
2032+ self .stream_chunk_timeout ,
2033+ model_name = self .model_name ,
2034+ )
2035+ async for event in aconvert_openai_responses_stream (
2036+ timed_stream ,
2037+ _convert_responses_chunk_to_generation_chunk ,
2038+ # Always None here: the `response_format` (structured
2039+ # output) path is handled by the bridge branch above.
2040+ schema = None ,
2041+ output_version = self .output_version ,
2042+ message_id = message_id ,
2043+ ):
2044+ if (
2045+ run_manager is not None
2046+ and event ["event" ] == "content-block-delta"
2047+ and event ["delta" ].get ("type" ) == "text-delta"
2048+ ):
2049+ await run_manager .on_llm_new_token (
2050+ str (event ["delta" ].get ("text" , "" ))
2051+ )
2052+ yield event
2053+ except openai .BadRequestError as e :
2054+ _handle_openai_bad_request (e )
2055+ except openai .APIError as e :
2056+ _handle_openai_api_error (e )
2057+ return
19872058
19882059 kwargs ["stream" ] = True
19892060 stream_usage = self ._should_stream_usage (
0 commit comments