@@ -218,6 +218,7 @@ def _invoke_llm(
218218 llm_response = cast (Iterator [str ], response )
219219 return LLMResponse (
220220 output = "" ,
221+ # FIXME: Why is this different from the async streaming implementation?
221222 stream_output = llm_response ,
222223 )
223224
@@ -491,6 +492,7 @@ def _invoke_llm(self, *args, **kwargs) -> LLMResponse:
491492 llm_response = cast (Iterator [str ], llm_response )
492493 return LLMResponse (
493494 output = "" ,
495+ # FIXME: Why is this different from the async streaming implementation?
494496 stream_output = llm_response ,
495497 )
496498
@@ -685,6 +687,8 @@ async def invoke_llm(
685687 # response = cast(AsyncIterator[str], response)
686688 return LLMResponse (
687689 output = "" ,
690+ # FIXME: Why is this different from the synchronous streaming implementation? ## noqa: E501
691+ # This shouldn't be necessary: https://docs.litellm.ai/docs/completion/stream#async-streaming
688692 async_stream_output = response .completion_stream , # pyright: ignore[reportGeneralTypeIssues]
689693 )
690694
@@ -842,6 +846,8 @@ async def invoke_llm(self, *args, **kwargs) -> LLMResponse:
842846 # the callable returns a generator object
843847 return LLMResponse (
844848 output = "" ,
849+ # FIXME: Why is this different from the synchronous streaming implementation? ## noqa: E501
850+ # This shouldn't be necessary: https://docs.litellm.ai/docs/completion/stream#async-streaming
845851 async_stream_output = output .completion_stream ,
846852 )
847853
0 commit comments