youtube-summarizer/venv311/lib/python3.11/site-packages/anthropic/resources/completions.py

# File generated from our OpenAPI spec by Stainless.

from __future__ import annotations

from typing import List, Union, overload
from typing_extensions import Literal

import httpx

from .. import _legacy_response
from ..types import Completion, completion_create_params
from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
from .._utils import (
    required_args,
    maybe_transform,
    async_maybe_transform,
)
from .._compat import cached_property
from .._resource import SyncAPIResource, AsyncAPIResource
from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
from .._streaming import Stream, AsyncStream
from .._base_client import (
    make_request_options,
)

__all__ = ["Completions", "AsyncCompletions"]


class Completions(SyncAPIResource):
    @cached_property
    def with_raw_response(self) -> CompletionsWithRawResponse:
        return CompletionsWithRawResponse(self)

    @cached_property
    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
        return CompletionsWithStreamingResponse(self)

    @overload
    def create(
        self,
        *,
        max_tokens_to_sample: int,
        model: Union[str, Literal["claude-3-opus-20240229", "claude-2.1", "claude-instant-1"]],
        prompt: str,
        metadata: completion_create_params.Metadata | NotGiven = NOT_GIVEN,
        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
        stream: Literal[False] | NotGiven = NOT_GIVEN,
        temperature: float | NotGiven = NOT_GIVEN,
        top_k: int | NotGiven = NOT_GIVEN,
        top_p: float | NotGiven = NOT_GIVEN,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = 600,
    ) -> Completion:
        """[Legacy] Create a Text Completion.

        The Text Completions API is a legacy API.

        We recommend using the
        [Messages API](https://docs.anthropic.com/claude/reference/messages_post) going
        forward.

        Future models and features will not be compatible with Text Completions. See our
        [migration guide](https://docs.anthropic.com/claude/reference/migrating-from-text-completions-to-messages)
        for guidance in migrating from Text Completions to Messages.

        Args:
          max_tokens_to_sample: The maximum number of tokens to generate before stopping.

              Note that our models may stop _before_ reaching this maximum. This parameter
              only specifies the absolute maximum number of tokens to generate.

          model: The model that will complete your prompt.

              See [models](https://docs.anthropic.com/claude/docs/models-overview) for
              additional details and options.

          prompt: The prompt that you want Claude to complete.

              For proper response generation you will need to format your prompt using
              alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:

              ```
              "\n\nHuman: {userQuestion}\n\nAssistant:"
              ```

              See
              [prompt validation](https://anthropic.readme.io/claude/reference/prompt-validation)
              and our guide to
              [prompt design](https://docs.anthropic.com/claude/docs/introduction-to-prompt-design)
              for more details.

          metadata: An object describing metadata about the request.

          stop_sequences: Sequences that will cause the model to stop generating.

              Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
              sequences in the future. By providing the stop_sequences parameter, you may
              include additional strings that will cause the model to stop generating.

          stream: Whether to incrementally stream the response using server-sent events.

              See
              [streaming](https://docs.anthropic.com/claude/reference/text-completions-streaming)
              for details.

          temperature: Amount of randomness injected into the response.

              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
              for analytical / multiple choice, and closer to `1.0` for creative and
              generative tasks.

              Note that even with `temperature` of `0.0`, the results will not be fully
              deterministic.

          top_k: Only sample from the top K options for each subsequent token.

              Used to remove "long tail" low probability responses.
              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).

              Recommended for advanced use cases only. You usually only need to use
              `temperature`.

          top_p: Use nucleus sampling.

              In nucleus sampling, we compute the cumulative distribution over all the options
              for each subsequent token in decreasing probability order and cut it off once it
              reaches a particular probability specified by `top_p`. You should either alter
              `temperature` or `top_p`, but not both.

              Recommended for advanced use cases only. You usually only need to use
              `temperature`.

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        ...

    @overload
    def create(
        self,
        *,
        max_tokens_to_sample: int,
        model: Union[str, Literal["claude-3-opus-20240229", "claude-2.1", "claude-instant-1"]],
        prompt: str,
        stream: Literal[True],
        metadata: completion_create_params.Metadata | NotGiven = NOT_GIVEN,
        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
        temperature: float | NotGiven = NOT_GIVEN,
        top_k: int | NotGiven = NOT_GIVEN,
        top_p: float | NotGiven = NOT_GIVEN,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = 600,
    ) -> Stream[Completion]:
        """[Legacy] Create a Text Completion.

        The Text Completions API is a legacy API.

        We recommend using the
        [Messages API](https://docs.anthropic.com/claude/reference/messages_post) going
        forward.

        Future models and features will not be compatible with Text Completions. See our
        [migration guide](https://docs.anthropic.com/claude/reference/migrating-from-text-completions-to-messages)
        for guidance in migrating from Text Completions to Messages.

        Args:
          max_tokens_to_sample: The maximum number of tokens to generate before stopping.

              Note that our models may stop _before_ reaching this maximum. This parameter
              only specifies the absolute maximum number of tokens to generate.

          model: The model that will complete your prompt.

              See [models](https://docs.anthropic.com/claude/docs/models-overview) for
              additional details and options.

          prompt: The prompt that you want Claude to complete.

              For proper response generation you will need to format your prompt using
              alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:

              ```
              "\n\nHuman: {userQuestion}\n\nAssistant:"
              ```

              See
              [prompt validation](https://anthropic.readme.io/claude/reference/prompt-validation)
              and our guide to
              [prompt design](https://docs.anthropic.com/claude/docs/introduction-to-prompt-design)
              for more details.

          stream: Whether to incrementally stream the response using server-sent events.

              See
              [streaming](https://docs.anthropic.com/claude/reference/text-completions-streaming)
              for details.

          metadata: An object describing metadata about the request.

          stop_sequences: Sequences that will cause the model to stop generating.

              Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
              sequences in the future. By providing the stop_sequences parameter, you may
              include additional strings that will cause the model to stop generating.

          temperature: Amount of randomness injected into the response.

              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
              for analytical / multiple choice, and closer to `1.0` for creative and
              generative tasks.

              Note that even with `temperature` of `0.0`, the results will not be fully
              deterministic.

          top_k: Only sample from the top K options for each subsequent token.

              Used to remove "long tail" low probability responses.
              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).

              Recommended for advanced use cases only. You usually only need to use
              `temperature`.

          top_p: Use nucleus sampling.

              In nucleus sampling, we compute the cumulative distribution over all the options
              for each subsequent token in decreasing probability order and cut it off once it
              reaches a particular probability specified by `top_p`. You should either alter
              `temperature` or `top_p`, but not both.

              Recommended for advanced use cases only. You usually only need to use
              `temperature`.

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        ...

    @overload
    def create(
        self,
        *,
        max_tokens_to_sample: int,
        model: Union[str, Literal["claude-3-opus-20240229", "claude-2.1", "claude-instant-1"]],
        prompt: str,
        stream: bool,
        metadata: completion_create_params.Metadata | NotGiven = NOT_GIVEN,
        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
        temperature: float | NotGiven = NOT_GIVEN,
        top_k: int | NotGiven = NOT_GIVEN,
        top_p: float | NotGiven = NOT_GIVEN,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = 600,
    ) -> Completion | Stream[Completion]:
        """[Legacy] Create a Text Completion.

        The Text Completions API is a legacy API.

        We recommend using the
        [Messages API](https://docs.anthropic.com/claude/reference/messages_post) going
        forward.

        Future models and features will not be compatible with Text Completions. See our
        [migration guide](https://docs.anthropic.com/claude/reference/migrating-from-text-completions-to-messages)
        for guidance in migrating from Text Completions to Messages.

        Args:
          max_tokens_to_sample: The maximum number of tokens to generate before stopping.

              Note that our models may stop _before_ reaching this maximum. This parameter
              only specifies the absolute maximum number of tokens to generate.

          model: The model that will complete your prompt.

              See [models](https://docs.anthropic.com/claude/docs/models-overview) for
              additional details and options.

          prompt: The prompt that you want Claude to complete.

              For proper response generation you will need to format your prompt using
              alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:

              ```
              "\n\nHuman: {userQuestion}\n\nAssistant:"
              ```

              See
              [prompt validation](https://anthropic.readme.io/claude/reference/prompt-validation)
              and our guide to
              [prompt design](https://docs.anthropic.com/claude/docs/introduction-to-prompt-design)
              for more details.

          stream: Whether to incrementally stream the response using server-sent events.

              See
              [streaming](https://docs.anthropic.com/claude/reference/text-completions-streaming)
              for details.

          metadata: An object describing metadata about the request.

          stop_sequences: Sequences that will cause the model to stop generating.

              Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
              sequences in the future. By providing the stop_sequences parameter, you may
              include additional strings that will cause the model to stop generating.

          temperature: Amount of randomness injected into the response.

              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
              for analytical / multiple choice, and closer to `1.0` for creative and
              generative tasks.

              Note that even with `temperature` of `0.0`, the results will not be fully
              deterministic.

          top_k: Only sample from the top K options for each subsequent token.

              Used to remove "long tail" low probability responses.
              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).

              Recommended for advanced use cases only. You usually only need to use
              `temperature`.

          top_p: Use nucleus sampling.

              In nucleus sampling, we compute the cumulative distribution over all the options
              for each subsequent token in decreasing probability order and cut it off once it
              reaches a particular probability specified by `top_p`. You should either alter
              `temperature` or `top_p`, but not both.

              Recommended for advanced use cases only. You usually only need to use
              `temperature`.

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        ...

    @required_args(["max_tokens_to_sample", "model", "prompt"], ["max_tokens_to_sample", "model", "prompt", "stream"])
    def create(
        self,
        *,
        max_tokens_to_sample: int,
        model: Union[str, Literal["claude-3-opus-20240229", "claude-2.1", "claude-instant-1"]],
        prompt: str,
        metadata: completion_create_params.Metadata | NotGiven = NOT_GIVEN,
        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
        temperature: float | NotGiven = NOT_GIVEN,
        top_k: int | NotGiven = NOT_GIVEN,
        top_p: float | NotGiven = NOT_GIVEN,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = 600,
    ) -> Completion | Stream[Completion]:
        return self._post(
            "/v1/complete",
            body=maybe_transform(
                {
                    "max_tokens_to_sample": max_tokens_to_sample,
                    "model": model,
                    "prompt": prompt,
                    "metadata": metadata,
                    "stop_sequences": stop_sequences,
                    "stream": stream,
                    "temperature": temperature,
                    "top_k": top_k,
                    "top_p": top_p,
                },
                completion_create_params.CompletionCreateParams,
            ),
            options=make_request_options(
                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
            ),
            cast_to=Completion,
            stream=stream or False,
            stream_cls=Stream[Completion],
        )


class AsyncCompletions(AsyncAPIResource):
    @cached_property
    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
        return AsyncCompletionsWithRawResponse(self)

    @cached_property
    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
        return AsyncCompletionsWithStreamingResponse(self)

    @overload
    async def create(
        self,
        *,
        max_tokens_to_sample: int,
        model: Union[str, Literal["claude-3-opus-20240229", "claude-2.1", "claude-instant-1"]],
        prompt: str,
        metadata: completion_create_params.Metadata | NotGiven = NOT_GIVEN,
        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
        stream: Literal[False] | NotGiven = NOT_GIVEN,
        temperature: float | NotGiven = NOT_GIVEN,
        top_k: int | NotGiven = NOT_GIVEN,
        top_p: float | NotGiven = NOT_GIVEN,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = 600,
    ) -> Completion:
        """[Legacy] Create a Text Completion.

        The Text Completions API is a legacy API.

        We recommend using the
        [Messages API](https://docs.anthropic.com/claude/reference/messages_post) going
        forward.

        Future models and features will not be compatible with Text Completions. See our
        [migration guide](https://docs.anthropic.com/claude/reference/migrating-from-text-completions-to-messages)
        for guidance in migrating from Text Completions to Messages.

        Args:
          max_tokens_to_sample: The maximum number of tokens to generate before stopping.

              Note that our models may stop _before_ reaching this maximum. This parameter
              only specifies the absolute maximum number of tokens to generate.

          model: The model that will complete your prompt.

              See [models](https://docs.anthropic.com/claude/docs/models-overview) for
              additional details and options.

          prompt: The prompt that you want Claude to complete.

              For proper response generation you will need to format your prompt using
              alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:

              ```
              "\n\nHuman: {userQuestion}\n\nAssistant:"
              ```

              See
              [prompt validation](https://anthropic.readme.io/claude/reference/prompt-validation)
              and our guide to
              [prompt design](https://docs.anthropic.com/claude/docs/introduction-to-prompt-design)
              for more details.

          metadata: An object describing metadata about the request.

          stop_sequences: Sequences that will cause the model to stop generating.

              Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
              sequences in the future. By providing the stop_sequences parameter, you may
              include additional strings that will cause the model to stop generating.

          stream: Whether to incrementally stream the response using server-sent events.

              See
              [streaming](https://docs.anthropic.com/claude/reference/text-completions-streaming)
              for details.

          temperature: Amount of randomness injected into the response.

              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
              for analytical / multiple choice, and closer to `1.0` for creative and
              generative tasks.

              Note that even with `temperature` of `0.0`, the results will not be fully
              deterministic.

          top_k: Only sample from the top K options for each subsequent token.

              Used to remove "long tail" low probability responses.
              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).

              Recommended for advanced use cases only. You usually only need to use
              `temperature`.

          top_p: Use nucleus sampling.

              In nucleus sampling, we compute the cumulative distribution over all the options
              for each subsequent token in decreasing probability order and cut it off once it
              reaches a particular probability specified by `top_p`. You should either alter
              `temperature` or `top_p`, but not both.

              Recommended for advanced use cases only. You usually only need to use
              `temperature`.

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        ...

    @overload
    async def create(
        self,
        *,
        max_tokens_to_sample: int,
        model: Union[str, Literal["claude-3-opus-20240229", "claude-2.1", "claude-instant-1"]],
        prompt: str,
        stream: Literal[True],
        metadata: completion_create_params.Metadata | NotGiven = NOT_GIVEN,
        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
        temperature: float | NotGiven = NOT_GIVEN,
        top_k: int | NotGiven = NOT_GIVEN,
        top_p: float | NotGiven = NOT_GIVEN,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = 600,
    ) -> AsyncStream[Completion]:
        """[Legacy] Create a Text Completion.

        The Text Completions API is a legacy API.

        We recommend using the
        [Messages API](https://docs.anthropic.com/claude/reference/messages_post) going
        forward.

        Future models and features will not be compatible with Text Completions. See our
        [migration guide](https://docs.anthropic.com/claude/reference/migrating-from-text-completions-to-messages)
        for guidance in migrating from Text Completions to Messages.

        Args:
          max_tokens_to_sample: The maximum number of tokens to generate before stopping.

              Note that our models may stop _before_ reaching this maximum. This parameter
              only specifies the absolute maximum number of tokens to generate.

          model: The model that will complete your prompt.

              See [models](https://docs.anthropic.com/claude/docs/models-overview) for
              additional details and options.

          prompt: The prompt that you want Claude to complete.

              For proper response generation you will need to format your prompt using
              alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:

              ```
              "\n\nHuman: {userQuestion}\n\nAssistant:"
              ```

              See
              [prompt validation](https://anthropic.readme.io/claude/reference/prompt-validation)
              and our guide to
              [prompt design](https://docs.anthropic.com/claude/docs/introduction-to-prompt-design)
              for more details.

          stream: Whether to incrementally stream the response using server-sent events.

              See
              [streaming](https://docs.anthropic.com/claude/reference/text-completions-streaming)
              for details.

          metadata: An object describing metadata about the request.

          stop_sequences: Sequences that will cause the model to stop generating.

              Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
              sequences in the future. By providing the stop_sequences parameter, you may
              include additional strings that will cause the model to stop generating.

          temperature: Amount of randomness injected into the response.

              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
              for analytical / multiple choice, and closer to `1.0` for creative and
              generative tasks.

              Note that even with `temperature` of `0.0`, the results will not be fully
              deterministic.

          top_k: Only sample from the top K options for each subsequent token.

              Used to remove "long tail" low probability responses.
              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).

              Recommended for advanced use cases only. You usually only need to use
              `temperature`.

          top_p: Use nucleus sampling.

              In nucleus sampling, we compute the cumulative distribution over all the options
              for each subsequent token in decreasing probability order and cut it off once it
              reaches a particular probability specified by `top_p`. You should either alter
              `temperature` or `top_p`, but not both.

              Recommended for advanced use cases only. You usually only need to use
              `temperature`.

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        ...

    @overload
    async def create(
        self,
        *,
        max_tokens_to_sample: int,
        model: Union[str, Literal["claude-3-opus-20240229", "claude-2.1", "claude-instant-1"]],
        prompt: str,
        stream: bool,
        metadata: completion_create_params.Metadata | NotGiven = NOT_GIVEN,
        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
        temperature: float | NotGiven = NOT_GIVEN,
        top_k: int | NotGiven = NOT_GIVEN,
        top_p: float | NotGiven = NOT_GIVEN,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = 600,
    ) -> Completion | AsyncStream[Completion]:
        """[Legacy] Create a Text Completion.

        The Text Completions API is a legacy API.

        We recommend using the
        [Messages API](https://docs.anthropic.com/claude/reference/messages_post) going
        forward.

        Future models and features will not be compatible with Text Completions. See our
        [migration guide](https://docs.anthropic.com/claude/reference/migrating-from-text-completions-to-messages)
        for guidance in migrating from Text Completions to Messages.

        Args:
          max_tokens_to_sample: The maximum number of tokens to generate before stopping.

              Note that our models may stop _before_ reaching this maximum. This parameter
              only specifies the absolute maximum number of tokens to generate.

          model: The model that will complete your prompt.

              See [models](https://docs.anthropic.com/claude/docs/models-overview) for
              additional details and options.

          prompt: The prompt that you want Claude to complete.

              For proper response generation you will need to format your prompt using
              alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:

              ```
              "\n\nHuman: {userQuestion}\n\nAssistant:"
              ```

              See
              [prompt validation](https://anthropic.readme.io/claude/reference/prompt-validation)
              and our guide to
              [prompt design](https://docs.anthropic.com/claude/docs/introduction-to-prompt-design)
              for more details.

          stream: Whether to incrementally stream the response using server-sent events.

              See
              [streaming](https://docs.anthropic.com/claude/reference/text-completions-streaming)
              for details.

          metadata: An object describing metadata about the request.

          stop_sequences: Sequences that will cause the model to stop generating.

              Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
              sequences in the future. By providing the stop_sequences parameter, you may
              include additional strings that will cause the model to stop generating.

          temperature: Amount of randomness injected into the response.

              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
              for analytical / multiple choice, and closer to `1.0` for creative and
              generative tasks.

              Note that even with `temperature` of `0.0`, the results will not be fully
              deterministic.

          top_k: Only sample from the top K options for each subsequent token.

              Used to remove "long tail" low probability responses.
              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).

              Recommended for advanced use cases only. You usually only need to use
              `temperature`.

          top_p: Use nucleus sampling.

              In nucleus sampling, we compute the cumulative distribution over all the options
              for each subsequent token in decreasing probability order and cut it off once it
              reaches a particular probability specified by `top_p`. You should either alter
              `temperature` or `top_p`, but not both.

              Recommended for advanced use cases only. You usually only need to use
              `temperature`.

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        ...

    @required_args(["max_tokens_to_sample", "model", "prompt"], ["max_tokens_to_sample", "model", "prompt", "stream"])
    async def create(
        self,
        *,
        max_tokens_to_sample: int,
        model: Union[str, Literal["claude-3-opus-20240229", "claude-2.1", "claude-instant-1"]],
        prompt: str,
        metadata: completion_create_params.Metadata | NotGiven = NOT_GIVEN,
        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
        temperature: float | NotGiven = NOT_GIVEN,
        top_k: int | NotGiven = NOT_GIVEN,
        top_p: float | NotGiven = NOT_GIVEN,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = 600,
    ) -> Completion | AsyncStream[Completion]:
        return await self._post(
            "/v1/complete",
            body=await async_maybe_transform(
                {
                    "max_tokens_to_sample": max_tokens_to_sample,
                    "model": model,
                    "prompt": prompt,
                    "metadata": metadata,
                    "stop_sequences": stop_sequences,
                    "stream": stream,
                    "temperature": temperature,
                    "top_k": top_k,
                    "top_p": top_p,
                },
                completion_create_params.CompletionCreateParams,
            ),
            options=make_request_options(
                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
            ),
            cast_to=Completion,
            stream=stream or False,
            stream_cls=AsyncStream[Completion],
        )


class CompletionsWithRawResponse:
    def __init__(self, completions: Completions) -> None:
        self._completions = completions

        self.create = _legacy_response.to_raw_response_wrapper(
            completions.create,
        )


class AsyncCompletionsWithRawResponse:
    def __init__(self, completions: AsyncCompletions) -> None:
        self._completions = completions

        self.create = _legacy_response.async_to_raw_response_wrapper(
            completions.create,
        )


class CompletionsWithStreamingResponse:
    def __init__(self, completions: Completions) -> None:
        self._completions = completions

        self.create = to_streamed_response_wrapper(
            completions.create,
        )


class AsyncCompletionsWithStreamingResponse:
    def __init__(self, completions: AsyncCompletions) -> None:
        self._completions = completions

        self.create = async_to_streamed_response_wrapper(
            completions.create,
        )