1523 lines
63 KiB
Python
1523 lines
63 KiB
Python
# File generated from our OpenAPI spec by Stainless.
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import List, Iterable, overload
|
|
from functools import partial
|
|
from typing_extensions import Literal
|
|
|
|
import httpx
|
|
|
|
from .. import _legacy_response
|
|
from ..types import Message, MessageParam, MessageStreamEvent, message_create_params
|
|
from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
|
|
from .._utils import (
|
|
required_args,
|
|
maybe_transform,
|
|
async_maybe_transform,
|
|
)
|
|
from .._compat import cached_property
|
|
from .._resource import SyncAPIResource, AsyncAPIResource
|
|
from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
|
|
from .._streaming import Stream, AsyncStream
|
|
from .._base_client import (
|
|
make_request_options,
|
|
)
|
|
from ..lib.streaming import (
|
|
MessageStream,
|
|
MessageStreamT,
|
|
AsyncMessageStream,
|
|
AsyncMessageStreamT,
|
|
MessageStreamManager,
|
|
AsyncMessageStreamManager,
|
|
)
|
|
|
|
__all__ = ["Messages", "AsyncMessages"]
|
|
|
|
|
|
class Messages(SyncAPIResource):
|
|
@cached_property
|
|
def with_raw_response(self) -> MessagesWithRawResponse:
|
|
return MessagesWithRawResponse(self)
|
|
|
|
@cached_property
|
|
def with_streaming_response(self) -> MessagesWithStreamingResponse:
|
|
return MessagesWithStreamingResponse(self)
|
|
|
|
@overload
|
|
def create(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[MessageParam],
|
|
model: str,
|
|
metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
|
|
stop_sequences: List[str] | NotGiven = NOT_GIVEN,
|
|
stream: Literal[False] | NotGiven = NOT_GIVEN,
|
|
system: str | NotGiven = NOT_GIVEN,
|
|
temperature: float | NotGiven = NOT_GIVEN,
|
|
top_k: int | NotGiven = NOT_GIVEN,
|
|
top_p: float | NotGiven = NOT_GIVEN,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = 600,
|
|
) -> Message:
|
|
"""
|
|
Create a Message.
|
|
|
|
Send a structured list of input messages with text and/or image content, and the
|
|
model will generate the next message in the conversation.
|
|
|
|
The Messages API can be used for for either single queries or stateless
|
|
multi-turn conversations.
|
|
|
|
Args:
|
|
max_tokens: The maximum number of tokens to generate before stopping.
|
|
|
|
Note that our models may stop _before_ reaching this maximum. This parameter
|
|
only specifies the absolute maximum number of tokens to generate.
|
|
|
|
Different models have different maximum values for this parameter. See
|
|
[models](https://docs.anthropic.com/claude/docs/models-overview) for details.
|
|
|
|
messages: Input messages.
|
|
|
|
Our models are trained to operate on alternating `user` and `assistant`
|
|
conversational turns. When creating a new `Message`, you specify the prior
|
|
conversational turns with the `messages` parameter, and the model then generates
|
|
the next `Message` in the conversation.
|
|
|
|
Each input message must be an object with a `role` and `content`. You can
|
|
specify a single `user`-role message, or you can include multiple `user` and
|
|
`assistant` messages. The first message must always use the `user` role.
|
|
|
|
If the final message uses the `assistant` role, the response content will
|
|
continue immediately from the content in that message. This can be used to
|
|
constrain part of the model's response.
|
|
|
|
Example with a single `user` message:
|
|
|
|
```json
|
|
[{ "role": "user", "content": "Hello, Claude" }]
|
|
```
|
|
|
|
Example with multiple conversational turns:
|
|
|
|
```json
|
|
[
|
|
{ "role": "user", "content": "Hello there." },
|
|
{ "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
|
|
{ "role": "user", "content": "Can you explain LLMs in plain English?" }
|
|
]
|
|
```
|
|
|
|
Example with a partially-filled response from Claude:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"role": "user",
|
|
"content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
|
|
},
|
|
{ "role": "assistant", "content": "The best answer is (" }
|
|
]
|
|
```
|
|
|
|
Each input message `content` may be either a single `string` or an array of
|
|
content blocks, where each block has a specific `type`. Using a `string` for
|
|
`content` is shorthand for an array of one content block of type `"text"`. The
|
|
following input messages are equivalent:
|
|
|
|
```json
|
|
{ "role": "user", "content": "Hello, Claude" }
|
|
```
|
|
|
|
```json
|
|
{ "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
|
|
```
|
|
|
|
Starting with Claude 3 models, you can also send image content blocks:
|
|
|
|
```json
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "image",
|
|
"source": {
|
|
"type": "base64",
|
|
"media_type": "image/jpeg",
|
|
"data": "/9j/4AAQSkZJRg..."
|
|
}
|
|
},
|
|
{ "type": "text", "text": "What is in this image?" }
|
|
]
|
|
}
|
|
```
|
|
|
|
We currently support the `base64` source type for images, and the `image/jpeg`,
|
|
`image/png`, `image/gif`, and `image/webp` media types.
|
|
|
|
See [examples](https://docs.anthropic.com/claude/reference/messages-examples)
|
|
for more input examples.
|
|
|
|
Note that if you want to include a
|
|
[system prompt](https://docs.anthropic.com/claude/docs/system-prompts), you can
|
|
use the top-level `system` parameter — there is no `"system"` role for input
|
|
messages in the Messages API.
|
|
|
|
model: The model that will complete your prompt.
|
|
|
|
See [models](https://docs.anthropic.com/claude/docs/models-overview) for
|
|
additional details and options.
|
|
|
|
metadata: An object describing metadata about the request.
|
|
|
|
stop_sequences: Custom text sequences that will cause the model to stop generating.
|
|
|
|
Our models will normally stop when they have naturally completed their turn,
|
|
which will result in a response `stop_reason` of `"end_turn"`.
|
|
|
|
If you want the model to stop generating when it encounters custom strings of
|
|
text, you can use the `stop_sequences` parameter. If the model encounters one of
|
|
the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
|
|
and the response `stop_sequence` value will contain the matched stop sequence.
|
|
|
|
stream: Whether to incrementally stream the response using server-sent events.
|
|
|
|
See [streaming](https://docs.anthropic.com/claude/reference/messages-streaming)
|
|
for details.
|
|
|
|
system: System prompt.
|
|
|
|
A system prompt is a way of providing context and instructions to Claude, such
|
|
as specifying a particular goal or role. See our
|
|
[guide to system prompts](https://docs.anthropic.com/claude/docs/system-prompts).
|
|
|
|
temperature: Amount of randomness injected into the response.
|
|
|
|
Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
|
|
for analytical / multiple choice, and closer to `1.0` for creative and
|
|
generative tasks.
|
|
|
|
Note that even with `temperature` of `0.0`, the results will not be fully
|
|
deterministic.
|
|
|
|
top_k: Only sample from the top K options for each subsequent token.
|
|
|
|
Used to remove "long tail" low probability responses.
|
|
[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
top_p: Use nucleus sampling.
|
|
|
|
In nucleus sampling, we compute the cumulative distribution over all the options
|
|
for each subsequent token in decreasing probability order and cut it off once it
|
|
reaches a particular probability specified by `top_p`. You should either alter
|
|
`temperature` or `top_p`, but not both.
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
extra_headers: Send extra headers
|
|
|
|
extra_query: Add additional query parameters to the request
|
|
|
|
extra_body: Add additional JSON properties to the request
|
|
|
|
timeout: Override the client-level default timeout for this request, in seconds
|
|
"""
|
|
...
|
|
|
|
@overload
|
|
def create(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[MessageParam],
|
|
model: str,
|
|
stream: Literal[True],
|
|
metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
|
|
stop_sequences: List[str] | NotGiven = NOT_GIVEN,
|
|
system: str | NotGiven = NOT_GIVEN,
|
|
temperature: float | NotGiven = NOT_GIVEN,
|
|
top_k: int | NotGiven = NOT_GIVEN,
|
|
top_p: float | NotGiven = NOT_GIVEN,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = 600,
|
|
) -> Stream[MessageStreamEvent]:
|
|
"""
|
|
Create a Message.
|
|
|
|
Send a structured list of input messages with text and/or image content, and the
|
|
model will generate the next message in the conversation.
|
|
|
|
The Messages API can be used for for either single queries or stateless
|
|
multi-turn conversations.
|
|
|
|
Args:
|
|
max_tokens: The maximum number of tokens to generate before stopping.
|
|
|
|
Note that our models may stop _before_ reaching this maximum. This parameter
|
|
only specifies the absolute maximum number of tokens to generate.
|
|
|
|
Different models have different maximum values for this parameter. See
|
|
[models](https://docs.anthropic.com/claude/docs/models-overview) for details.
|
|
|
|
messages: Input messages.
|
|
|
|
Our models are trained to operate on alternating `user` and `assistant`
|
|
conversational turns. When creating a new `Message`, you specify the prior
|
|
conversational turns with the `messages` parameter, and the model then generates
|
|
the next `Message` in the conversation.
|
|
|
|
Each input message must be an object with a `role` and `content`. You can
|
|
specify a single `user`-role message, or you can include multiple `user` and
|
|
`assistant` messages. The first message must always use the `user` role.
|
|
|
|
If the final message uses the `assistant` role, the response content will
|
|
continue immediately from the content in that message. This can be used to
|
|
constrain part of the model's response.
|
|
|
|
Example with a single `user` message:
|
|
|
|
```json
|
|
[{ "role": "user", "content": "Hello, Claude" }]
|
|
```
|
|
|
|
Example with multiple conversational turns:
|
|
|
|
```json
|
|
[
|
|
{ "role": "user", "content": "Hello there." },
|
|
{ "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
|
|
{ "role": "user", "content": "Can you explain LLMs in plain English?" }
|
|
]
|
|
```
|
|
|
|
Example with a partially-filled response from Claude:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"role": "user",
|
|
"content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
|
|
},
|
|
{ "role": "assistant", "content": "The best answer is (" }
|
|
]
|
|
```
|
|
|
|
Each input message `content` may be either a single `string` or an array of
|
|
content blocks, where each block has a specific `type`. Using a `string` for
|
|
`content` is shorthand for an array of one content block of type `"text"`. The
|
|
following input messages are equivalent:
|
|
|
|
```json
|
|
{ "role": "user", "content": "Hello, Claude" }
|
|
```
|
|
|
|
```json
|
|
{ "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
|
|
```
|
|
|
|
Starting with Claude 3 models, you can also send image content blocks:
|
|
|
|
```json
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "image",
|
|
"source": {
|
|
"type": "base64",
|
|
"media_type": "image/jpeg",
|
|
"data": "/9j/4AAQSkZJRg..."
|
|
}
|
|
},
|
|
{ "type": "text", "text": "What is in this image?" }
|
|
]
|
|
}
|
|
```
|
|
|
|
We currently support the `base64` source type for images, and the `image/jpeg`,
|
|
`image/png`, `image/gif`, and `image/webp` media types.
|
|
|
|
See [examples](https://docs.anthropic.com/claude/reference/messages-examples)
|
|
for more input examples.
|
|
|
|
Note that if you want to include a
|
|
[system prompt](https://docs.anthropic.com/claude/docs/system-prompts), you can
|
|
use the top-level `system` parameter — there is no `"system"` role for input
|
|
messages in the Messages API.
|
|
|
|
model: The model that will complete your prompt.
|
|
|
|
See [models](https://docs.anthropic.com/claude/docs/models-overview) for
|
|
additional details and options.
|
|
|
|
stream: Whether to incrementally stream the response using server-sent events.
|
|
|
|
See [streaming](https://docs.anthropic.com/claude/reference/messages-streaming)
|
|
for details.
|
|
|
|
metadata: An object describing metadata about the request.
|
|
|
|
stop_sequences: Custom text sequences that will cause the model to stop generating.
|
|
|
|
Our models will normally stop when they have naturally completed their turn,
|
|
which will result in a response `stop_reason` of `"end_turn"`.
|
|
|
|
If you want the model to stop generating when it encounters custom strings of
|
|
text, you can use the `stop_sequences` parameter. If the model encounters one of
|
|
the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
|
|
and the response `stop_sequence` value will contain the matched stop sequence.
|
|
|
|
system: System prompt.
|
|
|
|
A system prompt is a way of providing context and instructions to Claude, such
|
|
as specifying a particular goal or role. See our
|
|
[guide to system prompts](https://docs.anthropic.com/claude/docs/system-prompts).
|
|
|
|
temperature: Amount of randomness injected into the response.
|
|
|
|
Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
|
|
for analytical / multiple choice, and closer to `1.0` for creative and
|
|
generative tasks.
|
|
|
|
Note that even with `temperature` of `0.0`, the results will not be fully
|
|
deterministic.
|
|
|
|
top_k: Only sample from the top K options for each subsequent token.
|
|
|
|
Used to remove "long tail" low probability responses.
|
|
[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
top_p: Use nucleus sampling.
|
|
|
|
In nucleus sampling, we compute the cumulative distribution over all the options
|
|
for each subsequent token in decreasing probability order and cut it off once it
|
|
reaches a particular probability specified by `top_p`. You should either alter
|
|
`temperature` or `top_p`, but not both.
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
extra_headers: Send extra headers
|
|
|
|
extra_query: Add additional query parameters to the request
|
|
|
|
extra_body: Add additional JSON properties to the request
|
|
|
|
timeout: Override the client-level default timeout for this request, in seconds
|
|
"""
|
|
...
|
|
|
|
@overload
|
|
def create(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[MessageParam],
|
|
model: str,
|
|
stream: bool,
|
|
metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
|
|
stop_sequences: List[str] | NotGiven = NOT_GIVEN,
|
|
system: str | NotGiven = NOT_GIVEN,
|
|
temperature: float | NotGiven = NOT_GIVEN,
|
|
top_k: int | NotGiven = NOT_GIVEN,
|
|
top_p: float | NotGiven = NOT_GIVEN,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = 600,
|
|
) -> Message | Stream[MessageStreamEvent]:
|
|
"""
|
|
Create a Message.
|
|
|
|
Send a structured list of input messages with text and/or image content, and the
|
|
model will generate the next message in the conversation.
|
|
|
|
The Messages API can be used for for either single queries or stateless
|
|
multi-turn conversations.
|
|
|
|
Args:
|
|
max_tokens: The maximum number of tokens to generate before stopping.
|
|
|
|
Note that our models may stop _before_ reaching this maximum. This parameter
|
|
only specifies the absolute maximum number of tokens to generate.
|
|
|
|
Different models have different maximum values for this parameter. See
|
|
[models](https://docs.anthropic.com/claude/docs/models-overview) for details.
|
|
|
|
messages: Input messages.
|
|
|
|
Our models are trained to operate on alternating `user` and `assistant`
|
|
conversational turns. When creating a new `Message`, you specify the prior
|
|
conversational turns with the `messages` parameter, and the model then generates
|
|
the next `Message` in the conversation.
|
|
|
|
Each input message must be an object with a `role` and `content`. You can
|
|
specify a single `user`-role message, or you can include multiple `user` and
|
|
`assistant` messages. The first message must always use the `user` role.
|
|
|
|
If the final message uses the `assistant` role, the response content will
|
|
continue immediately from the content in that message. This can be used to
|
|
constrain part of the model's response.
|
|
|
|
Example with a single `user` message:
|
|
|
|
```json
|
|
[{ "role": "user", "content": "Hello, Claude" }]
|
|
```
|
|
|
|
Example with multiple conversational turns:
|
|
|
|
```json
|
|
[
|
|
{ "role": "user", "content": "Hello there." },
|
|
{ "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
|
|
{ "role": "user", "content": "Can you explain LLMs in plain English?" }
|
|
]
|
|
```
|
|
|
|
Example with a partially-filled response from Claude:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"role": "user",
|
|
"content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
|
|
},
|
|
{ "role": "assistant", "content": "The best answer is (" }
|
|
]
|
|
```
|
|
|
|
Each input message `content` may be either a single `string` or an array of
|
|
content blocks, where each block has a specific `type`. Using a `string` for
|
|
`content` is shorthand for an array of one content block of type `"text"`. The
|
|
following input messages are equivalent:
|
|
|
|
```json
|
|
{ "role": "user", "content": "Hello, Claude" }
|
|
```
|
|
|
|
```json
|
|
{ "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
|
|
```
|
|
|
|
Starting with Claude 3 models, you can also send image content blocks:
|
|
|
|
```json
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "image",
|
|
"source": {
|
|
"type": "base64",
|
|
"media_type": "image/jpeg",
|
|
"data": "/9j/4AAQSkZJRg..."
|
|
}
|
|
},
|
|
{ "type": "text", "text": "What is in this image?" }
|
|
]
|
|
}
|
|
```
|
|
|
|
We currently support the `base64` source type for images, and the `image/jpeg`,
|
|
`image/png`, `image/gif`, and `image/webp` media types.
|
|
|
|
See [examples](https://docs.anthropic.com/claude/reference/messages-examples)
|
|
for more input examples.
|
|
|
|
Note that if you want to include a
|
|
[system prompt](https://docs.anthropic.com/claude/docs/system-prompts), you can
|
|
use the top-level `system` parameter — there is no `"system"` role for input
|
|
messages in the Messages API.
|
|
|
|
model: The model that will complete your prompt.
|
|
|
|
See [models](https://docs.anthropic.com/claude/docs/models-overview) for
|
|
additional details and options.
|
|
|
|
stream: Whether to incrementally stream the response using server-sent events.
|
|
|
|
See [streaming](https://docs.anthropic.com/claude/reference/messages-streaming)
|
|
for details.
|
|
|
|
metadata: An object describing metadata about the request.
|
|
|
|
stop_sequences: Custom text sequences that will cause the model to stop generating.
|
|
|
|
Our models will normally stop when they have naturally completed their turn,
|
|
which will result in a response `stop_reason` of `"end_turn"`.
|
|
|
|
If you want the model to stop generating when it encounters custom strings of
|
|
text, you can use the `stop_sequences` parameter. If the model encounters one of
|
|
the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
|
|
and the response `stop_sequence` value will contain the matched stop sequence.
|
|
|
|
system: System prompt.
|
|
|
|
A system prompt is a way of providing context and instructions to Claude, such
|
|
as specifying a particular goal or role. See our
|
|
[guide to system prompts](https://docs.anthropic.com/claude/docs/system-prompts).
|
|
|
|
temperature: Amount of randomness injected into the response.
|
|
|
|
Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
|
|
for analytical / multiple choice, and closer to `1.0` for creative and
|
|
generative tasks.
|
|
|
|
Note that even with `temperature` of `0.0`, the results will not be fully
|
|
deterministic.
|
|
|
|
top_k: Only sample from the top K options for each subsequent token.
|
|
|
|
Used to remove "long tail" low probability responses.
|
|
[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
top_p: Use nucleus sampling.
|
|
|
|
In nucleus sampling, we compute the cumulative distribution over all the options
|
|
for each subsequent token in decreasing probability order and cut it off once it
|
|
reaches a particular probability specified by `top_p`. You should either alter
|
|
`temperature` or `top_p`, but not both.
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
extra_headers: Send extra headers
|
|
|
|
extra_query: Add additional query parameters to the request
|
|
|
|
extra_body: Add additional JSON properties to the request
|
|
|
|
timeout: Override the client-level default timeout for this request, in seconds
|
|
"""
|
|
...
|
|
|
|
@required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
|
|
def create(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[MessageParam],
|
|
model: str,
|
|
metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
|
|
stop_sequences: List[str] | NotGiven = NOT_GIVEN,
|
|
stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
|
|
system: str | NotGiven = NOT_GIVEN,
|
|
temperature: float | NotGiven = NOT_GIVEN,
|
|
top_k: int | NotGiven = NOT_GIVEN,
|
|
top_p: float | NotGiven = NOT_GIVEN,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = 600,
|
|
) -> Message | Stream[MessageStreamEvent]:
|
|
return self._post(
|
|
"/v1/messages",
|
|
body=maybe_transform(
|
|
{
|
|
"max_tokens": max_tokens,
|
|
"messages": messages,
|
|
"model": model,
|
|
"metadata": metadata,
|
|
"stop_sequences": stop_sequences,
|
|
"stream": stream,
|
|
"system": system,
|
|
"temperature": temperature,
|
|
"top_k": top_k,
|
|
"top_p": top_p,
|
|
},
|
|
message_create_params.MessageCreateParams,
|
|
),
|
|
options=make_request_options(
|
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
),
|
|
cast_to=Message,
|
|
stream=stream or False,
|
|
stream_cls=Stream[MessageStreamEvent],
|
|
)
|
|
|
|
@overload
|
|
def stream(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[MessageParam],
|
|
model: str,
|
|
metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
|
|
stop_sequences: List[str] | NotGiven = NOT_GIVEN,
|
|
system: str | NotGiven = NOT_GIVEN,
|
|
temperature: float | NotGiven = NOT_GIVEN,
|
|
top_k: int | NotGiven = NOT_GIVEN,
|
|
top_p: float | NotGiven = NOT_GIVEN,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> MessageStreamManager[MessageStream]:
|
|
"""Create a Message stream"""
|
|
...
|
|
|
|
@overload
|
|
def stream(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[MessageParam],
|
|
model: str,
|
|
metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
|
|
stop_sequences: List[str] | NotGiven = NOT_GIVEN,
|
|
system: str | NotGiven = NOT_GIVEN,
|
|
temperature: float | NotGiven = NOT_GIVEN,
|
|
top_k: int | NotGiven = NOT_GIVEN,
|
|
top_p: float | NotGiven = NOT_GIVEN,
|
|
event_handler: type[MessageStreamT],
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> MessageStreamManager[MessageStreamT]:
|
|
"""Create a Message stream"""
|
|
...
|
|
|
|
def stream(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[MessageParam],
|
|
model: str,
|
|
metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
|
|
stop_sequences: List[str] | NotGiven = NOT_GIVEN,
|
|
system: str | NotGiven = NOT_GIVEN,
|
|
temperature: float | NotGiven = NOT_GIVEN,
|
|
top_k: int | NotGiven = NOT_GIVEN,
|
|
top_p: float | NotGiven = NOT_GIVEN,
|
|
event_handler: type[MessageStreamT] = MessageStream, # type: ignore[assignment]
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> MessageStreamManager[MessageStream] | MessageStreamManager[MessageStreamT]:
|
|
"""Create a Message stream"""
|
|
extra_headers = {
|
|
"X-Stainless-Stream-Helper": "messages",
|
|
"X-Stainless-Custom-Event-Handler": "true" if event_handler != MessageStream else "false",
|
|
**(extra_headers or {}),
|
|
}
|
|
make_request = partial(
|
|
self._post,
|
|
"/v1/messages",
|
|
body=maybe_transform(
|
|
{
|
|
"max_tokens": max_tokens,
|
|
"messages": messages,
|
|
"model": model,
|
|
"metadata": metadata,
|
|
"stop_sequences": stop_sequences,
|
|
"system": system,
|
|
"temperature": temperature,
|
|
"top_k": top_k,
|
|
"top_p": top_p,
|
|
"stream": True,
|
|
},
|
|
message_create_params.MessageCreateParams,
|
|
),
|
|
options=make_request_options(
|
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
),
|
|
cast_to=Message,
|
|
stream=True,
|
|
stream_cls=event_handler,
|
|
)
|
|
return MessageStreamManager(make_request)
|
|
|
|
|
|
class AsyncMessages(AsyncAPIResource):
|
|
@cached_property
|
|
def with_raw_response(self) -> AsyncMessagesWithRawResponse:
|
|
return AsyncMessagesWithRawResponse(self)
|
|
|
|
@cached_property
|
|
def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
|
|
return AsyncMessagesWithStreamingResponse(self)
|
|
|
|
@overload
|
|
async def create(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[MessageParam],
|
|
model: str,
|
|
metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
|
|
stop_sequences: List[str] | NotGiven = NOT_GIVEN,
|
|
stream: Literal[False] | NotGiven = NOT_GIVEN,
|
|
system: str | NotGiven = NOT_GIVEN,
|
|
temperature: float | NotGiven = NOT_GIVEN,
|
|
top_k: int | NotGiven = NOT_GIVEN,
|
|
top_p: float | NotGiven = NOT_GIVEN,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = 600,
|
|
) -> Message:
|
|
"""
|
|
Create a Message.
|
|
|
|
Send a structured list of input messages with text and/or image content, and the
|
|
model will generate the next message in the conversation.
|
|
|
|
The Messages API can be used for for either single queries or stateless
|
|
multi-turn conversations.
|
|
|
|
Args:
|
|
max_tokens: The maximum number of tokens to generate before stopping.
|
|
|
|
Note that our models may stop _before_ reaching this maximum. This parameter
|
|
only specifies the absolute maximum number of tokens to generate.
|
|
|
|
Different models have different maximum values for this parameter. See
|
|
[models](https://docs.anthropic.com/claude/docs/models-overview) for details.
|
|
|
|
messages: Input messages.
|
|
|
|
Our models are trained to operate on alternating `user` and `assistant`
|
|
conversational turns. When creating a new `Message`, you specify the prior
|
|
conversational turns with the `messages` parameter, and the model then generates
|
|
the next `Message` in the conversation.
|
|
|
|
Each input message must be an object with a `role` and `content`. You can
|
|
specify a single `user`-role message, or you can include multiple `user` and
|
|
`assistant` messages. The first message must always use the `user` role.
|
|
|
|
If the final message uses the `assistant` role, the response content will
|
|
continue immediately from the content in that message. This can be used to
|
|
constrain part of the model's response.
|
|
|
|
Example with a single `user` message:
|
|
|
|
```json
|
|
[{ "role": "user", "content": "Hello, Claude" }]
|
|
```
|
|
|
|
Example with multiple conversational turns:
|
|
|
|
```json
|
|
[
|
|
{ "role": "user", "content": "Hello there." },
|
|
{ "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
|
|
{ "role": "user", "content": "Can you explain LLMs in plain English?" }
|
|
]
|
|
```
|
|
|
|
Example with a partially-filled response from Claude:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"role": "user",
|
|
"content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
|
|
},
|
|
{ "role": "assistant", "content": "The best answer is (" }
|
|
]
|
|
```
|
|
|
|
Each input message `content` may be either a single `string` or an array of
|
|
content blocks, where each block has a specific `type`. Using a `string` for
|
|
`content` is shorthand for an array of one content block of type `"text"`. The
|
|
following input messages are equivalent:
|
|
|
|
```json
|
|
{ "role": "user", "content": "Hello, Claude" }
|
|
```
|
|
|
|
```json
|
|
{ "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
|
|
```
|
|
|
|
Starting with Claude 3 models, you can also send image content blocks:
|
|
|
|
```json
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "image",
|
|
"source": {
|
|
"type": "base64",
|
|
"media_type": "image/jpeg",
|
|
"data": "/9j/4AAQSkZJRg..."
|
|
}
|
|
},
|
|
{ "type": "text", "text": "What is in this image?" }
|
|
]
|
|
}
|
|
```
|
|
|
|
We currently support the `base64` source type for images, and the `image/jpeg`,
|
|
`image/png`, `image/gif`, and `image/webp` media types.
|
|
|
|
See [examples](https://docs.anthropic.com/claude/reference/messages-examples)
|
|
for more input examples.
|
|
|
|
Note that if you want to include a
|
|
[system prompt](https://docs.anthropic.com/claude/docs/system-prompts), you can
|
|
use the top-level `system` parameter — there is no `"system"` role for input
|
|
messages in the Messages API.
|
|
|
|
model: The model that will complete your prompt.
|
|
|
|
See [models](https://docs.anthropic.com/claude/docs/models-overview) for
|
|
additional details and options.
|
|
|
|
metadata: An object describing metadata about the request.
|
|
|
|
stop_sequences: Custom text sequences that will cause the model to stop generating.
|
|
|
|
Our models will normally stop when they have naturally completed their turn,
|
|
which will result in a response `stop_reason` of `"end_turn"`.
|
|
|
|
If you want the model to stop generating when it encounters custom strings of
|
|
text, you can use the `stop_sequences` parameter. If the model encounters one of
|
|
the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
|
|
and the response `stop_sequence` value will contain the matched stop sequence.
|
|
|
|
stream: Whether to incrementally stream the response using server-sent events.
|
|
|
|
See [streaming](https://docs.anthropic.com/claude/reference/messages-streaming)
|
|
for details.
|
|
|
|
system: System prompt.
|
|
|
|
A system prompt is a way of providing context and instructions to Claude, such
|
|
as specifying a particular goal or role. See our
|
|
[guide to system prompts](https://docs.anthropic.com/claude/docs/system-prompts).
|
|
|
|
temperature: Amount of randomness injected into the response.
|
|
|
|
Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
|
|
for analytical / multiple choice, and closer to `1.0` for creative and
|
|
generative tasks.
|
|
|
|
Note that even with `temperature` of `0.0`, the results will not be fully
|
|
deterministic.
|
|
|
|
top_k: Only sample from the top K options for each subsequent token.
|
|
|
|
Used to remove "long tail" low probability responses.
|
|
[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
top_p: Use nucleus sampling.
|
|
|
|
In nucleus sampling, we compute the cumulative distribution over all the options
|
|
for each subsequent token in decreasing probability order and cut it off once it
|
|
reaches a particular probability specified by `top_p`. You should either alter
|
|
`temperature` or `top_p`, but not both.
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
extra_headers: Send extra headers
|
|
|
|
extra_query: Add additional query parameters to the request
|
|
|
|
extra_body: Add additional JSON properties to the request
|
|
|
|
timeout: Override the client-level default timeout for this request, in seconds
|
|
"""
|
|
...
|
|
|
|
@overload
|
|
async def create(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[MessageParam],
|
|
model: str,
|
|
stream: Literal[True],
|
|
metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
|
|
stop_sequences: List[str] | NotGiven = NOT_GIVEN,
|
|
system: str | NotGiven = NOT_GIVEN,
|
|
temperature: float | NotGiven = NOT_GIVEN,
|
|
top_k: int | NotGiven = NOT_GIVEN,
|
|
top_p: float | NotGiven = NOT_GIVEN,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = 600,
|
|
) -> AsyncStream[MessageStreamEvent]:
|
|
"""
|
|
Create a Message.
|
|
|
|
Send a structured list of input messages with text and/or image content, and the
|
|
model will generate the next message in the conversation.
|
|
|
|
The Messages API can be used for for either single queries or stateless
|
|
multi-turn conversations.
|
|
|
|
Args:
|
|
max_tokens: The maximum number of tokens to generate before stopping.
|
|
|
|
Note that our models may stop _before_ reaching this maximum. This parameter
|
|
only specifies the absolute maximum number of tokens to generate.
|
|
|
|
Different models have different maximum values for this parameter. See
|
|
[models](https://docs.anthropic.com/claude/docs/models-overview) for details.
|
|
|
|
messages: Input messages.
|
|
|
|
Our models are trained to operate on alternating `user` and `assistant`
|
|
conversational turns. When creating a new `Message`, you specify the prior
|
|
conversational turns with the `messages` parameter, and the model then generates
|
|
the next `Message` in the conversation.
|
|
|
|
Each input message must be an object with a `role` and `content`. You can
|
|
specify a single `user`-role message, or you can include multiple `user` and
|
|
`assistant` messages. The first message must always use the `user` role.
|
|
|
|
If the final message uses the `assistant` role, the response content will
|
|
continue immediately from the content in that message. This can be used to
|
|
constrain part of the model's response.
|
|
|
|
Example with a single `user` message:
|
|
|
|
```json
|
|
[{ "role": "user", "content": "Hello, Claude" }]
|
|
```
|
|
|
|
Example with multiple conversational turns:
|
|
|
|
```json
|
|
[
|
|
{ "role": "user", "content": "Hello there." },
|
|
{ "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
|
|
{ "role": "user", "content": "Can you explain LLMs in plain English?" }
|
|
]
|
|
```
|
|
|
|
Example with a partially-filled response from Claude:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"role": "user",
|
|
"content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
|
|
},
|
|
{ "role": "assistant", "content": "The best answer is (" }
|
|
]
|
|
```
|
|
|
|
Each input message `content` may be either a single `string` or an array of
|
|
content blocks, where each block has a specific `type`. Using a `string` for
|
|
`content` is shorthand for an array of one content block of type `"text"`. The
|
|
following input messages are equivalent:
|
|
|
|
```json
|
|
{ "role": "user", "content": "Hello, Claude" }
|
|
```
|
|
|
|
```json
|
|
{ "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
|
|
```
|
|
|
|
Starting with Claude 3 models, you can also send image content blocks:
|
|
|
|
```json
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "image",
|
|
"source": {
|
|
"type": "base64",
|
|
"media_type": "image/jpeg",
|
|
"data": "/9j/4AAQSkZJRg..."
|
|
}
|
|
},
|
|
{ "type": "text", "text": "What is in this image?" }
|
|
]
|
|
}
|
|
```
|
|
|
|
We currently support the `base64` source type for images, and the `image/jpeg`,
|
|
`image/png`, `image/gif`, and `image/webp` media types.
|
|
|
|
See [examples](https://docs.anthropic.com/claude/reference/messages-examples)
|
|
for more input examples.
|
|
|
|
Note that if you want to include a
|
|
[system prompt](https://docs.anthropic.com/claude/docs/system-prompts), you can
|
|
use the top-level `system` parameter — there is no `"system"` role for input
|
|
messages in the Messages API.
|
|
|
|
model: The model that will complete your prompt.
|
|
|
|
See [models](https://docs.anthropic.com/claude/docs/models-overview) for
|
|
additional details and options.
|
|
|
|
stream: Whether to incrementally stream the response using server-sent events.
|
|
|
|
See [streaming](https://docs.anthropic.com/claude/reference/messages-streaming)
|
|
for details.
|
|
|
|
metadata: An object describing metadata about the request.
|
|
|
|
stop_sequences: Custom text sequences that will cause the model to stop generating.
|
|
|
|
Our models will normally stop when they have naturally completed their turn,
|
|
which will result in a response `stop_reason` of `"end_turn"`.
|
|
|
|
If you want the model to stop generating when it encounters custom strings of
|
|
text, you can use the `stop_sequences` parameter. If the model encounters one of
|
|
the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
|
|
and the response `stop_sequence` value will contain the matched stop sequence.
|
|
|
|
system: System prompt.
|
|
|
|
A system prompt is a way of providing context and instructions to Claude, such
|
|
as specifying a particular goal or role. See our
|
|
[guide to system prompts](https://docs.anthropic.com/claude/docs/system-prompts).
|
|
|
|
temperature: Amount of randomness injected into the response.
|
|
|
|
Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
|
|
for analytical / multiple choice, and closer to `1.0` for creative and
|
|
generative tasks.
|
|
|
|
Note that even with `temperature` of `0.0`, the results will not be fully
|
|
deterministic.
|
|
|
|
top_k: Only sample from the top K options for each subsequent token.
|
|
|
|
Used to remove "long tail" low probability responses.
|
|
[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
top_p: Use nucleus sampling.
|
|
|
|
In nucleus sampling, we compute the cumulative distribution over all the options
|
|
for each subsequent token in decreasing probability order and cut it off once it
|
|
reaches a particular probability specified by `top_p`. You should either alter
|
|
`temperature` or `top_p`, but not both.
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
extra_headers: Send extra headers
|
|
|
|
extra_query: Add additional query parameters to the request
|
|
|
|
extra_body: Add additional JSON properties to the request
|
|
|
|
timeout: Override the client-level default timeout for this request, in seconds
|
|
"""
|
|
...
|
|
|
|
@overload
|
|
async def create(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[MessageParam],
|
|
model: str,
|
|
stream: bool,
|
|
metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
|
|
stop_sequences: List[str] | NotGiven = NOT_GIVEN,
|
|
system: str | NotGiven = NOT_GIVEN,
|
|
temperature: float | NotGiven = NOT_GIVEN,
|
|
top_k: int | NotGiven = NOT_GIVEN,
|
|
top_p: float | NotGiven = NOT_GIVEN,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = 600,
|
|
) -> Message | AsyncStream[MessageStreamEvent]:
|
|
"""
|
|
Create a Message.
|
|
|
|
Send a structured list of input messages with text and/or image content, and the
|
|
model will generate the next message in the conversation.
|
|
|
|
The Messages API can be used for for either single queries or stateless
|
|
multi-turn conversations.
|
|
|
|
Args:
|
|
max_tokens: The maximum number of tokens to generate before stopping.
|
|
|
|
Note that our models may stop _before_ reaching this maximum. This parameter
|
|
only specifies the absolute maximum number of tokens to generate.
|
|
|
|
Different models have different maximum values for this parameter. See
|
|
[models](https://docs.anthropic.com/claude/docs/models-overview) for details.
|
|
|
|
messages: Input messages.
|
|
|
|
Our models are trained to operate on alternating `user` and `assistant`
|
|
conversational turns. When creating a new `Message`, you specify the prior
|
|
conversational turns with the `messages` parameter, and the model then generates
|
|
the next `Message` in the conversation.
|
|
|
|
Each input message must be an object with a `role` and `content`. You can
|
|
specify a single `user`-role message, or you can include multiple `user` and
|
|
`assistant` messages. The first message must always use the `user` role.
|
|
|
|
If the final message uses the `assistant` role, the response content will
|
|
continue immediately from the content in that message. This can be used to
|
|
constrain part of the model's response.
|
|
|
|
Example with a single `user` message:
|
|
|
|
```json
|
|
[{ "role": "user", "content": "Hello, Claude" }]
|
|
```
|
|
|
|
Example with multiple conversational turns:
|
|
|
|
```json
|
|
[
|
|
{ "role": "user", "content": "Hello there." },
|
|
{ "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
|
|
{ "role": "user", "content": "Can you explain LLMs in plain English?" }
|
|
]
|
|
```
|
|
|
|
Example with a partially-filled response from Claude:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"role": "user",
|
|
"content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
|
|
},
|
|
{ "role": "assistant", "content": "The best answer is (" }
|
|
]
|
|
```
|
|
|
|
Each input message `content` may be either a single `string` or an array of
|
|
content blocks, where each block has a specific `type`. Using a `string` for
|
|
`content` is shorthand for an array of one content block of type `"text"`. The
|
|
following input messages are equivalent:
|
|
|
|
```json
|
|
{ "role": "user", "content": "Hello, Claude" }
|
|
```
|
|
|
|
```json
|
|
{ "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
|
|
```
|
|
|
|
Starting with Claude 3 models, you can also send image content blocks:
|
|
|
|
```json
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "image",
|
|
"source": {
|
|
"type": "base64",
|
|
"media_type": "image/jpeg",
|
|
"data": "/9j/4AAQSkZJRg..."
|
|
}
|
|
},
|
|
{ "type": "text", "text": "What is in this image?" }
|
|
]
|
|
}
|
|
```
|
|
|
|
We currently support the `base64` source type for images, and the `image/jpeg`,
|
|
`image/png`, `image/gif`, and `image/webp` media types.
|
|
|
|
See [examples](https://docs.anthropic.com/claude/reference/messages-examples)
|
|
for more input examples.
|
|
|
|
Note that if you want to include a
|
|
[system prompt](https://docs.anthropic.com/claude/docs/system-prompts), you can
|
|
use the top-level `system` parameter — there is no `"system"` role for input
|
|
messages in the Messages API.
|
|
|
|
model: The model that will complete your prompt.
|
|
|
|
See [models](https://docs.anthropic.com/claude/docs/models-overview) for
|
|
additional details and options.
|
|
|
|
stream: Whether to incrementally stream the response using server-sent events.
|
|
|
|
See [streaming](https://docs.anthropic.com/claude/reference/messages-streaming)
|
|
for details.
|
|
|
|
metadata: An object describing metadata about the request.
|
|
|
|
stop_sequences: Custom text sequences that will cause the model to stop generating.
|
|
|
|
Our models will normally stop when they have naturally completed their turn,
|
|
which will result in a response `stop_reason` of `"end_turn"`.
|
|
|
|
If you want the model to stop generating when it encounters custom strings of
|
|
text, you can use the `stop_sequences` parameter. If the model encounters one of
|
|
the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
|
|
and the response `stop_sequence` value will contain the matched stop sequence.
|
|
|
|
system: System prompt.
|
|
|
|
A system prompt is a way of providing context and instructions to Claude, such
|
|
as specifying a particular goal or role. See our
|
|
[guide to system prompts](https://docs.anthropic.com/claude/docs/system-prompts).
|
|
|
|
temperature: Amount of randomness injected into the response.
|
|
|
|
Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
|
|
for analytical / multiple choice, and closer to `1.0` for creative and
|
|
generative tasks.
|
|
|
|
Note that even with `temperature` of `0.0`, the results will not be fully
|
|
deterministic.
|
|
|
|
top_k: Only sample from the top K options for each subsequent token.
|
|
|
|
Used to remove "long tail" low probability responses.
|
|
[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
top_p: Use nucleus sampling.
|
|
|
|
In nucleus sampling, we compute the cumulative distribution over all the options
|
|
for each subsequent token in decreasing probability order and cut it off once it
|
|
reaches a particular probability specified by `top_p`. You should either alter
|
|
`temperature` or `top_p`, but not both.
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
extra_headers: Send extra headers
|
|
|
|
extra_query: Add additional query parameters to the request
|
|
|
|
extra_body: Add additional JSON properties to the request
|
|
|
|
timeout: Override the client-level default timeout for this request, in seconds
|
|
"""
|
|
...
|
|
|
|
@required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
|
|
async def create(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[MessageParam],
|
|
model: str,
|
|
metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
|
|
stop_sequences: List[str] | NotGiven = NOT_GIVEN,
|
|
stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
|
|
system: str | NotGiven = NOT_GIVEN,
|
|
temperature: float | NotGiven = NOT_GIVEN,
|
|
top_k: int | NotGiven = NOT_GIVEN,
|
|
top_p: float | NotGiven = NOT_GIVEN,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = 600,
|
|
) -> Message | AsyncStream[MessageStreamEvent]:
|
|
return await self._post(
|
|
"/v1/messages",
|
|
body=await async_maybe_transform(
|
|
{
|
|
"max_tokens": max_tokens,
|
|
"messages": messages,
|
|
"model": model,
|
|
"metadata": metadata,
|
|
"stop_sequences": stop_sequences,
|
|
"stream": stream,
|
|
"system": system,
|
|
"temperature": temperature,
|
|
"top_k": top_k,
|
|
"top_p": top_p,
|
|
},
|
|
message_create_params.MessageCreateParams,
|
|
),
|
|
options=make_request_options(
|
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
),
|
|
cast_to=Message,
|
|
stream=stream or False,
|
|
stream_cls=AsyncStream[MessageStreamEvent],
|
|
)
|
|
|
|
@overload
|
|
def stream(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[MessageParam],
|
|
model: str,
|
|
metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
|
|
stop_sequences: List[str] | NotGiven = NOT_GIVEN,
|
|
system: str | NotGiven = NOT_GIVEN,
|
|
temperature: float | NotGiven = NOT_GIVEN,
|
|
top_k: int | NotGiven = NOT_GIVEN,
|
|
top_p: float | NotGiven = NOT_GIVEN,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> AsyncMessageStreamManager[AsyncMessageStream]:
|
|
"""Create a Message stream"""
|
|
...
|
|
|
|
@overload
|
|
def stream(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[MessageParam],
|
|
model: str,
|
|
metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
|
|
stop_sequences: List[str] | NotGiven = NOT_GIVEN,
|
|
system: str | NotGiven = NOT_GIVEN,
|
|
temperature: float | NotGiven = NOT_GIVEN,
|
|
top_k: int | NotGiven = NOT_GIVEN,
|
|
top_p: float | NotGiven = NOT_GIVEN,
|
|
event_handler: type[AsyncMessageStreamT],
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> AsyncMessageStreamManager[AsyncMessageStreamT]:
|
|
"""Create a Message stream"""
|
|
...
|
|
|
|
def stream(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[MessageParam],
|
|
model: str,
|
|
metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
|
|
stop_sequences: List[str] | NotGiven = NOT_GIVEN,
|
|
system: str | NotGiven = NOT_GIVEN,
|
|
temperature: float | NotGiven = NOT_GIVEN,
|
|
top_k: int | NotGiven = NOT_GIVEN,
|
|
top_p: float | NotGiven = NOT_GIVEN,
|
|
event_handler: type[AsyncMessageStreamT] = AsyncMessageStream, # type: ignore[assignment]
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> AsyncMessageStreamManager[AsyncMessageStream] | AsyncMessageStreamManager[AsyncMessageStreamT]:
|
|
"""Create a Message stream"""
|
|
extra_headers = {
|
|
"X-Stainless-Stream-Helper": "messages",
|
|
"X-Stainless-Custom-Event-Handler": "true" if event_handler != AsyncMessageStream else "false",
|
|
**(extra_headers or {}),
|
|
}
|
|
request = self._post(
|
|
"/v1/messages",
|
|
body=maybe_transform(
|
|
{
|
|
"max_tokens": max_tokens,
|
|
"messages": messages,
|
|
"model": model,
|
|
"metadata": metadata,
|
|
"stop_sequences": stop_sequences,
|
|
"system": system,
|
|
"temperature": temperature,
|
|
"top_k": top_k,
|
|
"top_p": top_p,
|
|
"stream": True,
|
|
},
|
|
message_create_params.MessageCreateParams,
|
|
),
|
|
options=make_request_options(
|
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
),
|
|
cast_to=Message,
|
|
stream=True,
|
|
stream_cls=event_handler,
|
|
)
|
|
return AsyncMessageStreamManager(request)
|
|
|
|
|
|
class MessagesWithRawResponse:
|
|
def __init__(self, messages: Messages) -> None:
|
|
self._messages = messages
|
|
|
|
self.create = _legacy_response.to_raw_response_wrapper(
|
|
messages.create,
|
|
)
|
|
|
|
|
|
class AsyncMessagesWithRawResponse:
|
|
def __init__(self, messages: AsyncMessages) -> None:
|
|
self._messages = messages
|
|
|
|
self.create = _legacy_response.async_to_raw_response_wrapper(
|
|
messages.create,
|
|
)
|
|
|
|
|
|
class MessagesWithStreamingResponse:
|
|
def __init__(self, messages: Messages) -> None:
|
|
self._messages = messages
|
|
|
|
self.create = to_streamed_response_wrapper(
|
|
messages.create,
|
|
)
|
|
|
|
|
|
class AsyncMessagesWithStreamingResponse:
|
|
def __init__(self, messages: AsyncMessages) -> None:
|
|
self._messages = messages
|
|
|
|
self.create = async_to_streamed_response_wrapper(
|
|
messages.create,
|
|
)
|