tomviner/llm-claude

Migrating from Text Completions to Messages

Opened this issue · 4 comments

First of all, thanks for this very useful plugin!

Messages will soon replace Text Completions as the primary method to use Anthropic's Claude API. A full migration guide is available here.

Messages is more elegant to handle things like system prompts and user / assistant roles. Updating llm-claude with Messages should be very straightforward using the migration guide.

Is that something that's planned, or something you'd be willing to look at by any chance?

In the spirit of contributing rather than just asking, I've taken a stab at updating __init__.py myself to use Messages instead of Text Completions. I'm sure it could be improved, but it seems to work as intended:

from typing import Optional

import click
import llm
from anthropic import Anthropic
from pydantic import Field, field_validator

@llm.hookimpl
def register_models(register):
    # Registering models as per the latest naming conventions
    register(Claude("claude-instant-1.2"), aliases=("claude-instant",))
    register(Claude("claude-2.1"), aliases=("claude",))

class Claude(llm.Model):
    needs_key = "claude"
    key_env_var = "ANTHROPIC_API_KEY"
    can_stream = True

    class Options(llm.Options):
        max_tokens: Optional[int] = Field(
            description="The maximum number of tokens for the model to generate",
            default=4096,  # Adjusted to the maximum allowed for claude-2.1
        )

        @field_validator("max_tokens")
        def validate_max_tokens(cls, max_tokens):
            if not (0 < max_tokens <= 4096):  # Updated maximum limit
                raise ValueError("max_tokens must be in range 1-4096 for claude-2.1")
            return max_tokens

    def __init__(self, model_id):
        self.model_id = model_id

    def generate_prompt_messages(self, prompt, conversation):
        # Generate a list of message dictionaries based on conversation history
        messages = []
        if conversation:
            for response in conversation.responses:
                role = "user" if response.prompt.prompt else "assistant"
                messages.append({"role": role, "content": response.text()})
        # Add the current prompt with the role 'user'
        messages.append({"role": "user", "content": prompt})
        return messages

    def execute(self, prompt, stream, response, conversation):
        anthropic = Anthropic(api_key=self.get_key())

        messages = self.generate_prompt_messages(prompt.prompt, conversation)

        if stream:
            # Handling streaming responses
            with anthropic.beta.messages.stream(
                max_tokens=prompt.options.max_tokens,
                messages=messages,
                model=self.model_id
            ) as stream_response:
                for text in stream_response.text_stream:
                    yield text
        else:
            # Handling non-streaming response
            message_response = anthropic.beta.messages.create(
                model=self.model_id,
                max_tokens=prompt.options.max_tokens,
                messages=messages
            )
            # Concatenating text from content blocks
            yield "".join(content_block['text'] for content_block in message_response.content)

    def __str__(self):
        return "Anthropic: {}".format(self.model_id)

One very useful improvement would be to let the user pass their own strings for system as well as user and assistant messages. This would make it easy to use system prompts, as well as "put words in Claude's mouth" as Anthropic puts it (which has been shown to be very effective at preventing Claude from refusing requests, or nudging it in a particular direction).

After digging into the llm documentation, here is an attempt at implementing what I mentioned in my last message, to make full use of the Claude API. I also added comments and docstrings to make the code more readable:

from typing import Optional, List, Dict
import llm
from anthropic import Anthropic
from pydantic import Field, field_validator

@llm.hookimpl
def register_models(register):
    """
    Registers the Claude models with the llm library.
    This function is called by the llm library to discover available models.
    """
    # Registering models with their latest versions and aliases for convenience
    register(Claude("claude-instant-1.2"), aliases=("claude-instant",))
    register(Claude("claude-2.1"), aliases=("claude",))

class Claude(llm.Model):
    """
    Claude class representing a model from Anthropic.
    Inherits from llm.Model and integrates with the Anthropic API.
    """

    # Class attributes required for API integration
    needs_key = "claude"
    key_env_var = "ANTHROPIC_API_KEY"
    can_stream = True  # Indicates support for streaming responses

    class Options(llm.Options):
        """
        Options subclass to handle model-specific options.
        """
        # Option for setting maximum tokens to generate in responses
        max_tokens: Optional[int] = Field(
            description="The maximum number of tokens for the model to generate",
            default=4096,  # Default value set as per model's capabilities
        )

        @field_validator("max_tokens")
        def validate_max_tokens(cls, max_tokens):
            """
            Validates the max_tokens option to ensure it's within acceptable limits.
            """
            if not (0 < max_tokens <= 4096):  # Validation rule
                raise ValueError("max_tokens must be in range 1-4096 for claude-2.1")
            return max_tokens

    def __init__(self, model_id):
        """
        Initializes the Claude model with a given model ID.
        """
        self.model_id = model_id

    def prompt(self, prompt_text=None, system=None, messages=None, **options):
        """
        Generates a prompt to the model. Handles both simple text prompts and structured messages.

        :param prompt_text: A simple text prompt, optional if messages are provided.
        :param system: A system prompt to provide context or instructions to the model.
        :param messages: A list of messages for structured interaction, each with a role and content.
        :param options: Additional options such as max_tokens.
        :return: The model's response.
        """
        # Building the list of messages based on the provided parameters
        message_list = []
        if prompt_text is not None:
            message_list.append({'role': 'user', 'content': prompt_text})
        if messages is not None:
            message_list.extend(messages)

        # Include system prompt if provided
        system_prompt = system if system else ""

        # Execute the model call with the constructed messages and options
        response = self.execute(message_list, system_prompt, **options)
        return response

    def execute(self, messages, system_prompt, **options):
        """
        Executes the model's response generation based on messages and system prompt.

        :param messages: Structured messages for the conversation.
        :param system_prompt: System-level prompt for context.
        :param options: Additional execution options like max_tokens.
        :return: A generator yielding the model's response.
        """
        # Initialize Anthropic API client with the provided API key
        anthropic = Anthropic(api_key=self.get_key())

        # Handle streaming and non-streaming responses differently
        if self.can_stream:
            # Streaming response handling
            with anthropic.beta.messages.stream(
                model=self.model_id,
                max_tokens=options.get('max_tokens', 4096),
                system=system_prompt,
                messages=messages
            ) as stream_response:
                for text in stream_response.text_stream:
                    yield text
        else:
            # Non-streaming response handling
            message_response = anthropic.beta.messages.create(
                model=self.model_id,
                max_tokens=options.get('max_tokens', 4096),
                system=system_prompt,
                messages=messages
            )
            # Concatenate and yield text from content blocks in the response
            yield "".join(content_block['text'] for content_block in message_response.content)

    def __str__(self):
        """
        String representation of the model.
        """
        return "Anthropic: {}".format(self.model_id)

Here is a minimal working example using a system prompt and "putting words in Claude's mouth":

import llm

model = llm.get_model('claude')
custom_prompt = {
    "system": "Today is January 1, 2024.",
    "messages": [
        {"role": "user", "content": "Hello, Claude"},
        {"role": "assistant", "content": "Hello, my name is"}
    ]
}
response = model.prompt(**custom_prompt, max_tokens=1000)

response_text = ''.join(chunk for chunk in response)
stripped_response = response_text.strip()
print(stripped_response)

Thanks for these proposals @9j7axvsLuF I like it. And I see @bderenzi has put something together at main...bderenzi:llm-claude:main

I'm up for reviewing or collaborating on a PR