OpenAI Batcher (`oaib`)

A Python library for making rate-limited, async batch requests to the OpenAI API.

Use Batch to make batch requests as quickly as possible given TPM/RPM limits.
Use Auto to automatically read your rate limits from OpenAI's response headers, and run the job as fast as possible.

This notebook is available at README.ipynb.

Usage

Set OpenAI API Key

import os
os.environ['OPENAI_API_KEY'] = input()

Using the `Batch` class

You can mix and match endpoints as needed for regular Batch, but it's not recommended as the rate limits for different endpoints/models will differ. For maximum efficiency, use Auto.

from oaib import Batch

# Use low rate limits for this example.
batch = Batch(rpm=100, tpm=1_000, workers=5)

# Creating a batch with 20 chat completions.
for i in range(20):
    await batch.add(
        "chat.completions.create", 
        model="gpt-3.5-turbo", 
        messages=[{"role": "user", "content": "say hello"}]
    )

await batch.run()

✅ DONE: 100%|█████████████| 20/20 [00:22<00:00,  1.12s/req]
RPM:  53%|█████████████████████▏                  | 53.0/100
TPM:  93%|███████████████████████████████████▎  | 928.0/1000

Run took 20.02s.

	endpoint	model	messages	result
0	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzMf7oXHlLWpsISVpAaUPxps0g5...
1	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzNeFY9TLN71o2FMEqGssHIrOQq...
2	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzNDiD3ikFtBZ4hHXWEsLONHUeS...
3	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzP9dccsvrGsOR3X5HgmHqsR2fm...
4	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzQV7ZnIoXccx9R8dIfS4rdPd0U...
5	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzRVQvp3wwmEvbFzNPtrXBmcOhR...
6	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzRSw7iTCLs0uu8fWZwDcaPGB0s...
7	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzS6D1gACsJW6JXvuS42N4lQLh7...
8	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzUFmpIzWjKsNGnlLvZW3DhF752...
9	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzU6Mg6Zk4BC5uelndmHjmGAQ0I...
10	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzV5K4OEk80dDuSwohiTualLOoO...
11	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzXCcmbcuy1EQPskJrfN5po1Ix9...
12	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzX4UfaBPDO3fF8vMO1dsQ6tfiT...
13	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzZme2VBNhckfItEZRJqmpC3E53...
14	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzaYddjxrFpmYUDjMHXlDPgS7G4...
15	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzcTLFClvwjtPNNyv3KJ2xvZ8dQ...
16	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzcuWlMUwqXj0AeMQkZqzNIQvJo...
17	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzepfAhFnZj1AlnVSyNHHMyZ0mK...
18	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzfdULvZx6OyUZpARpTMNKYzfKx...
19	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVzfyEru2f67JyunYCdPMaC0fD2H...

Using the `Auto` class

Automatically use the given TPM/RPM rate limits provided by OpenAI API responses.

from oaib import Auto

# Automatically set rate limits.
batch = Auto(workers=8)

# Fetch 1,000 chat completions as quickly as possible, setting rate limits
# automatically from OpenAI's response headers.
for i in range(1000):
    await batch.add(
        "chat.completions.create", 
        model="gpt-3.5-turbo", 
        messages=[{"role": "user", "content": "say hello"}]
    )

await batch.run()

✅ DONE: 100%|█████████| 1000/1000 [00:10<00:00, 92.98req/s]
RPM:  56%|████████████████████                | 5573.0/10000
TPM:   5%|█▌                               | 94401.0/2000000

Run took 12.58s.

	endpoint	model	messages	result
0	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVxz211nsiQQSY2k54r4r141UX83...
1	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVy0TrnGax3XqkDOlEiImosPnvIL...
2	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVxzMdsF5v1je6iAmvjK7BCZKQna...
3	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVy0KiRWYPtQ099p3b1k1HfYBYwT...
4	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVy0M7l6Fg0mvWCpOXomVSqV6Pow...
...	...	...	...	...
995	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVyBemEPgb6lV5Opnu8X9UQ7T9iZ...
996	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVyBBlh4PMbI9qtca80UyMbrOGAF...
997	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVyBWveGsMeSLS5SzUbXIiMLvGaS...
998	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVyBbBnX5SleJWSDIHqZ8lS0y15V...
999	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'say hello'}]	{'id': 'chatcmpl-8qVyBYTizFbBIu3RQ9XlvKDlnSuEG...

1000 rows × 4 columns

Notes

It is not possible to perfectly guarantee the tokens per minute limit is not breached because we cannot know the total token usage until the response comes back.

Use the safety param to set the rate limit tolerance. By default it is set to 10%, and will wait until the predicted TPM (the current TPM plus the average number of tokens per request) drops below 90% of the limit.
By default, important logs are stored at oaib.txt. This can be disabled using the silent flag.
There's an error with TPM/RPM progress bar display in Jupyter Notebooks for the Auto class only. This is caused by a tqdm.notebook bug where only the initial totals (here, our limits) are used to calculate the width of the bar, and the Auto class updates these values only after the first request. The text percentage displays are accurate.

Metadata and Index

You can add custom metadata to your observations with add(metadata={...}, and set the index for the DataFrame with Batch(index=[...]) and Auto(index=[...]).

from oaib import Batch

n = 5
batch = Batch(rpm=1000, tpm=10000, workers=5, index=["difficulty", "i"])
difficulties = ["easy", "medium", "hard"]

for difficulty in difficulties:
    for i in range(n):
        await batch.add(
            "chat.completions.create",
            metadata={"difficulty": difficulty, "i": i},
            model="gpt-3.5-turbo",
            messages=[{
                "role": "user", 
                "content": f"difficulty: {difficulty}. write a math problem."
            }]
        )

await batch.run()

✅ DONE: 100%|█████████| 15/15 [00:01<00:00, 10.52req/s]
RPM:  56%|████████████████████                | 631.0/1000
TPM:     |                                    | 10781.0/?

Run took 1.43s.

		endpoint	model	messages	result
difficulty	i
easy	0	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'difficulty: easy...	{'id': 'chatcmpl-8rdiFkLTnjs4LbX2ZUKyb1UaRcPwH...
	1	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'difficulty: easy...	{'id': 'chatcmpl-8rdiFtEcz6CEvO8K9jZpAdbFaaJ3o...
	2	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'difficulty: easy...	{'id': 'chatcmpl-8rdiFEmy4TfO4iR3aJbh4u9lgAD1W...
	3	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'difficulty: easy...	{'id': 'chatcmpl-8rdiFVw5YuHY8WhuNqJjyJcFO9Mhs...
	4	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'difficulty: easy...	{'id': 'chatcmpl-8rdiGoYqWA5wH3xoFKVl8pRGGTuDZ...
hard	0	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'difficulty: hard...	{'id': 'chatcmpl-8rdiG8ZAUhcsBOtgtPuOjblzEo14a...
	1	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'difficulty: hard...	{'id': 'chatcmpl-8rdiGgl9uEe4ASQgt5uzwMpeJhnU6...
	2	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'difficulty: hard...	{'id': 'chatcmpl-8rdiGfVkiZqrE1p2TxeKnCxc2zzUb...
	3	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'difficulty: hard...	{'id': 'chatcmpl-8rdiGdKxmYC6mS4QSuiW3HjsMRW05...
	4	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'difficulty: hard...	{'id': 'chatcmpl-8rdiGqTe3MVy8FGJ6qtVAjxmMODy6...
medium	0	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'difficulty: medi...	{'id': 'chatcmpl-8rdiGsk7ohvIVwzuFMfO3cH9zNLW4...
	1	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'difficulty: medi...	{'id': 'chatcmpl-8rdiG64Y66W8CZZ9MI4xiVNHheHiF...
	2	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'difficulty: medi...	{'id': 'chatcmpl-8rdiGGmXfXx0uRuKafeKODGQe42vz...
	3	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'difficulty: medi...	{'id': 'chatcmpl-8rdiG95XCozUiFXY7ryA4rSfzbwRk...
	4	chat.completions.create	gpt-3.5-turbo	[{'role': 'user', 'content': 'difficulty: medi...	{'id': 'chatcmpl-8rdiGi6nF5FLMdPIYrkrYgI15Yfcw...

jvmncs/oaib

OpenAI Batcher (oaib)