Skip to content

Add AsyncComplete to do LLM completions asynchronously #68

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 13, 2024

Conversation

clam004
Copy link
Contributor

@clam004 clam004 commented Jan 12, 2024

Issue # https://linear.app/together-ai/issue/ENG-388/async-class-for-python-library

Describe your changes

import os
import asyncio
import time
import random
import string

import together

from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env.

TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')

model = "togethercomputer/llama-2-7b"
prompt = "one 2 three 4"

output = together.Complete.create(
  prompt = prompt, 
  model = model, 
  max_tokens = 8,
)

print(output['output']['choices'][0]['text'])

output = await together.AsyncComplete.create(
  prompt = prompt, 
  model = model, 
  max_tokens = 8,
)

print(output.choices[0].text)

testing to show the non-blocking time is faster

# Synchronous version
def sync_example(prompt, model, max_tokens):
    output = together.Complete.create(prompt=prompt, model=model, max_tokens=max_tokens)
    return output['output']['choices'][0]['text']

# Asynchronous version
async def async_example(prompt, model, max_tokens):
    output = await together.AsyncComplete.create(prompt=prompt, model=model, max_tokens=max_tokens)
    return output.choices[0].text

# Function to run multiple requests concurrently
async def run_concurrent_requests(prompt="one 2 three 4", model="togethercomputer/llama-2-7b", max_tokens=8, num_requests=9):
    async def generate_random_prompt():
        # Randomly add a space or letter to the original prompt
        random_char = random.choice(string.ascii_letters + ' ')
        return prompt[:random.randint(0, len(prompt))] + random_char + prompt[random.randint(0, len(prompt)):]

    # Synchronous example
    start_time_sync = time.time()
    sync_results = [sync_example(await generate_random_prompt(), model, max_tokens) for _ in range(num_requests)]
    end_time_sync = time.time()
    print(f"Synchronous execution time: {end_time_sync - start_time_sync} seconds")

    # Asynchronous example
    start_time_async = time.time()
    async_results = await asyncio.gather(
        *[async_example(await generate_random_prompt(), model, max_tokens) for _ in range(num_requests)]
    )
    end_time_async = time.time()
    print(f"Asynchronous execution time: {end_time_async - start_time_async} seconds")

    # Print the results
    print("\nSynchronous Results:")
    for result in sync_results:
        print(result)

    print("\nAsynchronous Results:")
    for result in async_results:
        print(result)

# Run the concurrent requests with default arguments
asyncio.run(run_concurrent_requests())

Synchronous execution time: 3.3800830841064453 seconds
Asynchronous execution time: 0.6449248790740967 seconds

@clam004 clam004 requested a review from orangetin January 12, 2024 21:17
@clam004 clam004 requested a review from azahed98 January 13, 2024 03:13
Copy link
Member

@orangetin orangetin left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lgtm

@clam004 clam004 marked this pull request as ready for review January 13, 2024 04:09
@clam004 clam004 merged commit 055a916 into main Jan 13, 2024
@orangetin orangetin deleted the clam004/async-client branch April 5, 2024 19:18
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants