Skip to content

Commit 7183149

Browse files
committed
server: tests: fix concurrent OAI streaming request
1 parent 77b8589 commit 7183149

File tree

2 files changed

+20
-15
lines changed

2 files changed

+20
-15
lines changed

examples/server/tests/features/parallel.feature

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ Feature: Parallel
5151
Examples:
5252
| streaming | n_predict |
5353
| disabled | 128 |
54-
#| enabled | 64 | FIXME: phymbert: need to investigate why in aiohttp with streaming only one token is generated
54+
| enabled | 64 |
5555

5656
Scenario: Multi users with total number of tokens to predict exceeds the KV Cache size #3969
5757
Given a prompt:

examples/server/tests/features/steps/steps.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -485,20 +485,25 @@ async def oai_chat_completions(user_prompt,
485485
assert response.status == 200
486486
assert response.headers['Access-Control-Allow-Origin'] == origin
487487
assert response.headers['Content-Type'] == "text/event-stream"
488-
489-
async for line_in_bytes in response.content:
490-
line = line_in_bytes.decode('utf8')
491-
event_data = line.split(': ', 1)
492-
assert event_data[0] == 'data', f'{event_data}'
493-
chunk_raw = event_data[1]
494-
495-
chunk = json.loads(chunk_raw)
496-
assert len(chunk['choices']) == 1
497-
delta = chunk['choices'][0]['delta']
498-
if 'content' in delta:
499-
completion_response['content'] += delta['content']
500-
completion_response['timings']['predicted_n'] += 1
501-
print(f"DEBUG completion_response: {completion_response}")
488+
event_received = True
489+
while event_received:
490+
event_received = False
491+
async for line_in_bytes in response.content:
492+
line = line_in_bytes.decode('utf8')
493+
line = line.rstrip('\n').rstrip('\r')
494+
if line == '':
495+
continue
496+
event_data = line.split(': ', 1)
497+
assert event_data[0] == 'data', f'Bad event code received: ```{event_data}```'
498+
chunk_raw = event_data[1]
499+
500+
chunk = json.loads(chunk_raw)
501+
assert len(chunk['choices']) == 1, f"no choices provided, line ```{line}```"
502+
delta = chunk['choices'][0]['delta']
503+
if 'content' in delta:
504+
completion_response['content'] += delta['content']
505+
completion_response['timings']['predicted_n'] += 1
506+
print(f"DEBUG completion_response: {completion_response}")
502507
else:
503508
if expect_api_error is None or not expect_api_error:
504509
assert response.status == 200

0 commit comments

Comments
 (0)