Skip to content

Commit 2331aa4

Browse files
authored
Add WorkflowUpdateRPCTimeoutOrCancelledError (#548)
Fixes #529
1 parent 4f646c2 commit 2331aa4

File tree

4 files changed

+200
-35
lines changed

4 files changed

+200
-35
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
# Build and test the project
1111
build-lint-test:
1212
strategy:
13-
fail-fast: true
13+
fail-fast: false
1414
matrix:
1515
python: ["3.8", "3.12"]
1616
os: [ubuntu-latest, ubuntu-arm, macos-intel, macos-arm, windows-latest]

temporalio/client.py

Lines changed: 68 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1852,7 +1852,10 @@ async def execute_update(
18521852
rpc_timeout: Optional RPC deadline to set for the RPC call.
18531853
18541854
Raises:
1855-
WorkflowUpdateFailedError: If the update failed
1855+
WorkflowUpdateFailedError: If the update failed.
1856+
WorkflowUpdateRPCTimeoutOrCancelledError: This update call timed out
1857+
or was cancelled. This doesn't mean the update itself was timed
1858+
out or cancelled.
18561859
RPCError: There was some issue sending the update to the workflow.
18571860
"""
18581861
handle = await self._start_update(
@@ -1968,6 +1971,9 @@ async def start_update(
19681971
rpc_timeout: Optional RPC deadline to set for the RPC call.
19691972
19701973
Raises:
1974+
WorkflowUpdateRPCTimeoutOrCancelledError: This update call timed out
1975+
or was cancelled. This doesn't mean the update itself was timed
1976+
out or cancelled.
19711977
RPCError: There was some issue sending the update to the workflow.
19721978
"""
19731979
return await self._start_update(
@@ -4305,7 +4311,10 @@ async def result(
43054311
it will be retried until the result is available.
43064312
43074313
Raises:
4308-
WorkflowUpdateFailedError: If the update failed
4314+
WorkflowUpdateFailedError: If the update failed.
4315+
WorkflowUpdateRPCTimeoutOrCancelledError: This update call timed out
4316+
or was cancelled. This doesn't mean the update itself was timed
4317+
out or cancelled.
43094318
RPCError: Update result could not be fetched for some other reason.
43104319
"""
43114320
# Poll until outcome reached
@@ -4357,15 +4366,28 @@ async def _poll_until_outcome(
43574366

43584367
# Continue polling as long as we have no outcome
43594368
while True:
4360-
res = await self._client.workflow_service.poll_workflow_execution_update(
4361-
req,
4362-
retry=True,
4363-
metadata=rpc_metadata,
4364-
timeout=rpc_timeout,
4365-
)
4366-
if res.HasField("outcome"):
4367-
self._known_outcome = res.outcome
4368-
return
4369+
try:
4370+
res = (
4371+
await self._client.workflow_service.poll_workflow_execution_update(
4372+
req,
4373+
retry=True,
4374+
metadata=rpc_metadata,
4375+
timeout=rpc_timeout,
4376+
)
4377+
)
4378+
if res.HasField("outcome"):
4379+
self._known_outcome = res.outcome
4380+
return
4381+
except RPCError as err:
4382+
if (
4383+
err.status == RPCStatusCode.DEADLINE_EXCEEDED
4384+
or err.status == RPCStatusCode.CANCELLED
4385+
):
4386+
raise WorkflowUpdateRPCTimeoutOrCancelledError() from err
4387+
else:
4388+
raise
4389+
except asyncio.CancelledError as err:
4390+
raise WorkflowUpdateRPCTimeoutOrCancelledError() from err
43694391

43704392

43714393
class WorkflowUpdateStage(IntEnum):
@@ -4456,6 +4478,24 @@ def cause(self) -> BaseException:
44564478
return self.__cause__
44574479

44584480

4481+
class RPCTimeoutOrCancelledError(temporalio.exceptions.TemporalError):
4482+
"""Error that occurs on some client calls that timeout or get cancelled."""
4483+
4484+
pass
4485+
4486+
4487+
class WorkflowUpdateRPCTimeoutOrCancelledError(RPCTimeoutOrCancelledError):
4488+
"""Error that occurs when update RPC call times out or is cancelled.
4489+
4490+
Note, this is not related to any general concept of timing out or cancelling
4491+
a running update, this is only related to the client call itself.
4492+
"""
4493+
4494+
def __init__(self) -> None:
4495+
"""Create workflow update timeout or cancelled error."""
4496+
super().__init__("Timeout or cancellation waiting for update")
4497+
4498+
44594499
class AsyncActivityCancelledError(temporalio.exceptions.TemporalError):
44604500
"""Error that occurs when async activity attempted heartbeat but was cancelled."""
44614501

@@ -5261,9 +5301,23 @@ async def start_workflow_update(
52615301
# the user cannot specify sooner than ACCEPTED)
52625302
resp: temporalio.api.workflowservice.v1.UpdateWorkflowExecutionResponse
52635303
while True:
5264-
resp = await self._client.workflow_service.update_workflow_execution(
5265-
req, retry=True, metadata=input.rpc_metadata, timeout=input.rpc_timeout
5266-
)
5304+
try:
5305+
resp = await self._client.workflow_service.update_workflow_execution(
5306+
req,
5307+
retry=True,
5308+
metadata=input.rpc_metadata,
5309+
timeout=input.rpc_timeout,
5310+
)
5311+
except RPCError as err:
5312+
if (
5313+
err.status == RPCStatusCode.DEADLINE_EXCEEDED
5314+
or err.status == RPCStatusCode.CANCELLED
5315+
):
5316+
raise WorkflowUpdateRPCTimeoutOrCancelledError() from err
5317+
else:
5318+
raise
5319+
except asyncio.CancelledError as err:
5320+
raise WorkflowUpdateRPCTimeoutOrCancelledError() from err
52675321
if (
52685322
resp.stage >= req.wait_policy.lifecycle_stage
52695323
or resp.stage

tests/helpers/__init__.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,14 @@
66
from datetime import timedelta
77
from typing import Awaitable, Callable, Optional, Sequence, Type, TypeVar
88

9+
from temporalio.api.common.v1 import WorkflowExecution
910
from temporalio.api.enums.v1 import IndexedValueType
1011
from temporalio.api.operatorservice.v1 import (
1112
AddSearchAttributesRequest,
1213
ListSearchAttributesRequest,
1314
)
15+
from temporalio.api.update.v1 import UpdateRef
16+
from temporalio.api.workflowservice.v1 import PollWorkflowExecutionUpdateRequest
1417
from temporalio.client import BuildIdOpAddNewDefault, Client
1518
from temporalio.common import SearchAttributeKey
1619
from temporalio.service import RPCError, RPCStatusCode
@@ -105,3 +108,23 @@ def find_free_port() -> int:
105108
s.bind(("", 0))
106109
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
107110
return s.getsockname()[1]
111+
112+
113+
async def workflow_update_exists(
114+
client: Client, workflow_id: str, update_id: str
115+
) -> bool:
116+
try:
117+
await client.workflow_service.poll_workflow_execution_update(
118+
PollWorkflowExecutionUpdateRequest(
119+
namespace=client.namespace,
120+
update_ref=UpdateRef(
121+
workflow_execution=WorkflowExecution(workflow_id=workflow_id),
122+
update_id=update_id,
123+
),
124+
)
125+
)
126+
return True
127+
except RPCError as err:
128+
if err.status != RPCStatusCode.NOT_FOUND:
129+
raise
130+
return False

tests/worker/test_workflow.py

Lines changed: 108 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
from temporalio.api.update.v1 import UpdateRef
4343
from temporalio.api.workflowservice.v1 import (
4444
GetWorkflowExecutionHistoryRequest,
45-
PollWorkflowExecutionUpdateRequest,
4645
ResetStickyTaskQueueRequest,
4746
)
4847
from temporalio.bridge.proto.workflow_activation import WorkflowActivation
@@ -57,6 +56,7 @@
5756
WorkflowQueryFailedError,
5857
WorkflowUpdateFailedError,
5958
WorkflowUpdateHandle,
59+
WorkflowUpdateRPCTimeoutOrCancelledError,
6060
WorkflowUpdateStage,
6161
)
6262
from temporalio.common import (
@@ -107,6 +107,7 @@
107107
ensure_search_attributes_present,
108108
find_free_port,
109109
new_worker,
110+
workflow_update_exists,
110111
)
111112
from tests.helpers.external_coroutine import wait_on_timer
112113
from tests.helpers.external_stack_trace import (
@@ -4364,25 +4365,8 @@ async def test_workflow_update_before_worker_start(
43644365
task_queue=task_queue,
43654366
)
43664367

4367-
async def update_exists() -> bool:
4368-
try:
4369-
await client.workflow_service.poll_workflow_execution_update(
4370-
PollWorkflowExecutionUpdateRequest(
4371-
namespace=client.namespace,
4372-
update_ref=UpdateRef(
4373-
workflow_execution=WorkflowExecution(workflow_id=handle.id),
4374-
update_id="my-update",
4375-
),
4376-
)
4377-
)
4378-
return True
4379-
except RPCError as err:
4380-
if err.status != RPCStatusCode.NOT_FOUND:
4381-
raise
4382-
return False
4383-
43844368
# Confirm update not there
4385-
assert not await update_exists()
4369+
assert not await workflow_update_exists(client, handle.id, "my-update")
43864370

43874371
# Execute update in background
43884372
update_task = asyncio.create_task(
@@ -4392,7 +4376,9 @@ async def update_exists() -> bool:
43924376
)
43934377

43944378
# Wait until update exists
4395-
await assert_eq_eventually(True, update_exists)
4379+
await assert_eq_eventually(
4380+
True, lambda: workflow_update_exists(client, handle.id, "my-update")
4381+
)
43964382

43974383
# Start no-cache worker on the task queue
43984384
async with new_worker(
@@ -4466,6 +4452,108 @@ async def test_workflow_update_separate_handle(
44664452
assert "workflow-done" == await handle.result()
44674453

44684454

4455+
@workflow.defn
4456+
class UpdateTimeoutOrCancelWorkflow:
4457+
@workflow.run
4458+
async def run(self) -> None:
4459+
await workflow.wait_condition(lambda: False)
4460+
4461+
@workflow.update
4462+
async def do_update(self, sleep: float) -> None:
4463+
await asyncio.sleep(sleep)
4464+
4465+
4466+
async def test_workflow_update_timeout_or_cancel(
4467+
client: Client, env: WorkflowEnvironment
4468+
):
4469+
if env.supports_time_skipping:
4470+
pytest.skip(
4471+
"Java test server: https://github.com/temporalio/sdk-java/issues/1903"
4472+
)
4473+
4474+
# Confirm start timeout via short timeout on update w/ no worker running
4475+
handle = await client.start_workflow(
4476+
UpdateTimeoutOrCancelWorkflow.run,
4477+
id=f"wf-{uuid.uuid4()}",
4478+
task_queue="does-not-exist",
4479+
)
4480+
with pytest.raises(WorkflowUpdateRPCTimeoutOrCancelledError):
4481+
await handle.start_update(
4482+
UpdateTimeoutOrCancelWorkflow.do_update,
4483+
1000,
4484+
wait_for_stage=WorkflowUpdateStage.ACCEPTED,
4485+
rpc_timeout=timedelta(milliseconds=1),
4486+
)
4487+
4488+
# Confirm start cancel via cancel on update w/ no worker running
4489+
handle = await client.start_workflow(
4490+
UpdateTimeoutOrCancelWorkflow.run,
4491+
id=f"wf-{uuid.uuid4()}",
4492+
task_queue="does-not-exist",
4493+
)
4494+
task = asyncio.create_task(
4495+
handle.start_update(
4496+
UpdateTimeoutOrCancelWorkflow.do_update,
4497+
1000,
4498+
wait_for_stage=WorkflowUpdateStage.ACCEPTED,
4499+
id="my-update",
4500+
)
4501+
)
4502+
# Have to wait for update to exist before cancelling to capture
4503+
await assert_eq_eventually(
4504+
True, lambda: workflow_update_exists(client, handle.id, "my-update")
4505+
)
4506+
task.cancel()
4507+
with pytest.raises(WorkflowUpdateRPCTimeoutOrCancelledError):
4508+
await task
4509+
4510+
# Start worker
4511+
async with new_worker(client, UpdateTimeoutOrCancelWorkflow) as worker:
4512+
# Start the workflow
4513+
handle = await client.start_workflow(
4514+
UpdateTimeoutOrCancelWorkflow.run,
4515+
id=f"wf-{uuid.uuid4()}",
4516+
task_queue=worker.task_queue,
4517+
)
4518+
# Start an update
4519+
update_handle = await handle.start_update(
4520+
UpdateTimeoutOrCancelWorkflow.do_update,
4521+
1000,
4522+
wait_for_stage=WorkflowUpdateStage.ACCEPTED,
4523+
)
4524+
# Timeout a poll call
4525+
with pytest.raises(WorkflowUpdateRPCTimeoutOrCancelledError):
4526+
await update_handle.result(rpc_timeout=timedelta(milliseconds=1))
4527+
4528+
# Cancel a poll call
4529+
update_handle = await handle.start_update(
4530+
UpdateTimeoutOrCancelWorkflow.do_update,
4531+
1000,
4532+
wait_for_stage=WorkflowUpdateStage.ACCEPTED,
4533+
)
4534+
result_task = asyncio.create_task(update_handle.result())
4535+
# Unfortunately there is not a way for us to confirm this is actually
4536+
# pending the server call and if you cancel early you get an asyncio
4537+
# cancelled error because it never even reached the gRPC client. We
4538+
# considered sleeping, but that makes for flaky tests. So what we are
4539+
# going to do is patch the poll call to notify us when it was called.
4540+
called = asyncio.Event()
4541+
unpatched_call = client.workflow_service.poll_workflow_execution_update
4542+
4543+
async def patched_call(*args, **kwargs):
4544+
called.set()
4545+
return await unpatched_call(*args, **kwargs)
4546+
4547+
client.workflow_service.poll_workflow_execution_update = patched_call # type: ignore
4548+
try:
4549+
await called.wait()
4550+
finally:
4551+
client.workflow_service.poll_workflow_execution_update = unpatched_call
4552+
result_task.cancel()
4553+
with pytest.raises(WorkflowUpdateRPCTimeoutOrCancelledError):
4554+
await result_task
4555+
4556+
44694557
@workflow.defn
44704558
class TimeoutSupportWorkflow:
44714559
@workflow.run

0 commit comments

Comments
 (0)