Skip to content

Enhancement/allow custom metric buckets #781

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions temporalio/bridge/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class PrometheusConfig:
counters_total_suffix: bool
unit_suffix: bool
durations_as_seconds: bool
histogram_bucket_overrides: Optional[Mapping[str, Sequence[float]]] = None


@dataclass(frozen=True)
Expand Down
6 changes: 6 additions & 0 deletions temporalio/bridge/src/runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ pub struct PrometheusConfig {
counters_total_suffix: bool,
unit_suffix: bool,
durations_as_seconds: bool,
histogram_bucket_overrides: Option<HashMap<String, Vec<f64>>>,
}

const FORWARD_LOG_BUFFER_SIZE: usize = 2048;
Expand Down Expand Up @@ -347,6 +348,11 @@ impl TryFrom<MetricsConfig> for Arc<dyn CoreMeter> {
if let Some(global_tags) = conf.global_tags {
build.global_tags(global_tags);
}
if let Some(overrides) = prom_conf.histogram_bucket_overrides {
build.histogram_bucket_overrides(temporal_sdk_core_api::telemetry::HistogramBucketOverrides {
overrides,
});
}
let prom_options = build.build().map_err(|err| {
PyValueError::new_err(format!("Invalid Prometheus config: {}", err))
})?;
Expand Down
2 changes: 2 additions & 0 deletions temporalio/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,13 +277,15 @@ class PrometheusConfig:
counters_total_suffix: bool = False
unit_suffix: bool = False
durations_as_seconds: bool = False
histogram_bucket_overrides: Optional[Mapping[str, Sequence[float]]] = None

def _to_bridge_config(self) -> temporalio.bridge.runtime.PrometheusConfig:
return temporalio.bridge.runtime.PrometheusConfig(
bind_address=self.bind_address,
counters_total_suffix=self.counters_total_suffix,
unit_suffix=self.unit_suffix,
durations_as_seconds=self.durations_as_seconds,
histogram_bucket_overrides=self.histogram_bucket_overrides,
)


Expand Down
75 changes: 74 additions & 1 deletion tests/test_runtime.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import logging
import logging.handlers
import queue
import re
import uuid
from datetime import timedelta
from typing import List, cast
from urllib.request import urlopen

Expand All @@ -16,7 +18,7 @@
TelemetryFilter,
)
from temporalio.worker import Worker
from tests.helpers import assert_eq_eventually, find_free_port
from tests.helpers import assert_eq_eventually, assert_eventually, find_free_port


@workflow.defn
Expand Down Expand Up @@ -181,3 +183,74 @@ async def has_log() -> bool:
assert record.levelno == logging.WARNING
assert record.name == f"{logger.name}-sdk_core::temporal_sdk_core::worker::workflow"
assert record.temporal_log.fields["run_id"] == handle.result_run_id # type: ignore


async def test_prometheus_histogram_bucket_overrides(client: Client):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for completeness sake, can you also add a check for custom metric? Basically just make a histogram override for your custom metric too, just assign Runtime to a var, and in addition to all that you're doing below, use runtime.metric_meter() to create/record a custom histogram metric value and confirm it too gets the histogram override.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added a custom_histogram and verified the buckets are updated.

This work is actually NOT accomplishing what I want in the ability to control the binning of temporal_workflow_endtoend_latency_bucket and temporal_activity_execution_latency_[milliseconds]_bucket

Are you able to tell if I will be able to do this in a PR on this repository or if it will require an update to sdk-core for that functionality?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You may have to remove the temporal_ prefix. If that is indeed the case, may be a good thing to document where the override attr is defined.

# Set up a Prometheus configuration with custom histogram bucket overrides
prom_addr = f"127.0.0.1:{find_free_port()}"
special_value = float(1234.5678)
histogram_overrides = {
"temporal_long_request_latency": [special_value / 2, special_value],
"custom_histogram": [special_value / 2, special_value],
}

runtime = Runtime(
telemetry=TelemetryConfig(
metrics=PrometheusConfig(
bind_address=prom_addr,
counters_total_suffix=False,
unit_suffix=False,
durations_as_seconds=False,
histogram_bucket_overrides=histogram_overrides,
),
),
)

# Create a custom histogram metric
custom_histogram = runtime.metric_meter.create_histogram(
"custom_histogram", "Custom histogram", "ms"
)

# Record a value to the custom histogram
custom_histogram.record(600)

# Create client with overrides
client_with_overrides = await Client.connect(
client.service_client.config.target_host,
namespace=client.namespace,
runtime=runtime,
)

async def run_workflow(client: Client):
task_queue = f"task-queue-{uuid.uuid4()}"
async with Worker(
client,
task_queue=task_queue,
workflows=[HelloWorkflow],
):
assert "Hello, World!" == await client.execute_workflow(
HelloWorkflow.run,
"World",
id=f"workflow-{uuid.uuid4()}",
task_queue=task_queue,
)

await run_workflow(client_with_overrides)

async def check_metrics() -> None:
with urlopen(url=f"http://{prom_addr}/metrics") as f:
metrics_output = f.read().decode("utf-8")

for key, buckets in histogram_overrides.items():
assert (
key in metrics_output
), f"Missing {key} in full output: {metrics_output}"
for bucket in buckets:
# expect to have {key}_bucket and le={bucket} in the same line with arbitrary strings between them
regex = re.compile(f'{key}_bucket.*le="{bucket}"')
assert regex.search(
metrics_output
), f"Missing bucket for {key} in full output: {metrics_output}"

# Wait for metrics to appear and match the expected buckets
await assert_eventually(check_metrics)
Loading