Skip to content

Add custom Duration and Timestamp classes with nanosecond support #975

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
348 changes: 348 additions & 0 deletions databricks/sdk/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,348 @@
"""Common types for the Databricks SDK.

This module provides common types used by different APIs.
"""

from __future__ import annotations

import logging
import re
from datetime import datetime, timedelta, timezone
from decimal import Decimal

_LOG = logging.getLogger("databricks.sdk")

# Python datetime library does not have nanoseconds precision. These classes below are used to work around this limitation.


class Duration:
"""Represents a duration with nanosecond precision.

This class provides nanosecond precision for durations, which is not supported
by Python's standard datetime.timedelta.

Attributes:
seconds (int): Number of seconds in the duration
nanoseconds (int): Number of nanoseconds (0-999999999)
"""

def __init__(self, seconds: int = 0, nanoseconds: int = 0) -> None:
"""Initialize a Duration with seconds and nanoseconds.

Args:
seconds: Number of seconds
nanoseconds: Number of nanoseconds (0-999999999)

Raises:
TypeError: If seconds or nanoseconds are not integers
ValueError: If nanoseconds is not between 0 and 999999999
"""
if not isinstance(seconds, int):
raise TypeError("seconds must be an integer")
if not isinstance(nanoseconds, int):
raise TypeError("nanoseconds must be an integer")
Comment on lines +29 to +43
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this type check when we explicitly stated that in the function arguments?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The type in the arguments are only "hints" for the linter, but python does not enforce them.

if nanoseconds < 0 or nanoseconds >= 1_000_000_000:
raise ValueError("nanoseconds must be between 0 and 999999999")

self.seconds = seconds
self.nanoseconds = nanoseconds

@classmethod
def from_timedelta(cls, td: timedelta) -> "Duration":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is returning "Duration" with quotes. Is this expected?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. In Python you cannot use a type before introduced. This is a workaround introduced at some point:

https://peps.python.org/pep-0484/#forward-references

"""Convert a datetime.timedelta to Duration.

Args:
td: The timedelta to convert

Returns:
Duration: A new Duration instance with equivalent time span

"""
# Use Decimal for precise calculation of total seconds
total_seconds = Decimal(str(td.total_seconds()))
seconds = int(total_seconds)
# Get the fractional part and convert to nanoseconds
# This preserves more precision than using microsecond * 1000
fractional = total_seconds - seconds
nanoseconds = int(fractional * Decimal("1000000000"))
return cls(seconds=seconds, nanoseconds=nanoseconds)

def to_timedelta(self) -> timedelta:
"""Convert Duration to datetime.timedelta.

Returns:
timedelta: A new timedelta instance with equivalent time span

Note:
The conversion will lose nanosecond precision as timedelta
only supports microsecond precision. Nanoseconds beyond
microsecond precision will be truncated.
"""
# Convert nanoseconds to microseconds, truncating any extra precision
microseconds = self.nanoseconds // 1000
return timedelta(seconds=self.seconds, microseconds=microseconds)

def __repr__(self) -> str:
"""Return a string representation of the Duration.

Returns:
str: String in the format 'Duration(seconds=X, nanoseconds=Y)'
"""
return f"Duration(seconds={self.seconds}, nanoseconds={self.nanoseconds})"

def __eq__(self, other: object) -> bool:
"""Compare this Duration with another object for equality.

Args:
other: Object to compare with

Returns:
bool: True if other is a Duration with same seconds and nanoseconds
"""
if not isinstance(other, Duration):
return NotImplemented
return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds

@classmethod
def parse(cls, duration_str: str) -> "Duration":
"""Parse a duration string in the format 'Xs' where X is a decimal number.

Examples:
"3.1s" -> Duration(seconds=3, nanoseconds=100000000)
"1.5s" -> Duration(seconds=1, nanoseconds=500000000)
"10s" -> Duration(seconds=10, nanoseconds=0)

Args:
duration_str: String in the format 'Xs' where X is a decimal number

Returns:
A new Duration instance

Raises:
ValueError: If the string format is invalid
"""
if not duration_str.endswith("s"):
raise ValueError("Duration string must end with 's'")

try:
# Remove the 's' suffix and convert to Decimal
value = Decimal(duration_str[:-1])
# Split into integer and fractional parts
seconds = int(value)
# Convert fractional part to nanoseconds
nanoseconds = int((value - seconds) * 1_000_000_000)
return cls(seconds=seconds, nanoseconds=nanoseconds)
except ValueError as e:
raise ValueError(f"Invalid duration format: {duration_str}") from e

def to_string(self) -> str:
"""Convert Duration to string format 'Xs' where X is a decimal number.

Examples:
Duration(seconds=3, nanoseconds=100000000) -> "3.1s"
Duration(seconds=1, nanoseconds=500000000) -> "1.5s"
Duration(seconds=10, nanoseconds=0) -> "10s"

Returns:
String representation of the duration
"""
if self.nanoseconds == 0:
return f"{self.seconds}s"

# Use Decimal for precise decimal arithmetic
total = Decimal(self.seconds) + (Decimal(self.nanoseconds) / Decimal("1000000000"))
# Format with up to 9 decimal places, removing trailing zeros
return f"{total:.9f}".rstrip("0").rstrip(".") + "s"


class Timestamp:
"""Represents a timestamp with nanosecond precision.

This class provides nanosecond precision for timestamps, which is not supported
by Python's standard datetime. It's compatible with protobuf Timestamp format and
supports RFC3339 string formatting.

Attributes:
seconds (int): Seconds since Unix epoch (1970-01-01T00:00:00Z)
nanos (int): Nanoseconds (0-999999999)
"""

# RFC3339 regex pattern for validation and parsing
_RFC3339_PATTERN = re.compile(
r"^(\d{4})-(\d{2})-(\d{2})[Tt](\d{2}):(\d{2}):(\d{2})(?:\.(\d+))?(Z|[+-]\d{2}:?\d{2})$"
)

def __init__(self, seconds: int = 0, nanos: int = 0) -> None:
"""Initialize a Timestamp with seconds since epoch and nanoseconds.

Args:
seconds: Seconds since Unix epoch (1970-01-01T00:00:00Z)
nanos: Nanoseconds (0-999999999)

Raises:
TypeError: If seconds or nanos are not integers
ValueError: If nanos is not between 0 and 999999999
"""
if not isinstance(seconds, int):
raise TypeError("seconds must be an integer")
if not isinstance(nanos, int):
raise TypeError("nanos must be an integer")
if nanos < 0 or nanos >= 1_000_000_000:
raise ValueError("nanos must be between 0 and 999999999")

self.seconds = seconds
self.nanos = nanos

@classmethod
def from_datetime(cls, dt: datetime) -> "Timestamp":
"""Convert a datetime.datetime to Timestamp.

Args:
dt: The datetime to convert. If naive, it's assumed to be UTC.

Returns:
Timestamp: A new Timestamp instance

Note:
The datetime is converted to UTC if it isn't already.
Note that datetime only supports microsecond precision, so nanoseconds
will be padded with zeros.
"""
# If datetime is naive (no timezone), assume UTC
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
# Convert to UTC
utc_dt = dt.astimezone(timezone.utc)

# Get seconds since epoch using Decimal for precise calculation
# datetime.timestamp() returns float, so we need to handle it carefully
ts = Decimal(str(utc_dt.timestamp()))
seconds = int(ts)
# Get the fractional part and convert to nanoseconds
# This preserves more precision than using microsecond * 1000
fractional = ts - seconds
nanos = int(fractional * Decimal("1000000000"))

return cls(seconds=seconds, nanos=nanos)

def to_datetime(self) -> datetime:
"""Convert Timestamp to datetime.datetime.

Returns:
datetime: A new datetime instance in UTC timezone

Note:
The returned datetime will have microsecond precision at most.
Nanoseconds beyond microsecond precision will be truncated.
"""
# Create base datetime from seconds
dt = datetime.fromtimestamp(self.seconds, tz=timezone.utc)
# Convert nanoseconds to microseconds, truncating any extra precision
microseconds = self.nanos // 1000
return dt.replace(microsecond=microseconds)

@classmethod
def parse(cls, timestamp_str: str) -> "Timestamp":
"""Parse an RFC3339 formatted string into a Timestamp.

Examples:
>>> Timestamp.parse("2023-01-01T12:00:00Z")
>>> Timestamp.parse("2023-01-01T12:00:00.123456789Z")
>>> Timestamp.parse("2023-01-01T12:00:00+01:00")

Args:
timestamp_str: RFC3339 formatted timestamp string

Returns:
Timestamp: A new Timestamp instance

Raises:
ValueError: If the string format is invalid or not RFC3339 compliant
"""
match = cls._RFC3339_PATTERN.match(timestamp_str)
if not match:
raise ValueError(f"Invalid RFC3339 format: {timestamp_str}")

year, month, day, hour, minute, second, frac, offset = match.groups()

# Build the datetime string with a standardized offset format
dt_str = f"{year}-{month}-{day}T{hour}:{minute}:{second}"

# Handle fractional seconds, truncating to microseconds for fromisoformat
nanos = 0
if frac:
# Pad to 9 digits for nanoseconds
frac = (frac + "000000000")[:9]
# Truncate to 6 digits (microseconds) for fromisoformat
dt_str += f".{frac[:6]}"
# Store full nanosecond precision separately
nanos = int(frac)

# Handle timezone offset
if offset == "Z":
dt_str += "+00:00"
elif ":" not in offset:
# Insert colon in offset if not present (e.g., +0000 -> +00:00)
dt_str += f"{offset[:3]}:{offset[3:]}"
else:
dt_str += offset

# Parse with microsecond precision
dt = datetime.fromisoformat(dt_str)
# Create timestamp with full nanosecond precision
return cls.from_datetime(dt).replace(nanos=nanos)

def to_string(self) -> str:
"""Convert Timestamp to RFC3339 formatted string.

Returns:
str: RFC3339 formatted timestamp string in UTC timezone

Note:
The string will include nanosecond precision only if nanos > 0
"""
# Convert seconds to UTC datetime for formatting
dt = datetime.fromtimestamp(self.seconds, tz=timezone.utc)
base = dt.strftime("%Y-%m-%dT%H:%M:%S")

# Add nanoseconds if present
if self.nanos == 0:
return base + "Z"

# Format nanoseconds, removing trailing zeros
nanos_str = f"{self.nanos:09d}".rstrip("0")
return f"{base}.{nanos_str}Z"

def __repr__(self) -> str:
"""Return a string representation of the Timestamp.

Returns:
str: String in the format 'Timestamp(seconds=X, nanos=Y)'
"""
return f"Timestamp(seconds={self.seconds}, nanos={self.nanos})"

def __eq__(self, other: object) -> bool:
"""Compare this Timestamp with another object for equality.

Args:
other: Object to compare with

Returns:
bool: True if other is a Timestamp with same seconds and nanos
"""
if not isinstance(other, Timestamp):
return NotImplemented
return self.seconds == other.seconds and self.nanos == other.nanos

def replace(self, **kwargs) -> "Timestamp":
"""Create a new Timestamp with the given fields replaced.

Args:
**kwargs: Fields to replace (seconds, nanos)

Returns:
A new Timestamp instance with the specified fields replaced
"""
seconds = kwargs.get("seconds", self.seconds)
nanos = kwargs.get("nanos", self.nanos)
return Timestamp(seconds=seconds, nanos=nanos)
Loading
Loading