Skip to content

Commit 592047f

Browse files
authored
observability: PDML + some batch write spans (#1274)
* observability: PDML + some batch write spans This change adds spans for Partitioned DML and making updates for Batch. Carved out from PR #1241. * Add more system tests * Account for lack of OpenTelemetry on Python-3.7 * Update tests * Fix more test assertions * Updates from code review * Update tests with code review suggestions * Remove return per code review nit
1 parent 0887eb4 commit 592047f

11 files changed

+370
-194
lines changed

google/cloud/spanner_v1/batch.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ def batch_write(self, request_options=None, exclude_txn_from_change_streams=Fals
344344
)
345345
observability_options = getattr(database, "observability_options", None)
346346
with trace_call(
347-
"CloudSpanner.BatchWrite",
347+
"CloudSpanner.batch_write",
348348
self._session,
349349
trace_attributes,
350350
observability_options=observability_options,

google/cloud/spanner_v1/database.py

+129-93
Original file line numberDiff line numberDiff line change
@@ -699,38 +699,43 @@ def execute_partitioned_dml(
699699
)
700700

701701
def execute_pdml():
702-
with SessionCheckout(self._pool) as session:
703-
txn = api.begin_transaction(
704-
session=session.name, options=txn_options, metadata=metadata
705-
)
702+
with trace_call(
703+
"CloudSpanner.Database.execute_partitioned_pdml",
704+
observability_options=self.observability_options,
705+
) as span:
706+
with SessionCheckout(self._pool) as session:
707+
add_span_event(span, "Starting BeginTransaction")
708+
txn = api.begin_transaction(
709+
session=session.name, options=txn_options, metadata=metadata
710+
)
706711

707-
txn_selector = TransactionSelector(id=txn.id)
712+
txn_selector = TransactionSelector(id=txn.id)
708713

709-
request = ExecuteSqlRequest(
710-
session=session.name,
711-
sql=dml,
712-
params=params_pb,
713-
param_types=param_types,
714-
query_options=query_options,
715-
request_options=request_options,
716-
)
717-
method = functools.partial(
718-
api.execute_streaming_sql,
719-
metadata=metadata,
720-
)
714+
request = ExecuteSqlRequest(
715+
session=session.name,
716+
sql=dml,
717+
params=params_pb,
718+
param_types=param_types,
719+
query_options=query_options,
720+
request_options=request_options,
721+
)
722+
method = functools.partial(
723+
api.execute_streaming_sql,
724+
metadata=metadata,
725+
)
721726

722-
iterator = _restart_on_unavailable(
723-
method=method,
724-
trace_name="CloudSpanner.ExecuteStreamingSql",
725-
request=request,
726-
transaction_selector=txn_selector,
727-
observability_options=self.observability_options,
728-
)
727+
iterator = _restart_on_unavailable(
728+
method=method,
729+
trace_name="CloudSpanner.ExecuteStreamingSql",
730+
request=request,
731+
transaction_selector=txn_selector,
732+
observability_options=self.observability_options,
733+
)
729734

730-
result_set = StreamedResultSet(iterator)
731-
list(result_set) # consume all partials
735+
result_set = StreamedResultSet(iterator)
736+
list(result_set) # consume all partials
732737

733-
return result_set.stats.row_count_lower_bound
738+
return result_set.stats.row_count_lower_bound
734739

735740
return _retry_on_aborted(execute_pdml, DEFAULT_RETRY_BACKOFF)()
736741

@@ -1357,6 +1362,10 @@ def to_dict(self):
13571362
"transaction_id": snapshot._transaction_id,
13581363
}
13591364

1365+
@property
1366+
def observability_options(self):
1367+
return getattr(self._database, "observability_options", {})
1368+
13601369
def _get_session(self):
13611370
"""Create session as needed.
13621371
@@ -1476,27 +1485,32 @@ def generate_read_batches(
14761485
mappings of information used perform actual partitioned reads via
14771486
:meth:`process_read_batch`.
14781487
"""
1479-
partitions = self._get_snapshot().partition_read(
1480-
table=table,
1481-
columns=columns,
1482-
keyset=keyset,
1483-
index=index,
1484-
partition_size_bytes=partition_size_bytes,
1485-
max_partitions=max_partitions,
1486-
retry=retry,
1487-
timeout=timeout,
1488-
)
1488+
with trace_call(
1489+
f"CloudSpanner.{type(self).__name__}.generate_read_batches",
1490+
extra_attributes=dict(table=table, columns=columns),
1491+
observability_options=self.observability_options,
1492+
):
1493+
partitions = self._get_snapshot().partition_read(
1494+
table=table,
1495+
columns=columns,
1496+
keyset=keyset,
1497+
index=index,
1498+
partition_size_bytes=partition_size_bytes,
1499+
max_partitions=max_partitions,
1500+
retry=retry,
1501+
timeout=timeout,
1502+
)
14891503

1490-
read_info = {
1491-
"table": table,
1492-
"columns": columns,
1493-
"keyset": keyset._to_dict(),
1494-
"index": index,
1495-
"data_boost_enabled": data_boost_enabled,
1496-
"directed_read_options": directed_read_options,
1497-
}
1498-
for partition in partitions:
1499-
yield {"partition": partition, "read": read_info.copy()}
1504+
read_info = {
1505+
"table": table,
1506+
"columns": columns,
1507+
"keyset": keyset._to_dict(),
1508+
"index": index,
1509+
"data_boost_enabled": data_boost_enabled,
1510+
"directed_read_options": directed_read_options,
1511+
}
1512+
for partition in partitions:
1513+
yield {"partition": partition, "read": read_info.copy()}
15001514

15011515
def process_read_batch(
15021516
self,
@@ -1522,12 +1536,17 @@ def process_read_batch(
15221536
:rtype: :class:`~google.cloud.spanner_v1.streamed.StreamedResultSet`
15231537
:returns: a result set instance which can be used to consume rows.
15241538
"""
1525-
kwargs = copy.deepcopy(batch["read"])
1526-
keyset_dict = kwargs.pop("keyset")
1527-
kwargs["keyset"] = KeySet._from_dict(keyset_dict)
1528-
return self._get_snapshot().read(
1529-
partition=batch["partition"], **kwargs, retry=retry, timeout=timeout
1530-
)
1539+
observability_options = self.observability_options
1540+
with trace_call(
1541+
f"CloudSpanner.{type(self).__name__}.process_read_batch",
1542+
observability_options=observability_options,
1543+
):
1544+
kwargs = copy.deepcopy(batch["read"])
1545+
keyset_dict = kwargs.pop("keyset")
1546+
kwargs["keyset"] = KeySet._from_dict(keyset_dict)
1547+
return self._get_snapshot().read(
1548+
partition=batch["partition"], **kwargs, retry=retry, timeout=timeout
1549+
)
15311550

15321551
def generate_query_batches(
15331552
self,
@@ -1602,34 +1621,39 @@ def generate_query_batches(
16021621
mappings of information used perform actual partitioned reads via
16031622
:meth:`process_read_batch`.
16041623
"""
1605-
partitions = self._get_snapshot().partition_query(
1606-
sql=sql,
1607-
params=params,
1608-
param_types=param_types,
1609-
partition_size_bytes=partition_size_bytes,
1610-
max_partitions=max_partitions,
1611-
retry=retry,
1612-
timeout=timeout,
1613-
)
1624+
with trace_call(
1625+
f"CloudSpanner.{type(self).__name__}.generate_query_batches",
1626+
extra_attributes=dict(sql=sql),
1627+
observability_options=self.observability_options,
1628+
):
1629+
partitions = self._get_snapshot().partition_query(
1630+
sql=sql,
1631+
params=params,
1632+
param_types=param_types,
1633+
partition_size_bytes=partition_size_bytes,
1634+
max_partitions=max_partitions,
1635+
retry=retry,
1636+
timeout=timeout,
1637+
)
16141638

1615-
query_info = {
1616-
"sql": sql,
1617-
"data_boost_enabled": data_boost_enabled,
1618-
"directed_read_options": directed_read_options,
1619-
}
1620-
if params:
1621-
query_info["params"] = params
1622-
query_info["param_types"] = param_types
1623-
1624-
# Query-level options have higher precedence than client-level and
1625-
# environment-level options
1626-
default_query_options = self._database._instance._client._query_options
1627-
query_info["query_options"] = _merge_query_options(
1628-
default_query_options, query_options
1629-
)
1639+
query_info = {
1640+
"sql": sql,
1641+
"data_boost_enabled": data_boost_enabled,
1642+
"directed_read_options": directed_read_options,
1643+
}
1644+
if params:
1645+
query_info["params"] = params
1646+
query_info["param_types"] = param_types
1647+
1648+
# Query-level options have higher precedence than client-level and
1649+
# environment-level options
1650+
default_query_options = self._database._instance._client._query_options
1651+
query_info["query_options"] = _merge_query_options(
1652+
default_query_options, query_options
1653+
)
16301654

1631-
for partition in partitions:
1632-
yield {"partition": partition, "query": query_info}
1655+
for partition in partitions:
1656+
yield {"partition": partition, "query": query_info}
16331657

16341658
def process_query_batch(
16351659
self,
@@ -1654,9 +1678,16 @@ def process_query_batch(
16541678
:rtype: :class:`~google.cloud.spanner_v1.streamed.StreamedResultSet`
16551679
:returns: a result set instance which can be used to consume rows.
16561680
"""
1657-
return self._get_snapshot().execute_sql(
1658-
partition=batch["partition"], **batch["query"], retry=retry, timeout=timeout
1659-
)
1681+
with trace_call(
1682+
f"CloudSpanner.{type(self).__name__}.process_query_batch",
1683+
observability_options=self.observability_options,
1684+
):
1685+
return self._get_snapshot().execute_sql(
1686+
partition=batch["partition"],
1687+
**batch["query"],
1688+
retry=retry,
1689+
timeout=timeout,
1690+
)
16601691

16611692
def run_partitioned_query(
16621693
self,
@@ -1711,18 +1742,23 @@ def run_partitioned_query(
17111742
:rtype: :class:`~google.cloud.spanner_v1.merged_result_set.MergedResultSet`
17121743
:returns: a result set instance which can be used to consume rows.
17131744
"""
1714-
partitions = list(
1715-
self.generate_query_batches(
1716-
sql,
1717-
params,
1718-
param_types,
1719-
partition_size_bytes,
1720-
max_partitions,
1721-
query_options,
1722-
data_boost_enabled,
1745+
with trace_call(
1746+
f"CloudSpanner.${type(self).__name__}.run_partitioned_query",
1747+
extra_attributes=dict(sql=sql),
1748+
observability_options=self.observability_options,
1749+
):
1750+
partitions = list(
1751+
self.generate_query_batches(
1752+
sql,
1753+
params,
1754+
param_types,
1755+
partition_size_bytes,
1756+
max_partitions,
1757+
query_options,
1758+
data_boost_enabled,
1759+
)
17231760
)
1724-
)
1725-
return MergedResultSet(self, partitions, 0)
1761+
return MergedResultSet(self, partitions, 0)
17261762

17271763
def process(self, batch):
17281764
"""Process a single, partitioned query or read.

google/cloud/spanner_v1/merged_result_set.py

+12
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from typing import Any, TYPE_CHECKING
1818
from threading import Lock, Event
1919

20+
from google.cloud.spanner_v1._opentelemetry_tracing import trace_call
21+
2022
if TYPE_CHECKING:
2123
from google.cloud.spanner_v1.database import BatchSnapshot
2224

@@ -37,6 +39,16 @@ def __init__(self, batch_snapshot, partition_id, merged_result_set):
3739
self._queue: Queue[PartitionExecutorResult] = merged_result_set._queue
3840

3941
def run(self):
42+
observability_options = getattr(
43+
self._batch_snapshot, "observability_options", {}
44+
)
45+
with trace_call(
46+
"CloudSpanner.PartitionExecutor.run",
47+
observability_options=observability_options,
48+
):
49+
self.__run()
50+
51+
def __run(self):
4052
results = None
4153
try:
4254
results = self._batch_snapshot.process_query_batch(self._partition_id)

google/cloud/spanner_v1/pool.py

+9-20
Original file line numberDiff line numberDiff line change
@@ -523,12 +523,11 @@ def bind(self, database):
523523
metadata.append(
524524
_metadata_with_leader_aware_routing(database._route_to_leader_enabled)
525525
)
526-
created_session_count = 0
527526
self._database_role = self._database_role or self._database.database_role
528527

529528
request = BatchCreateSessionsRequest(
530529
database=database.name,
531-
session_count=self.size - created_session_count,
530+
session_count=self.size,
532531
session_template=Session(creator_role=self.database_role),
533532
)
534533

@@ -549,38 +548,28 @@ def bind(self, database):
549548
span_event_attributes,
550549
)
551550

552-
if created_session_count >= self.size:
553-
add_span_event(
554-
current_span,
555-
"Created no new sessions as sessionPool is full",
556-
span_event_attributes,
557-
)
558-
return
559-
560-
add_span_event(
561-
current_span,
562-
f"Creating {request.session_count} sessions",
563-
span_event_attributes,
564-
)
565-
566551
observability_options = getattr(self._database, "observability_options", None)
567552
with trace_call(
568553
"CloudSpanner.PingingPool.BatchCreateSessions",
569554
observability_options=observability_options,
570555
) as span:
571556
returned_session_count = 0
572-
while created_session_count < self.size:
557+
while returned_session_count < self.size:
573558
resp = api.batch_create_sessions(
574559
request=request,
575560
metadata=metadata,
576561
)
562+
563+
add_span_event(
564+
span,
565+
f"Created {len(resp.session)} sessions",
566+
)
567+
577568
for session_pb in resp.session:
578569
session = self._new_session()
570+
returned_session_count += 1
579571
session._session_id = session_pb.name.split("/")[-1]
580572
self.put(session)
581-
returned_session_count += 1
582-
583-
created_session_count += len(resp.session)
584573

585574
add_span_event(
586575
span,

0 commit comments

Comments
 (0)