Skip to content

Commit 589c114

Browse files
authored
Add comment to clarify on_write_complete callback (#2100)
1 parent 58003dd commit 589c114

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,15 +135,20 @@ def _write_block(
135135
) -> None:
136136
raise NotImplementedError("Subclasses of PandasFileBasedDatasource must implement _write_block().")
137137

138+
# Note: this callback function is called once by the main thread after
139+
# [all write tasks complete](https://github.com/ray-project/ray/blob/ray-2.3.0/python/ray/data/dataset.py#L2716)
140+
# and is meant to be used for singular actions like
141+
# [committing a transaction](https://docs.ray.io/en/latest/data/api/doc/ray.data.Datasource.html).
142+
# As deceptive as it may look, there is no race condition here.
138143
def on_write_complete(self, write_results: List[Any], **_: Any) -> None:
139-
"""Execute callback on write complete."""
144+
"""Execute callback after all write tasks complete."""
140145
_logger.debug("Write complete %s.", write_results)
141146

142147
# Collect and return all write task paths
143148
self._write_paths.extend(write_results)
144149

145150
def on_write_failed(self, write_results: List[ObjectRef[Any]], error: Exception, **_: Any) -> None:
146-
"""Execute callback on write failed."""
151+
"""Execute callback after write tasks fail."""
147152
_logger.debug("Write failed %s.", write_results)
148153
raise error
149154

0 commit comments

Comments
 (0)