Skip to content
This repository was archived by the owner on Mar 13, 2020. It is now read-only.

Commit 13d6f02

Browse files
authored
Merge pull request #61 from pageuppeople-opensource/feature/SP-149-log-all-exec-and-model-details
SP-149 - Log all available details when logging execution and execution-model
2 parents df984d3 + 1bb9990 commit 13d6f02

File tree

4 files changed

+65
-26
lines changed

4 files changed

+65
-26
lines changed

rdl/data_load_tracking/DataLoadTrackerRepository.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def complete_execution(self, execution_id, total_number_of_models,
4242
.one()
4343

4444
execution_end_time = session.query(func.now()).scalar()
45-
total_execution_seconds = (execution_end_time - current_execution.started_on).total_seconds()
45+
total_execution_seconds = max((execution_end_time - current_execution.started_on).total_seconds(), 1)
4646
total_rows_processed = self.get_execution_rows(current_execution.execution_id)
4747
total_batches_processed = self.get_execution_batches(current_execution.execution_id)
4848

@@ -53,7 +53,7 @@ def complete_execution(self, execution_id, total_number_of_models,
5353
current_execution.rows_processed = total_rows_processed
5454
current_execution.batches_processed = total_batches_processed
5555
session.commit()
56-
self.logger.info(current_execution)
56+
self.logger.info(f'Completed {current_execution}')
5757
session.close()
5858
return total_rows_processed
5959

@@ -82,7 +82,7 @@ def save_execution_model(self, data_load_tracker):
8282
.one()
8383

8484
execution_end_time = session.query(func.now()).scalar()
85-
total_execution_seconds = (execution_end_time - current_execution_model.started_on).total_seconds()
85+
total_execution_seconds = max((execution_end_time - current_execution_model.started_on).total_seconds(), 1)
8686

8787
current_execution_model.completed_on = execution_end_time
8888
current_execution_model.execution_time_ms = int(total_execution_seconds * 1000)

rdl/data_sources/DataSourceFactory.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def __init__(self, logger=None):
1111
def create_source(self, connection_string):
1212
for source in self.sources:
1313
if source.can_handle_connection_string(connection_string):
14-
self.logger.debug(f"Found handler '{source}' for connection string.")
14+
self.logger.info(f"Found handler '{source}' for given connection string.")
1515
return source(connection_string)
1616

1717
raise RuntimeError('There are no data sources that can handle this connection string')

rdl/entities/execution_entity.py

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,35 @@ class ExecutionEntity(Base):
2323
models_processed = Column(Integer, nullable=True)
2424

2525
def __str__(self):
26-
if self.status == Constants.ExecutionStatus.STARTED:
27-
return f"Started Execution ID: {self.execution_id} at {self.started_on}"
28-
29-
total_execution_seconds = self.execution_time_s
30-
execution_hours = total_execution_seconds // 3600
31-
execution_minutes = (total_execution_seconds // 60) % 60
32-
execution_seconds = total_execution_seconds % 60
33-
34-
return f"Completed Execution ID: {self.execution_id}" \
35-
f"; Models Processed: {self.models_processed:,}" \
36-
f"; Rows Processed: {self.rows_processed:,}" \
37-
f"; Execution Time: {execution_hours}h {execution_minutes}m {execution_seconds}s" \
38-
f"; Average rows processed per second: {(self.rows_processed//max(total_execution_seconds, 1)):,}."
26+
execution_time_str = None
27+
rows_per_second = None
28+
29+
if self.execution_time_s:
30+
total_execution_seconds = self.execution_time_s
31+
32+
execution_hours = total_execution_seconds // 3600
33+
execution_minutes = (total_execution_seconds // 60) % 60
34+
execution_seconds = total_execution_seconds % 60
35+
execution_time_str = f'{execution_hours}h {execution_minutes}m {execution_seconds}s'
36+
37+
if self.rows_processed:
38+
rows_per_second = (self.rows_processed//max(total_execution_seconds, 1))
39+
40+
return 'Execution ID: {exec_id}; ' \
41+
'Status: {status}; ' \
42+
'Started on: {started}; ' \
43+
'Completed on: {completed}; ' \
44+
'Execution time: {exec_time}; ' \
45+
'Models processed: {models}; ' \
46+
'Batches processed: {batches};' \
47+
'Rows processed: {rows}; ' \
48+
'Average rows processed per second: {rows_per_second};'.format(
49+
exec_id=self.execution_id,
50+
status=self.status,
51+
started=self.started_on.isoformat(),
52+
completed=self.completed_on.isoformat() if self.completed_on else 'n/a',
53+
exec_time=execution_time_str if execution_time_str else 'n/a',
54+
models=f'{self.models_processed:,}' if self.models_processed else 'n/a',
55+
batches=f'{self.batches_processed:,}' if self.batches_processed else 'n/a',
56+
rows=f'{self.rows_processed:,}' if self.rows_processed else 'n/a',
57+
rows_per_second=f'{rows_per_second:,.2f}' if rows_per_second else 'n/a')

rdl/entities/execution_model_entity.py

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,32 @@ class ExecutionModelEntity(Base):
3636
failure_reason = Column(String(1000), nullable=True)
3737

3838
def __str__(self):
39-
load_type = 'FULL' if self.is_full_refresh else f"INCREMENTAL from " \
40-
f"version '{self.last_sync_version}' " \
41-
f"to '{self.sync_version}'"
42-
43-
execution_tims_s = max(self.execution_time_ms // 1000, 1)
44-
rows_per_second = self.rows_processed / execution_tims_s
45-
return f"Rows: {self.rows_processed}, " \
46-
f"Load type: {load_type}, " \
47-
f"Total Execution Time: {execution_tims_s}s @ {rows_per_second:.2f} rows per second "
39+
load_type = f'FULL ({self.full_refresh_reason})' if self.is_full_refresh else \
40+
f"INCREMENTAL from version '{self.last_sync_version}' to '{self.sync_version}'"
41+
execution_time_s = None
42+
rows_per_second = None
43+
44+
if self.execution_time_ms:
45+
execution_time_s = max(self.execution_time_ms // 1000, 1)
46+
47+
if self.rows_processed:
48+
rows_per_second = self.rows_processed / execution_time_s
49+
50+
return 'Model: {model}; ' \
51+
'Load type: {load_type}; ' \
52+
'Status: {status}; ' \
53+
'Started on: {started}; ' \
54+
'Completed on: {completed}; ' \
55+
'Execution time: {exec_time}; ' \
56+
'Batches processed: {batches}; ' \
57+
'Rows processed: {rows}; ' \
58+
'Average rows processed per second: {rows_per_second};'.format(
59+
model=self.model_name,
60+
load_type=load_type,
61+
status=self.status,
62+
started=self.started_on.isoformat(),
63+
completed=self.completed_on.isoformat() if self.completed_on else 'n/a',
64+
exec_time=f'{execution_time_s}s' if execution_time_s else 'n/a',
65+
batches=f'{self.batches_processed:,}' if self.batches_processed else 'n/a',
66+
rows=f'{self.rows_processed:,}' if self.rows_processed else 'n/a',
67+
rows_per_second=f'{rows_per_second:,.2f}' if rows_per_second else 'n/a')

0 commit comments

Comments
 (0)