Skip to content
This repository was archived by the owner on Mar 13, 2020. It is now read-only.

Commit 2ec42bf

Browse files
author
Ben Edwards
authored
Merge pull request #77 from pageuppeople-opensource/execution_model-indexing
Add indexes to execution model
2 parents 11a85a6 + 375b9e9 commit 2ec42bf

File tree

2 files changed

+141
-102
lines changed

2 files changed

+141
-102
lines changed
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""add indexes to execution_model
2+
3+
Revision ID: 3834c837f5f2
4+
Revises: bb0c5e8d05e2
5+
Create Date: 2019-11-26 08:42:13.575198
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = '3834c837f5f2'
14+
down_revision = 'bb0c5e8d05e2'
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
op.create_index('execution_model__index_on_execution_id_model_name', 'execution_model', ['execution_id', 'model_name'], unique=True, schema='rdl')
22+
op.create_index('execution_model__index_on_model_name_completed_on', 'execution_model', ['model_name', 'completed_on'], unique=False, schema='rdl')
23+
op.create_index(op.f('ix_rdl_execution_model_completed_on'), 'execution_model', ['completed_on'], unique=False, schema='rdl')
24+
op.create_index(op.f('ix_rdl_execution_model_status'), 'execution_model', ['status'], unique=False, schema='rdl')
25+
# ### end Alembic commands ###
26+
27+
28+
def downgrade():
29+
# ### commands auto generated by Alembic - please adjust! ###
30+
op.drop_index(op.f('ix_rdl_execution_model_status'), table_name='execution_model', schema='rdl')
31+
op.drop_index(op.f('ix_rdl_execution_model_completed_on'), table_name='execution_model', schema='rdl')
32+
op.drop_index('execution_model__index_on_model_name_completed_on', table_name='execution_model', schema='rdl')
33+
op.drop_index('execution_model__index_on_execution_id_model_name', table_name='execution_model', schema='rdl')
34+
# ### end Alembic commands ###
Lines changed: 107 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -1,102 +1,107 @@
1-
import uuid
2-
3-
from sqlalchemy import (
4-
Column,
5-
DateTime,
6-
Integer,
7-
String,
8-
Boolean,
9-
BigInteger,
10-
ForeignKey,
11-
)
12-
from sqlalchemy.sql import func
13-
from sqlalchemy.dialects.postgresql import UUID
14-
from sqlalchemy.inspection import inspect
15-
16-
from rdl.entities import Base
17-
from rdl.entities import ExecutionEntity
18-
from rdl.shared import Constants
19-
20-
21-
class ExecutionModelEntity(Base):
22-
__tablename__ = "execution_model"
23-
__table_args__ = {"schema": Constants.DATA_PIPELINE_EXECUTION_SCHEMA_NAME}
24-
execution_model_id = Column(
25-
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
26-
)
27-
created_on = Column(
28-
DateTime(timezone=True), nullable=False, server_default=func.now()
29-
)
30-
updated_on = Column(
31-
DateTime(timezone=True),
32-
nullable=False,
33-
server_default=func.now(),
34-
onupdate=func.now(),
35-
)
36-
execution_id = Column(
37-
UUID(as_uuid=True),
38-
ForeignKey(
39-
f"{Constants.DATA_PIPELINE_EXECUTION_SCHEMA_NAME}."
40-
f"{inspect(ExecutionEntity).tables[0].name}."
41-
f"{inspect(ExecutionEntity).primary_key[0].name}"
42-
),
43-
nullable=False,
44-
)
45-
model_name = Column(String(250), nullable=False)
46-
status = Column(
47-
String(50),
48-
nullable=False,
49-
server_default=str(Constants.ExecutionModelStatus.STARTED),
50-
)
51-
last_sync_version = Column(BigInteger, nullable=False)
52-
sync_version = Column(BigInteger, nullable=False)
53-
is_full_refresh = Column(Boolean, nullable=False)
54-
full_refresh_reason = Column(String(100), nullable=False)
55-
started_on = Column(
56-
DateTime(timezone=True), server_default=func.now(), nullable=False
57-
)
58-
completed_on = Column(DateTime(timezone=True), nullable=True)
59-
execution_time_ms = Column(BigInteger, nullable=True)
60-
rows_processed = Column(BigInteger, nullable=True)
61-
batches_processed = Column(Integer, nullable=True)
62-
model_checksum = Column(String(100), nullable=False)
63-
failure_reason = Column(String(1000), nullable=True)
64-
65-
def __str__(self):
66-
load_type = (
67-
f"FULL ({self.full_refresh_reason})"
68-
if self.is_full_refresh
69-
else f"INCREMENTAL from version '{self.last_sync_version}' to '{self.sync_version}'"
70-
)
71-
execution_time_s = None
72-
rows_per_second = None
73-
74-
if self.execution_time_ms:
75-
execution_time_s = max(self.execution_time_ms // 1000, 1)
76-
77-
if self.rows_processed:
78-
rows_per_second = self.rows_processed / execution_time_s
79-
80-
return (
81-
"Model: {model}; "
82-
"Load type: {load_type}; "
83-
"Status: {status}; "
84-
"Started on: {started}; "
85-
"Completed on: {completed}; "
86-
"Execution time: {exec_time}; "
87-
"Batches processed: {batches}; "
88-
"Rows processed: {rows}; "
89-
"Average rows processed per second: {rows_per_second};".format(
90-
model=self.model_name,
91-
load_type=load_type,
92-
status=self.status,
93-
started=self.started_on.isoformat(),
94-
completed=self.completed_on.isoformat() if self.completed_on else "n/a",
95-
exec_time=f"{execution_time_s}s" if execution_time_s else "n/a",
96-
batches=f"{self.batches_processed:,}"
97-
if self.batches_processed
98-
else "n/a",
99-
rows=f"{self.rows_processed:,}" if self.rows_processed else "n/a",
100-
rows_per_second=f"{rows_per_second:,.2f}" if rows_per_second else "n/a",
101-
)
102-
)
1+
import uuid
2+
3+
from sqlalchemy import (
4+
Column,
5+
DateTime,
6+
Integer,
7+
String,
8+
Boolean,
9+
BigInteger,
10+
ForeignKey,
11+
Index,
12+
)
13+
from sqlalchemy.sql import func
14+
from sqlalchemy.dialects.postgresql import UUID
15+
from sqlalchemy.inspection import inspect
16+
17+
from rdl.entities import Base
18+
from rdl.entities import ExecutionEntity
19+
from rdl.shared import Constants
20+
21+
22+
class ExecutionModelEntity(Base):
23+
__tablename__ = "execution_model"
24+
__table_args__ = {"schema": Constants.DATA_PIPELINE_EXECUTION_SCHEMA_NAME}
25+
execution_model_id = Column(
26+
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
27+
)
28+
created_on = Column(
29+
DateTime(timezone=True), nullable=False, server_default=func.now()
30+
)
31+
updated_on = Column(
32+
DateTime(timezone=True),
33+
nullable=False,
34+
server_default=func.now(),
35+
onupdate=func.now(),
36+
)
37+
execution_id = Column(
38+
UUID(as_uuid=True),
39+
ForeignKey(
40+
f"{Constants.DATA_PIPELINE_EXECUTION_SCHEMA_NAME}."
41+
f"{inspect(ExecutionEntity).tables[0].name}."
42+
f"{inspect(ExecutionEntity).primary_key[0].name}"
43+
),
44+
nullable=False,
45+
)
46+
model_name = Column(String(250), nullable=False)
47+
status = Column(
48+
String(50),
49+
nullable=False,
50+
server_default=str(Constants.ExecutionModelStatus.STARTED),
51+
index=True
52+
)
53+
last_sync_version = Column(BigInteger, nullable=False)
54+
sync_version = Column(BigInteger, nullable=False)
55+
is_full_refresh = Column(Boolean, nullable=False)
56+
full_refresh_reason = Column(String(100), nullable=False)
57+
started_on = Column(
58+
DateTime(timezone=True), server_default=func.now(), nullable=False
59+
)
60+
completed_on = Column(DateTime(timezone=True), nullable=True, index=True)
61+
execution_time_ms = Column(BigInteger, nullable=True)
62+
rows_processed = Column(BigInteger, nullable=True)
63+
batches_processed = Column(Integer, nullable=True)
64+
model_checksum = Column(String(100), nullable=False)
65+
failure_reason = Column(String(1000), nullable=True)
66+
67+
index_on_execution_id_model_name = Index("execution_model__index_on_execution_id_model_name", execution_id, model_name, unique=True)
68+
index_on_model_name_completed_on = Index("execution_model__index_on_model_name_completed_on", model_name, completed_on)
69+
70+
def __str__(self):
71+
load_type = (
72+
f"FULL ({self.full_refresh_reason})"
73+
if self.is_full_refresh
74+
else f"INCREMENTAL from version '{self.last_sync_version}' to '{self.sync_version}'"
75+
)
76+
execution_time_s = None
77+
rows_per_second = None
78+
79+
if self.execution_time_ms:
80+
execution_time_s = max(self.execution_time_ms // 1000, 1)
81+
82+
if self.rows_processed:
83+
rows_per_second = self.rows_processed / execution_time_s
84+
85+
return (
86+
"Model: {model}; "
87+
"Load type: {load_type}; "
88+
"Status: {status}; "
89+
"Started on: {started}; "
90+
"Completed on: {completed}; "
91+
"Execution time: {exec_time}; "
92+
"Batches processed: {batches}; "
93+
"Rows processed: {rows}; "
94+
"Average rows processed per second: {rows_per_second};".format(
95+
model=self.model_name,
96+
load_type=load_type,
97+
status=self.status,
98+
started=self.started_on.isoformat(),
99+
completed=self.completed_on.isoformat() if self.completed_on else "n/a",
100+
exec_time=f"{execution_time_s}s" if execution_time_s else "n/a",
101+
batches=f"{self.batches_processed:,}"
102+
if self.batches_processed
103+
else "n/a",
104+
rows=f"{self.rows_processed:,}" if self.rows_processed else "n/a",
105+
rows_per_second=f"{rows_per_second:,.2f}" if rows_per_second else "n/a",
106+
)
107+
)

0 commit comments

Comments
 (0)