1515from IPython .display import HTML , display
1616from pandas import DataFrame
1717from sqlalchemy import inspect
18+ from sqlalchemy .engine .base import Engine
1819from sqlalchemy .engine .reflection import Inspector
1920from sqlalchemy .exc import NoSuchTableError
2021from sqlalchemy .types import TypeEngine
@@ -412,7 +413,7 @@ def run(self, invoked_as: str, args: List[str]) -> Tuple[DataFrame, bool]:
412413 # down below will then raise NoSuchTableError.
413414 column_dicts = inspector .get_columns (relation_name , schema = schema )
414415 except NoSuchTableError :
415- self . _raise_from_no_such_table (schema , relation_name )
416+ _raise_from_no_such_table (schema , relation_name )
416417
417418 # 'Pivot' the dicts from get_columns()
418419 names = []
@@ -553,31 +554,34 @@ def run(self, invoked_as: str, args: List[str]) -> None:
553554 # * Uploading batches of successfully discovered relations
554555 # * Catching any exception and reporting it back to gate. Will suppress the exception.
555556 #
556- with RelationStructureMessager (ds_id ) as messenger :
557- inspector = self .get_inspector ()
558557
559- # This and delta() just for development timing figures. Could become yet another
560- # timer context manager implementation.
561- start = time .monotonic ()
558+ # This and delta() just for development timing figures. Could become yet another
559+ # timer context manager implementation.
560+ start = time .monotonic ()
562561
563- def delta () -> float :
564- """Record new timing section, kindof like a stopwatch lap timer.
565- Returns the prior 'lap' time.
566- """
567- nonlocal start
562+ def delta () -> float :
563+ """Record new timing section, kindof like a stopwatch lap timer.
564+ Returns the prior 'lap' time.
565+ """
566+ nonlocal start
568567
569- now = time .monotonic ()
570- ret = now - start
571- start = now
568+ now = time .monotonic ()
569+ ret = now - start
570+ start = now
572571
573- return ret
572+ return ret
573+
574+ with RelationStructureMessager (ds_id ) as messenger :
575+ inspector = self .get_inspector ()
574576
575577 relations_and_kinds = self .all_table_and_views (inspector )
576578 print (f'Discovered { len (relations_and_kinds )} relations in { delta ()} ' )
577579
578580 # Introspect each relation concurrently.
579581 # TODO: Take minimum concurrency as a param?
580- with ThreadPoolExecutor (max_workers = self .MAX_INTROSPECTION_THREADS ) as executor :
582+ with ThreadPoolExecutor (
583+ max_workers = self .get_max_threadpool_workers (inspector )
584+ ) as executor :
581585 future_to_relation = {
582586 executor .submit (
583587 self .fully_introspect , inspector , schema_name , relation_name , kind
@@ -589,10 +593,8 @@ def delta() -> float:
589593 schema_name , relation_name , kind = future_to_relation [future ]
590594 messenger .queue_for_delivery (future .result ())
591595
592- table_introspection_delta = delta ()
593- print (
594- f'Done introspecting and messaging gate in { table_introspection_delta } , amortized { table_introspection_delta / len (relations_and_kinds )} s per relation'
595- )
596+ table_introspection_delta = delta ()
597+ print (f'Done introspecting and messaging gate in { table_introspection_delta } ' )
596598
597599 # run() contract: return what to bind to the SQL cell variable name, and if display() needs
598600 # to be called on it. Nothing and nope!
@@ -602,14 +604,27 @@ def delta() -> float:
602604 # All of the rest of the methods end up assisting run(), directly or indirectly
603605 ###
604606
605- def all_table_and_views (self , inspector ) -> List [Tuple [str , str , str ]]:
607+ @classmethod
608+ def get_max_threadpool_workers (cls , inspector : SchemaStrippingInspector ) -> int :
609+ """Determine max concurrency for introspecting this sort of database.
610+
611+ BigQuery, in particular, seems to be not totally threadsafe, ENG-5808
612+ """
613+
614+ if inspector .engine .driver == 'bigquery' :
615+ return 1
616+ else :
617+ return cls .MAX_INTROSPECTION_THREADS
618+
619+ @classmethod
620+ def all_table_and_views (cls , inspector ) -> List [Tuple [str , str , str ]]:
606621 """Returns list of (schema name, relation name, table-or-view) tuples"""
607622
608623 results = []
609624
610625 default_schema = inspector .default_schema_name
611626 all_schemas = set (inspector .get_schema_names ())
612- all_schemas .difference_update (self .AVOID_SCHEMAS )
627+ all_schemas .difference_update (cls .AVOID_SCHEMAS )
613628 if default_schema and default_schema not in all_schemas :
614629 all_schemas .add (default_schema )
615630
@@ -632,23 +647,24 @@ def all_table_and_views(self, inspector) -> List[Tuple[str, str, str]]:
632647
633648 return results
634649
650+ @classmethod
635651 def fully_introspect (
636- self , inspector : SchemaStrippingInspector , schema_name : str , relation_name : str , kind : str
652+ cls , inspector : SchemaStrippingInspector , schema_name : str , relation_name : str , kind : str
637653 ) -> RelationStructureDescription :
638654 """Drive introspecting into this single relation, making all the necessary Introspector API
639655 calls to learn all of the relation's sub-structures.
640656
641657 Returns a RelationStructureDescription pydantic model, suitable to POST back to Gate with.
642658 """
643659
644- columns = self .introspect_columns (inspector , schema_name , relation_name )
660+ columns = cls .introspect_columns (inspector , schema_name , relation_name )
645661
646662 # Always introspect indexes, even if a view, because materialized views
647663 # can have indexes.
648- indexes = self .introspect_indexes (inspector , schema_name , relation_name )
664+ indexes = cls .introspect_indexes (inspector , schema_name , relation_name )
649665
650666 # Likewise unique constraints? Those _might_ be definable on materialized views?
651- unique_constraints = self .introspect_unique_constraints (
667+ unique_constraints = cls .introspect_unique_constraints (
652668 inspector , schema_name , relation_name
653669 )
654670
@@ -660,14 +676,14 @@ def fully_introspect(
660676 foreign_keys = []
661677 else :
662678 view_definition = None
663- primary_key_name , primary_key_columns = self .introspect_primary_key (
679+ primary_key_name , primary_key_columns = cls .introspect_primary_key (
664680 inspector , relation_name , schema_name
665681 )
666- check_constraints = self .introspect_check_constraints (
682+ check_constraints = cls .introspect_check_constraints (
667683 inspector , schema_name , relation_name
668684 )
669685
670- foreign_keys = self .introspect_foreign_keys (inspector , schema_name , relation_name )
686+ foreign_keys = cls .introspect_foreign_keys (inspector , schema_name , relation_name )
671687
672688 print (f'Introspected { kind } { schema_name } .{ relation_name } ' )
673689
@@ -685,8 +701,9 @@ def fully_introspect(
685701 foreign_keys = foreign_keys ,
686702 )
687703
704+ @classmethod
688705 def introspect_foreign_keys (
689- self , inspector : SchemaStrippingInspector , schema_name : str , relation_name : str
706+ cls , inspector : SchemaStrippingInspector , schema_name : str , relation_name : str
690707 ) -> List [ForeignKeysModel ]:
691708 """Introspect all foreign keys for a table, describing the results as a List[ForeignKeysModel]"""
692709
@@ -716,8 +733,9 @@ def introspect_foreign_keys(
716733
717734 return fkeys
718735
736+ @classmethod
719737 def introspect_check_constraints (
720- self , inspector , schema_name , relation_name
738+ cls , inspector , schema_name , relation_name
721739 ) -> List [CheckConstraintModel ]:
722740 """Introspect all check constraints for a table, describing the results as a List[CheckConstraintModel]"""
723741
@@ -734,8 +752,9 @@ def introspect_check_constraints(
734752
735753 return constraints
736754
755+ @classmethod
737756 def introspect_unique_constraints (
738- self , inspector , schema_name , relation_name
757+ cls , inspector , schema_name , relation_name
739758 ) -> List [UniqueConstraintModel ]:
740759 """Introspect all unique constraints for a table, describing the results as a List[UniqueConstraintModel]"""
741760
@@ -752,7 +771,8 @@ def introspect_unique_constraints(
752771
753772 return constraints
754773
755- def introspect_indexes (self , inspector , schema_name , relation_name ) -> List [IndexModel ]:
774+ @classmethod
775+ def introspect_indexes (cls , inspector , schema_name , relation_name ) -> List [IndexModel ]:
756776 """Introspect all indexes for a table or materialized view, describing the results as a List[IndexModel]"""
757777 indexes = []
758778
@@ -769,8 +789,9 @@ def introspect_indexes(self, inspector, schema_name, relation_name) -> List[Inde
769789
770790 return indexes
771791
792+ @classmethod
772793 def introspect_primary_key (
773- self , inspector : SchemaStrippingInspector , relation_name : str , schema_name : str
794+ cls , inspector : SchemaStrippingInspector , relation_name : str , schema_name : str
774795 ) -> Tuple [Optional [str ], List [str ]]:
775796 """Introspect the primary key of a table, returning the pkey name and list of columns in the primary key (if any).
776797
@@ -792,8 +813,9 @@ def introspect_primary_key(
792813 # No primary key to be returned.
793814 return None , []
794815
816+ @classmethod
795817 def introspect_columns (
796- self , inspector : SchemaStrippingInspector , schema_name : str , relation_name : str
818+ cls , inspector : SchemaStrippingInspector , schema_name : str , relation_name : str
797819 ) -> List [ColumnModel ]:
798820 column_dicts = inspector .get_columns (relation_name , schema = schema_name )
799821
@@ -1235,6 +1257,15 @@ def default_schema_name(self) -> Optional[str]:
12351257 # BigQuery, Trino dialects may end up returning None.
12361258 return self .underlying_inspector .default_schema_name
12371259
1260+ @property
1261+ def engine (self ) -> Engine :
1262+ return self .underlying_inspector .engine
1263+
1264+ # Value-added properties
1265+ @property
1266+ def is_bigquery (self ) -> bool :
1267+ return self .engine .driver == 'bigquery'
1268+
12381269 def get_schema_names (self ) -> List [str ]:
12391270 return self .underlying_inspector .get_schema_names ()
12401271
@@ -1243,6 +1274,11 @@ def get_columns(self, relation_name: str, schema: Optional[str] = None) -> List[
12431274
12441275 @handle_not_implemented ('(unobtainable)' )
12451276 def get_view_definition (self , view_name : str , schema : Optional [str ] = None ) -> str :
1277+ if self .is_bigquery :
1278+ # Sigh. Have to explicitly interpolate schema into view name, else
1279+ # underlying driver code complains. Not even joking.
1280+ view_name = f'{ schema } .{ view_name } '
1281+
12461282 return self .underlying_inspector .get_view_definition (view_name , schema = schema )
12471283
12481284 def get_pk_constraint (self , table_name : str , schema : Optional [str ] = None ) -> dict :
0 commit comments