11""" Tests over datasource bootstrapping """
22
33import json
4+ import os
45from pathlib import Path
56from typing import Callable , List
67from uuid import uuid4
@@ -341,7 +342,6 @@ def test_bootstrap_datasources(self, datasource_id_factory, tmp_path: Path):
341342class TestBootstrapDatasource :
342343 @pytest .mark .parametrize ('sample_name' , SampleData .all_sample_names ())
343344 def test_success (self , sample_name , datasource_id ):
344-
345345 # Clear out connections at the onset, else the get_engine() portion
346346 # gets confused over human-name conflicts when we've bootstrapped
347347 # the same sample data repeatedly, namely through having first
@@ -547,6 +547,208 @@ def test_had_bad_sqlite_database_files(self, datasource_id, bad_pathname: str):
547547 # the real problem to the user.
548548
549549
550+ class TestDatabricks :
551+ """Test ENG-5517 Very Special Behavior for databricks side-effects if databricks-connect script is
552+ in PATH and we have the optional 'cluster_id' datapoint in connect args.
553+ """
554+
555+ @pytest .fixture ()
556+ def tmp_home (self , tmpdir : Path ) -> Path :
557+ existing_home = os .environ ['HOME' ]
558+
559+ new_home = tmpdir / 'home'
560+
561+ new_home .mkdir ()
562+
563+ os .environ ['HOME' ] = str (new_home )
564+
565+ try :
566+ yield new_home
567+ finally :
568+ os .environ ['HOME' ] = existing_home
569+
570+ @pytest .fixture ()
571+ def databricks_connect_in_path (self , tmpdir : Path ) -> Path :
572+ # Get a mock-ish executable 'databricks-connect' into an element in the path
573+ # so that which('databricks-connect') will find something (see databricks post
574+ # processor)
575+
576+ # Make a new subdir of tmpdir, add it to the path, create executable
577+ # shell script databricks-connect
578+
579+ bindir = tmpdir / 'scratch-bin'
580+ bindir .mkdir ()
581+
582+ orig_path = os .environ ['PATH' ]
583+
584+ os .environ ['PATH' ] = f"{ orig_path } :{ bindir } "
585+
586+ scriptpath = bindir / 'databricks-connect'
587+ script_output_path = tmpdir / 'connect-inputs.txt'
588+
589+ # Now make a 'databricks-connect' executable that echos all its stdin to tmpdir/connect-inputs.txt.txt.
590+ with open (scriptpath , 'w' ) as outfile :
591+ outfile .write (f'#!/bin/sh\n cat > { script_output_path } \n exit 0\n ' )
592+
593+ scriptpath .chmod (0o755 )
594+
595+ try :
596+ # Yield the script output path so a test can inspect its contents.
597+ yield script_output_path
598+
599+ finally :
600+ # Undo $PATH change
601+ os .environ ['PATH' ] = orig_path
602+
603+ @pytest .fixture ()
604+ def jsons_for_extra_behavior (self ):
605+ """Return a DatasourceJSONs describing databricks that will tickle postprocess_databricks()
606+ into doing its extra behavior. Also returns dict of some of the fields within that JSON."""
607+
608+ hostname = 'dbc-1bab80fc-a74b.cloud.databricks.com'
609+ password = 'dapie372d57cefdc078d8ce3936fcb0e22ee'
610+ port = 54321
611+ org_id = 65475674534576
612+ cluster_id = '0122-044839-vx2fk606'
613+
614+ case_data = DatasourceJSONs (
615+ meta_dict = {
616+ 'required_python_modules' : ['sqlalchemy-databricks' ],
617+ 'allow_datasource_dialect_autoinstall' : False ,
618+ 'drivername' : 'databricks+connector' ,
619+ 'sqlmagic_autocommit' : False , # This one is special!
620+ 'name' : 'Databricks With Extras' ,
621+ },
622+ dsn_dict = {
623+ 'username' : 'token' ,
624+ 'host' : hostname ,
625+ 'password' : password ,
626+ },
627+ connect_args_dict = {
628+ "http_path" : "sql/protocolv1/o/2414094324684936/0125-220758-m9pfb4c7" ,
629+ "cluster_id" : cluster_id ,
630+ "org_id" : org_id ,
631+ "port" : port ,
632+ },
633+ )
634+
635+ return (
636+ case_data ,
637+ {
638+ 'hostname' : hostname ,
639+ 'password' : password ,
640+ 'port' : port ,
641+ 'org_id' : org_id ,
642+ 'cluster_id' : cluster_id ,
643+ },
644+ )
645+
646+ def test_extra_behavior (
647+ self , datasource_id , databricks_connect_in_path , tmp_home , jsons_for_extra_behavior
648+ ):
649+ """Test creating databricks with extra keys to cause postprocess_databricks() to do its magic"""
650+
651+ # Make a preexisting tmp_home/.databricks-connect, expect it to get unlinked
652+ # (see lines in postprocess_databricks)
653+ dotconnect = tmp_home / '.databricks-connect'
654+ with dotconnect .open ('w' ) as of :
655+ of .write ('exists' )
656+
657+ assert dotconnect .exists ()
658+
659+ case_data , case_dict = jsons_for_extra_behavior
660+
661+ assert 'cluster_id' in case_data .connect_args_dict
662+
663+ initial_len = len (Connection .connections )
664+
665+ datasources .bootstrap_datasource (
666+ datasource_id , case_data .meta_json , case_data .dsn_json , case_data .connect_args_json
667+ )
668+
669+ assert len (Connection .connections ) == initial_len + 1
670+
671+ # Preexisting file should have been unlinked. The real databricks-connect
672+ # script would have recreated it, but the mock version we create in fixture
673+ # databricks_connect_in_path will create a different file.
674+ assert not dotconnect .exists ()
675+
676+ # databricks_connect_in_path is the path where the fake script output was placed
677+ assert databricks_connect_in_path .exists ()
678+
679+ # Expect to find things in it. See ENG-5517.
680+ # We can only test that we ran this mock script and the known result
681+ # of our mock script. What the real one does ... ?
682+ contents = databricks_connect_in_path .read ().split ()
683+ assert len (contents ) == 6
684+ assert contents [0 ] == 'y'
685+ assert contents [1 ] == f"https://{ case_dict ['hostname' ]} /"
686+ assert contents [2 ] == case_dict ['password' ]
687+ assert contents [3 ] == case_dict ['cluster_id' ]
688+ assert contents [4 ] == str (case_dict ['org_id' ])
689+ assert contents [5 ] == str (case_dict ['port' ])
690+
691+ def test_skip_extra_behavior_if_no_databricks_connect (
692+ self , datasource_id , tmp_home , jsons_for_extra_behavior
693+ ):
694+ # Let's create a $HOME/.databricks-connect file. It should remain untouched
695+ # since we're not also using fixture databricks_connect_in_path putting the
696+ # script in our path.
697+
698+ dotconnect = tmp_home / '.databricks-connect'
699+ with dotconnect .open ('w' ) as of :
700+ of .write ('preexists' )
701+
702+ assert dotconnect .exists ()
703+
704+ case_data , case_dict = jsons_for_extra_behavior
705+
706+ initial_len = len (Connection .connections )
707+
708+ # Should not fail, but won't have done any extra behavior.
709+ datasources .bootstrap_datasource (
710+ datasource_id , case_data .meta_json , case_data .dsn_json , case_data .connect_args_json
711+ )
712+
713+ assert len (Connection .connections ) == initial_len + 1
714+
715+ # Left unchanged
716+ assert dotconnect .exists ()
717+ assert 'preexists' in dotconnect .read ()
718+
719+ def test_skip_extra_behavior_if_no_cluster_id (
720+ self , datasource_id , tmp_home , databricks_connect_in_path
721+ ):
722+ # Let's create a $HOME/.databricks-connect file. It should remain untouched
723+ # since we're not also using fixture databricks_connect_in_path putting the
724+ # script in our path.
725+
726+ dotconnect = tmp_home / '.databricks-connect'
727+ with dotconnect .open ('w' ) as of :
728+ of .write ('preexists' )
729+
730+ assert dotconnect .exists ()
731+
732+ case_data = SampleData .get_sample ('databricks' )
733+ assert 'cluster_id' not in case_data .connect_args_dict
734+
735+ initial_len = len (Connection .connections )
736+
737+ # Should not have triggered extra behavior -- cluster_id wasn't present (but
738+ # we do have a databricks-connect script in the PATH).
739+
740+ datasources .bootstrap_datasource (
741+ datasource_id , case_data .meta_json , case_data .dsn_json , case_data .connect_args_json
742+ )
743+
744+ assert len (Connection .connections ) == initial_len + 1
745+
746+ # But won't have breathed on dotconnect file.
747+ # Left unchanged
748+ assert dotconnect .exists ()
749+ assert 'preexists' in dotconnect .read ()
750+
751+
550752class TestEnsureRequirements :
551753 def test_already_installed (self , datasource_id ):
552754 requirements = ['pip' ]
@@ -569,7 +771,6 @@ def test_raises_when_disallowed_but_needs_to_install(
569771
570772class TestInstallPackage :
571773 def test_install (self , not_installed_package ):
572-
573774 pkgname = not_installed_package
574775
575776 # Better not be installed right now!
0 commit comments