-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcortex_search_service.sql
More file actions
30 lines (27 loc) · 1.08 KB
/
cortex_search_service.sql
File metadata and controls
30 lines (27 loc) · 1.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
USE SCHEMA DASH_SCHEMA;
create or replace table PARSE_PDFS as
select
relative_path,
SNOWFLAKE.CORTEX.PARSE_DOCUMENT(@SEMANTIC_DATABASE.DASH_SCHEMA.DASH_PDFS, relative_path, {'mode':'LAYOUT'}) as data
from
directory(@SEMANTIC_DATABASE.DASH_SCHEMA.DASH_PDFS);
create or replace table PARSED_PDFS as (
with tmp_parsed as (select
relative_path,
SNOWFLAKE.CORTEX.SPLIT_TEXT_RECURSIVE_CHARACTER(TO_VARIANT(data):content, 'MARKDOWN', 1800, 300) AS chunks
from PARSE_PDFS where TO_VARIANT(data):content is not null)
select
TO_VARCHAR(c.value) as PAGE_CONTENT,
REGEXP_REPLACE(relative_path, '\\.pdf$', '') as TITLE,
'SEMANTIC_DATABASE.DASH_SCHEMA.DASH_PDFS' as INPUT_STAGE,
RELATIVE_PATH as RELATIVE_PATH
from tmp_parsed p, lateral FLATTEN(INPUT => p.chunks) c
);
create or replace CORTEX SEARCH SERVICE SEMANTIC_DATABASE.DASH_SCHEMA.VEHICLES_INFO
ON PAGE_CONTENT
WAREHOUSE = SNOWFLAKE_LEARNING_WH
TARGET_LAG = '1 hour'
AS (
SELECT '' AS PAGE_URL, PAGE_CONTENT, TITLE, RELATIVE_PATH
FROM PARSED_PDFS
);