Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 10 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ mvn clean test \
```
Notice that you must change properties for Aurora MySQL and Aurora Postgresql to real before running tests.
## Setup Local Environment
MySQL, Postgresql, MSSQL, DB2, MemSQL, SAP HANA are using prebuild images.
MySQL, Postgresql, MSSQL, DB2, MemSQL, SAP HANA, and Oracle are using prebuilt images.

Oracle DB image should be build separately.
MemSQL image should be configure after start.

MemSQL image should be configure after start.

Expand All @@ -29,9 +29,8 @@ provided for this purpose.

Netezza and Teradata require VMware Player for running emulator.

* [Install Docker Compose](https://docs.docker.com/compose/install/)
* Build local docker images
* [Build Oracle DB docker image version 12.1.0.2-ee](https://github.com/oracle/docker-images/tree/master/OracleDatabase/SingleInstance)
* [Install Podman](https://podman.io/getting-started/installation)
* [Install Podman Compose](https://github.com/containers/podman-compose#installation) or use `docker-compose`.
* Enter the folder with docker-compose file:
```bash
cd docker-compose/db-plugins-env/
Expand All @@ -42,16 +41,16 @@ export MEMSQL_LICENSE_KEY=YOUR_LICENSE_KEY
```
* Initialize Memsql container:
```bash
docker-compose up memsql
podman-compose up memsql
```
* Start SAP HANA password service
```bash
bash saphana-password-server.sh &
```
* Start docker environment by running commands:
* Start container environment by running commands:
```bash
cd docker-compose/db-plugins-env/
docker-compose up -d
podman-compose up -d
```
* Connect to MemSQL Studio at [http://localhost:8888](http://localhost:8888)
The default Username is root and Password should be left blank.
Expand Down Expand Up @@ -100,9 +99,9 @@ grant all on *.* to 'root'@'%' identified by 'root' with grant option;
* **oracle.host** - Server host. Default: localhost.
* **oracle.port** - Server port. Default: 1521.
* **oracle.username** - Server username. Default: SYSTEM.
* **oracle.password** - Server password. Default: 123Qwe123.
* **oracle.database** - Server sid/database. Default: cdap.
* **oracle.connectionType** - Server connection type (service/sid) Default: sid.
* **oracle.password** - Server password. Default: oracle (as set in `docker-compose.yml`).
* **oracle.database** - Server sid/database. Default: FREEPDB1.
* **oracle.connectionType** - Server connection type (service/sid) Default: service.
#### Netezza
* **netezza.host** - Server host. Default: localhost.
* **netezza.port** - Server port. Default: 5480.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ protected void runETLOnce(ApplicationManager appManager,
ExecutionException {
final WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start(arguments);
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 10, TimeUnit.MINUTES);
}

protected void testDBInvalidFieldType(String columnName, Schema.Type type, ETLPlugin sinkConfig,
Expand Down
11 changes: 7 additions & 4 deletions docker-compose/db-plugins-env/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,16 @@ services:
privileged: true

oracle:
image: oracle/database:12.1.0.2-ee
image: container-registry.oracle.com/database/free:latest
ports:
- 1521:1521
environment:
- ORACLE_SID=cdap
- ORACLE_PDB=mydb
- ORACLE_PWD=123Qwe123
- ORACLE_PWD=oracle
healthcheck:
test: sqlplus -L SYSTEM/oracle@//localhost:1521/FREEPDB1 @/opt/oracle/checkDBStatus.sql || exit 1
interval: 20s
timeout: 10s
retries: 10

memsql:
image: memsql/cluster-in-a-box:centos-6.8.10-a53e479edc-1.9.0-1.3.0
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Copyright © 2025 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package io.cdap.plugin.oracle;

import io.cdap.plugin.db.sink.ETLDBOutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Arrays;
import java.util.stream.Collectors;

/**
* Class that extends {@link ETLDBOutputFormat} to implement the abstract methods for Oracle.
*/
public class OracleETLDBOutputFormat extends ETLDBOutputFormat {
private static final Logger LOG = LoggerFactory.getLogger(OracleETLDBOutputFormat.class);

/**
* This method is used to construct the MERGE query for Oracle.
* Example - MERGE INTO target_table USING source_table ON (target_table.id = source_table.id)
* WHEN MATCHED THEN UPDATE SET target_table.name = source_table.name, target_table.age = source_table.age
* WHEN NOT MATCHED THEN INSERT (id, name, age) VALUES (source_table.id, source_table.name, source_table.age);
*
* In this context, the source is a single row represented by placeholders.
*
* @param table Name of the target table.
* @param fieldNames All the columns of the table.
* @param mergeKeys The key columns to use for the ON condition.
* @return MERGE query in the form of a string.
*/
@Override
public String constructUpsertQuery(String table, String[] fieldNames, String[] mergeKeys) {
LOG.debug("Constructing upsert query for table: {}, fields: {}, keys: {}",
table, Arrays.toString(fieldNames), Arrays.toString(mergeKeys));
if (mergeKeys == null || mergeKeys.length == 0) {
throw new IllegalArgumentException("Merge keys must be specified for MERGE operation.");
}
if (fieldNames == null || fieldNames.length == 0) {
throw new IllegalArgumentException("Field names must be specified for MERGE operation.");
}

String targetTable = table;
String sourceTable = "DUAL"; // We are merging a single row

StringBuilder query = new StringBuilder();
query.append("MERGE INTO ").append(targetTable).append(" TGT");
query.append(" USING (SELECT ");
for (int i = 0; i < fieldNames.length; i++) {
query.append("? ").append(fieldNames[i]);
if (i < fieldNames.length - 1) {
query.append(", ");
}
}
query.append(" FROM DUAL) SRC");

query.append(" ON (");
for (int i = 0; i < mergeKeys.length; i++) {
query.append("TGT.").append(mergeKeys[i]).append(" = SRC.").append(mergeKeys[i]);
if (i < mergeKeys.length - 1) {
query.append(" AND ");
}
}
query.append(")");

// UPDATE clause
query.append(" WHEN MATCHED THEN UPDATE SET ");
boolean firstUpdate = true;
for (String fieldName : fieldNames) {
if (!Arrays.asList(mergeKeys).contains(fieldName)) {
if (!firstUpdate) {
query.append(", ");
}
query.append("TGT.").append(fieldName).append(" = SRC.").append(fieldName);
firstUpdate = false;
}
}
if (firstUpdate) {
// Should not happen if there are non-key fields, but good to handle.
// If all fields are keys, we can put a dummy update.
query.append("TGT.").append(mergeKeys[0]).append(" = SRC.").append(mergeKeys[0]);
}

// INSERT clause
query.append(" WHEN NOT MATCHED THEN INSERT (");
query.append(Arrays.stream(fieldNames).collect(Collectors.joining(", ")));
query.append(") VALUES (");
query.append(Arrays.stream(fieldNames).map(f -> "SRC." + f).collect(Collectors.joining(", ")));
query.append(")");

String resultQuery = query.toString();
LOG.debug("Constructed upsert query: {}", resultQuery);
return resultQuery;
}
}
90 changes: 84 additions & 6 deletions oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSink.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import io.cdap.cdap.api.annotation.MetadataProperty;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.api.data.batch.Output;
import io.cdap.cdap.api.data.format.StructuredRecord;
import io.cdap.cdap.etl.api.FailureCollector;
import io.cdap.cdap.etl.api.batch.BatchSink;
Expand All @@ -31,15 +32,25 @@
import io.cdap.plugin.common.Asset;
import io.cdap.plugin.common.ConfigUtil;
import io.cdap.plugin.common.LineageRecorder;
import io.cdap.plugin.common.batch.sink.SinkOutputFormatProvider;
import io.cdap.plugin.common.db.DBErrorDetailsProvider;
import io.cdap.plugin.db.DBRecord;
import io.cdap.plugin.db.SchemaReader;
import io.cdap.plugin.db.config.AbstractDBSpecificSinkConfig;
import io.cdap.plugin.db.sink.AbstractDBSink;
import io.cdap.plugin.db.sink.ETLDBOutputFormat;
import io.cdap.plugin.db.sink.FieldsValidator;
import io.cdap.plugin.util.DBUtils;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import javax.annotation.Nullable;

/**
Expand All @@ -50,12 +61,13 @@
@Description("Writes records to Oracle table. Each record will be written in a row in the table")
@Metadata(properties = {@MetadataProperty(key = Connector.PLUGIN_TYPE, value = OracleConnector.NAME)})
public class OracleSink extends AbstractDBSink<OracleSink.OracleSinkConfig> {

private static final Logger LOG = LoggerFactory.getLogger(OracleSink.class);
private final OracleSinkConfig oracleSinkConfig;

public OracleSink(OracleSinkConfig oracleSinkConfig) {
super(oracleSinkConfig);
this.oracleSinkConfig = oracleSinkConfig;
LOG.debug("OracleSink constructor called.");
}

@Override
Expand Down Expand Up @@ -96,6 +108,33 @@ protected DBErrorDetailsProvider getErrorDetailsProvider() {
return dbErrorDetailsProvider;
}

@Override
public void prepareRun(BatchSinkContext context) {
LOG.info("Entering OracleSink prepareRun for table {}.", oracleSinkConfig.getTableName());
super.prepareRun(context);
LOG.info("Exiting OracleSink prepareRun for table {}.", oracleSinkConfig.getTableName());
}

@Override
protected void addOutputContext(BatchSinkContext context) {
LOG.debug("Adding output context for operation: {}", oracleSinkConfig.getOperation());
Class<?> outputFormatClass;
if ("upsert".equalsIgnoreCase(oracleSinkConfig.getOperation())) {
outputFormatClass = OracleETLDBOutputFormat.class;
} else {
outputFormatClass = ETLDBOutputFormat.class;
}
context.addOutput(Output.of(oracleSinkConfig.getReferenceName(),
new SinkOutputFormatProvider((Class<? extends OutputFormat>) outputFormatClass, getConfiguration())));
}

@Override
public void destroy() {
LOG.info("Entering OracleSink destroy for table {}.", oracleSinkConfig.getTableName());
super.destroy();
LOG.info("Exiting OracleSink destroy for table {}.", oracleSinkConfig.getTableName());
}

/**
* Oracle action configuration.
*/
Expand All @@ -119,15 +158,42 @@ public static class OracleSinkConfig extends AbstractDBSpecificSinkConfig {
@Nullable
public Integer defaultBatchValue;

@Name("operation")
@Description("The write operation to perform. 'insert', 'update', or 'upsert'.")
@Macro
@Nullable
public String operation;

@Name("mergeKeys")
@Description("Comma-separated list of columns to use as keys for the MERGE operation. " +
"Required when 'operation' is 'upsert'.")
@Macro
@Nullable
public String mergeKeys;

@Override
public void validate(FailureCollector collector) {
super.validate(collector);
ConfigUtil.validateConnection(this, useConnection, connection, collector);
if ("upsert".equalsIgnoreCase(operation)) {
if (mergeKeys == null || mergeKeys.trim().isEmpty()) {
collector.addFailure("Merge keys must be specified when operation is 'upsert'.", null)
.withConfigProperty("mergeKeys");
}
}
}

@Override
protected Map<String, String> getDBSpecificArguments() {
return ImmutableMap.of(OracleConstants.DEFAULT_BATCH_VALUE, String.valueOf(defaultBatchValue));
ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
builder.put(OracleConstants.DEFAULT_BATCH_VALUE, String.valueOf(defaultBatchValue));
if (operation != null) {
builder.put("operation", operation);
}
if (mergeKeys != null) {
builder.put("mergeKeys", mergeKeys);
}
return builder.build();
}

@Override
Expand All @@ -140,14 +206,26 @@ public String getEscapedTableName() {
return ESCAPE_CHAR + getTableName() + ESCAPE_CHAR;
}

@Override
public String getEscapedDbSchemaName() {
return ESCAPE_CHAR + getDBSchemaName() + ESCAPE_CHAR;
String schemaName = getDBSchemaName();
return schemaName == null ? null : ESCAPE_CHAR + schemaName + ESCAPE_CHAR;
}

@Override
protected OracleConnectorConfig getConnection() {
return connection;
}

public List<String> getMergeKeys() {
if (mergeKeys == null || mergeKeys.trim().isEmpty()) {
return Collections.emptyList();
}
return Arrays.stream(mergeKeys.split(","))
.map(String::trim)
.collect(Collectors.toList());
}

public String getOperation() {
return operation == null ? "insert" : operation.toLowerCase();
}
}
}
Loading