apache · ColdL · Jan 29, 2026 · Mar 10, 2026
diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html
@@ -1494,6 +1494,24 @@
             <td>String</td>
             <td>The Variant shredding schema for writing.</td>
         </tr>
+        <tr>
+            <td><h5>vector-field</h5></td>
+            <td style="word-wrap: break-word;">(none)</td>
+            <td>String</td>
+            <td>Specifies column names that should be stored as vector type. This is used when you want to treat a ARRAY column as a VECTOR.</td>
+        </tr>
+        <tr>
+            <td><h5>vector.file.format</h5></td>
+            <td style="word-wrap: break-word;">(none)</td>
+            <td>String</td>
+            <td>Specify the vector store file format.</td>
+        </tr>
+        <tr>
+            <td><h5>vector.target-file-size</h5></td>
+            <td style="word-wrap: break-word;">(none)</td>
+            <td>MemorySize</td>
+            <td>Target size of a vector-store file. Default is 10 * TARGET_FILE_SIZE.</td>
+        </tr>
         <tr>
             <td><h5>visibility-callback.check-interval</h5></td>
             <td style="word-wrap: break-word;">10 s</td>

diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java
@@ -2299,6 +2299,31 @@ public InlineElement getDescription() {
                     .withDescription(
                             "The interval for checking visibility when visibility-callback enabled.");
 
+    public static final ConfigOption<String> VECTOR_FILE_FORMAT =
+            key("vector.file.format")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription("Specify the vector store file format.");
+
+    public static final ConfigOption<String> VECTOR_FIELD =
+            key("vector-field")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Specifies column names that should be stored as vector type. "
+                                    + "This is used when you want to treat a ARRAY column as a VECTOR.");
+
+    public static final ConfigOption<MemorySize> VECTOR_TARGET_FILE_SIZE =
+            key("vector.target-file-size")
+                    .memoryType()
+                    .noDefaultValue()
+                    .withDescription(
+                            Description.builder()
+                                    .text(
+                                            "Target size of a vector-store file."
+                                                    + " Default is 10 * TARGET_FILE_SIZE.")
+                                    .build());
+
     private final Options options;
 
     public CoreOptions(Map<String, String> options) {
@@ -3614,6 +3639,33 @@ public Duration visibilityCallbackCheckInterval() {
         return options.get(VISIBILITY_CALLBACK_CHECK_INTERVAL);
     }
 
+    public String vectorFileFormatString() {
+        return normalizeFileFormat(options.get(VECTOR_FILE_FORMAT));
+    }
+
+    public Set<String> vectorField() {
+        String vectorFields = options.get(CoreOptions.VECTOR_FIELD);
+        if (vectorFields == null || vectorFields.trim().isEmpty()) {
+            return Collections.emptySet();
+        }
+        return Arrays.stream(vectorFields.trim().split(",")).collect(Collectors.toSet());
+    }
+
+    public static Set<String> vectorField(Map<String, String> options) {
+        String vectorFields = options.getOrDefault(CoreOptions.VECTOR_FIELD.key(), null);
+        if (vectorFields == null || vectorFields.trim().isEmpty()) {
+            return Collections.emptySet();
+        }
+        return Arrays.stream(vectorFields.trim().split(",")).collect(Collectors.toSet());
+    }
+
+    public long vectorTargetFileSize() {
+        // Since vectors are large, it would be better to set a larger target size for vectors.
+        return options.getOptional(VECTOR_TARGET_FILE_SIZE)
+                .map(MemorySize::getBytes)
+                .orElse(10 * targetFileSize(false));
+    }
+
     /** Specifies the merge engine for table with primary key. */
     public enum MergeEngine implements DescribedEnum {
         DEDUPLICATE("deduplicate", "De-duplicate and keep the last row."),

diff --git a/paimon-common/src/main/java/org/apache/paimon/format/FileFormat.java b/paimon-common/src/main/java/org/apache/paimon/format/FileFormat.java
@@ -107,6 +107,14 @@ public static FileFormat fileFormat(CoreOptions options) {
         return FileFormat.fromIdentifier(options.fileFormatString(), options.toConfiguration());
     }
 
+    public static FileFormat vectorFileFormat(CoreOptions options) {
+        String vectorFileFormat = options.vectorFileFormatString();
+        if (vectorFileFormat == null) {
+            return fileFormat(options);
+        }
+        return FileFormat.fromIdentifier(vectorFileFormat, options.toConfiguration());
+    }
+
     public static FileFormat manifestFormat(CoreOptions options) {
         return FileFormat.fromIdentifier(options.manifestFormatString(), options.toConfiguration());
     }

diff --git a/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyWriter.java b/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyWriter.java
@@ -41,6 +41,7 @@
 import org.apache.paimon.operation.BlobFileContext;
 import org.apache.paimon.options.MemorySize;
 import org.apache.paimon.reader.RecordReaderIterator;
+import org.apache.paimon.types.DataField;
 import org.apache.paimon.types.RowType;
 import org.apache.paimon.utils.BatchRecordWriter;
 import org.apache.paimon.utils.CommitIncrement;
@@ -52,15 +53,20 @@
 import org.apache.paimon.utils.SinkWriter.BufferedSinkWriter;
 import org.apache.paimon.utils.SinkWriter.DirectSinkWriter;
 import org.apache.paimon.utils.StatsCollectorFactories;
+import org.apache.paimon.utils.VectorStoreUtils;
 
 import javax.annotation.Nullable;
 
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
+import java.util.Set;
 import java.util.concurrent.ExecutionException;
 import java.util.function.Supplier;
+import java.util.stream.Collectors;
+
+import static org.apache.paimon.types.BlobType.fieldsInBlobFile;
 
 /**
  * A {@link RecordWriter} implementation that only accepts records which are always insert
@@ -71,8 +77,10 @@ public class AppendOnlyWriter implements BatchRecordWriter, MemoryOwner {
     private final FileIO fileIO;
     private final long schemaId;
     private final FileFormat fileFormat;
+    private final FileFormat vectorFileFormat;
     private final long targetFileSize;
     private final long blobTargetFileSize;
+    private final long vectorTargetFileSize;
     private final RowType writeSchema;
     @Nullable private final List<String> writeCols;
     private final DataFilePathFactory pathFactory;
@@ -103,8 +111,10 @@ public AppendOnlyWriter(
             @Nullable IOManager ioManager,
             long schemaId,
             FileFormat fileFormat,
+            FileFormat vectorFileFormat,
             long targetFileSize,
             long blobTargetFileSize,
+            long vectorTargetFileSize,
             RowType writeSchema,
             @Nullable List<String> writeCols,
             long maxSequenceNumber,
@@ -127,8 +137,10 @@ public AppendOnlyWriter(
         this.fileIO = fileIO;
         this.schemaId = schemaId;
         this.fileFormat = fileFormat;
+        this.vectorFileFormat = vectorFileFormat;
         this.targetFileSize = targetFileSize;
         this.blobTargetFileSize = blobTargetFileSize;
+        this.vectorTargetFileSize = vectorTargetFileSize;
         this.writeSchema = writeSchema;
         this.writeCols = writeCols;
         this.pathFactory = pathFactory;
@@ -302,13 +314,38 @@ public void toBufferedWriter() throws Exception {
     }
 
     private RollingFileWriter<InternalRow, DataFileMeta> createRollingRowWriter() {
-        if (blobContext != null) {
-            return new RollingBlobFileWriter(
+        boolean hasNormal, hasBlob, hasVectorStore;
+        {
+            hasBlob = (blobContext != null);
+
+            List<DataField> fieldsInVectorFile =
+                    VectorStoreUtils.fieldsInVectorFile(writeSchema, fileFormat, vectorFileFormat);
+            Set<String> vectorFieldNames =
+                    fieldsInVectorFile.stream().map(DataField::name).collect(Collectors.toSet());
+            hasVectorStore = !fieldsInVectorFile.isEmpty();
+
+            List<DataField> fieldsInBlobFile =
+                    hasBlob
+                            ? fieldsInBlobFile(writeSchema, blobContext.blobDescriptorFields())
+                            : Collections.emptyList();
+            Set<String> blobFieldNames =
+                    fieldsInBlobFile.stream().map(DataField::name).collect(Collectors.toSet());
+            hasNormal =
+                    writeSchema.getFields().stream()
+                            .anyMatch(
+                                    f ->
+                                            !blobFieldNames.contains(f.name())
+                                                    && !vectorFieldNames.contains(f.name()));
+        }
+        if (hasBlob || (hasNormal && hasVectorStore)) {
+            return new DataEvolutionRollingFileWriter(
                     fileIO,
                     schemaId,
                     fileFormat,
+                    vectorFileFormat,
                     targetFileSize,
                     blobTargetFileSize,
+                    vectorTargetFileSize,
                     writeSchema,
                     pathFactory,
                     seqNumCounterProvider,
@@ -319,13 +356,20 @@ private RollingFileWriter<InternalRow, DataFileMeta> createRollingRowWriter() {
                     statsDenseStore,
                     blobContext);
         }
+        FileFormat realFileFormat = hasNormal ? fileFormat : vectorFileFormat;
+        long realTargetFileSize = hasNormal ? targetFileSize : vectorTargetFileSize;
+        DataFilePathFactory realPathFactory =
+                hasNormal
+                        ? pathFactory
+                        : pathFactory.vectorStorePathFactory(
+                                vectorFileFormat.getFormatIdentifier());
         return new RowDataRollingFileWriter(
                 fileIO,
                 schemaId,
-                fileFormat,
-                targetFileSize,
+                realFileFormat,
+                realTargetFileSize,
                 writeSchema,
-                pathFactory,
+                realPathFactory,
                 seqNumCounterProvider,
                 fileCompression,
                 statsCollectorFactories.statsCollectors(writeSchema.getFieldNames()),