diff --git a/.github/workflows/build_image.yml b/.github/workflows/build_image.yml index e62ed77..56bdfb3 100644 --- a/.github/workflows/build_image.yml +++ b/.github/workflows/build_image.yml @@ -1,7 +1,6 @@ name: build_image permissions: - id-token: write contents: read on: @@ -10,131 +9,215 @@ on: env: CARGO_TERM_COLOR: always RUST_BACKTRACE: 1 + DOCKER_REPO: slggamer/vector jobs: - generate-build-meta: - name: Generate Build Meta + build-and-push-images: + name: Build Images & Push To Docker Hub runs-on: ubuntu-latest + permissions: + contents: read steps: - - uses: actions/checkout@v2 - - name: Generate Build Meta - id: build_meta - run: | - echo "vector_build_desc=$(git rev-parse --short HEAD) $(date +%Y-%m-%d)" >> $GITHUB_OUTPUT - echo "vector_build_sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT - outputs: - vector_build_desc: ${{ steps.build_meta.outputs.vector_build_desc }} - vector_build_sha: ${{ steps.build_meta.outputs.vector_build_sha }} - - build-x86_64-unknown-linux-gnu-binary: - name: Build x86_64-unknown-linux-gnu - runs-on: ubuntu-latest - needs: - - generate-build-meta - env: - VECTOR_BUILD_DESC: ${{ needs.generate-build-meta.outputs.vector_build_desc }} - VECTOR_BUILD_SHA: ${{ needs.generate-build-meta.outputs.vector_build_sha }} - steps: - - uses: actions/checkout@v2 - - name: Set up toolchains - uses: actions-rs/toolchain@v1 - - name: Install cross - run: cargo install cross - - name: Build x86_64-unknown-linux-gnu - run: make build-x86_64-unknown-linux-gnu - - name: Upload x86_64-unknown-linux-gnu - uses: actions/upload-artifact@v2 - with: - name: vector-${{ env.VECTOR_BUILD_SHA }}-x86_64-unknown-linux-gnu - path: target/x86_64-unknown-linux-gnu/release/vector - - build-aarch64-unknown-linux-gnu-binary: - name: Build aarch64-unknown-linux-gnu - runs-on: ubuntu-latest - needs: - - generate-build-meta - env: - VECTOR_BUILD_DESC: ${{ needs.generate-build-meta.outputs.vector_build_desc }} - VECTOR_BUILD_SHA: ${{ needs.generate-build-meta.outputs.vector_build_sha }} - JEMALLOC_SYS_WITH_LG_PAGE: 16 - steps: - - uses: actions/checkout@v2 - - name: Set up toolchains - uses: actions-rs/toolchain@v1 - - name: Install cross - run: cargo install cross - - name: Build aarch64-unknown-linux-gnu - run: make build-aarch64-unknown-linux-gnu - - name: Upload aarch64-unknown-linux-gnu - uses: actions/upload-artifact@v2 - with: - name: vector-${{ env.VECTOR_BUILD_SHA }}-aarch64-unknown-linux-gnu - path: target/aarch64-unknown-linux-gnu/release/vector - - build-armv7-unknown-linux-gnueabihf-binary: - name: Build armv7-unknown-linux-gnueabihf - runs-on: ubuntu-latest - needs: - - generate-build-meta - env: - VECTOR_BUILD_DESC: ${{ needs.generate-build-meta.outputs.vector_build_desc }} - VECTOR_BUILD_SHA: ${{ needs.generate-build-meta.outputs.vector_build_sha }} - JEMALLOC_SYS_WITH_LG_PAGE: 16 - steps: - - uses: actions/checkout@v2 - - name: Set up toolchains - uses: actions-rs/toolchain@v1 - - name: Install cross - run: cargo install cross - - name: Build armv7-unknown-linux-gnueabihf - run: make build-armv7-unknown-linux-gnueabihf - - name: Upload armv7-unknown-linux-gnueabihf - uses: actions/upload-artifact@v2 - with: - name: vector-${{ env.VECTOR_BUILD_SHA }}-armv7-unknown-linux-gnueabihf - path: target/armv7-unknown-linux-gnueabihf/release/vector - - push-image: - name: Push Docker Image - runs-on: ubuntu-latest - needs: - - generate-build-meta - - build-x86_64-unknown-linux-gnu-binary - - build-aarch64-unknown-linux-gnu-binary - - build-armv7-unknown-linux-gnueabihf-binary - env: - VECTOR_BUILD_DESC: ${{ needs.generate-build-meta.outputs.vector_build_desc }} - VECTOR_BUILD_SHA: ${{ needs.generate-build-meta.outputs.vector_build_sha }} - steps: - - uses: actions/checkout@v2 - - name: Set up QEMU - uses: docker/setup-qemu-action@v2 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - name: Configure AWS Credentials for DBaaS Dev - uses: aws-actions/configure-aws-credentials@v1 - with: - role-to-assume: arn:aws:iam::385595570414:role/vector-cicd - aws-region: us-west-2 - - name: Download staged binary (x86_64-unknown-linux-gnu) - uses: actions/download-artifact@v2 - with: - name: vector-${{ env.VECTOR_BUILD_SHA }}-x86_64-unknown-linux-gnu - path: target/x86_64-unknown-linux-gnu/release - - name: Download staged binary (aarch64-unknown-linux-gnu) - uses: actions/download-artifact@v2 - with: - name: vector-${{ env.VECTOR_BUILD_SHA }}-aarch64-unknown-linux-gnu - path: target/aarch64-unknown-linux-gnu/release - - name: Download staged binary (armv7-unknown-linux-gnueabihf) - uses: actions/download-artifact@v2 - with: - name: vector-${{ env.VECTOR_BUILD_SHA }}-armv7-unknown-linux-gnueabihf - path: target/armv7-unknown-linux-gnueabihf/release - - name: Set Release Meta - run: | - echo "TAG=385595570414.dkr.ecr.us-west-2.amazonaws.com/tidbcloud/vector:nightly-$(git rev-parse --short HEAD)" >> $GITHUB_ENV - - name: Push to DBaaS Dev ECR - run: | - aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 385595570414.dkr.ecr.us-west-2.amazonaws.com - make release-docker + - name: Check out code + uses: actions/checkout@v3 + with: + ref: ${{ github.event.inputs.git-ref || github.sha }} + + - name: Cache cargo dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo-registry- + + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: false + + - name: Set up Rust toolchain + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + rustup component add rustfmt clippy + # Install lld linker for faster linking (optional, falls back to default if unavailable) + sudo apt-get update && sudo apt-get install -y lld || echo "lld not available, using default linker" + + - name: Check disk space + run: | + df -h + echo "Available disk space before build:" + df -h . | tail -1 + + - name: Install cross + run: cargo install cross + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + + - name: Set build date and tags + id: set_tags + run: | + BUILD_DATE=$(date +%Y%m%d) + SHA_SHORT=$(echo "${{ github.sha }}" | cut -c1-7) + echo "build_date=${BUILD_DATE}" >> $GITHUB_OUTPUT + echo "sha_short=${SHA_SHORT}" >> $GITHUB_OUTPUT + echo "tag=${{ env.DOCKER_REPO }}:nightly-${SHA_SHORT}" >> $GITHUB_OUTPUT + echo "tag_ng=${{ env.DOCKER_REPO }}:nightly-${SHA_SHORT}-ng" >> $GITHUB_OUTPUT + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: slggamer + password: ${{ secrets.DOCKERHUBTOKEN }} + + - name: Build x86_64 binary (standard) + timeout-minutes: 60 + env: + CARGO_PROFILE_RELEASE_CODEGEN_UNITS: 16 + CARGO_PROFILE_RELEASE_LTO: "thin" + CARGO_BUILD_JOBS: 4 + CARGO_INCREMENTAL: 0 + run: | + echo "Starting x86_64 build at $(date)" + make build-x86_64-unknown-linux-gnu + echo "Finished x86_64 build at $(date)" + # Clean up intermediate files to save disk space + find target/x86_64-unknown-linux-gnu/release/deps -name "*.rlib" -not -name "libvector*.rlib" -delete 2>/dev/null || true + find target/x86_64-unknown-linux-gnu/release/build -type f -name "*.o" -delete 2>/dev/null || true + + - name: Build aarch64 binary (standard) + timeout-minutes: 60 + env: + CARGO_PROFILE_RELEASE_CODEGEN_UNITS: 16 + CARGO_PROFILE_RELEASE_LTO: "thin" + CARGO_BUILD_JOBS: 4 + CARGO_INCREMENTAL: 0 + run: | + echo "Starting aarch64 build at $(date)" + make build-aarch64-unknown-linux-gnu + echo "Finished aarch64 build at $(date)" + # Clean up intermediate files to save disk space + find target/aarch64-unknown-linux-gnu/release/deps -name "*.rlib" -not -name "libvector*.rlib" -delete 2>/dev/null || true + find target/aarch64-unknown-linux-gnu/release/build -type f -name "*.o" -delete 2>/dev/null || true + + - name: Build and push standard image + env: + REPO: ${{ env.DOCKER_REPO }} + TAG: ${{ steps.set_tags.outputs.tag }} + run: make release-docker + + - name: Clean up standard build artifacts + run: | + # Remove standard build artifacts after Docker image is built + rm -rf target/x86_64-unknown-linux-gnu/release/build + rm -rf target/aarch64-unknown-linux-gnu/release/build + find target/x86_64-unknown-linux-gnu/release/deps -type f ! -name "*.rlib" -delete 2>/dev/null || true + find target/aarch64-unknown-linux-gnu/release/deps -type f ! -name "*.rlib" -delete 2>/dev/null || true + # Keep only the final binaries + df -h + echo "Available disk space after Cleaned up intermediate build artifacts:" + df -h . | tail -1 + + - name: Build x86_64 binary (nextgen) + timeout-minutes: 60 + env: + CARGO_PROFILE_RELEASE_CODEGEN_UNITS: 16 + CARGO_PROFILE_RELEASE_LTO: "thin" + CARGO_BUILD_JOBS: 4 + CARGO_INCREMENTAL: 0 + run: | + echo "Starting x86_64 nextgen build at $(date)" + make build-x86_64-unknown-linux-gnu-nextgen + echo "Finished x86_64 nextgen build at $(date)" + # Clean up intermediate files to save disk space + find target/x86_64-unknown-linux-gnu/release/deps -name "*.rlib" -not -name "libvector*.rlib" -delete 2>/dev/null || true + find target/x86_64-unknown-linux-gnu/release/build -type f -name "*.o" -delete 2>/dev/null || true + + - name: Build aarch64 binary (nextgen) + timeout-minutes: 60 + env: + CARGO_PROFILE_RELEASE_CODEGEN_UNITS: 16 + CARGO_PROFILE_RELEASE_LTO: "thin" + CARGO_BUILD_JOBS: 4 + CARGO_INCREMENTAL: 0 + run: | + echo "Starting aarch64 nextgen build at $(date)" + make build-aarch64-unknown-linux-gnu-nextgen + echo "Finished aarch64 nextgen build at $(date)" + # Clean up intermediate files to save disk space + find target/aarch64-unknown-linux-gnu/release/deps -name "*.rlib" -not -name "libvector*.rlib" -delete 2>/dev/null || true + find target/aarch64-unknown-linux-gnu/release/build -type f -name "*.o" -delete 2>/dev/null || true + + - name: Check nextgen binaries before building image + run: | + echo "Checking nextgen binary files..." + echo "" + echo "x86_64 binary:" + if [ -f target/x86_64-unknown-linux-gnu/release/vector-nextgen ]; then + ls -lh target/x86_64-unknown-linux-gnu/release/vector-nextgen + echo " ✅ EXISTS" + else + echo " ❌ NOT FOUND" + fi + echo "" + echo "aarch64 binary:" + if [ -f target/aarch64-unknown-linux-gnu/release/vector-nextgen ]; then + ls -lh target/aarch64-unknown-linux-gnu/release/vector-nextgen + echo " ✅ EXISTS" + else + echo " ❌ NOT FOUND" + fi + echo "" + if [ -f target/x86_64-unknown-linux-gnu/release/vector-nextgen ] && [ -f target/aarch64-unknown-linux-gnu/release/vector-nextgen ]; then + echo "✅ Both nextgen binaries exist - Makefile should skip rebuild" + else + echo "⚠️ Some binaries missing - Makefile will trigger rebuild" + fi + + - name: Build and push nextgen image + env: + REPO: ${{ env.DOCKER_REPO }} + TAG: ${{ steps.set_tags.outputs.tag_ng }} + NEXTGEN: "true" + run: make release-docker-nextgen + + - name: Build Summary + run: | + echo "✅ Vector images built and pushed successfully!" + echo "" + echo "📦 **Built Images:**" + echo "- **Standard:** \`${{ steps.set_tags.outputs.tag }}\`" + echo "- **Nextgen:** \`${{ steps.set_tags.outputs.tag_ng }}\`" + echo "" + echo "🔍 **Git SHA:** ${{ github.sha }}" + echo "📅 **Build Date:** ${{ steps.set_tags.outputs.build_date }}" + + - name: Final cleanup and disk space check + if: always() + run: | + echo "Final disk space usage:" + df -h . + echo "" + echo "Target directory size:" + du -sh target 2>/dev/null || echo "target directory not found" + echo "" + echo "Cleaning up remaining build artifacts..." + # Remove all build artifacts except final binaries + find target -type d -name "build" -exec rm -rf {} + 2>/dev/null || true + find target -type f -name "*.o" -delete 2>/dev/null || true + find target -type f -name "*.d" -delete 2>/dev/null || true + find target -type f -name "*.rmeta" -delete 2>/dev/null || true + echo "Cleanup completed" + df -h . diff --git a/.gitignore b/.gitignore index 6893226..9482176 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,7 @@ .DS_Store .specstory/ test_data/ +coverage/ +*.lcov +tarpaulin-report.html +cobertura.xml \ No newline at end of file diff --git a/Makefile b/Makefile index 961acc4..52e5564 100644 --- a/Makefile +++ b/Makefile @@ -32,6 +32,21 @@ test: @cargo test --workspace --lib @echo "Done testing." +.PHONY: test-coverage +test-coverage: + @echo "Running tests with coverage..." + @cargo install cargo-tarpaulin --quiet 2>/dev/null || true + @cargo tarpaulin --workspace --lib --out Xml --out Html --output-dir coverage --timeout 120 || true + @echo "Coverage report generated in coverage/ directory" + @echo "Done testing with coverage." + +.PHONY: test-coverage-summary +test-coverage-summary: + @echo "Running tests with coverage summary..." + @cargo install cargo-tarpaulin --quiet 2>/dev/null || true + @cargo tarpaulin --workspace --lib --out Stdout --out Xml --output-dir coverage --timeout 120 || true + @echo "Done testing with coverage." + .PHONY: build build: @echo "Building..." @@ -157,6 +172,11 @@ target/%/vector-nextgen: cargo-install-cross --target ${TRIPLE} \ --no-default-features \ --features ${FEATURES} + @# Rename vector to vector-nextgen after build + @if [ -f target/${TRIPLE}/${PROFILE}/vector ] && [ ! -f target/${TRIPLE}/${PROFILE}/vector-nextgen ]; then \ + mv target/${TRIPLE}/${PROFILE}/vector target/${TRIPLE}/${PROFILE}/vector-nextgen; \ + echo "Renamed vector to vector-nextgen"; \ + fi .PHONY: cargo-install-% cargo-install-%: override TOOL = $(@:cargo-install-%=%) diff --git a/scripts/cross/aarch64-unknown-linux-gnu.dockerfile b/scripts/cross/aarch64-unknown-linux-gnu.dockerfile index 67e41c7..a3243b3 100644 --- a/scripts/cross/aarch64-unknown-linux-gnu.dockerfile +++ b/scripts/cross/aarch64-unknown-linux-gnu.dockerfile @@ -4,3 +4,8 @@ COPY bootstrap-ubuntu.sh . COPY install-protoc.sh . RUN ./bootstrap-ubuntu.sh RUN ./install-protoc.sh + +RUN apt-get update && \ + apt-get remove --assume-yes gcc-9 && \ + apt-get --assume-yes install clang && \ + rm -rf /var/lib/apt/lists/* diff --git a/scripts/release-docker.sh b/scripts/release-docker.sh index d04d482..41eb6be 100755 --- a/scripts/release-docker.sh +++ b/scripts/release-docker.sh @@ -31,9 +31,11 @@ trap cleanup EXIT # linux/amd64 -> amd64 # linux/arm64 -> arm64 # linux/arm/v7 -> arm -cp target/x86_64-unknown-linux-gnu/release/vector "$WORK_DIR"/vector-amd64 -cp target/aarch64-unknown-linux-gnu/release/vector "$WORK_DIR"/vector-arm64 -# cp target/armv7-unknown-linux-gnueabihf/release/vector "$WORK_DIR"/vector-arm +BINARY_NAME="${NEXTGEN:+vector-nextgen}" +BINARY_NAME="${BINARY_NAME:-vector}" +cp target/x86_64-unknown-linux-gnu/release/${BINARY_NAME} "$WORK_DIR"/vector-amd64 +cp target/aarch64-unknown-linux-gnu/release/${BINARY_NAME} "$WORK_DIR"/vector-arm64 +# cp target/armv7-unknown-linux-gnueabihf/release/${BINARY_NAME} "$WORK_DIR"/vector-arm # cp config/vector.toml "$WORK_DIR" VERSION="${VECTOR_VERSION:-"$(scripts/version.sh)"}" diff --git a/src/common/deltalake_writer/converter.rs b/src/common/deltalake_writer/converter.rs index 518b657..8b8f04c 100644 --- a/src/common/deltalake_writer/converter.rs +++ b/src/common/deltalake_writer/converter.rs @@ -18,6 +18,23 @@ impl EventConverter { Self } + /// Convert a Vector `Value` into a plain string for Arrow Utf8 columns. + /// + /// IMPORTANT: + /// - Prefer the raw string via `as_str()` to avoid JSON-style escaping (e.g. newline -> `\n`). + /// - Fall back to `to_string()` for non-string types. + fn log_value_to_plain_string(value: &LogValue) -> Option { + if let Some(s) = value.as_str() { + return Some(s.to_string()); + } + + match value { + LogValue::Null => None, + // For everything else, `to_string()` is fine (numbers/bools/objects, etc.). + _ => Some(value.to_string()), + } + } + /// Convert events to Arrow RecordBatch pub fn events_to_record_batch( schema_manager: &mut SchemaManager, @@ -140,14 +157,14 @@ impl EventConverter { // For data fields, try exact match first, then case-insensitive match let field_name = field.name(); if let Some(value) = log_event.get(field_name.as_str()) { - Some(value.to_string()) + Self::log_value_to_plain_string(value) } else { // Try case-insensitive match for data fields if let Some(iter) = log_event.all_event_fields() { let mut found_value = None; for (key, value) in iter { if key.as_ref().to_lowercase() == field_name.to_lowercase() { - found_value = Some(value.to_string()); + found_value = Self::log_value_to_plain_string(value); break; } } @@ -160,9 +177,7 @@ impl EventConverter { }; if let Some(s) = value_opt { - // Trim quotes from string values to avoid query issues - let trimmed = s.trim_matches('"'); - builder.append_value(trimmed); + builder.append_value(&s); } else { builder.append_null(); } @@ -586,11 +601,9 @@ impl EventConverter { .iter() .map(|event| { if let Event::Log(log_event) = event { - log_event.get(field.name().as_str()).map(|v| { - // Trim quotes from string values to avoid query issues - let s = v.to_string(); - s.trim_matches('"').to_string() - }) + log_event + .get(field.name().as_str()) + .and_then(Self::log_value_to_plain_string) } else { None } @@ -612,6 +625,7 @@ mod tests { use super::*; use std::collections::BTreeMap; use vector_lib::event::LogEvent; + use arrow::array::StringArray; fn create_test_log_event() -> LogEvent { let mut log = LogEvent::from(BTreeMap::new()); @@ -655,4 +669,25 @@ mod tests { let result = EventConverter::build_boolean_column(&field, &events); assert!(result.is_ok()); } + + #[test] + fn test_string_column_preserves_newlines() { + let mut log = create_test_log_event(); + let plan = "root\n└─ child\n".to_string(); + log.insert("normalized_plan", plan.clone()); + let events = vec![Event::Log(log)]; + let field = Field::new("normalized_plan", DataType::Utf8, true); + + let array = EventConverter::build_string_column(&field, &events, None).unwrap(); + let string_array = array + .as_any() + .downcast_ref::() + .expect("should be a StringArray"); + + let got = string_array.value(0); + assert_eq!(got, plan.as_str()); + assert!(got.contains('\n')); + // If we accidentally JSON-escaped the string, we'd store a literal backslash-n. + assert!(!got.contains("\\n")); + } } diff --git a/src/main.rs b/src/main.rs index 438b2f1..e0ab365 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,6 +12,10 @@ mod utils; #[cfg(unix)] fn main() -> ExitCode { + // Initialize rustls CryptoProvider early, before any threads are spawned + // This prevents panics when rustls is used in worker threads + crate::utils::rustls::init_rustls(); + let exit_code = Application::run(ExtraContext::default()) .code() .unwrap_or(exitcode::UNAVAILABLE) as u8; diff --git a/src/sources/mocked_topsql/shutdown.rs b/src/sources/mocked_topsql/shutdown.rs index cf9ef12..8d6ea8b 100644 --- a/src/sources/mocked_topsql/shutdown.rs +++ b/src/sources/mocked_topsql/shutdown.rs @@ -48,6 +48,7 @@ impl ShutdownSubscriber { } } + #[allow(dead_code)] pub fn extend(&self) -> (ShutdownNotifier, ShutdownSubscriber) { let (tx, rx) = watch::channel(()); ( @@ -64,6 +65,7 @@ impl ShutdownSubscriber { self.done().await } + #[allow(dead_code)] pub fn subscribe(&self) -> watch::Receiver<()> { self.rx.clone() }