From e48fd5caaeda9ad2b109186fe38dfb6650bcfb45 Mon Sep 17 00:00:00 2001 From: Aaditya Srinivasan Date: Wed, 15 Apr 2026 01:54:05 +0530 Subject: [PATCH] Optimize base64_decode validation using lookup table --- cpp/src/arrow/vendored/base64.cpp | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/vendored/base64.cpp b/cpp/src/arrow/vendored/base64.cpp index db2f74ed98f..51b93ab5ffe 100644 --- a/cpp/src/arrow/vendored/base64.cpp +++ b/cpp/src/arrow/vendored/base64.cpp @@ -30,6 +30,7 @@ */ #include "arrow/util/base64.h" +#include #include namespace arrow { @@ -40,6 +41,17 @@ static const std::string base64_chars = "abcdefghijklmnopqrstuvwxyz" "0123456789+/"; +static const std::array kBase64Lookup = [] { + std::array table{}; + table.fill(-1); + + for (size_t i = 0; i < base64_chars.size(); ++i) { + table[static_cast(base64_chars[i])] = i; + } + + return table; +}(); + static std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) { std::string ret; int i = 0; @@ -119,22 +131,16 @@ Result base64_decode(std::string_view encoded_string) { return Status::Invalid("Invalid base64 input: padding in wrong position"); } - if (base64_chars.find(c) == std::string::npos) { + if (kBase64Lookup[static_cast(c)] == -1) { return Status::Invalid("Invalid base64 input: character is not valid base64 character"); } - char_array_4[i++] = c; + char_array_4[i++] = kBase64Lookup[static_cast(c)]; } in_++; if (i == 4) { - for (i = 0; i < 4; i++) { - if (char_array_4[i] != 0) { - char_array_4[i] = base64_chars.find(char_array_4[i]) & 0xff; - } - } - char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];