ModelTC · shihaobai · May 14, 2026 · May 14, 2026 · May 14, 2026 · gemini-code-assist
diff --git a/docs/CN/source/tutorial/api_server_args.rst b/docs/CN/source/tutorial/api_server_args.rst
@@ -278,6 +278,12 @@ PD 分离模式参数
 
     当任意图片超过该阈值时，请求会被拒绝。
 
+.. option:: --max_image_pixels
+
+    单张图片在预处理缩放前允许的最大像素数量，默认为 ``8294400``（约等于 4K 图片像素总量）。
+
+    当输入图片超过该阈值时，LightLLM 会先自动将其缩放到该像素预算内，再继续后续流程。
+
 .. option:: --visual_infer_batch_size
 
     每次推理批次中处理的图像数量，默认为 ``1``

diff --git a/docs/EN/source/tutorial/api_server_args.rst b/docs/EN/source/tutorial/api_server_args.rst
@@ -276,6 +276,12 @@ Multimodal Parameters
 
     Requests are rejected when any image exceeds this limit.
 
+.. option:: --max_image_pixels
+
+    Maximum allowed pixel count for a single image before preprocessing resize, default is ``8294400`` (about 4K image pixels).
+
+    If an input image exceeds this threshold, LightLLM automatically resizes it down to this pixel budget before continuing.
+
 .. option:: --visual_infer_batch_size
 
     Number of images processed in each inference batch, default is ``1``

diff --git a/lightllm/server/api_cli.py b/lightllm/server/api_cli.py
@@ -445,9 +445,15 @@ def make_argument_parser() -> argparse.ArgumentParser:
     parser.add_argument(
         "--max_image_token_count",
         type=int,
-        default=6128,
+        default=8192,
         help="maximum allowed token count for one image after tokenization",
     )
+    parser.add_argument(
+        "--max_image_pixels",
+        type=int,
+        default=8294400,
+        help="maximum allowed pixel count for one image before resize preprocessing",
+    )
     parser.add_argument(
         "--embed_cache_storage_size",
         type=float,

diff --git a/lightllm/server/core/objs/start_args_type.py b/lightllm/server/core/objs/start_args_type.py
@@ -95,7 +95,8 @@ class StartArgs:
     enable_decode_microbatch_overlap: bool = field(default=False)
     enable_prefill_microbatch_overlap: bool = field(default=False)
     cache_capacity: int = field(default=200)
-    max_image_token_count: int = field(default=6128)
+    max_image_token_count: int = field(default=8192)
+    max_image_pixels: int = field(default=8294400)
     embed_cache_storage_size: float = field(default=4)
     data_type: Optional[str] = field(
         default=None, metadata={"choices": ["fp16", "float16", "bf16", "bfloat16", "fp32", "float32"]}

diff --git a/lightllm/server/multimodal_params.py b/lightllm/server/multimodal_params.py
@@ -4,7 +4,7 @@
 import librosa
 import base64
 import numpy as np
-from typing import List, Tuple
+from typing import List, Tuple, Optional
 from io import BytesIO
 from concurrent.futures import ThreadPoolExecutor
 from PIL import Image, ImageFile
@@ -13,6 +13,7 @@
 from lightllm.utils.error_utils import ClientDisconnected
 from lightllm.utils.multimodal_utils import fetch_resource
 from lightllm.utils.log_utils import init_logger
+from lightllm.utils.envs_utils import get_env_start_args
 
 
 logger = init_logger(__name__)
@@ -131,6 +132,9 @@ def __init__(self, **kwargs):
         self.extra_params = {}
 
     async def preload(self, request: Request):
+
+        max_image_pixels = get_env_start_args().max_image_pixels
+
         try:
             if self._type == "url":
                 timeout = int(os.getenv("REQUEST_TIMEOUT", "5"))
@@ -141,8 +145,14 @@ async def preload(self, request: Request):
             elif self._type == "image_size":
                 # image_size 代表直接传入图片的 width，height，主要是用于一些场景
                 # 的 token 计数判断, 所以只需要图片长宽信息，不需要具体图片的内容信息
-                self.image_w = self._data[0]
-                self.image_h = self._data[1]
+                src_w = self._data[0]
+                src_h = self._data[1]
+                self.image_w, self.image_h = _resize_image_dimensions_if_needed(src_w, src_h, max_image_pixels)
+                if (self.image_w, self.image_h) != (src_w, src_h):
+                    logger.warning(
+                        f"image_size pixels {src_w * src_h} exceed max_image_pixels={max_image_pixels}, "
+                        f"resized to {self.image_w}x{self.image_h}"
+                    )
                 return
             else:
                 raise ValueError(f"cannot read image which type is {self._type}!")
@@ -151,7 +161,24 @@ async def preload(self, request: Request):
             # Decoding is mainly done in the C libraries (libjpeg/libpng/libwebp), which releases the GIL,
             # and multiple threads can achieve true parallelism.
             loop = asyncio.get_running_loop()
-            self.image_w, self.image_h = await loop.run_in_executor(_IMAGE_VERIFY_POOL, _verify_image_bytes, img_data)
+            # 1) Verify original input bytes first.
+            src_w, src_h = await loop.run_in_executor(_IMAGE_VERIFY_POOL, _verify_image_bytes, img_data)
+            # 2) Resize (or no-op) after verification.
+            img_data, resized_w, resized_h = await loop.run_in_executor(
+                _IMAGE_VERIFY_POOL,
+                _resize_image_bytes_if_needed,
+                img_data,
+                src_w,
+                src_h,
+                max_image_pixels,
+            )
+            self.image_w, self.image_h = resized_w, resized_h
+
+            if (resized_w, resized_h) != (src_w, src_h):
+                logger.warning(
+                    f"image pixels {src_w * src_h} exceed max_image_pixels={max_image_pixels},"
+                    f" resized to {self.image_w}x{self.image_h}"
+                )
 
             self._preload_data = img_data
             return
@@ -245,3 +272,45 @@ def _verify_image_bytes(img_data: bytes) -> Tuple[int, int]:
         w, h = image.size
         image.load()
     return w, h
+
+
+def _resize_image_bytes_if_needed(
+    img_data: bytes, src_w: int, src_h: int, max_image_pixels: int
+) -> Tuple[bytes, int, int]:
+    """
+    Resize image bytes to satisfy max pixel constraint and return resized bytes with size.
+    """
+    new_w, new_h = _resize_image_dimensions_if_needed(src_w, src_h, max_image_pixels)
+    if (new_w, new_h) == (src_w, src_h):
+        return img_data, src_w, src_h
+
+    with Image.open(BytesIO(img_data)) as image:
+        resampling = Image.Resampling.LANCZOS if hasattr(Image, "Resampling") else Image.LANCZOS
+        resized_image = image.resize((new_w, new_h), resampling).convert("RGB")
+
+        buffer = BytesIO()
+        resized_image.save(buffer, format="JPEG", quality=96, optimize=True)
+        return buffer.getvalue(), new_w, new_h
+
+
+def _resize_image_dimensions_if_needed(src_w: int, src_h: int, max_image_pixels: int) -> Tuple[int, int]:
+    """
+    Compute resized (w, h) under a max pixel budget while preserving aspect ratio.
+    """
+    old_pixels = src_w * src_h
+    if old_pixels <= max_image_pixels:
+        return src_w, src_h
+
+    scale = (max_image_pixels / old_pixels) ** 0.5
-    scale = (max_image_pixels / old_pixels) ** 0.5
+    if old_pixels <= max_image_pixels or old_pixels == 0:
+        return src_w, src_h
-    scale = (max_image_pixels / old_pixels) ** 0.5
+    if old_pixels <= max_image_pixels or old_pixels == 0:
+        return src_w, src_h
+    new_w = max(1, int(src_w * scale))
+    new_h = max(1, int(src_h * scale))
+
+    # Avoid overflow from integer rounding.
+    while new_w * new_h > max_image_pixels:
+        if new_w >= new_h:
+            new_w = max(1, new_w - 1)
+        else:
+            new_h = max(1, new_h - 1)
+
+    assert new_w > 0 and new_h > 0, "resized image dimensions must be positive"
+    return new_w, new_h