Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/CN/source/tutorial/api_server_args.rst
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,12 @@ PD 分离模式参数

当任意图片超过该阈值时,请求会被拒绝。

.. option:: --max_image_pixels

单张图片在预处理缩放前允许的最大像素数量,默认为 ``8294400``(约等于 4K 图片像素总量)。

当输入图片超过该阈值时,LightLLM 会先自动将其缩放到该像素预算内,再继续后续流程。

.. option:: --visual_infer_batch_size

每次推理批次中处理的图像数量,默认为 ``1``
Expand Down
6 changes: 6 additions & 0 deletions docs/EN/source/tutorial/api_server_args.rst
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,12 @@ Multimodal Parameters

Requests are rejected when any image exceeds this limit.

.. option:: --max_image_pixels

Maximum allowed pixel count for a single image before preprocessing resize, default is ``8294400`` (about 4K image pixels).

If an input image exceeds this threshold, LightLLM automatically resizes it down to this pixel budget before continuing.

.. option:: --visual_infer_batch_size

Number of images processed in each inference batch, default is ``1``
Expand Down
8 changes: 7 additions & 1 deletion lightllm/server/api_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,9 +445,15 @@ def make_argument_parser() -> argparse.ArgumentParser:
parser.add_argument(
"--max_image_token_count",
type=int,
default=6128,
default=8192,
help="maximum allowed token count for one image after tokenization",
)
parser.add_argument(
"--max_image_pixels",
type=int,
default=8294400,
help="maximum allowed pixel count for one image before resize preprocessing",
)
parser.add_argument(
"--embed_cache_storage_size",
type=float,
Expand Down
3 changes: 2 additions & 1 deletion lightllm/server/core/objs/start_args_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ class StartArgs:
enable_decode_microbatch_overlap: bool = field(default=False)
enable_prefill_microbatch_overlap: bool = field(default=False)
cache_capacity: int = field(default=200)
max_image_token_count: int = field(default=6128)
max_image_token_count: int = field(default=8192)
max_image_pixels: int = field(default=8294400)
embed_cache_storage_size: float = field(default=4)
data_type: Optional[str] = field(
default=None, metadata={"choices": ["fp16", "float16", "bf16", "bfloat16", "fp32", "float32"]}
Expand Down
77 changes: 73 additions & 4 deletions lightllm/server/multimodal_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import librosa
import base64
import numpy as np
from typing import List, Tuple
from typing import List, Tuple, Optional
from io import BytesIO
from concurrent.futures import ThreadPoolExecutor
from PIL import Image, ImageFile
Expand All @@ -13,6 +13,7 @@
from lightllm.utils.error_utils import ClientDisconnected
from lightllm.utils.multimodal_utils import fetch_resource
from lightllm.utils.log_utils import init_logger
from lightllm.utils.envs_utils import get_env_start_args


logger = init_logger(__name__)
Expand Down Expand Up @@ -131,6 +132,9 @@ def __init__(self, **kwargs):
self.extra_params = {}

async def preload(self, request: Request):

max_image_pixels = get_env_start_args().max_image_pixels

try:
if self._type == "url":
timeout = int(os.getenv("REQUEST_TIMEOUT", "5"))
Expand All @@ -141,8 +145,14 @@ async def preload(self, request: Request):
elif self._type == "image_size":
# image_size 代表直接传入图片的 width,height,主要是用于一些场景
# 的 token 计数判断, 所以只需要图片长宽信息,不需要具体图片的内容信息
self.image_w = self._data[0]
self.image_h = self._data[1]
src_w = self._data[0]
src_h = self._data[1]
self.image_w, self.image_h = _resize_image_dimensions_if_needed(src_w, src_h, max_image_pixels)
if (self.image_w, self.image_h) != (src_w, src_h):
logger.warning(
f"image_size pixels {src_w * src_h} exceed max_image_pixels={max_image_pixels}, "
f"resized to {self.image_w}x{self.image_h}"
)
return
else:
raise ValueError(f"cannot read image which type is {self._type}!")
Expand All @@ -151,7 +161,24 @@ async def preload(self, request: Request):
# Decoding is mainly done in the C libraries (libjpeg/libpng/libwebp), which releases the GIL,
# and multiple threads can achieve true parallelism.
loop = asyncio.get_running_loop()
self.image_w, self.image_h = await loop.run_in_executor(_IMAGE_VERIFY_POOL, _verify_image_bytes, img_data)
# 1) Verify original input bytes first.
src_w, src_h = await loop.run_in_executor(_IMAGE_VERIFY_POOL, _verify_image_bytes, img_data)
# 2) Resize (or no-op) after verification.
img_data, resized_w, resized_h = await loop.run_in_executor(
_IMAGE_VERIFY_POOL,
_resize_image_bytes_if_needed,
img_data,
src_w,
src_h,
max_image_pixels,
)
Comment on lines +165 to +174
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The current implementation performs image verification and resizing in two separate run_in_executor calls. This is inefficient because _verify_image_bytes already decodes the image (via image.load()), and _resize_image_bytes_if_needed decodes it again (via Image.open()). Additionally, large image bytes are passed between the event loop and the thread pool twice.

Consider combining these operations into a single helper function to avoid redundant decoding and overhead.

self.image_w, self.image_h = resized_w, resized_h

if (resized_w, resized_h) != (src_w, src_h):
logger.warning(
f"image pixels {src_w * src_h} exceed max_image_pixels={max_image_pixels},"
f" resized to {self.image_w}x{self.image_h}"
)

self._preload_data = img_data
return
Expand Down Expand Up @@ -245,3 +272,45 @@ def _verify_image_bytes(img_data: bytes) -> Tuple[int, int]:
w, h = image.size
image.load()
return w, h


def _resize_image_bytes_if_needed(
img_data: bytes, src_w: int, src_h: int, max_image_pixels: int
) -> Tuple[bytes, int, int]:
"""
Resize image bytes to satisfy max pixel constraint and return resized bytes with size.
"""
new_w, new_h = _resize_image_dimensions_if_needed(src_w, src_h, max_image_pixels)
if (new_w, new_h) == (src_w, src_h):
return img_data, src_w, src_h

with Image.open(BytesIO(img_data)) as image:
resampling = Image.Resampling.LANCZOS if hasattr(Image, "Resampling") else Image.LANCZOS
resized_image = image.resize((new_w, new_h), resampling).convert("RGB")

buffer = BytesIO()
resized_image.save(buffer, format="JPEG", quality=96, optimize=True)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Hardcoding format="JPEG" and quality=96 with optimize=True might be suboptimal.

  1. If the input was a PNG with transparency, convert("RGB") will result in a black background, and the alpha channel will be lost.
  2. optimize=True can be CPU-intensive for a real-time server.
  3. quality=96 is very high; 90 is usually sufficient for VLM tasks and results in smaller payloads.

return buffer.getvalue(), new_w, new_h


def _resize_image_dimensions_if_needed(src_w: int, src_h: int, max_image_pixels: int) -> Tuple[int, int]:
"""
Compute resized (w, h) under a max pixel budget while preserving aspect ratio.
"""
old_pixels = src_w * src_h
if old_pixels <= max_image_pixels:
return src_w, src_h

scale = (max_image_pixels / old_pixels) ** 0.5
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Potential ZeroDivisionError if old_pixels is 0. While _verify_image_bytes should catch invalid images, src_w and src_h can be 0 if provided via the image_size type (lines 148-149).

Suggested change
scale = (max_image_pixels / old_pixels) ** 0.5
if old_pixels <= max_image_pixels or old_pixels == 0:
return src_w, src_h

new_w = max(1, int(src_w * scale))
new_h = max(1, int(src_h * scale))

# Avoid overflow from integer rounding.
while new_w * new_h > max_image_pixels:
if new_w >= new_h:
new_w = max(1, new_w - 1)
else:
new_h = max(1, new_h - 1)
Comment on lines +309 to +313
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

If max_image_pixels is set to 0, this loop will become infinite because new_w * new_h (which will be 1 * 1) will always be greater than 0, and max(1, new_w - 1) will keep the value at 1. Ensure max_image_pixels is at least 1 before entering this logic.


assert new_w > 0 and new_h > 0, "resized image dimensions must be positive"
return new_w, new_h
Loading