diff --git a/auto_round/__main__.py b/auto_round/__main__.py
index 554b62704..90869a5e6 100644
--- a/auto_round/__main__.py
+++ b/auto_round/__main__.py
@@ -140,7 +140,9 @@ def __init__(self, *args, **kwargs):
             "--format",
             default="auto_round",
             type=str,
-            help="Output format for the quantized model." "'auto_round' is the recommended format",
+            help="Output format for the quantized model."
+            "'auto_round' is the recommended format"
+            "use command `auto_round list format` to show all supported formats with support scheme.",
         )
         basic.add_argument(
             "--output_dir",
@@ -454,14 +456,25 @@ def __init__(self, *args, **kwargs):
         )
 
 
-def setup_parser(recipe="default"):
+def list_item():
+    args = argparse.ArgumentParser()
+    args.add_argument("item", type=str, help="item to list, e.g., format")
+    args = args.parse_args()
+    if args.item == "format":
+        from auto_round.formats import OutputFormat
+
+        print("AutoRound supported output formats and quantization scheme:")
+        print(OutputFormat.get_support_matrix())
+
+
+def start(recipe="default"):
     recipe = RECIPES[recipe]
     parser = BasicArgumentParser()
     args = parser.parse_args()
     for k, v in recipe.items():
         if getattr(args, k) is None:
             setattr(args, k, v)
-    return args
+    tune(args)
 
 
 def tune(args):
@@ -870,27 +883,33 @@ def run_eval():
 
 
 def run():
-    if "--eval" in sys.argv:
-        sys.argv.remove("--eval")
+    if "list" in sys.argv or "--list" in sys.argv:
+        if "list" in sys.argv:
+            sys.argv.remove("list")
+        if "--list" in sys.argv:
+            sys.argv.remove("--list")
+        list_item()
+        exit()
+    if "--eval" in sys.argv or "eval" in sys.argv:
+        if "--eval" in sys.argv:
+            sys.argv.remove("--eval")
+        if "eval" in sys.argv:
+            sys.argv.remove("eval")
         run_eval()
     else:
-        args = setup_parser()
-        tune(args)
+        start()
 
 
 def run_best():
-    args = setup_parser("best")
-    tune(args)
+    start("best")
 
 
 def run_light():
-    args = setup_parser("light")
-    tune(args)
+    start("light")
 
 
 def run_fast():
-    args = setup_parser("fast")
-    tune(args)
+    start("fast")
 
 
 if __name__ == "__main__":
diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
index 4a860ae10..69f3be72d 100644
--- a/auto_round/compressors/base.py
+++ b/auto_round/compressors/base.py
@@ -40,7 +40,6 @@
     check_skippable_keywords,
     collect_best_params,
     get_shared_keys,
-    gguf_args_check,
     immediate_saving,
     infer_bits_by_data_type,
     init_cache,
@@ -54,8 +53,8 @@
 )
 from auto_round.data_type import QUANT_FUNC_WITH_DTYPE
 from auto_round.data_type.utils import reshape_pad_tensor_by_group_size
-from auto_round.export.export_to_autoround import AutoRoundFormat
 from auto_round.export.export_to_gguf.config import GGUF_INNER_CONFIG, ModelType
+from auto_round.formats import OutputFormat, get_formats
 from auto_round.logger import logger
 from auto_round.schemes import (
     SPECIAL_SCHEMES,
@@ -69,7 +68,6 @@
 from auto_round.utils import (
     INNER_SUPPORTED_LAYER_TYPES,
     SUPPORTED_DTYPES,
-    SUPPORTED_FORMATS,
     SUPPORTED_LAYER_TYPES,
     TORCH_VERSION_AT_LEAST_2_6,
     CpuInfo,
@@ -736,35 +734,6 @@ def _check_configs(self) -> None:
 
     def _check_compatibility(self) -> None:
         """Checks compatibility of the configurations and model."""
-        # Check gguf and others
-        has_gguf = False
-        if hasattr(self, "formats"):
-            has_besides_gguf = False
-            for format_ in self.formats:
-                if "gguf" in format_:
-                    has_gguf = True
-                elif format_ != "fake":
-                    has_besides_gguf = True
-            if has_gguf and has_besides_gguf:
-                raise ValueError("GGUF format is not compatible with other formats, please choose only one of them")
-            if has_gguf:
-                from transformers.utils.versions import require_version
-
-                require_version(
-                    "sentencepiece",
-                    "GGUF format requires SentencePiece to be installed. "
-                    "Please install it with `pip install sentencepiece`",
-                )
-            if has_gguf and self.iters != 0 and self.bits != 3 and not self.enable_alg_ext:
-                logger.warning(
-                    "`iters=0` is recommended when exporting to current GGUF format"
-                    " or add `enable_alg_ext` for better accuracy with much more tuning cost."
-                    " Please refer to https://github.com/intel/auto-round/tree/main/docs/gguf_alg_ext_acc.md"
-                    " for the accuracy results."
-                )
-            elif self.bits >= 8 and self.iters != 0:
-                logger.warning("`iters=0` is recommended for bits>=8")
-
         if (
             self.seqlen is not None
             and hasattr(self.model, "config")
@@ -789,230 +758,6 @@ def _check_compatibility(self) -> None:
         if self.group_size == 0 and "fp8" not in self.data_type:
             logger.warning("`group_size==0` is not supported for data_type other than fp8 ")
 
-    def _parse_format_to_list(self, format: str) -> list:
-        """Parses the format string into a list of formats.
-
-        This method checks the requested format(s) against the model's
-        quantization settings and adjusts them if necessary. It ensures that
-        the formats are compatible with the model's data type, bit width,
-        and activation quantization settings.
-
-        Args:
-            format (str): The requested format(s) for quantization, separated by commas.
-
-        Returns:
-            list: A list of validated and updated formats.
-        """
-
-        # Remove duplicates from formats list
-        def remove_duplicates(lst):
-            seen = set()
-            return [x for x in lst if not (x in seen or seen.add(x))]
-
-        formats = format.replace("q*_", f"q{self.bits}_").replace(" ", "").split(",")
-        formats = remove_duplicates(formats)  # need the keep origin order
-
-        gguf_format_name = get_gguf_scheme(self.scheme)
-
-        if gguf_format_name:
-            for i in range(len(formats)):
-                if gguf_format_name.lower().endswith("mixed"):
-                    gguf_format_name = gguf_format_name.lower().replace("_mixed", "_s")
-                if formats[i] != "fake" and formats[i] != gguf_format_name.lower():
-                    logger.warning(
-                        f"reset format {formats[i]} to {gguf_format_name.lower()} "
-                        f"since scheme {gguf_format_name} can only be exported to format {gguf_format_name.lower()}"
-                    )
-                    formats[i] = gguf_format_name.lower()
-
-        gguf_args_check(self, formats, model_type=ModelType.TEXT)
-        if self.mllm:
-            gguf_args_check(self, formats, model_type=ModelType.MMPROJ)
-
-        for f in formats:
-            if f.startswith("gguf"):
-                self.scheme = f.upper()
-                break
-
-        for format_ in formats:
-            if format_ not in SUPPORTED_FORMATS:
-                logger.error(f"Unsupported format {format_}, please choose from {SUPPORTED_FORMATS}")
-                exit(-1)
-        if self.scale_dtype != torch.float32:
-            only_gguf = True
-            for format_ in formats:
-                if not ("gguf" in format_ or "fake" in format_):
-                    only_gguf = False
-                    break
-            if len(formats) == 1 and "fake" == formats[0]:
-                only_gguf = False
-            if only_gguf:
-                self.scale_dtype = torch.float32
-                logger.info("change `scale_dtype` to `torch.float32`")
-
-        # Adjust format settings based on compatibility
-        for index in range(len(formats)):
-            format = formats[index]
-            if format == "auto_round":
-                if self.sym and "int" in self.data_type:
-                    format = "auto_round:auto_gptq"
-                elif self.bits == 4 and not self.sym and "int" in self.data_type:
-                    enable_awq = all(
-                        config["bits"] == self.bits or config["bits"] >= 16 for config in self.layer_config.values()
-                    )
-                    if enable_awq:
-                        format = "auto_round:auto_awq"
-                elif is_nv_fp(self.data_type) or is_mx_fp(self.data_type):
-                    format = f"auto_round:{self.data_type}"
-                elif is_static_wfp8afp8(self):  # static wfp8afp8
-                    format = f"auto_round:{AutoRoundFormat.FP8_STATIC.value}"
-                elif self.data_type.startswith("fp") and self.bits == 8 and self.act_bits >= 16:  # woq fp8
-                    format = f"auto_round:{AutoRoundFormat.FP8.value}"
-                elif self.act_bits < 16:
-                    raise ValueError(
-                        "AutoRound format does not support exporting "
-                        "for the current quantization configuration, "
-                        "please change to `fake` format for research purpose"
-                    )
-                formats[index] = format
-            elif format == "llm_compressor":
-                from auto_round.export.export_to_llmcompressor import check_compressed_tensors_supported
-
-                if is_nv_fp(self.data_type) or is_mx_fp(self.data_type):
-                    check_compressed_tensors_supported()
-                    format = format.replace("llm_compressor", f"llm_compressor:{self.data_type}")
-                    formats[index] = format
-                elif is_static_wfp8afp8(self):
-                    format = f"llm_compressor:{AutoRoundFormat.FP8_STATIC.value}"
-                    formats[index] = format
-                    if self.act_group_size != 0:
-                        logger.warning(
-                            f"scheme FP8_STATIC export to llm_compressor format only support for act_group_size 0,"
-                            f" ,but got act_group_size={self.act_group_size}, reset = 0"
-                        )
-                        self.act_group_size = 0
-                    if self.group_size > 0:
-                        logger.warning(
-                            f"please note that group_size={self.group_size}"
-                            " may not be supported for llm_compressor format, and cannot be loaded in llm_compressor"
-                        )
-                elif not is_wfp8afp8(self):
-                    logger.error(
-                        "Currently, the llm_compressor format only supports MXFP/NVFP/FP8. "
-                        "Please change format to fake or auto_round etc."
-                    )
-            elif "auto_awq" in format:
-                from auto_round.compressors.utils import check_awq_gemm_compatibility
-
-                awq_supported, info = check_awq_gemm_compatibility(
-                    self.model, self.bits, self.group_size, self.sym, self.layer_config
-                )
-                if not awq_supported:
-                    logger.warning(f"The AutoAWQ format may not be supported due to {info}")
-            else:
-                if (is_nv_fp(self.data_type) or is_mx_fp(self.data_type)) and format != "fake":
-                    logger.warning(f"nv_fp and mx_fp dtypes are not supported for export format: {format}")
-
-        formats = remove_duplicates(formats)
-        for i in range(len(formats)):
-            formats[i] = self._check_supported_format(formats[i])
-        formats = remove_duplicates(formats)
-        return formats
-
-    def _check_supported_format(self, format: str) -> bool:
-        """Checks if the specified format is supported.
-
-        This method validates the requested format against the model's bit width,
-        group size, symmetry, and activation quantization settings. It raises an
-        error if the format is incompatible with the current model configuration.
-
-        Args:
-            format (str): The requested format for quantization.
-
-        Returns:
-            bool: True if the format is supported, False otherwise.
-        """
-        if format == "fake":
-            return format
-        format = format.replace("q*_", f"q{self.bits}_")
-
-        # format check for fp8
-        w_fp8 = self.data_type.startswith("fp") and self.bits == 8
-        act_fp8 = self.act_data_type.startswith("fp") and self.act_bits == 8
-        if (w_fp8 or act_fp8) and re.search("^auto_round|^llm_compressor", format) is None:
-            error_msg = (
-                f"is only supported to export auto_round or llm_compressor format," f" but got {format}, please check."
-            )
-            error_msg = ("act_data_type<fp8> " + error_msg) if act_fp8 else error_msg
-            error_msg = ("data_type<fp8> " + error_msg) if w_fp8 else error_msg
-            logger.error(error_msg)
-            sys.exit(-1)
-
-        # Only support to export afp8/nv_fp/mx_fp
-        if self.act_bits <= 8:
-            if not is_standard_fp(self.act_data_type) or self.act_dynamic:
-                if "llm_compressor" in format:
-                    if (is_nv_fp(self.act_data_type) and "static_gs" in self.act_data_type) or (
-                        is_mx_fp(self.act_data_type)
-                    ):
-                        return format
-                    bits, group_size, sym, act_bits = 8, -1, True, 8
-                    assert (
-                        self.bits == bits
-                        and self.group_size == group_size
-                        and self.sym == sym
-                        and self.act_bits == act_bits
-                        and self.act_dynamic
-                    ), (
-                        f"Currently only support to export llm_compressor format for sym dynamic quantized"
-                        f" W{self.bits}A{self.act_bits} model with group_size={group_size},"
-                        f" but got bits={self.bits}, group_size={self.group_size}, sym={self.sym},"
-                        f" act_bits={self.act_bits}"
-                    )
-                elif "auto_round" in format and (
-                    is_mx_fp(self.act_data_type) or (is_nv_fp(self.act_data_type) and "static_gs" in self.act_data_type)
-                ):
-                    pass
-                elif format != "fake":
-                    logger.warning(
-                        "Currently only support to export auto_round format quantized model"
-                        " with fp8, mx_fp and nv_fp4 dtype activation for activation quantization."
-                        " Change format to fake and save."
-                    )
-                    format = "fake"
-            else:
-                if format not in [
-                    "auto_round",
-                    f"auto_round:{AutoRoundFormat.FP8_STATIC.value}",
-                    f"llm_compressor:{AutoRoundFormat.FP8_STATIC.value}",
-                    "auto_round:llm_compressor",
-                ]:
-                    logger.warning(
-                        f"Currently only support to export auto_round or fake format for static W{self.bits}AFP8 model,"
-                        f" change format {format} to auto_round"
-                    )
-                    if is_static_wfp8afp8(self):
-                        format = f"auto_round:{AutoRoundFormat.FP8_STATIC.value}"
-                    else:
-                        format = f"auto_round:{AutoRoundFormat.FP8.value}"
-            if (
-                self.act_group_size != 0
-                and not self.act_dynamic
-                and format == f"auto_round:{AutoRoundFormat.FP8.value}"
-            ):
-                logger.warning(
-                    f"Please note that quantize activation with act_group_size={self.act_group_size}"
-                    " may result in failure to export or import normally."
-                )
-        if re.search(r"q\d_k", format) and not self.data_type.endswith("_dq"):
-            logger.error(
-                f"datatype<{self.data_type}> not support to export {format} format."
-                " Please change export format or `data_type`."
-            )
-            sys.exit(-1)
-
-        return format
-
     def quantize_and_save(
         self, output_dir: str = "tmp_autoround", format: str = "auto_round", inplace: bool = True, **kwargs
     ) -> tuple[torch.nn.Module, dict[str, Any]]:
@@ -1042,7 +787,7 @@ def quantize_and_save(
         self.orig_output_dir = output_dir
 
         # check and update the format based on the current configuration
-        format_list = self._parse_format_to_list(format)
+        format_list = get_formats(format, self)
         self.formats = format_list
 
         # If multiple formats are specified, enforce inplace=False
@@ -1065,24 +810,12 @@ def quantize_and_save(
         else:
             model, _ = self.quantize()
         # Save the quantized model in the specified format_list
-        folders = []
-        for format in format_list:
-            if "gptq" in format and not self.sym:
-                logger.warning(
-                    "The asymmetrical kernel of the GPTQ format may result in a noticeable accuracy drop,"
-                    " particularly for 2-bit quantization and smaller models."
-                    " We recommend exporting to either the AutoAWQ format ( only 4 bits) or "
-                    "the AutoRound format(2/3/4/8 bits)."
-                )
-            save_folder = self._get_save_folder_name(format)
-            self.save_quantized(save_folder, format=format, inplace=inplace, **kwargs)
-
-            folders.append(save_folder)
+        model, folders = self.save_quantized(output_dir, format=format, inplace=inplace, return_folders=True, **kwargs)
         memory_monitor.log_summary()
 
         return model, folders
 
-    def _get_save_folder_name(self, format_str: str) -> str:
+    def _get_save_folder_name(self, format: OutputFormat) -> str:
         """Generates the save folder name based on the provided format string.
 
         If there are multiple formats to handle, the function creates a subfolder
@@ -1096,7 +829,7 @@ def _get_save_folder_name(self, format_str: str) -> str:
             str: The path to the folder where results should be saved.
         """
         # Replace special characters to make the folder name filesystem-safe
-        sanitized_format = format_str.replace(":", "-").replace("_", "-")
+        sanitized_format = format.get_backend_name().replace(":", "-").replace("_", "-")
 
         # Use a subfolder only if there are multiple formats
         if len(self.formats) > 1:
@@ -1307,7 +1040,7 @@ def _quantize_layer_via_rtn(self, name: str, dtype: torch.dtype = None, to_cpu=T
             set_module(self.model, name, m)
         tuning_device = m.tuning_device if hasattr(m, "tuning_device") else self.device
         # Step 1: Try quantization on GPU first, fall back to CPU if OOM
-        if self.immediate_packing and self.iters == 0 and "gguf" in self.formats[0] and not self.disable_opt_rtn:
+        if self.immediate_packing and self.iters == 0 and self.formats[0].is_gguf() and not self.disable_opt_rtn:
             m = m.to(tuning_device)
             m.scale = None
             m.zp = None
@@ -1367,8 +1100,8 @@ def _immediate_pack(self, name: str):
             return
         from auto_round.export import PACKING_LAYER_WITH_FORMAT
 
-        target_backend = self.formats[0].split(":")[0] if ":" in self.formats[0] else self.formats[0]
-        has_gguf = any("gguf" in fmt for fmt in self.formats)
+        target_backend = self.formats[0].output_format
+        has_gguf = any(fmt.is_gguf() for fmt in self.formats)
 
         if has_gguf:
             from auto_round.export.export_to_gguf.export import pack_gguf_layer
@@ -1378,7 +1111,7 @@ def _immediate_pack(self, name: str):
             pack_gguf_layer(
                 name,
                 self.model,
-                self.formats[0],
+                self.formats[0].get_backend_name(),
                 output_dir,
                 self.layer_config,
                 self.tokenizer,
@@ -1388,7 +1121,9 @@ def _immediate_pack(self, name: str):
                 device=self.device,
             )
         else:
-            PACKING_LAYER_WITH_FORMAT[target_backend](name, self.model, self.formats[0], device=self.device)
+            PACKING_LAYER_WITH_FORMAT[target_backend](
+                name, self.model, self.formats[0].get_backend_name(), device=self.device
+            )
 
     @torch.inference_mode()
     def _quantize_rtn(self) -> tuple[torch.nn.Module, dict[str, Any]]:
@@ -1423,7 +1158,7 @@ def _quantize_rtn(self) -> tuple[torch.nn.Module, dict[str, Any]]:
                 update_fused_layer_global_scales(module)
             logger.info("Finished updating fused layer global scales.")
 
-        if not (any("gguf" in fmt for fmt in getattr(self, "formats", [])) or self.super_bits is not None):
+        if not (any(fmt.is_gguf() for fmt in getattr(self, "formats", [])) or self.super_bits is not None):
             self._quantize_embedding_layer()  # leave to gguf itself to handle
 
         self.model.to("cpu")
@@ -1433,7 +1168,8 @@ def _quantize_rtn(self) -> tuple[torch.nn.Module, dict[str, Any]]:
         enable_imatrix = False
         if not self.disable_opt_rtn:
             has_gguf_k = (
-                any("gguf" in fmt and "k" in fmt for fmt in getattr(self, "formats", [])) or self.super_bits is not None
+                any(fmt.is_gguf() and "k" in fmt.output_format for fmt in getattr(self, "formats", []))
+                or self.super_bits is not None
             )
             if has_gguf_k:
                 enable_imatrix = True
@@ -1672,20 +1408,11 @@ def quantize(self) -> tuple[torch.nn.Module, dict[str, Any]]:
         else:
             # Determine if immediate packing is required
             formats = self.formats
-            if (
-                len(formats) == 1
-                and (
-                    "awq" in formats[0]
-                    or "gptq" in formats[0]
-                    or "auto_round" in formats[0]
-                    or "gguf" in formats[0]
-                    or "llm_compressor" in formats[0]
-                )
-                and self.inplace
-            ):
+            if len(formats) == 1 and not formats[0].is_fake() and self.inplace:
                 self.immediate_packing = True
-                if "gguf" not in formats[0] and self.low_cpu_mem_usage:
+                if not formats[0].is_gguf() and self.low_cpu_mem_usage:
                     self.immediate_saving = True
+
         if self.immediate_saving and "int" not in self.data_type:
             logger.warning("immediate_saving is only supported for int quantization, set to False")
             self.immediate_saving = False
@@ -1862,7 +1589,7 @@ def _quantize_layers(self, layer_names: list, layer_inputs: dict) -> None:
         has_gguf = False
 
         if hasattr(self, "formats"):
-            has_gguf = any("gguf" in format_ for format_ in self.formats)
+            has_gguf = any(format_.is_gguf() for format_ in self.formats)
         if has_gguf and self.immediate_packing:
             enable_quanted_input = False
 
@@ -3133,7 +2860,12 @@ def _quantize_blocks(
         clear_memory(device_list=self.device_list)
 
     def save_quantized(
-        self, output_dir: str = None, format: str = "auto_round", inplace: bool = True, **kwargs
+        self,
+        output_dir: str = None,
+        format: Union[str, list[OutputFormat]] = "auto_round",
+        inplace: bool = True,
+        return_folders=False,
+        **kwargs,
     ) -> torch.nn.Module:
         """Save the quantized model to the specified output directory in the specified format.
 
@@ -3146,122 +2878,118 @@ def save_quantized(
         Returns:
             object: The compressed model object.
         """
-        format = self._check_supported_format(format)
+        self.orig_output_dir = output_dir
+        if isinstance(format, str):
+            formats = get_formats(format, self)
+            if not hasattr(self, "formats"):
+                self.formats = formats
 
         if not self.quantized:
             logger.warning("please run autoround.quantize first")
             return
-        if format == "fake" or format == "qdq":  # TODO fix act quantization later
-            self.model = self.model.to("cpu")
-            self.model.save_pretrained(output_dir)
-            if self.tokenizer is not None and hasattr(self.tokenizer, "save_pretrained"):
-                self.tokenizer.save_pretrained(output_dir)
-            processor = kwargs.get("processor", None)
-            if processor is not None:
-                processor.save_pretrained(output_dir)
-            try:
-                copy_python_files_from_model_cache(self.model, output_dir)
-            except Exception as e:
-                logger.warning("Skipping source model Python file copy due to error: %s", e)
-            return
-        if self.act_bits <= 8 and format == "qdq":
-            logger.warning(
-                "Support for exporting activation quantization is limited. "
-                "Please ensure that your configuration is supported."
-            )
-        # if format == "llm_compressor" and (is_nv_fp(self.data_type) or is_mx_fp(self.data_type)):
-        #     format = format.replace("llm_compressor", f"llm_compressor:{self.data_type}")
-        if format == "llm_compressor" and (is_nv_fp(self.data_type) or is_mx_fp(self.data_type)):
-            format = format.replace("llm_compressor", f"llm_compressor:{self.data_type}")
-        if format == "llm_compressor" and is_static_wfp8afp8(self):
-            format = format.replace("llm_compressor", "llm_compressor:{AutoRoundFormat.FP8_STATIC.value}")
-
-        from auto_round.export import EXPORT_FORMAT
-
-        backend = format
-        format = format.split(":")[0]
-        if format not in EXPORT_FORMAT:
-            logger.error(f"export format only supports {EXPORT_FORMAT.keys()}")
-            raise ValueError(f"export format only supports {EXPORT_FORMAT.keys()}, but got {format}")
-        save_quantized_as_format = EXPORT_FORMAT.get(format)
-        if "gptq" in format and not self.sym:
-            logger.warning(
-                "the asymmetrical kernel of the GPTQ format may result in a noticeable accuracy drop,"
-                " particularly for 2-bit quantization and smaller models."
-                " We recommend exporting to either the AutoAWQ format ( only 4 bits) or "
-                "the AutoRound format(2/3/4/8 bits)."
+        folders = []
+        for format in formats:
+            save_folder = self._get_save_folder_name(format)
+            if format.is_fake():  # TODO fix act quantization later
+                self.model = self.model.to("cpu")
+                self.model.save_pretrained(output_dir)
+                if self.tokenizer is not None and hasattr(self.tokenizer, "save_pretrained"):
+                    self.tokenizer.save_pretrained(output_dir)
+                processor = kwargs.get("processor", None)
+                if processor is not None:
+                    processor.save_pretrained(output_dir)
+                try:
+                    copy_python_files_from_model_cache(self.model, output_dir)
+                except Exception as e:
+                    logger.warning("Skipping source model Python file copy due to error: %s", e)
+                compressed_model = self.model
+                continue
+            if self.act_bits <= 8 and format.is_fake():
+                logger.warning(
+                    "Support for exporting activation quantization is limited. "
+                    "Please ensure that your configuration is supported."
+                )
+            from auto_round.export import EXPORT_FORMAT
+
+            backend = format.get_backend_name()
+            output_format = format.output_format
+            if output_format not in EXPORT_FORMAT:
+                raise ValueError(f"export format only supports {EXPORT_FORMAT.keys()}, but got {output_format}")
+            save_quantized_as_format = EXPORT_FORMAT.get(output_format)
+            serialization_keys = [
+                "bits",
+                "group_size",
+                "sym",
+                "data_type",
+                "enable_quanted_input",
+                "enable_minmax_tuning",
+                "seqlen",
+                "batch_size",
+                "scale_dtype",
+                "lr",
+                "minmax_lr",
+                "gradient_accumulate_steps",
+                "iters",
+                "amp",
+                "nsamples",
+                "low_gpu_mem_usage",
+                "to_quant_block_names",
+                "enable_norm_bias_tuning",
+                "act_bits",
+                "act_group_size",
+                "act_sym",
+                "act_dynamic",
+                "act_data_type",
+                "super_bits",
+                "super_group_size",
+                "regex_config",
+                "static_kv_dtype",
+                "static_attention_dtype",
+            ]
+            if isinstance(self.dataset, str):
+                serialization_keys.append("dataset")
+            serialization_dict = {}
+            for key in serialization_keys:
+                serialization_dict[key] = getattr(self, key)
+            from auto_round.version import __version__
+
+            serialization_dict["autoround_version"] = __version__
+            if "scale_dtype" in serialization_dict.keys():
+                serialization_dict["scale_dtype"] = str(serialization_dict["scale_dtype"])
+            compressed_model = save_quantized_as_format(  # TODO refine the code
+                save_folder,
+                model=self.model,
+                layer_config=self.layer_config,
+                inplace=inplace,
+                bits=self.bits,
+                act_bits=self.act_bits,
+                group_size=self.group_size,
+                sym=self.sym,
+                iters=self.iters,
+                lr=self.lr,
+                minmax_lr=self.minmax_lr,
+                enable_minmax_tuning=self.enable_minmax_tuning,
+                enable_quanted_input=self.enable_quanted_input,
+                scale_dtype=self.scale_dtype,
+                tokenizer=self.tokenizer,
+                supported_types=self.supported_types,
+                data_type=self.data_type,
+                act_data_type=self.act_data_type,
+                serialization_dict=serialization_dict,
+                backend=backend,
+                to_quant_block_names=self.to_quant_block_names,
+                quant_block_list=self.quant_block_list,
+                device=self.device,
+                static_kv_dtype=self.static_kv_dtype,
+                static_attention_dtype=self.static_attention_dtype,
+                **kwargs,
             )
-        if "awq" in format and not self.bits == 4:
-            raise ValueError("The AWQ format only supports W4 quantization ")
-        serialization_keys = [
-            "bits",
-            "group_size",
-            "sym",
-            "data_type",
-            "enable_quanted_input",
-            "enable_minmax_tuning",
-            "seqlen",
-            "batch_size",
-            "scale_dtype",
-            "lr",
-            "minmax_lr",
-            "gradient_accumulate_steps",
-            "iters",
-            "amp",
-            "nsamples",
-            "low_gpu_mem_usage",
-            "to_quant_block_names",
-            "enable_norm_bias_tuning",
-            "act_bits",
-            "act_group_size",
-            "act_sym",
-            "act_dynamic",
-            "act_data_type",
-            "super_bits",
-            "super_group_size",
-            "regex_config",
-            "static_kv_dtype",
-            "static_attention_dtype",
-        ]
-        if isinstance(self.dataset, str):
-            serialization_keys.append("dataset")
-        serialization_dict = {}
-        for key in serialization_keys:
-            serialization_dict[key] = getattr(self, key)
-        from auto_round.version import __version__
-
-        serialization_dict["autoround_version"] = __version__
-        if "scale_dtype" in serialization_dict.keys():
-            serialization_dict["scale_dtype"] = str(serialization_dict["scale_dtype"])
-        compressed_model = save_quantized_as_format(  # TODO refine the code
-            output_dir,
-            model=self.model,
-            layer_config=self.layer_config,
-            inplace=inplace,
-            bits=self.bits,
-            act_bits=self.act_bits,
-            group_size=self.group_size,
-            sym=self.sym,
-            iters=self.iters,
-            lr=self.lr,
-            minmax_lr=self.minmax_lr,
-            enable_minmax_tuning=self.enable_minmax_tuning,
-            enable_quanted_input=self.enable_quanted_input,
-            scale_dtype=self.scale_dtype,
-            tokenizer=self.tokenizer,
-            supported_types=self.supported_types,
-            data_type=self.data_type,
-            act_data_type=self.act_data_type,
-            serialization_dict=serialization_dict,
-            backend=backend,
-            to_quant_block_names=self.to_quant_block_names,
-            quant_block_list=self.quant_block_list,
-            device=self.device,
-            static_kv_dtype=self.static_kv_dtype,
-            static_attention_dtype=self.static_attention_dtype,
-            **kwargs,
-        )
-        return compressed_model
+            folders.append(save_folder)
+
+        if return_folders:
+            return compressed_model, folders
+        else:
+            return compressed_model
 
     def _get_quantized_layer_names_outside_blocks(self) -> list:
         """Gets the names of quantized layers outside blocks in the model.
diff --git a/auto_round/compressors/utils.py b/auto_round/compressors/utils.py
index dd85f4ebd..268cbed5b 100644
--- a/auto_round/compressors/utils.py
+++ b/auto_round/compressors/utils.py
@@ -164,48 +164,6 @@ def check_need_act_calibration(
     return False
 
 
-def check_awq_gemm_compatibility(model, bits, group_size, sym, layer_configs=None):
-    """Checks if a model is compatible with the AutoAWQ GEMM kernel.
-
-    Args:
-        model: The model object to evaluate, typically a PyTorch model.
-        bits (int): The number of bits for quantization (must be 4 for compatibility).
-        group_size (int): The group size for quantization.
-        sym (bool): Whether symmetric quantization is used (not utilized in the current function logic).
-        layer_configs (dict, optional): A dictionary mapping layer names to configurations, where each
-            configuration can specify a custom number of bits for the layer.
-
-    Returns:
-        tuple: A tuple containing:
-            - bool: `True` if the model is compatible, `False` otherwise.
-            - str: An error message describing why the model is incompatible, or an empty string if compatible.
-    """
-    from auto_round.utils.model import get_layer_names_in_block, get_module
-
-    if bits != 4:
-        return False, "AutoAWQ GEMM kernel only supports 4 bits"
-    for n, m in model.named_modules():
-        if type(m) == transformers.pytorch_utils.Conv1D:
-            return False, "AutoAWQ GEMM kernel does not support conv1d"
-
-    layer_names = get_layer_names_in_block(model)
-    for layer_name in layer_names:
-        if (
-            layer_configs is not None
-            and layer_name in layer_configs.keys()
-            and layer_configs[layer_name].get("bits", bits) > 8
-        ):
-            continue
-
-        layer = get_module(model, layer_name)
-        if layer.in_features % group_size != 0:
-            return False, f"Layer {layer_name} in_features is not multiple of group_size {group_size}"
-        if layer.out_features % (32 // bits) != 0:
-            return False, f"Layer {layer_name} out_features is not multiple of 32 // bits"
-
-    return True, ""
-
-
 def collect_best_params(block, cache_device="cpu"):
     """Collect the best parameters from the block to the specified device."""
     params = {}
@@ -513,117 +471,6 @@ def gguf_type_fallback(gguf_type: str) -> str:
     return gguf_type
 
 
-def gguf_args_check(args_or_ar, formats: list[str] = None, model_type=ModelType.TEXT):
-    import argparse
-
-    from auto_round.export.export_to_gguf.convert import download_convert_file
-    from auto_round.logger import logger
-    from auto_round.utils.model import download_or_get_path, get_gguf_architecture
-
-    formats = sorted(formats, key=lambda x: len(x))
-    export_gguf = False
-    for f in formats:
-        if f.startswith("gguf"):
-            export_gguf = True
-
-        if f.startswith("gguf") and f not in GGUF_CONFIG:
-            logger.error(f"{f} is not supported, please check.")
-
-    redownload = False
-    if export_gguf:
-        try:
-            from auto_round.export.export_to_gguf.convert_hf_to_gguf import (  # pylint: disable=E0401
-                ModelBase,
-                ModelType,
-                get_model_architecture,
-            )
-
-            if isinstance(args_or_ar.model, str):
-                model_path = args_or_ar.model
-            else:
-                model_path = args_or_ar.model.name_or_path
-            if not os.path.isdir(model_path):
-                model_path = download_or_get_path(model_path, args_or_ar.platform)
-            model_architecture = get_gguf_architecture(model_path, model_type=ModelType.TEXT)
-            if model_architecture not in ModelBase._model_classes[ModelType.TEXT]:
-                logger.warning(
-                    f"Current version of gguf export does not support for {model_architecture},"
-                    " will re-download dependency file. Please restart the task."
-                )
-                redownload = True
-        except ModuleNotFoundError as e:
-            if "convert_hf_to_gguf" in str(e):
-                logger.warning("GGUF export dependency file is not found, download from github.")
-                redownload = True
-        except AttributeError as e:
-            raise ImportError(
-                "Please use the latest gguf-py, you can use the following command to install it:\n"
-                "git clone https://github.com/ggml-org/llama.cpp.git && cd llama.cpp/gguf-py &&"
-                " pip install . sentencepiece"
-            )
-        download_convert_file(redownload)
-
-        try:
-            from auto_round.export.export_to_gguf.convert_hf_to_gguf import (  # pylint: disable=E0401
-                ModelBase,
-                ModelType,
-            )
-        except ImportError as e:
-            raise ImportError(
-                "Please use the latest gguf-py, you can use the following command to install it:\n"
-                "git clone https://github.com/ggml-org/llama.cpp.git && cd llama.cpp/gguf-py &&"
-                " pip install . sentencepiece"
-            )
-        if isinstance(args_or_ar.model, str):
-            model_path = args_or_ar.model
-        else:
-            model_path = args_or_ar.model.name_or_path
-        if not os.path.isdir(model_path):
-            model_path = download_or_get_path(model_path, args_or_ar.platform)
-        model_architecture = get_gguf_architecture(model_path, model_type=ModelType.TEXT)
-        if model_architecture not in ModelBase._model_classes[ModelType.TEXT]:
-            logger.error(f"Model {model_architecture} is not supported to export gguf format.")
-            sys.exit(1)
-
-    pattern = re.compile(r"q\d_k")
-    pre_dq_format = ""
-    unsupported_list, reset_list = [], []
-    for format in GGUF_CONFIG:
-        if format in formats:
-            if format == "q6_k_s":
-                logger.warning("Please note that q6_k_s is q6_k.")
-
-            if re.search(pattern, format):
-                if pre_dq_format and re.search(pattern, format).group() not in pre_dq_format:
-                    logger.error(f"Cannot export {pre_dq_format} and {format} at the same time.")
-                    sys.exit(-1)
-                else:
-                    pre_dq_format = format
-
-            unsupported_list, reset_list = [], []
-            gguf_config = GGUF_CONFIG[format]
-            for k, v in gguf_config.items():
-                if not hasattr(args_or_ar, k):
-                    continue
-                if k == "data_type":
-                    if re.search(r"q\d_1", format) and len(formats) > 1:
-                        v = "int"
-                if k == "sym" and isinstance(args_or_ar, argparse.Namespace):
-                    k = "asym"
-                    v = not v
-                if getattr(args_or_ar, k) != v:
-                    unsupported_list.append(f"{k}={getattr(args_or_ar, k)}")
-                    reset_list.append(f"{k}={v}")
-                    setattr(args_or_ar, k, v)
-            if len(unsupported_list) > 0:
-                logger.info(
-                    f"format {format} does not support for {', '.join(unsupported_list)},"
-                    f" reset to {', '.join(reset_list)}."
-                )
-    # Removed obsolete commented-out block for improved readability and maintainability.
-    return args_or_ar
-
-
 def get_gguf_qtype_by_layer_config(layer_config):
     import gguf  # pylint: disable=E0401
 
@@ -1110,7 +957,6 @@ def immediate_saving(rounder: object, m: torch.nn.Module, name: str = None, last
             writes shard index, renames shard files, copies source files, and releases temporary state.
     """
     import json
-    import os
     from collections import OrderedDict
 
     from auto_round.utils import clear_memory, get_module
diff --git a/auto_round/export/export_to_autoround/__init__.py b/auto_round/export/export_to_autoround/__init__.py
index 6cdcd5aed..0c036d831 100644
--- a/auto_round/export/export_to_autoround/__init__.py
+++ b/auto_round/export/export_to_autoround/__init__.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .export import save_quantized_as_autoround, AutoRoundFormat
+from .export import save_quantized_as_autoround
diff --git a/auto_round/export/export_to_autoround/export.py b/auto_round/export/export_to_autoround/export.py
index ccc7780b3..2dfac16ef 100644
--- a/auto_round/export/export_to_autoround/export.py
+++ b/auto_round/export/export_to_autoround/export.py
@@ -35,6 +35,7 @@
     release_layer_safely,
     save_model,
 )
+from auto_round.formats import AutoRoundExportFormat
 from auto_round.logger import logger
 from auto_round.schemes import QuantizationScheme
 from auto_round.utils import (
@@ -50,16 +51,6 @@
 )
 
 
-class AutoRoundFormat(str, Enum):
-    # Weight: FP8, per-channel, may be extended to per-tensor in future
-    # Activation: FP8, per-tensor
-    FP8_STATIC = "fp8_static"
-    MXFP8 = "mxfp8"
-    MXFP4 = "mxfp4"
-    NVFP4 = "nvfp4"
-    FP8 = "fp8"
-
-
 def dynamic_import_quant_linear_for_packing(backend, bits, group_size, sym, act_bits=16):
     """
     Dynamically imports and returns the appropriate QuantLinear class based on the specified backend and parameters.
@@ -170,14 +161,14 @@ def pack_layer(layer_name, model, backend, device=None):
         return pack_layer(layer_name, model, backend, device)
 
     if (
-        backend == f"auto_round:{AutoRoundFormat.FP8.value}"
-        or backend == f"auto_round:{AutoRoundFormat.FP8_STATIC.value}"
+        backend == f"auto_round:{AutoRoundExportFormat.FP8.value}"
+        or backend == f"auto_round:{AutoRoundExportFormat.FP8_STATIC.value}"
     ):
         from auto_round.export.export_to_autoround.export_to_fp8 import pack_layer
 
         return pack_layer(layer_name, model, backend, device)
 
-    if backend == "auto_round:llm_compressor":
+    if backend in ["auto_round:llm_compressor", f"auto_round:llm_compressor:{AutoRoundExportFormat.FP8_STATIC.value}"]:
         from auto_round.export.export_to_llmcompressor.export_to_static_fp import pack_layer
 
         return pack_layer(layer_name, model, backend, device)
@@ -287,7 +278,7 @@ def save_quantized_as_autoround(output_dir, inplace=True, backend="auto_round:ex
 
         return save_quantized_as_fp(output_dir, inplace=inplace, backend="auto_round:llm_compressor", **kwargs)
 
-    if backend == "auto_round:llm_compressor":
+    if backend in ["auto_round:llm_compressor", f"auto_round:llm_compressor:{AutoRoundExportFormat.FP8_STATIC.value}"]:
         from auto_round.export.export_to_llmcompressor.export_to_static_fp import save_quantized_as_static_fp
 
         return save_quantized_as_static_fp(output_dir, inplace=inplace, backend="auto_round:llm_compressor", **kwargs)
@@ -301,7 +292,7 @@ def save_quantized_as_autoround(output_dir, inplace=True, backend="auto_round:ex
     if (
         (kwargs.get("sym") is None or kwargs.get("sym"))
         and ("gptq" not in backend and "awq" not in backend)
-        and (AutoRoundFormat.FP8_STATIC.value not in backend)
+        and (AutoRoundExportFormat.FP8_STATIC.value not in backend)
     ):
         backend = backend.replace("auto_round", "auto_round:auto_gptq")
 
diff --git a/auto_round/formats.py b/auto_round/formats.py
new file mode 100644
index 000000000..16592320a
--- /dev/null
+++ b/auto_round/formats.py
@@ -0,0 +1,654 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import copy
+import os
+import re
+import sys
+from dataclasses import asdict
+from enum import Enum
+from typing import TYPE_CHECKING, Callable, Union
+
+import torch
+import transformers
+
+from auto_round.compressors.utils import (
+    is_mx_fp,
+    is_nv_fp,
+    is_standard_fp,
+    is_static_wfp8afp8,
+    is_wfp8afp8,
+)
+from auto_round.export.export_to_gguf.config import ModelType
+from auto_round.schemes import (
+    PRESET_SCHEMES,
+    QuantizationScheme,
+    get_gguf_scheme,
+)
+from auto_round.utils import SUPPORTED_FORMATS, logger
+
+
+class AutoRoundExportFormat(str, Enum):
+    # Weight: FP8, per-channel, may be extended to per-tensor in future
+    # Activation: FP8, per-tensor
+    FP8_STATIC = "fp8_static"
+    MXFP8 = "mxfp8"
+    MXFP4 = "mxfp4"
+    NVFP4 = "nvfp4"
+    FP8 = "fp8"
+    MX_FP = "mx_fp"
+    NV_FP = "nv_fp"
+    MX_FP_RCEIL = "mx_fp_rceil"
+    NV_FP4_WITH_STATIC_GS = "nv_fp4_with_static_gs"
+
+
+if TYPE_CHECKING:
+    from auto_round.compressors.base import BaseCompressor
+
+
+def _check_compatibility(formats: list[str], ar: BaseCompressor):
+    if (
+        any(["gguf" in f.lower() for f in formats])
+        and len([f for f in formats if f.lower() != "fake" and not f.lower().startswith("gguf")]) > 1
+    ):
+        raise ValueError(
+            f"GGUF format is not compatible with other formats, but got {formats}, please choose only one of them"
+        )
+    gguf_format_name = get_gguf_scheme(ar.scheme)
+    if gguf_format_name:
+        if gguf_format_name.lower().endswith("mixed"):
+            gguf_format_name = gguf_format_name.lower().replace("_mixed", "_s")
+        if any([f.lower() not in ["fake", gguf_format_name.lower()] for f in formats]):
+            tmp_format_name = gguf_format_name.lower() if "fake" not in formats else f"{gguf_format_name.lower()},fake"
+            logger.warning(
+                f"reset format {','.join(formats)} to {tmp_format_name} "
+                f"since scheme {gguf_format_name} can only be exported to format {gguf_format_name.lower()} or fake"
+            )
+            formats = tmp_format_name.split(",")
+    return formats
+
+
+def get_formats(
+    format: str,
+    ar: BaseCompressor,
+) -> list[OutputFormat]:
+    """Get the list of OutputFormat instances based on the provided name."""
+
+    def remove_duplicates(lst):
+        seen = set()
+        return [x for x in lst if not (x in seen or seen.add(x))]
+
+    formats = format.replace("q*_", f"q{ar.bits}_").replace(" ", "").split(",")
+    formats = remove_duplicates(formats)  # need the keep origin order
+
+    formats = _check_compatibility(formats, ar)
+
+    formats = remove_duplicates(formats)
+
+    for i in range(len(formats)):
+        if formats[i].startswith("gguf:"):
+            formats[i] = GGUFFormat(formats[i], ar)
+        elif formats[i] not in OutputFormat._format_list:
+            raise KeyError(f"Unsupported format {formats[i]}, please choose from {SUPPORTED_FORMATS}")
+        else:
+            formats[i] = OutputFormat._format_list[formats[i]](formats[i], ar)
+
+        new_format = formats[i].check_and_reset_format(ar)
+        if new_format is not None:
+            if new_format not in format:
+                formats[i] = OutputFormat._format_list[new_format](new_format, ar)
+            else:
+                formats[i] = None
+
+    formats = [fmt for fmt in formats if fmt is not None]
+
+    if len(formats) == 1 and formats[0].is_gguf and ar.scale_dtype != torch.float32:
+        ar.scale_dtype = torch.float32
+        logger.info("change `scale_dtype` to `torch.float32` for gguf format")
+
+    return formats
+
+
+def _check_divisible_by_32(ar):
+    from auto_round.schemes import preset_name_to_scheme
+
+    if isinstance(ar.scheme, str):
+        default_dict = asdict(preset_name_to_scheme(ar.scheme.upper()))
+    else:
+        default_dict = asdict(ar.scheme)
+    if default_dict["data_type"] == "int" and default_dict["act_bits"] >= 16:
+        for n, m in ar.model.named_modules():
+            if type(m) in ar.supported_types or m.__class__.__name__ in ar.inner_supported_types:
+                if m.weight.shape[0] % 32 or m.weight.shape[1] % 32:
+                    if ar.layer_config is None:
+                        ar.layer_config = {}
+                    if ar.layer_config.get(n) is not None and ar.layer_config[n]["bits"] >= 16:
+                        continue
+                    ar.layer_config.setdefault(n, copy.deepcopy(default_dict))
+                    ar.layer_config[n].update({"bits": 16, "data_type": "fp", "fixed_by_user": True})
+                    logger.warning_once(f"{n} skipped quantization (shape not divisible by 32).")
+
+
+class OutputFormat:
+    """ "Base class for different output formats.
+
+    format: determines which method from export module to use for exporting.
+            For example, auto_round, gguf, llmcompressor etc.
+    backend: determines the specific export process within the format.
+            For example, auto_round:fp8_static, auto_round:auto_awq etc.
+    """
+
+    support_schemes: list = []
+    _format_list: dict[str, OutputFormat] = {}
+    format_name = "base"
+
+    def __init__(self, format: str, ar: BaseCompressor):
+        """Initialize the OutputFormat class."""
+        self.output_format = format
+        self.backend = None
+
+        if not self.is_fake() and not self.is_support_scheme(ar.scheme):
+            logger.error(
+                f"Currently, the {self.format_name} format only supports {self.support_schemes}, "
+                f"but got scheme {ar.scheme}, please change to fake or auto_round etc."
+            )
+            exit(-1)
+
+    @classmethod
+    def register(cls, *names: str) -> Callable[[OutputFormat], OutputFormat]:
+        assert names
+
+        def func(output_format: OutputFormat) -> OutputFormat:
+            for name in names:
+                cls._format_list[name] = output_format
+            return output_format
+
+        return func
+
+    @classmethod
+    def get_support_matrix(cls: OutputFormat) -> str:
+        output_str = ""
+        for k, v in cls._format_list.items():
+            if k == "fake":
+                support_scheme = "All schemes"
+            else:
+                support_scheme = ", ".join(v.support_schemes).rstrip(",")
+            output_str += f"\x1b[31;1m{k}\x1b[0m support scheme:\n\t{support_scheme}\n"
+        return output_str
+
+    def get_backend_name(self) -> str:
+        if self.backend is None:
+            return self.output_format
+
+        # auto_round:llm_compressor:fp8_static
+        if self.backend.backend is not None:
+            return f"{self.output_format}:{self.backend.get_backend_name()}"
+        # auto_round:auto_awq, auto_round:auto_gptq
+        elif self.backend.get_backend_name() in self._format_list:
+            return f"{self.output_format}:{self.backend.get_backend_name()}"
+        # auto_round:fp8_static, llm_compressor:fp8_static
+        else:
+            return self.backend.get_backend_name()
+
+    @classmethod
+    def is_support_scheme(cls: OutputFormat, scheme: Union[str, QuantizationScheme]) -> bool:
+        if isinstance(scheme, str) and scheme.upper() in cls.support_schemes:
+            return True
+        if isinstance(scheme, QuantizationScheme):
+            return True
+        return False
+
+    def check_and_reset_format(self, ar: BaseCompressor) -> str:
+        if self.backend is not None:
+            new_format = self.backend.check_and_reset_format(ar)
+            self.backend = OutputFormat._format_list[new_format](new_format, ar) if new_format else self.backend
+
+        w_fp8 = ar.data_type.startswith("fp") and ar.bits == 8
+        act_fp8 = ar.act_data_type.startswith("fp") and ar.act_bits == 8
+        if w_fp8 or act_fp8:
+            error_msg = (
+                f"is only supported to export auto_round or llm_compressor format,"
+                f" but got {self.format_name}, please check."
+            )
+            error_msg = ("act_data_type<fp8> " + error_msg) if act_fp8 else error_msg
+            error_msg = ("data_type<fp8> " + error_msg) if w_fp8 else error_msg
+            logger.error(error_msg)
+            sys.exit(-1)
+
+        if ar.act_bits <= 8 and (not is_standard_fp(ar.act_data_type) or ar.act_dynamic):
+            logger.warning(
+                f"{self.format_name} format not support for current activation quantization configuration,"
+                " reset to fake format and save."
+            )
+            return "fake"
+
+        return None
+
+    def is_gguf(self) -> bool:
+        return "gguf" in self.output_format
+
+    def is_fake(self) -> bool:
+        return self.output_format == "fake"
+
+    def is_gptq(self) -> bool:
+        return "gptq" in self.output_format or (self.backend is not None and self.backend.is_gptq())
+
+    def is_awq(self) -> bool:
+        return "awq" in self.output_format or (self.backend is not None and self.backend.is_awq())
+
+    def is_llm_compressor(self) -> bool:
+        return "llm_compressor" in self.output_format or (self.backend is not None and self.backend.is_llm_compressor())
+
+
+@OutputFormat.register("fake")
+class FakeFormat(OutputFormat):
+    support_schemes = None
+    format_name = "fake"
+
+    def check_and_reset_format(self, ar: BaseCompressor) -> str:
+        return None
+
+
+@OutputFormat.register("llm_compressor", "llmcompressor")
+class LLMCompressorFormat(OutputFormat):
+    support_schemes = ["MXFP4", "MXFP8", "NVFP4", "FPW8A16", "FP8_STATIC"]
+    format_name = "llm_compressor"
+
+    def __init__(self, format, ar):
+        if not self.is_support_scheme(ar.scheme):
+            logger.error(
+                f"Currently, the llm_compressor format only supports {self.support_schemes}, "
+                f"but got scheme {ar.scheme}, please change to fake or auto_round etc."
+            )
+            exit(-1)
+        if format.startswith("llm_compressor"):
+            self.output_format = format
+            self.backend = None
+            if is_nv_fp(ar.data_type) or is_mx_fp(ar.data_type):
+                from auto_round.export.export_to_llmcompressor import check_compressed_tensors_supported
+
+                check_compressed_tensors_supported()
+                self.backend = LLMCompressorFormat(ar.data_type, ar)
+            elif is_static_wfp8afp8(ar):
+                self.backend = LLMCompressorFormat(AutoRoundExportFormat.FP8_STATIC.value, ar)
+                if ar.act_group_size != 0:
+                    logger.warning(
+                        f"scheme FP8_STATIC export to llm_compressor format only support for act_group_size 0,"
+                        f" ,but got act_group_size={ar.act_group_size}, reset = 0"
+                    )
+                    ar.act_group_size = 0
+                if ar.group_size > 0:
+                    logger.warning(
+                        f"please note that group_size={ar.group_size}"
+                        " may not be supported for llm_compressor format, and cannot be loaded in llm_compressor"
+                    )
+        else:
+            if format.upper() not in list(AutoRoundExportFormat.__members__.keys()):
+                raise KeyError(f"Unsupported backend format llm_compressor:{format}, please check")
+            self.output_format = f"llm_compressor:{format}"
+            self.backend = None
+
+    def check_and_reset_format(self, ar: BaseCompressor) -> str:
+        if self.backend is not None:
+            new_format = self.backend.check_and_reset_format(ar)
+            self.backend = OutputFormat._format_list[new_format](new_format, ar) if new_format else self.backend
+
+        if ar.act_bits <= 8 and (not is_standard_fp(ar.act_data_type) or ar.act_dynamic):
+            if (is_nv_fp(ar.act_data_type) and "static_gs" in ar.act_data_type) or (is_mx_fp(ar.act_data_type)):
+                return None
+            else:
+                bits, group_size, sym, act_bits = 8, -1, True, 8
+                assert (
+                    ar.bits == bits
+                    and ar.group_size == group_size
+                    and ar.sym == sym
+                    and ar.act_bits == act_bits
+                    and ar.act_dynamic
+                ), (
+                    f"Currently only support to export llm_compressor format for sym dynamic quantized"
+                    f" W{ar.bits}A{ar.act_bits} model with group_size={group_size},"
+                    f" but got bits={ar.bits}, group_size={ar.group_size}, sym={ar.sym},"
+                    f" act_bits={ar.act_bits}"
+                )
+            return None
+        return None
+
+
+@OutputFormat.register("auto_gptq", "gptqmodel")
+class AutoGPTQFormat(OutputFormat):
+    support_schemes = ["W4A16", "W2A16", "W3A16", "W8A16", "BF16", "W2A16G64", "W2A16G32"]
+    format_name = "auto_gptq"
+
+    def check_and_reset_format(self, ar):
+        if not ar.sym:
+            logger.warning(
+                "the asymmetrical kernel of the GPTQ format may result in a noticeable accuracy drop,"
+                " particularly for 2-bit quantization and smaller models."
+                " We recommend exporting to either the AutoAWQ format ( only 4 bits) or "
+                "the AutoRound format(2/3/4/8 bits)."
+            )
+        if self.backend is None:
+            _check_divisible_by_32(ar)
+        return super().check_and_reset_format(ar)
+
+
+@OutputFormat.register("auto_awq")
+class AutoAWQFormat(OutputFormat):
+    support_schemes = ["W4A16", "W2A16", "W3A16", "W8A16", "BF16", "W2A16G64", "W2A16G32"]
+    format_name = "auto_awq"
+
+    @staticmethod
+    def check_awq_gemm_compatibility(model, bits, group_size, sym, layer_configs=None):
+        """Checks if a model is compatible with the AutoAWQ GEMM kernel.
+
+        Args:
+            model: The model object to evaluate, typically a PyTorch model.
+            bits (int): The number of bits for quantization (must be 4 for compatibility).
+            group_size (int): The group size for quantization.
+            sym (bool): Whether symmetric quantization is used (not utilized in the current function logic).
+            layer_configs (dict, optional): A dictionary mapping layer names to configurations, where each
+                configuration can specify a custom number of bits for the layer.
+
+        Returns:
+            tuple: A tuple containing:
+                - bool: `True` if the model is compatible, `False` otherwise.
+                - str: An error message describing why the model is incompatible, or an empty string if compatible.
+        """
+        from auto_round.utils.model import get_layer_names_in_block, get_module
+
+        if bits != 4:
+            return False, "AutoAWQ GEMM kernel only supports 4 bits"
+        for n, m in model.named_modules():
+            if type(m) == transformers.pytorch_utils.Conv1D:
+                return False, "AutoAWQ GEMM kernel does not support conv1d"
+
+        layer_names = get_layer_names_in_block(model)
+        for layer_name in layer_names:
+            if (
+                layer_configs is not None
+                and layer_name in layer_configs.keys()
+                and layer_configs[layer_name].get("bits", bits) > 8
+            ):
+                continue
+
+            layer = get_module(model, layer_name)
+            if layer.in_features % group_size != 0:
+                return False, f"Layer {layer_name} in_features is not multiple of group_size {group_size}"
+            if layer.out_features % (32 // bits) != 0:
+                return False, f"Layer {layer_name} out_features is not multiple of 32 // bits"
+
+        return True, ""
+
+    def check_and_reset_format(self, ar):
+        awq_supported, info = self.check_awq_gemm_compatibility(
+            ar.model, ar.bits, ar.group_size, ar.sym, ar.layer_config
+        )
+        if not awq_supported:
+            logger.warning(f"The AutoAWQ format may not be supported due to {info}")
+        if ar.bits != 4:
+            raise ValueError("The AWQ format only supports W4 quantization ")
+
+        if self.backend is None:
+            _check_divisible_by_32(ar)
+
+        return super().check_and_reset_format(ar)
+
+
+@OutputFormat.register("itrex")
+@OutputFormat.register("itrex_xpu")
+class ITREXFormat(OutputFormat):
+    support_schemes = ["W4A16", "W2A16", "W3A16", "W8A16", "BF16", "W2A16G64", "W2A16G32"]
+    format_name = "itrex"
+
+
+@OutputFormat.register("gguf")
+class GGUFFormat(OutputFormat):
+    support_schemes = [
+        "GGUF:Q4_0",
+        "GGUF:Q4_1",
+        "GGUF:Q5_0",
+        "GGUF:Q5_1",
+        "GGUF:Q2_K_S",
+        "GGUF:Q3_K_S",
+        "GGUF:Q3_K_M",
+        "GGUF:Q3_K_L",
+        "GGUF:Q4_K_S",
+        "GGUF:Q4_K_M",
+        "GGUF:Q5_K_S",
+        "GGUF:Q5_K_M",
+        "GGUF:Q6_K",
+        "GGUF:Q8_0",
+    ]
+    format_name = "gguf"
+
+    def __init__(self, format: str, ar: BaseCompressor):
+        if format.startswith("gguf:"):
+            self.gguf_args_check(ar, format, model_type=ModelType.TEXT)
+            if ar.mllm:
+                self.gguf_args_check(ar, format, model_type=ModelType.MMPROJ)
+            ar.scheme = format.upper()
+
+            self.output_format = "gguf"
+            self.backend_cls = GGUFFormat
+            self.backend = GGUFFormat(format.split(":")[-1], ar)
+        else:
+            self.output_format = f"gguf:{format}"
+            self.backend = None
+
+    def check_and_reset_format(self, ar):
+        if ar.iters != 0 and ar.bits != 3 and not ar.enable_alg_ext:
+            logger.warning_once(
+                "`iters=0` is recommended when exporting to current GGUF format"
+                " or add `enable_alg_ext` for better accuracy with much more tuning cost."
+                " Please refer to https://github.com/intel/auto-round/tree/main/docs/gguf_alg_ext_acc.md"
+                " for the accuracy results."
+            )
+        elif ar.bits >= 8 and ar.iters != 0:
+            logger.warning_once("`iters=0` is recommended for bits>=8")
+
+        return super().check_and_reset_format(ar)
+
+    @staticmethod
+    def gguf_args_check(args_or_ar, formats: Union[str, list[str]] = None, model_type=ModelType.TEXT):
+        import argparse
+
+        from auto_round.export.export_to_gguf.config import GGUF_CONFIG
+        from auto_round.export.export_to_gguf.convert import download_convert_file
+        from auto_round.logger import logger
+        from auto_round.utils.model import download_or_get_path, get_gguf_architecture
+
+        formats = [formats] if isinstance(formats, str) else formats
+        formats = sorted(formats, key=lambda x: len(x))
+        export_gguf = False
+        for f in formats:
+            if f.startswith("gguf"):
+                export_gguf = True
+
+            if f.startswith("gguf") and f not in GGUF_CONFIG:
+                logger.error(f"{f} is not supported, please check.")
+
+        redownload = False
+        if export_gguf:
+            try:
+                from auto_round.export.export_to_gguf.convert_hf_to_gguf import (  # pylint: disable=E0401
+                    ModelBase,
+                    ModelType,
+                    get_model_architecture,
+                )
+
+                if isinstance(args_or_ar.model, str):
+                    model_path = args_or_ar.model
+                else:
+                    model_path = args_or_ar.model.name_or_path
+                if not os.path.isdir(model_path):
+                    model_path = download_or_get_path(model_path, args_or_ar.platform)
+                model_architecture = get_gguf_architecture(model_path, model_type=ModelType.TEXT)
+                if model_architecture not in ModelBase._model_classes[ModelType.TEXT]:
+                    logger.warning(
+                        f"Current version of gguf export does not support for {model_architecture},"
+                        " will re-download dependency file. Please restart the task."
+                    )
+                    redownload = True
+            except ModuleNotFoundError as e:
+                if "convert_hf_to_gguf" in str(e):
+                    logger.warning("GGUF export dependency file is not found, download from github.")
+                    redownload = True
+            except AttributeError as e:
+                raise ImportError(
+                    "Please use the latest gguf-py, you can use the following command to install it:\n"
+                    "git clone https://github.com/ggml-org/llama.cpp.git && cd llama.cpp/gguf-py"
+                    " && pip install . sentencepiece"
+                )
+            download_convert_file(redownload)
+
+            try:
+                from auto_round.export.export_to_gguf.convert_hf_to_gguf import (  # pylint: disable=E0401
+                    ModelBase,
+                    ModelType,
+                )
+            except ImportError as e:
+                raise ImportError(
+                    "Please use the latest gguf-py, you can use the following command to install it:\n"
+                    "git clone https://github.com/ggml-org/llama.cpp.git && cd llama.cpp/gguf-py"
+                    " && pip install . sentencepiece"
+                )
+            if isinstance(args_or_ar.model, str):
+                model_path = args_or_ar.model
+            else:
+                model_path = args_or_ar.model.name_or_path
+            if not os.path.isdir(model_path):
+                model_path = download_or_get_path(model_path, args_or_ar.platform)
+            model_architecture = get_gguf_architecture(model_path, model_type=ModelType.TEXT)
+            if model_architecture not in ModelBase._model_classes[ModelType.TEXT]:
+                logger.error(f"Model {model_architecture} is not supported to export gguf format.")
+                sys.exit(1)
+
+        pattern = re.compile(r"q\d_k")
+        pre_dq_format = ""
+        unsupported_list, reset_list = [], []
+        for format in GGUF_CONFIG:
+            if format in formats:
+                if format == "q6_k_s":
+                    logger.warning("Please note that q6_k_s is q6_k.")
+
+                if re.search(pattern, format):
+                    if pre_dq_format and re.search(pattern, format).group() not in pre_dq_format:
+                        logger.error(f"Cannot export {pre_dq_format} and {format} at the same time.")
+                        sys.exit(-1)
+                    else:
+                        pre_dq_format = format
+
+                unsupported_list, reset_list = [], []
+                gguf_config = GGUF_CONFIG[format]
+                for k, v in gguf_config.items():
+                    if not hasattr(args_or_ar, k):
+                        continue
+                    if k == "data_type":
+                        if re.search(r"q\d_1", format) and len(formats) > 1:
+                            v = "int"
+                    if k == "sym" and isinstance(args_or_ar, argparse.Namespace):
+                        k = "asym"
+                        v = not v
+                    if getattr(args_or_ar, k) != v:
+                        unsupported_list.append(f"{k}={getattr(args_or_ar, k)}")
+                        reset_list.append(f"{k}={v}")
+                        setattr(args_or_ar, k, v)
+                if len(unsupported_list) > 0:
+                    logger.info(
+                        f"format {format} does not support for {', '.join(unsupported_list)},"
+                        f" reset to {', '.join(reset_list)}."
+                    )
+        # Removed obsolete commented-out block for improved readability and maintainability.
+        return args_or_ar
+
+
+@OutputFormat.register("auto_round")
+@OutputFormat.register("auto_round:auto_awq")
+@OutputFormat.register("auto_round:llm_compressor")
+@OutputFormat.register("auto_round:gptqmodel", "auto_round:auto_gptq")
+class AutoRoundFormat(OutputFormat):
+    support_schemes = [
+        "W4A16",
+        "W2A16",
+        "W3A16",
+        "W8A16",
+        "MXFP4",
+        "MXFP8",
+        "NVFP4",
+        "FPW8A16",
+        "W2A16G64",
+        "W2A16G32",
+        "FP8_STATIC",
+        "BF16",
+    ]
+    format_name = "auto_round"
+
+    def __init__(self, format: str, ar: BaseCompressor):
+        self.output_format = "auto_round"
+        self.backend = None
+
+        if format == "auto_round":
+            if ar.sym and "int" in ar.data_type:
+                self.backend = AutoGPTQFormat("auto_gptq", ar)
+            elif ar.bits == 4 and not ar.sym and "int" in ar.data_type:
+                enable_awq = all(
+                    config["bits"] == ar.bits or config["bits"] >= 16 for config in ar.layer_config.values()
+                )
+                if enable_awq:
+                    self.backend = AutoAWQFormat("auto_awq", ar)
+            elif is_nv_fp(ar.data_type) or is_mx_fp(ar.data_type):
+                self.backend = AutoRoundFormat(ar.data_type, ar)
+            elif is_static_wfp8afp8(ar):  # static wfp8afp8
+                self.backend = AutoRoundFormat(AutoRoundExportFormat.FP8_STATIC.value, ar)
+            elif ar.data_type.startswith("fp") and ar.bits == 8 and ar.act_bits >= 16:  # woq fp8
+                self.backend = AutoRoundFormat(AutoRoundExportFormat.FP8.value, ar)
+            elif ar.act_bits < 16:
+                raise ValueError(
+                    "AutoRound format does not support exporting "
+                    "for the current quantization configuration, "
+                    "please change to `fake` format for research purpose"
+                )
+        elif not format.startswith("auto_round"):
+            if format.upper() not in list(AutoRoundExportFormat.__members__.keys()):
+                raise KeyError(f"Unsupported backend format auto_round:{format}, please check")
+            self.output_format = f"auto_round:{format}"
+            self.backend = None
+        else:
+            backend = format.split(":")[1] if ":" in format else None
+            self.backend = self._format_list.get(backend)(backend, ar) if backend else None
+
+        if self.backend is not None:
+            self.support_schemes = self.backend.support_schemes
+
+    def check_and_reset_format(self, ar):
+        if self.backend is not None:
+            new_format = self.backend.check_and_reset_format(ar)
+            self.backend = OutputFormat._format_list[new_format](new_format, ar) if new_format else self.backend
+
+        if ar.act_bits <= 8:
+            if is_standard_fp(ar.act_data_type) and not ar.act_dynamic:
+                if (
+                    ar.act_group_size != 0
+                    and not ar.act_dynamic
+                    and self.get_backend_name() == f"auto_round:{AutoRoundExportFormat.FP8.value}"
+                ):
+                    logger.warning(
+                        f"Please note that quantize activation with act_group_size={ar.act_group_size}"
+                        " may result in failure to export or import normally."
+                    )
+        if self.backend is None:
+            _check_divisible_by_32(ar)
+        return None
diff --git a/auto_round/inference/convert_model.py b/auto_round/inference/convert_model.py
index f4cd1c527..4cd47d7e5 100644
--- a/auto_round/inference/convert_model.py
+++ b/auto_round/inference/convert_model.py
@@ -20,7 +20,7 @@
 from tqdm import tqdm
 from transformers.pytorch_utils import Conv1D
 
-from auto_round.export.export_to_autoround import AutoRoundFormat
+from auto_round.formats import AutoRoundExportFormat
 from auto_round.inference.backend import (
     BackendInfos,
     dynamic_import_inference_linear,
@@ -414,10 +414,10 @@ def _create_quant_layer(layer, layer_backend, config, in_features, out_features)
             bias=bias,
         )
     elif (
-        AutoRoundFormat.FP8_STATIC.value in layer_backend
-        or AutoRoundFormat.MXFP8.value in layer_backend
-        or AutoRoundFormat.MXFP4.value in layer_backend
-        or AutoRoundFormat.NVFP4.value in layer_backend
+        AutoRoundExportFormat.FP8_STATIC.value in layer_backend
+        or AutoRoundExportFormat.MXFP8.value in layer_backend
+        or AutoRoundExportFormat.MXFP4.value in layer_backend
+        or AutoRoundExportFormat.NVFP4.value in layer_backend
     ):
         return QuantLinear.from_original(config, layer)
 
diff --git a/auto_round/special_model_handler.py b/auto_round/special_model_handler.py
index fe3865b85..ef688634f 100644
--- a/auto_round/special_model_handler.py
+++ b/auto_round/special_model_handler.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import auto_round.modelling as auto_round_modelling
+from auto_round.formats import OutputFormat
 from auto_round.utils import LazyImport, logger, unsupported_meta_device
 
 mllms_with_limited_bs = ("llava", "qwen2_vl", "phi3_v", "mllama")  # Limitations on batch_size
@@ -67,8 +68,8 @@ def _handle_special_model(model):
     return model
 
 
-def _handle_moe_model(model, formats=None):
-    if formats is not None and any(["gguf" in format_ for format_ in formats]):
+def _handle_moe_model(model, formats: list[OutputFormat] = None):
+    if formats is not None and any([format_.is_gguf() for format_ in formats]):
         return model
     if hasattr(model.config, "model_type") and model.config.model_type in CONVERT_EXPERT_TO_LINEAR_MODELS:
         from tqdm import tqdm
diff --git a/auto_round_extension/vllm_ext/moe_impl_mxfp4.py b/auto_round_extension/vllm_ext/moe_impl_mxfp4.py
index 0ff5241ab..75e298acd 100644
--- a/auto_round_extension/vllm_ext/moe_impl_mxfp4.py
+++ b/auto_round_extension/vllm_ext/moe_impl_mxfp4.py
@@ -283,7 +283,6 @@ def revert_interleaved_bias(bias):
 
                 return revert_bias
 
-            # breakpoint()
             if self.has_bias:
                 if envs.VLLM_AR_POST_PROCESS_GPTOSS:
                     w13_bias_swapped = revert_interleaved_bias(layer.w13_bias)
diff --git a/test/test_cpu/test_export.py b/test/test_cpu/test_export.py
index 57b30354b..d5faa737a 100644
--- a/test/test_cpu/test_export.py
+++ b/test/test_cpu/test_export.py
@@ -402,6 +402,32 @@ def test_gptq_lmhead_export(self):
         print(res)
         shutil.rmtree(quantized_model_path, ignore_errors=True)
 
+    def test_export_format(self):
+        from auto_round.formats import get_formats
+
+        autoround = AutoRound(
+            self.model_name,
+            scheme="FP8_STATIC",
+        )
+        format_list = get_formats("auto_round, llm_compressor, auto_round:llm_compressor", autoround)
+        self.assertEqual(len(format_list), 3)
+        self.assertEqual(format_list[0].output_format, "auto_round")
+        self.assertEqual(format_list[0].get_backend_name(), "auto_round:fp8_static")
+        self.assertEqual(format_list[1].output_format, "llm_compressor")
+        self.assertEqual(format_list[1].get_backend_name(), "llm_compressor:fp8_static")
+        self.assertEqual(format_list[2].output_format, "auto_round")
+        self.assertEqual(format_list[2].get_backend_name(), "auto_round:llm_compressor:fp8_static")
+
+        autoround = AutoRound(
+            self.model_name,
+            scheme="W4A16",
+        )
+        format_list = get_formats("auto_round:auto_awq, auto_gptq", autoround)
+        self.assertEqual(format_list[0].output_format, "auto_round")
+        self.assertEqual(format_list[0].get_backend_name(), "auto_round:auto_awq")
+        self.assertEqual(format_list[1].output_format, "auto_gptq")
+        self.assertEqual(format_list[1].get_backend_name(), "auto_gptq")
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/test_cpu/test_mx_quant_linear.py b/test/test_cpu/test_mx_quant_linear.py
index e8e18c3bb..c2e9a3c00 100644
--- a/test/test_cpu/test_mx_quant_linear.py
+++ b/test/test_cpu/test_mx_quant_linear.py
@@ -3,14 +3,14 @@
 
 from auto_round.data_type.utils import get_quant_func
 from auto_round.experimental import qmodules as ar_qmodules
-from auto_round.export.export_to_autoround import AutoRoundFormat
 from auto_round.export.export_to_autoround.qlinear_fp import QuantLinear as _MXFPLinear
+from auto_round.formats import AutoRoundExportFormat
 from auto_round.schemes import PRESET_SCHEMES
 
-mx_schemes = [AutoRoundFormat.MXFP8.value, AutoRoundFormat.MXFP4.value]
+mx_schemes = [AutoRoundExportFormat.MXFP8.value, AutoRoundExportFormat.MXFP4.value]
 QMODULE_MAPPING = {
-    AutoRoundFormat.MXFP8.value: ar_qmodules.MXFP8QuantLinear,
-    AutoRoundFormat.MXFP4.value: ar_qmodules.MXFP4QuantLinear,
+    AutoRoundExportFormat.MXFP8.value: ar_qmodules.MXFP8QuantLinear,
+    AutoRoundExportFormat.MXFP4.value: ar_qmodules.MXFP4QuantLinear,
 }
 
 
diff --git a/test/test_cpu/test_mxfp_save_load.py b/test/test_cpu/test_mxfp_save_load.py
index aca5c7592..5331949a0 100644
--- a/test/test_cpu/test_mxfp_save_load.py
+++ b/test/test_cpu/test_mxfp_save_load.py
@@ -9,22 +9,22 @@
 from auto_round import AutoRound
 from auto_round import schemes as ar_schemes
 from auto_round.experimental import qmodules as ar_qmodules
-from auto_round.export.export_to_autoround import AutoRoundFormat
 from auto_round.export.export_to_autoround import qlinear_fp as ar_qlinear_fp
+from auto_round.formats import AutoRoundExportFormat
 from auto_round.inference.backend import MX_TENSOR_DATA_TYPES
 from auto_round.testing_utils import has_module
 
 testing_scheme_name_lst = [
-    AutoRoundFormat.MXFP8.value,
-    AutoRoundFormat.MXFP4.value,
+    AutoRoundExportFormat.MXFP8.value,
+    AutoRoundExportFormat.MXFP4.value,
 ]
 QMODULE_MAPPING = {
-    AutoRoundFormat.MXFP8.value: ar_qmodules.MXFP8QuantLinear,
-    AutoRoundFormat.MXFP4.value: ar_qmodules.MXFP4QuantLinear,
+    AutoRoundExportFormat.MXFP8.value: ar_qmodules.MXFP8QuantLinear,
+    AutoRoundExportFormat.MXFP4.value: ar_qmodules.MXFP4QuantLinear,
 }
 SCHEMES_MAPPING = {
-    AutoRoundFormat.MXFP8.value: ar_schemes.MXFP8,
-    AutoRoundFormat.MXFP4.value: ar_schemes.MXFP4,
+    AutoRoundExportFormat.MXFP8.value: ar_schemes.MXFP8,
+    AutoRoundExportFormat.MXFP4.value: ar_schemes.MXFP4,
 }
 
 
diff --git a/test/test_cpu/test_nvfp4_quant_linear.py b/test/test_cpu/test_nvfp4_quant_linear.py
index 0a42f009a..80a289b70 100644
--- a/test/test_cpu/test_nvfp4_quant_linear.py
+++ b/test/test_cpu/test_nvfp4_quant_linear.py
@@ -4,12 +4,12 @@
 from auto_round.data_type.nvfp import calculate_gparam
 from auto_round.data_type.utils import get_quant_func
 from auto_round.experimental import qmodules as ar_qmodules
-from auto_round.export.export_to_autoround import AutoRoundFormat
 from auto_round.export.export_to_autoround.qlinear_fp import QuantLinear as _FPLinear
+from auto_round.formats import AutoRoundExportFormat
 from auto_round.schemes import PRESET_SCHEMES
 
 QMODULE_MAPPING = {
-    AutoRoundFormat.NVFP4.value: ar_qmodules.NVFP4QuantLinear,
+    AutoRoundExportFormat.NVFP4.value: ar_qmodules.NVFP4QuantLinear,
 }
 
 
@@ -26,7 +26,7 @@ def fixed_seed():
     # (Optional) cleanup or reset after test
 
 
-@pytest.mark.parametrize("scheme", [AutoRoundFormat.NVFP4.value])
+@pytest.mark.parametrize("scheme", [AutoRoundExportFormat.NVFP4.value])
 @torch.inference_mode()
 def test_nvfp4_quantlinear_from_original_and_forward(scheme):
     """
diff --git a/test/test_cuda/test_mxfp_and_nvfp_quant.py b/test/test_cuda/test_mxfp_and_nvfp_quant.py
index 0dc43b093..4954b2320 100644
--- a/test/test_cuda/test_mxfp_and_nvfp_quant.py
+++ b/test/test_cuda/test_mxfp_and_nvfp_quant.py
@@ -8,15 +8,19 @@
 from auto_round import AutoRound
 from auto_round import schemes as ar_schemes
 from auto_round.experimental import qmodules as ar_qmodules
-from auto_round.export.export_to_autoround import AutoRoundFormat
 from auto_round.export.export_to_autoround import qlinear_fp as ar_qlinear_fp
+from auto_round.formats import AutoRoundExportFormat
 from auto_round.testing_utils import has_module
 
-testing_schemes = [AutoRoundFormat.MXFP8.value, AutoRoundFormat.MXFP4.value, AutoRoundFormat.NVFP4.value]
+testing_schemes = [
+    AutoRoundExportFormat.MXFP8.value,
+    AutoRoundExportFormat.MXFP4.value,
+    AutoRoundExportFormat.NVFP4.value,
+]
 QMODULE_MAPPING = {
-    AutoRoundFormat.MXFP8.value: ar_qmodules.MXFP8QuantLinear,
-    AutoRoundFormat.MXFP4.value: ar_qmodules.MXFP4QuantLinear,
-    AutoRoundFormat.NVFP4.value: ar_qmodules.NVFP4QuantLinear,
+    AutoRoundExportFormat.MXFP8.value: ar_qmodules.MXFP8QuantLinear,
+    AutoRoundExportFormat.MXFP4.value: ar_qmodules.MXFP4QuantLinear,
+    AutoRoundExportFormat.NVFP4.value: ar_qmodules.NVFP4QuantLinear,
 }