Make certain gptq options customizable via model specific mapping

This is to avoid hardcoding these paramters in config files for models that aren't (like phi3) yet officially supported by auto-gptq.
microsoft · Sep 18, 2024 · d2bc1c4 · d2bc1c4
1 parent 9fa2604
commit d2bc1c4
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 15 deletions.
diff --git a/olive/common/hf/mappings.py b/olive/common/hf/mappings.py
@@ -70,3 +70,18 @@
     "llama": "gpt2",
     "roberta": "bert",
 }
+
+MODEL_OUTSIDE_LAYER_MODULES = {
+    "phi3": ["model.embed_tokens", "embed_dropout", "model.norm"],
+}
+
+MODEL_INSIDE_LAYER_MODULES = {
+    "phi3": [
+        ["self_attn.qkv_proj"],
+        ["self_attn.o_proj"],
+        ["mlp.gate_up_proj"],
+        ["mlp.down_proj"],
+    ]
+}
+
+MODEL_LAYERS_BLOCK_NAME = {"phi3": "model.layers"}
diff --git a/olive/passes/pytorch/gptq.py b/olive/passes/pytorch/gptq.py
@@ -11,6 +11,7 @@
 import torch
 
 from olive.common.config_utils import validate_config
+from olive.common.hf.mappings import MODEL_INSIDE_LAYER_MODULES, MODEL_LAYERS_BLOCK_NAME, MODEL_OUTSIDE_LAYER_MODULES
 from olive.data.config import DataConfig
 from olive.hardware.accelerator import AcceleratorSpec, Device
 from olive.model import HfModelHandler, PyTorchModelHandler
@@ -41,7 +42,7 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> Dict[str, PassCon
             ),
             "layers_block_name": PassConfigParam(
                 type_=str,
-                default_value="model.layers",
+                default_value=None,
                 description=(
                     "Block name to quantize. Default value is model.layers. "
                     "For models can't be auto filled, you can refer this link to fill these parameters.\n"
@@ -166,20 +167,24 @@ def _run_for_config(
         def get_onnx_quant_linear(*args, **kwargs):
             return QuantLinear
 
-        if hasattr(pytorch_model, "config") and pytorch_model.config.model_type in GPTQ_CAUSAL_LM_MODEL_MAP:
-            model_type = pytorch_model.config.model_type
-            model_class = GPTQ_CAUSAL_LM_MODEL_MAP[model_type]
-            quantized_model = model_class(pytorch_model, False, quantize_config)
-        else:
-            quantized_model = BaseGPTQForCausalLM(pytorch_model, False, quantize_config)
-            if not (config["layers_block_name"] and config["outside_layer_modules"] and config["inside_layer_modules"]):
-                raise ValueError(
-                    "Can't get layers_block_name to quantize automatically, "
-                    "please set layers_block_name, outside_layer_modules and inside_layer_modules in config."
-                )
-            quantized_model.layers_block_name = config["layers_block_name"]
-            quantized_model.outside_layer_modules = config["outside_layer_modules"]
-            quantized_model.inside_layer_modules = config["inside_layer_modules"]
+        model_type = pytorch_model.config.model_type if hasattr(pytorch_model, "config") else ""
+        model_class = GPTQ_CAUSAL_LM_MODEL_MAP.get(model_type, BaseGPTQForCausalLM)
+        quantized_model = model_class(pytorch_model, False, quantize_config)
+
+        fields_to_set = {
+            "outside_layer_modules": MODEL_OUTSIDE_LAYER_MODULES,
+            "inside_layer_modules": MODEL_INSIDE_LAYER_MODULES,
+            "layers_block_name": MODEL_LAYERS_BLOCK_NAME,
+        }
+
+        for key, value in fields_to_set.items():
+            if config[key]:
+                setattr(quantized_model, key, config[key])
+            elif model_type not in GPTQ_CAUSAL_LM_MODEL_MAP:
+                if model_type in value:
+                    setattr(quantized_model, key, value[model_type])
+                else:
+                    raise ValueError(f"Can't get {key} to quantize automatically, please provide it in config.")
 
         import auto_gptq