Make certain gptq options customizable via model specific mapping

This is to avoid hardcoding these paramters in config files for models that aren't (like phi3) yet officially supported by auto-gptq.
microsoft · Sep 18, 2024 · 188873a · 188873a
1 parent 9fa2604
commit 188873a
Show file tree

Hide file tree

Showing 2 changed files with 40 additions and 11 deletions.
diff --git a/olive/common/hf/mappings.py b/olive/common/hf/mappings.py
@@ -70,3 +70,16 @@
     "llama": "gpt2",
     "roberta": "bert",
 }
+
+MODEL_OUTSIDE_LAYER_MODULES = {
+    "phi3": ["model.embed_tokens", "embed_dropout", "model.norm"],
+}
+
+MODEL_INSIDE_LAYER_MODULES = {
+    "phi3": [
+        ["self_attn.qkv_proj"],
+        ["self_attn.o_proj"],
+        ["mlp.gate_up_proj"],
+        ["mlp.down_proj"],
+    ]
+}
diff --git a/olive/passes/pytorch/gptq.py b/olive/passes/pytorch/gptq.py
@@ -11,6 +11,7 @@
 import torch
 
 from olive.common.config_utils import validate_config
+from olive.common.hf.mappings import MODEL_INSIDE_LAYER_MODULES, MODEL_OUTSIDE_LAYER_MODULES
 from olive.data.config import DataConfig
 from olive.hardware.accelerator import AcceleratorSpec, Device
 from olive.model import HfModelHandler, PyTorchModelHandler
@@ -166,20 +167,35 @@ def _run_for_config(
         def get_onnx_quant_linear(*args, **kwargs):
             return QuantLinear
 
-        if hasattr(pytorch_model, "config") and pytorch_model.config.model_type in GPTQ_CAUSAL_LM_MODEL_MAP:
-            model_type = pytorch_model.config.model_type
-            model_class = GPTQ_CAUSAL_LM_MODEL_MAP[model_type]
-            quantized_model = model_class(pytorch_model, False, quantize_config)
-        else:
-            quantized_model = BaseGPTQForCausalLM(pytorch_model, False, quantize_config)
-            if not (config["layers_block_name"] and config["outside_layer_modules"] and config["inside_layer_modules"]):
+        model_type = pytorch_model.config.model_type if hasattr(pytorch_model, "config") else ""
+        model_class = GPTQ_CAUSAL_LM_MODEL_MAP.get(model_type, BaseGPTQForCausalLM)
+        quantized_model = model_class(pytorch_model, False, quantize_config)
+
+        if config["outside_layer_modules"]:
+            quantized_model.outside_layer_modules = config["outside_layer_modules"]
+        elif model_type not in GPTQ_CAUSAL_LM_MODEL_MAP:
+            if model_type in MODEL_OUTSIDE_LAYER_MODULES:
+                quantized_model.outside_layer_modules = MODEL_OUTSIDE_LAYER_MODULES[model_type]
+            else:
                 raise ValueError(
-                    "Can't get layers_block_name to quantize automatically, "
-                    "please set layers_block_name, outside_layer_modules and inside_layer_modules in config."
+                    "Can't get outside_layer_modules to quantize automatically, please provide it in config."
                 )
-            quantized_model.layers_block_name = config["layers_block_name"]
-            quantized_model.outside_layer_modules = config["outside_layer_modules"]
+
+        if config["inside_layer_modules"]:
             quantized_model.inside_layer_modules = config["inside_layer_modules"]
+        elif model_type not in GPTQ_CAUSAL_LM_MODEL_MAP:
+            if model_type in MODEL_INSIDE_LAYER_MODULES:
+                quantized_model.inside_layer_modules = MODEL_INSIDE_LAYER_MODULES[model_type]
+            else:
+                raise ValueError(
+                    "Can't get inside_layer_modules to quantize automatically, please provide it in config."
+                )
+
+        if model_type not in GPTQ_CAUSAL_LM_MODEL_MAP:
+            if config["layers_block_name"]:
+                quantized_model.layers_block_name = config["layers_block_name"]
+            else:
+                raise ValueError("Can't get layers_block_name to quantize automatically, please provide it in config.")
 
         import auto_gptq