Skip to content

Commit

Permalink
Make certain gptq options customizable via model specific mapping
Browse files Browse the repository at this point in the history
This is to avoid hardcoding these paramters in config files for models that
aren't (like phi3) yet officially supported by auto-gptq.
  • Loading branch information
shaahji committed Sep 18, 2024
1 parent 9fa2604 commit d2bc1c4
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 15 deletions.
15 changes: 15 additions & 0 deletions olive/common/hf/mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,18 @@
"llama": "gpt2",
"roberta": "bert",
}

MODEL_OUTSIDE_LAYER_MODULES = {
"phi3": ["model.embed_tokens", "embed_dropout", "model.norm"],
}

MODEL_INSIDE_LAYER_MODULES = {
"phi3": [
["self_attn.qkv_proj"],
["self_attn.o_proj"],
["mlp.gate_up_proj"],
["mlp.down_proj"],
]
}

MODEL_LAYERS_BLOCK_NAME = {"phi3": "model.layers"}
35 changes: 20 additions & 15 deletions olive/passes/pytorch/gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import torch

from olive.common.config_utils import validate_config
from olive.common.hf.mappings import MODEL_INSIDE_LAYER_MODULES, MODEL_LAYERS_BLOCK_NAME, MODEL_OUTSIDE_LAYER_MODULES
from olive.data.config import DataConfig
from olive.hardware.accelerator import AcceleratorSpec, Device
from olive.model import HfModelHandler, PyTorchModelHandler
Expand Down Expand Up @@ -41,7 +42,7 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> Dict[str, PassCon
),
"layers_block_name": PassConfigParam(
type_=str,
default_value="model.layers",
default_value=None,
description=(
"Block name to quantize. Default value is model.layers. "
"For models can't be auto filled, you can refer this link to fill these parameters.\n"
Expand Down Expand Up @@ -166,20 +167,24 @@ def _run_for_config(
def get_onnx_quant_linear(*args, **kwargs):
return QuantLinear

if hasattr(pytorch_model, "config") and pytorch_model.config.model_type in GPTQ_CAUSAL_LM_MODEL_MAP:
model_type = pytorch_model.config.model_type
model_class = GPTQ_CAUSAL_LM_MODEL_MAP[model_type]
quantized_model = model_class(pytorch_model, False, quantize_config)
else:
quantized_model = BaseGPTQForCausalLM(pytorch_model, False, quantize_config)
if not (config["layers_block_name"] and config["outside_layer_modules"] and config["inside_layer_modules"]):
raise ValueError(
"Can't get layers_block_name to quantize automatically, "
"please set layers_block_name, outside_layer_modules and inside_layer_modules in config."
)
quantized_model.layers_block_name = config["layers_block_name"]
quantized_model.outside_layer_modules = config["outside_layer_modules"]
quantized_model.inside_layer_modules = config["inside_layer_modules"]
model_type = pytorch_model.config.model_type if hasattr(pytorch_model, "config") else ""
model_class = GPTQ_CAUSAL_LM_MODEL_MAP.get(model_type, BaseGPTQForCausalLM)
quantized_model = model_class(pytorch_model, False, quantize_config)

fields_to_set = {
"outside_layer_modules": MODEL_OUTSIDE_LAYER_MODULES,
"inside_layer_modules": MODEL_INSIDE_LAYER_MODULES,
"layers_block_name": MODEL_LAYERS_BLOCK_NAME,
}

for key, value in fields_to_set.items():
if config[key]:
setattr(quantized_model, key, config[key])
elif model_type not in GPTQ_CAUSAL_LM_MODEL_MAP:
if model_type in value:
setattr(quantized_model, key, value[model_type])
else:
raise ValueError(f"Can't get {key} to quantize automatically, please provide it in config.")

import auto_gptq

Expand Down

0 comments on commit d2bc1c4

Please sign in to comment.