Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make certain gptq options customizable via model specific mapping #1363

Merged
merged 1 commit into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions olive/common/hf/mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,18 @@
"llama": "gpt2",
"roberta": "bert",
}

MODEL_OUTSIDE_LAYER_MODULES = {
"phi3": ["model.embed_tokens", "embed_dropout", "model.norm"],
}

MODEL_INSIDE_LAYER_MODULES = {
"phi3": [
["self_attn.qkv_proj"],
["self_attn.o_proj"],
["mlp.gate_up_proj"],
["mlp.down_proj"],
]
}

MODEL_LAYERS_BLOCK_NAME = {"phi3": "model.layers"}
37 changes: 21 additions & 16 deletions olive/passes/pytorch/gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import torch

from olive.common.config_utils import validate_config
from olive.common.hf.mappings import MODEL_INSIDE_LAYER_MODULES, MODEL_LAYERS_BLOCK_NAME, MODEL_OUTSIDE_LAYER_MODULES
from olive.data.config import DataConfig
from olive.hardware.accelerator import AcceleratorSpec, Device
from olive.model import HfModelHandler, PyTorchModelHandler
Expand Down Expand Up @@ -41,9 +42,9 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> Dict[str, PassCon
),
"layers_block_name": PassConfigParam(
type_=str,
default_value="model.layers",
default_value=None,
description=(
"Block name to quantize. Default value is model.layers. "
"Block name to quantize. "
"For models can't be auto filled, you can refer this link to fill these parameters.\n"
"https://github.com/AutoGPTQ/AutoGPTQ/blob/896d8204bc89a7cfbda42bf3314e13cf4ce20b02/auto_gptq/modeling/llama.py#L19-L26"
),
Expand Down Expand Up @@ -166,20 +167,24 @@ def _run_for_config(
def get_onnx_quant_linear(*args, **kwargs):
return QuantLinear

if hasattr(pytorch_model, "config") and pytorch_model.config.model_type in GPTQ_CAUSAL_LM_MODEL_MAP:
model_type = pytorch_model.config.model_type
model_class = GPTQ_CAUSAL_LM_MODEL_MAP[model_type]
quantized_model = model_class(pytorch_model, False, quantize_config)
else:
quantized_model = BaseGPTQForCausalLM(pytorch_model, False, quantize_config)
if not (config["layers_block_name"] and config["outside_layer_modules"] and config["inside_layer_modules"]):
raise ValueError(
"Can't get layers_block_name to quantize automatically, "
"please set layers_block_name, outside_layer_modules and inside_layer_modules in config."
)
quantized_model.layers_block_name = config["layers_block_name"]
quantized_model.outside_layer_modules = config["outside_layer_modules"]
quantized_model.inside_layer_modules = config["inside_layer_modules"]
model_type = pytorch_model.config.model_type if hasattr(pytorch_model, "config") else ""
model_class = GPTQ_CAUSAL_LM_MODEL_MAP.get(model_type, BaseGPTQForCausalLM)
quantized_model = model_class(pytorch_model, False, quantize_config)

fields_to_set = {
"outside_layer_modules": MODEL_OUTSIDE_LAYER_MODULES,
"inside_layer_modules": MODEL_INSIDE_LAYER_MODULES,
"layers_block_name": MODEL_LAYERS_BLOCK_NAME,
}

for key, value in fields_to_set.items():
if config[key]:
setattr(quantized_model, key, config[key])
elif model_type not in GPTQ_CAUSAL_LM_MODEL_MAP:
if model_type in value:
setattr(quantized_model, key, value[model_type])
else:
raise ValueError(f"Can't get {key} to quantize automatically, please provide it in config.")

import auto_gptq

Expand Down
Loading