Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🚘 Auto opt cli #1343

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
18 changes: 10 additions & 8 deletions olive/auto_optimizer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ class AutoOptimizerConfig(ConfigBase):
# if fine_tune is True, we will not suggest the training related pass, like: QLora
# fine_tune: bool = False

excluded_passes: Optional[List[str]] = None

@validator("opt_level", pre=True)
def check_opt_level(cls, v):
if v != 0:
Expand Down Expand Up @@ -76,10 +78,7 @@ def _initialize(self):
# if user can tolerate accuracy drop, we can enable more optimization
default_precisions = [Precision.FP32]
if self.is_accuracy_drop_tolerance:
# ignore int4 for now as it is not supported very well in onnxruntime
# enable it only when user explicitly set it
# default_precisions = [Precision.FP32, Precision.FP16, Precision.INT8, Precision.INT4]
default_precisions = [Precision.FP32, Precision.FP16, Precision.INT8]
default_precisions = [Precision.FP32, Precision.FP16, Precision.INT8, Precision.INT4]
self.auto_optimizer_config.precisions = self.auto_optimizer_config.precisions or default_precisions

def suggest(self):
Expand All @@ -92,11 +91,11 @@ def suggest(self):
return self._regulate(self._suggest_pass_flows())

def _suggest_pass_flows(self):
pass_flows_by_precision = []
pass_flows = []
if self.auto_optimizer_config.opt_level == 0:
pass_flows_by_precision = self._suggest_pass_flows_from_template()
pass_flows = self._suggest_pass_flows_from_template()

return pass_flows_by_precision
return pass_flows

def _suggest_pass_flows_from_template(self):
from olive.auto_optimizer.template_mapping import get_pass_flows_by_accelerator_ep_precision
Expand All @@ -110,6 +109,7 @@ def _suggest_pass_flows_from_template(self):
self.accelerator_spec.accelerator_type.value,
self.accelerator_spec.execution_provider,
precision,
self.auto_optimizer_config.excluded_passes,
)
return pass_flows_by_precision

Expand All @@ -120,4 +120,6 @@ def _regulate(self, pass_flows_by_precision):
pass_config, pass_flows = self.regulate_pass_flows_dict(pass_flows_by_precision)

# step2: fill the data_config for the passes that need data_config
return self.regulate_data_config(pass_config, pass_flows)
pass_config, pass_flows = self.regulate_data_config(pass_config, pass_flows)

return pass_config, pass_flows
4 changes: 2 additions & 2 deletions olive/auto_optimizer/config_template/opt_level_passes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# OnnxConversion -> OrtTransformersOptimization -> IncQuantization -> OrtPerfTuning
# and etc.

- [OnnxConversion]
- [OnnxConversion, ModelBuilder]
- [OrtTransformersOptimization]
- [OrtMixedPrecision, OnnxQuantization, IncQuantization, VitisAIQuantization, OnnxMatMul4Quantizer]
- [OnnxQuantization, IncQuantization, VitisAIQuantization, OnnxMatMul4Quantizer, OrtMixedPrecision]
- [OrtPerfTuning]
8 changes: 8 additions & 0 deletions olive/auto_optimizer/config_template/pass_capability.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ OnnxConversion:
EP: null
precision: null
accelerator: null
ModelBuilder:
EP:
- CPU
- CUDA
precision: null
accelerator:
- cpu
- gpu
OrtTransformersOptimization:
EP: null
precision: null
Expand Down
81 changes: 56 additions & 25 deletions olive/auto_optimizer/regulate_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,26 @@ def regulate_pass_flows_dict(self, pass_flows_dict):
# remove useless passes according to the olive model type, for example if onnx model
# the conversion pass will be removed
if self.input_model_config.type.lower().endswith("onnxmodel"):
to_remove_passes = ["OnnxConversion", "ModelBuilder"]
for pfs in pass_flows_dict.values():
for pf in pfs:
pf.remove("OnnxConversion")
for p in to_remove_passes:
if p in pf:
pf.remove(p)

# special passes: OrtTransformerOptimization and OrtPerfTuning can be used for both fp16 and fp32
# we need assign different pass name for them
# special passes: ModelBuilder, OrtTransformerOptimization and OrtPerfTuning can
# be used for both fp16 and fp32 we need assign different pass name for them
# for example: gpu_cuda_fp16, we need rename OrtTransformerOptimization to OrtTransformerOptimization_cuda_fp16
pass_flows_by_fp16 = pass_flows_dict.get("fp16", [])
pass_config, pass_flows_16 = self._regulate_fp16(None, pass_flows_by_fp16)
pass_config, pass_flows_dict = self._regulate_precision(None, pass_flows_dict)

# flatten pass_flows_dict to pass_flows and generate the default pass_configs
pass_flows = []
unique_pass_flows = set()
if pass_flows_16:
pass_flows_dict["fp16"] = pass_flows_16
for pfs in pass_flows_dict.values():
for pf in pfs:
if tuple(pf) not in unique_pass_flows:
unique_pass_flows.add(tuple(pf))
pass_flows.append(pf)
unique_pass_flows.add(tuple(pf))
for p in pf:
if p not in pass_config:
pass_config.update({p: {"type": p, "config": {}}})
Expand All @@ -42,15 +42,34 @@ def regulate_pass_flows_dict(self, pass_flows_dict):
pass_config[pass_name]["disable_search"] = True
return pass_config, pass_flows

def _regulate_fp16(self, pass_config, pass_flows):
def _fill_precision_for_model_builder(self, pass_config, pass_flows):
for precision, pfs in pass_flows.items():
for pass_flow in pfs:
for i, p in enumerate(pass_flow):
if p == "ModelBuilder":
pass_flow[i] = f"ModelBuilder_{precision}"
pass_config.update(
{
pass_flow[i]: {
"type": "ModelBuilder",
"config": {
"precision": precision,
},
}
}
)

def _regulate_precision(self, pass_config, pass_flows):
pass_config = pass_config or {}
# if it is model builder, we need to add suffix for all precisions to distinguish them
self._fill_precision_for_model_builder(pass_config, pass_flows)
is_gpu = self.accelerator_spec.accelerator_type == Device.GPU and self.accelerator_spec.execution_provider in [
"CUDAExecutionProvider",
"DmlExecutionProvider",
"TensorrtExecutionProvider",
]
if not is_gpu or not self.is_accuracy_drop_tolerance:
return {}, []
return pass_config, pass_flows

is_cuda_ep = self.accelerator_spec.execution_provider != "TensorrtExecutionProvider"
is_trt_ep = self.accelerator_spec.execution_provider == "TensorrtExecutionProvider"
Expand All @@ -66,8 +85,8 @@ def _regulate_fp16(self, pass_config, pass_flows):
perf_tuning = "OrtPerfTuning"
trans_opt_fp16 = "OrtTransformerOptimization_cuda_fp16"
perf_tuning_fp16 = "OrtPerfTuning_trt_fp16"

for i, pf in enumerate(pass_flows):
pass_flows_by_fp16 = pass_flows.get("fp16", [])
for i, pf in enumerate(pass_flows_by_fp16):
new_pf = deepcopy(pf)
if "OrtMixedPrecision" not in pf:
for j, p in enumerate(pf):
Expand Down Expand Up @@ -98,24 +117,36 @@ def _regulate_fp16(self, pass_config, pass_flows):
}
}
)

pass_flows[i] = new_pf

pass_flows_by_fp16[i] = new_pf
if pass_flows_by_fp16:
pass_flows["fp16"] = pass_flows_by_fp16
return pass_config, pass_flows

def regulate_data_config(self, pass_config, pass_flows):
if not self.data_configs or not self.auto_optimizer_config or self.auto_optimizer_config.disable_auto_optimizer:
if not self.auto_optimizer_config or self.auto_optimizer_config.disable_auto_optimizer:
return pass_config, pass_flows

if len(self.data_configs) != 1:
raise ValueError("AutoOptimizer expects exactly one data config.")

passes_require_data_config = ["OnnxQuantization", "OrtPerfTuning"]
for p in passes_require_data_config:
# TODO(anyone): support multi data_config for different passes, pass_flows
p_names = self._find_pass_name_in_pass_flow(p, pass_flows)
for pn in p_names:
pass_config[pn]["config"]["data_config"] = self.data_configs[0]
passes_require_data_config = ["OrtPerfTuning", "IncQuantization", "OnnxQuantization"]
if not self.data_configs:
# remove the passes which require data_config
for pass_flow in pass_flows:
for p in passes_require_data_config:
p_names = self._find_pass_name_in_pass_flow(p, [pass_flow])
for pn in p_names:
pass_flow.remove(pn)
pass_config.pop(pn, None)
for p in pass_flow:
if p.lower().startswith("onnxquantization"):
pass_config[p]["config"]["quant_mode"] = "dynamic"
else:
if len(self.data_configs) != 1:
raise ValueError("AutoOptimizer expects exactly one data config.")

for p in passes_require_data_config:
# TODO(anyone): support multi data_config for different passes, pass_flows
p_names = self._find_pass_name_in_pass_flow(p, pass_flows)
for pn in p_names:
pass_config[pn]["config"]["data_config"] = self.data_configs[0]

return pass_config, pass_flows

Expand Down
58 changes: 50 additions & 8 deletions olive/auto_optimizer/template_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,47 +30,89 @@ def get_available_passes_by_opt_level(opt_level):
return opt_level_passes[str(opt_level)]


def get_pass_flows_by_accelerator_ep_precision(opt_level, accelerator, ep, precision):
def remove_incompatible_passes(pass_flows):
# for suggested pass_flows, some of passes in a same pass_flow may be incompatible with each other
# e.g. model_builder(int4) -> matmul int4 quantization. We can ignore the matmul int4 quantization
# this kind of constraints should be defined manually by olive

# rule1: if the model is converted from model builder, we should remove following quantization passes
incompatible_passes_with_model_builder = [
"OnnxQuantization",
"IncQuantization",
"VitisAIQuantization",
"OnnxMatMul4Quantizer",
"OrtTransformersOptimization",
"OrtMixedPrecision",
]
for pass_flow in pass_flows:
if "ModelBuilder" in pass_flow:
for p in incompatible_passes_with_model_builder:
if p in pass_flow:
pass_flow.remove(p)

# remove duplicated pass_flows
pass_flows_tuple = {tuple(pf) for pf in pass_flows}
return [list(pf) for pf in pass_flows_tuple]


def get_pass_flows_by_accelerator_ep_precision(opt_level, accelerator, ep, precision, excluded_passes=None):
ep_literal = "ExecutionProvider"
ep = ep[: -len(ep_literal)].lower() if ep.endswith(ep_literal) else ep.lower()
precision = precision.lower()

available_passes_tree = get_available_passes_by_opt_level(opt_level)
passes_tree = get_available_passes_by_opt_level(opt_level)
excluded_passes = excluded_passes or []
available_passes_tree = []
for pass_level in passes_tree:
filtered_passes = [p for p in pass_level if p not in excluded_passes]
if not filtered_passes:
continue
available_passes_tree.append(filtered_passes)

passes_cap = get_pass_capability()
pass_flows = []

# given available_passes_tree is [a] -> [b, c] -> [d, e, f], generate all possible pass flows
# [a, b, d], [a, b, e], [a, b, f], [a, c, d], [a, c, e], [a, c, f]

# as we need to step over some intermediate passes, we cannot use len(pass_flow_candidate) to
# indicate the current pass level, instead, we use the length of available_passes_tree to indicate
# item in pass stack is (pass, depth, pass_flow_candidate)
# indicate the current pass depth, instead, we use the depth of available_passes_tree to indicate.

# item in pass stack is (depth, pass_flow_candidate)
pass_deque = deque([(-1, [])])
max_depth = len(available_passes_tree)
while pass_deque:
depth, pf_candidate = pass_deque.popleft()

# strong rule when met the last pass, traverse back to the previous pass
if depth == len(available_passes_tree) - 1:
if depth == max_depth - 1:
pass_flows.append(pf_candidate)
continue

# if we don't have any pass in next level, we cannot step over it
keep_try = True
for next_level in range(depth + 1, len(available_passes_tree)):
for next_level in range(depth + 1, max_depth):
if keep_try:
for p_next in available_passes_tree[next_level]:
if _if_match_pass_capability(p_next, passes_cap, accelerator, ep, precision):
pass_deque.append((next_level, [*pf_candidate, p_next]))
# if we find one pass in next_level, break the outer loop
keep_try = False
# did not find any pass in next level, we cannot step over it
if keep_try:
# push back and increase depth
pass_deque.append((next_level, pf_candidate))
# not `elif` here, as we need to check special case for fp16
if not keep_try:
if precision == "fp16" and len(available_passes_tree[next_level]) > 1:
if precision == "fp16" and len(pf_candidate) > 1 and pf_candidate[-1] == "OrtTransformersOptimization":
# for fp16, we can step over to next level + 1 even we find one pass in next level
# e.g: we need suggest both convert -> transformers opt -> mixed precision -> perf tuning
# and convert -> transformers opt -> perf tuning
keep_try = True
else:
break

return pass_flows
return remove_incompatible_passes(pass_flows)


def _if_match_pass_capability(p, passes_cap, accelerator, ep, precision):
Expand Down
Loading
Loading