Skip to content

Commit

Permalink
Add llama-3 to list of supported DML LLM models (#1211)
Browse files Browse the repository at this point in the history
## Describe your changes

## Checklist before requesting a review
- [ ] Add unit tests for this change.
- [ ] Make sure all tests can pass.
- [ ] Update documents if necessary.
- [ ] Lint and apply fixes to your code by running `lintrunner -a`
- [ ] Is this a user-facing change? If yes, give a description of this
change to be included in the release notes.
- [ ] Is this PR including examples changes? If yes, please remember to
update [example
documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md)
in a follow-up PR.

## (Optional) Issue link
  • Loading branch information
PatriceVignola committed Jun 26, 2024
1 parent 76d4646 commit 7fa2c41
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 7 deletions.
8 changes: 2 additions & 6 deletions examples/directml/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import json
import os
import shutil
import sys
import warnings
from pathlib import Path
from typing import Optional
Expand Down Expand Up @@ -137,7 +136,7 @@ def optimize(
with Path.open(script_dir / "config_llm.json") as fin:
olive_config = json.load(fin)

if quant_strategy == "awq":
if quant_strategy is not None:
olive_config["passes"]["quantize"] = {
"type": "IncStaticQuantization",
"disable_search": True,
Expand All @@ -157,9 +156,6 @@ def optimize(
"user_script": "user_script.py",
},
}
elif quant_strategy is not None:
print(f"Unknown quantization strategy {quant_strategy}")
sys.exit(1)

olive_config["systems"]["local_system"]["config"]["accelerators"][0]["execution_providers"] = {
"dml": ["DmlExecutionProvider"],
Expand Down Expand Up @@ -320,7 +316,7 @@ def main():
)
parser.add_argument(
"--quant_strategy",
choices=["awq"],
choices=["awq", "rtn"],
help="Which quantization strategy to use. Defaults to None (no quantization).",
default=None,
type=str,
Expand Down
2 changes: 2 additions & 0 deletions examples/directml/llm/model_type_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"phi-3-mini-4k": "microsoft/Phi-3-mini-4k-instruct",
"phi-3-mini-128k": "microsoft/Phi-3-mini-128k-instruct",
"gemma-7b-it": "google/gemma-7b-it",
"llama-3-8b-it": "meta-llama/Meta-Llama-3-8B-Instruct",
}


Expand Down Expand Up @@ -60,6 +61,7 @@ def get_supported_llm_models():
"phi-3-mini-4k",
"phi-3-mini-128k",
"gemma-7b-it",
"llama-3-8b-it",
]


Expand Down
1 change: 0 additions & 1 deletion examples/directml/llm/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
huggingface-hub
markdown
mdtex2html
neural-compressor
optimum
protobuf==3.20.3 # protobuf 4.x aborts with OOM when optimizing large models
Pygments
Expand Down

0 comments on commit 7fa2c41

Please sign in to comment.