Add llama-3 to list of supported DML LLM models (#1211)

## Describe your changes ## Checklist before requesting a review - [ ] Add unit tests for this change. - [ ] Make sure all tests can pass. - [ ] Update documents if necessary. - [ ] Lint and apply fixes to your code by running `lintrunner -a` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. - [ ] Is this PR including examples changes? If yes, please remember to update [example documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md) in a follow-up PR. ## (Optional) Issue link
microsoft · Jun 26, 2024 · 7fa2c41 · 7fa2c41
1 parent 76d4646
commit 7fa2c41
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 7 deletions.
diff --git a/examples/directml/llm/llm.py b/examples/directml/llm/llm.py
@@ -9,7 +9,6 @@
 import json
 import os
 import shutil
-import sys
 import warnings
 from pathlib import Path
 from typing import Optional
@@ -137,7 +136,7 @@ def optimize(
     with Path.open(script_dir / "config_llm.json") as fin:
         olive_config = json.load(fin)
 
-        if quant_strategy == "awq":
+        if quant_strategy is not None:
             olive_config["passes"]["quantize"] = {
                 "type": "IncStaticQuantization",
                 "disable_search": True,
@@ -157,9 +156,6 @@ def optimize(
                     "user_script": "user_script.py",
                 },
             }
-        elif quant_strategy is not None:
-            print(f"Unknown quantization strategy {quant_strategy}")
-            sys.exit(1)
 
         olive_config["systems"]["local_system"]["config"]["accelerators"][0]["execution_providers"] = {
             "dml": ["DmlExecutionProvider"],
@@ -320,7 +316,7 @@ def main():
     )
     parser.add_argument(
         "--quant_strategy",
-        choices=["awq"],
+        choices=["awq", "rtn"],
         help="Which quantization strategy to use. Defaults to None (no quantization).",
         default=None,
         type=str,

diff --git a/examples/directml/llm/model_type_mapping.py b/examples/directml/llm/model_type_mapping.py
@@ -30,6 +30,7 @@
     "phi-3-mini-4k": "microsoft/Phi-3-mini-4k-instruct",
     "phi-3-mini-128k": "microsoft/Phi-3-mini-128k-instruct",
     "gemma-7b-it": "google/gemma-7b-it",
+    "llama-3-8b-it": "meta-llama/Meta-Llama-3-8B-Instruct",
 }
 
 
@@ -60,6 +61,7 @@ def get_supported_llm_models():
         "phi-3-mini-4k",
         "phi-3-mini-128k",
         "gemma-7b-it",
+        "llama-3-8b-it",
     ]
 
 

diff --git a/examples/directml/llm/requirements.txt b/examples/directml/llm/requirements.txt
@@ -1,7 +1,6 @@
 huggingface-hub
 markdown
 mdtex2html
-neural-compressor
 optimum
 protobuf==3.20.3 # protobuf 4.x aborts with OOM when optimizing large models
 Pygments