diff --git a/charts/ai-stack/Chart.lock b/charts/ai-stack/Chart.lock index fa59f6a..2fa2ecf 100644 --- a/charts/ai-stack/Chart.lock +++ b/charts/ai-stack/Chart.lock @@ -11,5 +11,8 @@ dependencies: - name: chromadb repository: https://infracloudio.github.io/charts version: 0.1.3 -digest: sha256:0febd220a71c6533c04a53affcfbeca2a77261acba6ded41f424cc34c2a056ff -generated: "2024-08-19T19:46:03.544448+05:30" +- name: vllm + repository: https://infracloudio.github.io/charts + version: 0.1.0 +digest: sha256:14b5e60e54b3618e5d950841fee42743eb9d50d2fed44d8d46484c97adbffde6 +generated: "2024-09-19T18:46:10.274968+05:30" diff --git a/charts/ai-stack/Chart.yaml b/charts/ai-stack/Chart.yaml index f98cab6..68b4e04 100644 --- a/charts/ai-stack/Chart.yaml +++ b/charts/ai-stack/Chart.yaml @@ -16,7 +16,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.3.8 +version: 0.4.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to @@ -49,6 +49,12 @@ dependencies: alias: vectordb condition: vectordb.enabled + - name: vllm + version: 0.1.0 + repository: "https://infracloudio.github.io/charts" + alias: vllm + condition: vllm.enabled + keywords: - ai-stack - ai-services diff --git a/charts/ai-stack/README.md b/charts/ai-stack/README.md index 0668837..9169a89 100644 --- a/charts/ai-stack/README.md +++ b/charts/ai-stack/README.md @@ -13,6 +13,7 @@ The AI stack consists of the following components: - [Text Generation Inference(TGI)](../text-generation-inference/) - [Grafana Dashboards](../infracloud-dashboards) - [ChromaDB](../chromadb) +- [vLLM](../vllm) ## Setup Helm Repository diff --git a/charts/ai-stack/charts/vllm-0.1.0.tgz b/charts/ai-stack/charts/vllm-0.1.0.tgz new file mode 100644 index 0000000..13b09cd Binary files /dev/null and b/charts/ai-stack/charts/vllm-0.1.0.tgz differ diff --git a/charts/ai-stack/values.yaml b/charts/ai-stack/values.yaml index 2e9d93b..fdcad0c 100644 --- a/charts/ai-stack/values.yaml +++ b/charts/ai-stack/values.yaml @@ -185,3 +185,49 @@ reranker: - name: hf-cache persistentVolumeClaim: claimName: hf-cache + + +# Values for vllm: the vllm chart +# Reference: https://artifacthub.io/packages/helm/infracloud-charts/vllm?modal=values +vllm: + enabled: false + + config: + model: "meta-llama/Meta-Llama-3.1-8B-Instruct" + + env: + - name: HF_API_TOKEN + valueFrom: + secretKeyRef: + name: hf-api-token + key: HF_API_TOKEN + - name: HF_HUB_OFFLINE + value: "1" + - name: HF_HUB_CACHE + value: "/model" + + resources: + limits: + nvidia.com/gpu: 1 + requests: + nvidia.com/gpu: 1 + + strategy: + type: Recreate + + service: + type: LoadBalancer + port: 8000 + + volumeMounts: + - name: hf-cache + mountPath: /model + + volumes: + - name: hf-cache + persistentVolumeClaim: + claimName: hf-cache + - name: shm + emptyDir: + medium: Memory + sizeLimit: "1Gi"