diff --git a/charts/opea/common/guardrails-usvc/.helmignore b/charts/opea/common/guardrails-usvc/.helmignore
new file mode 100644
index 0000000..0e8a0eb
--- /dev/null
+++ b/charts/opea/common/guardrails-usvc/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/charts/opea/common/guardrails-usvc/Chart.yaml b/charts/opea/common/guardrails-usvc/Chart.yaml
new file mode 100644
index 0000000..86a4f27
--- /dev/null
+++ b/charts/opea/common/guardrails-usvc/Chart.yaml
@@ -0,0 +1,14 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: guardrails-usvc
+description: The Helm chart for deploying guardrails-usvc as microservice
+type: application
+version: 1.0.0
+appVersion: "v1.0"
+dependencies:
+  - name: tgi
+    version: 1.0.0
+    repository: file://../tgi
+    condition: autodependency.enabled
diff --git a/charts/opea/common/guardrails-usvc/README.md b/charts/opea/common/guardrails-usvc/README.md
new file mode 100644
index 0000000..3e7f5dc
--- /dev/null
+++ b/charts/opea/common/guardrails-usvc/README.md
@@ -0,0 +1,57 @@
+# guardrails-usvc
+
+Helm chart for deploying LLM microservice.
+
+guardrails-usvc depends on TGI, you should set TGI_LLM_ENDPOINT as tgi endpoint.
+
+## (Option1): Installing the chart separately
+
+First, you need to install the tgi chart, please refer to the [tgi](../tgi) chart for more information. Please use model `meta-llama/Meta-Llama-Guard-2-8B` during installation.
+
+After you've deployted the tgi chart successfully, please run `kubectl get svc` to get the tgi service endpoint, i.e. `http://tgi`.
+
+To install the chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common/guardrails-usvc
+export HFTOKEN="insert-your-huggingface-token-here"
+export SAFETY_GUARD_ENDPOINT="http://tgi"
+export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
+helm dependency update
+helm install guardrails-usvc . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set SAFETY_GUARD_ENDPOINT=${SAFETY_GUARD_ENDPOINT} --set SAFETY_GUARD_MODEL_ID=${SAFETY_GUARD_MODEL_ID} --wait
+```
+
+## (Option2): Installing the chart with dependencies automatically
+
+```console
+cd GenAIInfra/helm-charts/common/guardrails-usvc
+export HFTOKEN="insert-your-huggingface-token-here"
+helm dependency update
+helm install guardrails-usvc . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set autodependency.enabled=true --wait
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/guardrails-usvc 9090:9090` to expose the llm-uservice service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:9090/v1/guardrails \
+    -X POST \
+    -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
+    -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key                             | Type   | Default                              | Description                                                                                                                                                  |
+| ------------------------------- | ------ | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `""`                                 | Your own Hugging Face API token                                                                                                                              |
+| global.modelUseHostPath         | string | `"/mnt/opea-models"`                 | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory |
+| image.repository                | string | `"opea/guardrails-usvc"`             |                                                                                                                                                              |
+| service.port                    | string | `"9090"`                             |                                                                                                                                                              |
+| SAFETY_GUARD_ENDPOINT           | string | `""`                                 | LLM endpoint                                                                                                                                                 |
+| SAFETY_GUARD_MODEL_ID           | string | `"meta-llama/Meta-Llama-Guard-2-8B"` | Model ID for the underlying LLM service is using                                                                                                             |
diff --git a/charts/opea/common/guardrails-usvc/templates/_helpers.tpl b/charts/opea/common/guardrails-usvc/templates/_helpers.tpl
new file mode 100644
index 0000000..088f884
--- /dev/null
+++ b/charts/opea/common/guardrails-usvc/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "guardrails-usvc.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "guardrails-usvc.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "guardrails-usvc.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "guardrails-usvc.labels" -}}
+helm.sh/chart: {{ include "guardrails-usvc.chart" . }}
+{{ include "guardrails-usvc.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "guardrails-usvc.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "guardrails-usvc.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "guardrails-usvc.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "guardrails-usvc.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/charts/opea/common/guardrails-usvc/templates/configmap.yaml b/charts/opea/common/guardrails-usvc/templates/configmap.yaml
new file mode 100644
index 0000000..694bf4c
--- /dev/null
+++ b/charts/opea/common/guardrails-usvc/templates/configmap.yaml
@@ -0,0 +1,29 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "guardrails-usvc.fullname" . }}-config
+  labels:
+    {{- include "guardrails-usvc.labels" . | nindent 4 }}
+data:
+  {{- if .Values.SAFETY_GUARD_ENDPOINT }}
+  SAFETY_GUARD_ENDPOINT: {{ tpl .Values.SAFETY_GUARD_ENDPOINT . | quote}}
+  {{- else }}
+  SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi"
+  {{- end }}
+  SAFETY_GUARD_MODEL_ID: {{ .Values.SAFETY_GUARD_MODEL_ID | quote }}
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
+  HF_HOME: "/tmp/.cache/huggingface"
+  LOGFLAG: {{ .Values.LOGFLAG | quote }}
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
+  {{- end }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  {{- if and (not .Values.SAFETY_GUARD_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }}
+  no_proxy: "{{ .Release.Name }}-tgi,{{ .Values.global.no_proxy }}"
+  {{- else }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- end }}
diff --git a/charts/opea/common/guardrails-usvc/templates/deployment.yaml b/charts/opea/common/guardrails-usvc/templates/deployment.yaml
new file mode 100644
index 0000000..bb6d396
--- /dev/null
+++ b/charts/opea/common/guardrails-usvc/templates/deployment.yaml
@@ -0,0 +1,88 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "guardrails-usvc.fullname" . }}
+  labels:
+    {{- include "guardrails-usvc.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "guardrails-usvc.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "guardrails-usvc.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "guardrails-usvc.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          ports:
+            - name: guardrails-usvc
+              containerPort: 9090
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "guardrails-usvc.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/charts/opea/common/guardrails-usvc/templates/service.yaml b/charts/opea/common/guardrails-usvc/templates/service.yaml
new file mode 100644
index 0000000..594312f
--- /dev/null
+++ b/charts/opea/common/guardrails-usvc/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "guardrails-usvc.fullname" . }}
+  labels:
+    {{- include "guardrails-usvc.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 9090
+      protocol: TCP
+      name: guardrails-usvc
+  selector:
+    {{- include "guardrails-usvc.selectorLabels" . | nindent 4 }}
diff --git a/charts/opea/common/guardrails-usvc/templates/tests/test-pod.yaml b/charts/opea/common/guardrails-usvc/templates/tests/test-pod.yaml
new file mode 100644
index 0000000..ec077d4
--- /dev/null
+++ b/charts/opea/common/guardrails-usvc/templates/tests/test-pod.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "guardrails-usvc.fullname" . }}-testpod"
+  labels:
+    {{- include "guardrails-usvc.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl http://{{ include "guardrails-usvc.fullname" . }}:{{ .Values.service.port }}/v1/guardrails -sS --fail-with-body \
+              -X POST \
+              -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
+              -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/charts/opea/common/guardrails-usvc/values.yaml b/charts/opea/common/guardrails-usvc/values.yaml
new file mode 100644
index 0000000..314791e
--- /dev/null
+++ b/charts/opea/common/guardrails-usvc/values.yaml
@@ -0,0 +1,97 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for guardrails-usvc.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+autodependency:
+  enabled: false
+
+replicaCount: 1
+
+# TGI service endpoint
+SAFETY_GUARD_ENDPOINT: ""
+# Guard Model Id
+SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
+# Set it as a non-null string, such as true, if you want to enable logging facility,
+# otherwise, keep it as "" to disable it.
+LOGFLAG: ""
+
+tgi:
+  LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
+
+image:
+  repository: opea/guardrails-tgi
+  pullPolicy: IfNotPresent
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  port: 9090
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: v1/health_check
+    port: guardrails-usvc
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: v1/health_check
+    port: guardrails-usvc
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: v1/health_check
+    port: guardrails-usvc
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
diff --git a/charts/opea/common/tgi/.helmignore b/charts/opea/common/tgi/.helmignore
new file mode 100644
index 0000000..0e8a0eb
--- /dev/null
+++ b/charts/opea/common/tgi/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/charts/opea/common/tgi/Chart.yaml b/charts/opea/common/tgi/Chart.yaml
new file mode 100644
index 0000000..7ab58f8
--- /dev/null
+++ b/charts/opea/common/tgi/Chart.yaml
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: tgi
+description: The Helm chart for HuggingFace Text Generation Inference Server
+type: application
+version: 1.0.0
+# The HF TGI version
+appVersion: "2.1.0"
diff --git a/charts/opea/common/tgi/README.md b/charts/opea/common/tgi/README.md
new file mode 100644
index 0000000..0100378
--- /dev/null
+++ b/charts/opea/common/tgi/README.md
@@ -0,0 +1,51 @@
+# tgi
+
+Helm chart for deploying Hugging Face Text Generation Inference service.
+
+## Installing the Chart
+
+To install the chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common
+export MODELDIR=/mnt/opea-models
+export MODELNAME="bigscience/bloom-560m"
+export HFTOKEN="insert-your-huggingface-token-here"
+helm install tgi tgi --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN}
+# To deploy on Gaudi enabled kubernetes cluster
+# helm install tgi tgi --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values gaudi-values.yaml
+```
+
+By default, the tgi service will downloading the "bigscience/bloom-560m" which is about 1.1GB.
+
+If you already cached the model locally, you can pass it to container like this example:
+
+MODELDIR=/mnt/opea-models
+
+MODELNAME="/data/models--bigscience--bloom-560m"
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are runinng.
+
+Then run the command `kubectl port-forward svc/tgi 2080:80` to expose the tgi service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:2080/generate \
+    -X POST \
+    -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+    -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key                             | Type   | Default                                           | Description                                                                                                                                                                                                           |
+| ------------------------------- | ------ | ------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| LLM_MODEL_ID                    | string | `"bigscience/bloom-560m"`                         | Models id from https://huggingface.co/, or predownloaded model directory                                                                                                                                              |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here`              | Hugging Face API token                                                                                                                                                                                                |
+| global.modelUseHostPath         | string | `"/mnt/opea-models"`                              | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
+| image.repository                | string | `"ghcr.io/huggingface/text-generation-inference"` |                                                                                                                                                                                                                       |
+| image.tag                       | string | `"1.4"`                                           |                                                                                                                                                                                                                       |
+| horizontalPodAutoscaler.enabled | bool   | false                                             | Enable HPA autoscaling for the service deployment based on metrics it provides. See HPA section in ../../README.md before enabling!                                                                                   |
diff --git a/charts/opea/common/tgi/gaudi-values.yaml b/charts/opea/common/tgi/gaudi-values.yaml
new file mode 100644
index 0000000..25546c4
--- /dev/null
+++ b/charts/opea/common/tgi/gaudi-values.yaml
@@ -0,0 +1,20 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for tgi.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+accelDevice: "gaudi"
+
+image:
+  repository: ghcr.io/huggingface/tgi-gaudi
+  tag: "2.0.1"
+
+MAX_INPUT_LENGTH: "1024"
+MAX_TOTAL_TOKENS: "2048"
+CUDA_GRAPHS: ""
+
+resources:
+  limits:
+    habana.ai/gaudi: 1
diff --git a/charts/opea/common/tgi/nv-values.yaml b/charts/opea/common/tgi/nv-values.yaml
new file mode 100644
index 0000000..798af89
--- /dev/null
+++ b/charts/opea/common/tgi/nv-values.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for tgi.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+accelDevice: "nvidia"
+
+image:
+  repository: ghcr.io/huggingface/text-generation-inference
+  tag: "2.2.0"
+
+resources:
+  limits:
+    nvidia.com/gpu: 1
+
+CUDA_GRAPHS: ""
diff --git a/charts/opea/common/tgi/templates/_helpers.tpl b/charts/opea/common/tgi/templates/_helpers.tpl
new file mode 100644
index 0000000..b672e83
--- /dev/null
+++ b/charts/opea/common/tgi/templates/_helpers.tpl
@@ -0,0 +1,69 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "tgi.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "tgi.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "tgi.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Convert chart name to a string suitable as metric prefix
+*/}}
+{{- define "tgi.metricPrefix" -}}
+{{- include "tgi.fullname" . | replace "-" "_" | regexFind "[a-zA-Z_:][a-zA-Z0-9_:]*" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "tgi.labels" -}}
+helm.sh/chart: {{ include "tgi.chart" . }}
+{{ include "tgi.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "tgi.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "tgi.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "tgi.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "tgi.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/charts/opea/common/tgi/templates/configmap.yaml b/charts/opea/common/tgi/templates/configmap.yaml
new file mode 100644
index 0000000..e44d8ee
--- /dev/null
+++ b/charts/opea/common/tgi/templates/configmap.yaml
@@ -0,0 +1,31 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "tgi.fullname" . }}-config
+  labels:
+    {{- include "tgi.labels" . | nindent 4 }}
+data:
+  MODEL_ID: {{ .Values.LLM_MODEL_ID | quote }}
+  PORT: {{ .Values.port | quote }}
+  HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
+  {{- end }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  HABANA_LOGS: "/tmp/habana_logs"
+  NUMBA_CACHE_DIR: "/tmp"
+  HF_HOME: "/tmp/.cache/huggingface"
+  {{- if .Values.MAX_INPUT_LENGTH }}
+  MAX_INPUT_LENGTH: {{ .Values.MAX_INPUT_LENGTH | quote }}
+  {{- end }}
+  {{- if .Values.MAX_TOTAL_TOKENS }}
+  MAX_TOTAL_TOKENS: {{ .Values.MAX_TOTAL_TOKENS | quote }}
+  {{- end }}
+  {{- if .Values.CUDA_GRAPHS }}
+  CUDA_GRAPHS: {{ .Values.CUDA_GRAPHS | quote }}
+  {{- end }}
diff --git a/charts/opea/common/tgi/templates/deployment.yaml b/charts/opea/common/tgi/templates/deployment.yaml
new file mode 100644
index 0000000..511cead
--- /dev/null
+++ b/charts/opea/common/tgi/templates/deployment.yaml
@@ -0,0 +1,124 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "tgi.fullname" . }}
+  labels:
+    {{- include "tgi.labels" . | nindent 4 }}
+spec:
+  {{- if ne (int .Values.replicaCount) 1 }}
+  # remove if replica count should not be reset on pod update with HPA
+  replicas: {{ .Values.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "tgi.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "tgi.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Chart.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "tgi.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- if .Values.global.modelUseHostPath }}
+            {}
+            {{- else }}
+            {{- toYaml .Values.securityContext | nindent 12 }}
+            {{- end }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- if .Values.extraCmdArgs }}
+          args:
+            {{- range .Values.extraCmdArgs }}
+            - {{ . | quote }}
+            {{- end }}
+          {{- end }}
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+            - mountPath: /dev/shm
+              name: shm
+            - mountPath: /tmp
+              name: tmp
+          ports:
+            - name: http
+              containerPort: {{ .Values.port }}
+              protocol: TCP
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: model-volume
+          {{- if .Values.global.modelUsePVC }}
+          persistentVolumeClaim:
+            claimName: {{ .Values.global.modelUsePVC }}
+          {{- else if .Values.global.modelUseHostPath }}
+          hostPath:
+            path: {{ .Values.global.modelUseHostPath }}
+            type: Directory
+          {{- else }}
+          emptyDir: {}
+          {{- end }}
+        - name: shm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.shmSize }}
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if not .Values.accelDevice }}
+      # extra time to finish processing buffered requests on CPU before pod is forcibly terminated
+      terminationGracePeriodSeconds: 120
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "tgi.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/charts/opea/common/tgi/templates/horizontal-pod-autoscaler.yaml b/charts/opea/common/tgi/templates/horizontal-pod-autoscaler.yaml
new file mode 100644
index 0000000..646ea9c
--- /dev/null
+++ b/charts/opea/common/tgi/templates/horizontal-pod-autoscaler.yaml
@@ -0,0 +1,53 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if .Values.horizontalPodAutoscaler.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "tgi.fullname" . }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "tgi.fullname" . }}
+  minReplicas: 1
+  maxReplicas: {{ .Values.horizontalPodAutoscaler.maxReplicas }}
+  metrics:
+  - type: Object
+    object:
+      metric:
+        # TGI time metrics are in seconds
+        name: {{ include "tgi.metricPrefix" . }}_request_latency
+      describedObject:
+        apiVersion: v1
+        # get metric for named object of given type (in same namespace)
+        kind: Service
+        name: {{ include "tgi.fullname" . }}
+      target:
+        # tgi_request_latency is average for all the TGI pods. To avoid replica fluctuations when
+        # TGI startup + request processing takes longer than HPA evaluation period, this uses
+        # "Value" (replicas = metric.value / target.value), instead of "averageValue" type:
+        #  https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#algorithm-details
+        type: Value
+        value: 4
+  behavior:
+    scaleDown:
+      stabilizationWindowSeconds: 180
+      policies:
+      - type: Percent
+        value: 25
+        periodSeconds: 90
+    scaleUp:
+      selectPolicy: Max
+      stabilizationWindowSeconds: 0
+      policies:
+      # Slow linear rampup in case additional CPU pods go to same node
+      # (i.e. interfere with each other)
+      - type: Pods
+        value: 1
+        periodSeconds: 90
+      #- type: Percent
+      #  value: 25
+      #  periodSeconds: 90
+{{- end }}
diff --git a/charts/opea/common/tgi/templates/service.yaml b/charts/opea/common/tgi/templates/service.yaml
new file mode 100644
index 0000000..011cc37
--- /dev/null
+++ b/charts/opea/common/tgi/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "tgi.fullname" . }}
+  labels:
+    {{- include "tgi.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: 80
+      targetPort: {{ .Values.port }}
+      protocol: TCP
+      name: tgi
+  selector:
+    {{- include "tgi.selectorLabels" . | nindent 4 }}
diff --git a/charts/opea/common/tgi/templates/servicemonitor.yaml b/charts/opea/common/tgi/templates/servicemonitor.yaml
new file mode 100644
index 0000000..fdb1159
--- /dev/null
+++ b/charts/opea/common/tgi/templates/servicemonitor.yaml
@@ -0,0 +1,24 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+# Dashboard for the exposed TGI metrics:
+# - https://grafana.com/grafana/dashboards/19831-text-generation-inference-dashboard/
+# Metric descriptions:
+# - https://github.com/huggingface/text-generation-inference/discussions/1127#discussioncomment-7240527
+
+{{- if .Values.horizontalPodAutoscaler.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "tgi.fullname" . }}
+  labels:
+    release: {{ .Values.global.prometheusRelease }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "tgi.selectorLabels" . | nindent 6 }}
+  endpoints:
+  - interval: 4s
+    port: tgi
+    scheme: http
+{{- end }}
diff --git a/charts/opea/common/tgi/templates/tests/test-pod.yaml b/charts/opea/common/tgi/templates/tests/test-pod.yaml
new file mode 100644
index 0000000..948f238
--- /dev/null
+++ b/charts/opea/common/tgi/templates/tests/test-pod.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "tgi.fullname" . }}-testpod"
+  labels:
+    {{- include "tgi.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl http://{{ include "tgi.fullname" . }}/generate -sS --fail-with-body \
+            -X POST \
+            -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+            -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/charts/opea/common/tgi/values.yaml b/charts/opea/common/tgi/values.yaml
new file mode 100644
index 0000000..805df10
--- /dev/null
+++ b/charts/opea/common/tgi/values.yaml
@@ -0,0 +1,138 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for tgi.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: 1
+
+# Enabling HPA will:
+# - Ignore above replica count, as it will be controlled by HPA
+# - Add example HPA scaling rules with thresholds suitable for Xeon deployments
+# - Require custom metrics ConfigMap available in the main application chart
+horizontalPodAutoscaler:
+  maxReplicas: 4
+  enabled: false
+
+port: 2080
+shmSize: 1Gi
+
+# Set extraCmdArgs if you need to pass additional parameters to TGI for performance
+# Refer to https://huggingface.co/docs/text-generation-inference/en/reference/launcher for more options.
+# extraCmdArgs: ["--dtype","bfloat16"]
+
+image:
+  repository: ghcr.io/huggingface/text-generation-inference
+  pullPolicy: IfNotPresent
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "2.2.0"
+
+# empty for CPU
+accelDevice: ""
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+# Use TCP probe instead of HTTP due to bug #483
+# https://github.com/opea-project/GenAIExamples/issues/483
+livenessProbe:
+  tcpSocket:
+    port: http
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  tcpSocket:
+    port: http
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  tcpSocket:
+    port: http
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+#livenessProbe:
+#  httpGet:
+#    path: /health
+#    port: http
+#  initialDelaySeconds: 5
+#  periodSeconds: 5
+#  failureThreshold: 24
+#readinessProbe:
+#  httpGet:
+#    path: /health
+#    port: http
+#  initialDelaySeconds: 5
+#  periodSeconds: 5
+#startupProbe:
+#  httpGet:
+#    path: /health
+#    port: http
+#  initialDelaySeconds: 5
+#  periodSeconds: 5
+#  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+
+MAX_INPUT_LENGTH: ""
+MAX_TOTAL_TOKENS: ""
+CUDA_GRAPHS: "0"
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+
+  # Choose where to save your downloaded models
+  # Set modelUseHostPath for local directory, this is good for one node test. Example:
+  # modelUseHostPath: /mnt/opea-models
+  # Set modelUsePVC for PersistentVolumeClaim(PVC), which is suitable for multinode deployment. Example:
+  # modelUsePVC: model-volume
+  # You can only set one of the following var, the behavior is not defined is both are set.
+  # By default, both var are set to empty, the model will be downloaded and saved to a tmp volume.
+  modelUseHostPath: ""
+  modelUsePVC: ""
+
+  # Prometheus Helm installation info for serviceMonitor
+  prometheusRelease: prometheus-stack