diff --git a/codeblock/llama/llama3-cpu-8b.yaml b/codeblock/llama/llama3-cpu-8b.yaml
new file mode 100644
index 0000000..555cb4f
--- /dev/null
+++ b/codeblock/llama/llama3-cpu-8b.yaml
@@ -0,0 +1,82 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: ollama
+ namespace: llama
+spec:
+ serviceName: "ollama"
+ replicas: 1
+ selector:
+ matchLabels:
+ app: ollama
+ template:
+ metadata:
+ labels:
+ app: ollama
+ spec:
+ initContainers:
+ - name: pull
+ image: ollama/ollama:latest
+ tty: true
+ stdin: true
+ command:
+ - bash
+ - -c
+ - |
+ # highlight-next-line
+ model="llama3:8b" # 替换需要使用的模型,模型库列表: https://ollama.com/library/llama3
+ ollama serve &
+ sleep 5 # 等待 ollama server 就绪,就绪后才能执行 ollama cli 工具的命令
+ result=`ollama list | grep $model`
+ if [ "$result" == "" ]; then
+ echo "downloading model $model"
+ ollama pull $model
+ else
+ echo "model $model already been downloaded"
+ fi
+ volumeMounts:
+ - name: ollama-volume
+ mountPath: /root/.ollama
+ containers:
+ - name: ollama
+ image: ollama/ollama:latest
+ ports:
+ - containerPort: 11434
+ resources:
+ requests:
+ cpu: "2000m"
+ memory: "2Gi"
+ # highlight-next-line
+ nvidia.com/gpu: "0" # 如果要用 Nvidia GPU,这里声明下 GPU 卡
+ limits:
+ cpu: "4000m"
+ memory: "4Gi"
+ volumeMounts:
+ - name: ollama-volume
+ mountPath: /root/.ollama
+ tty: true
+ volumeClaimTemplates:
+ - metadata:
+ name: ollama-volume
+ spec:
+ accessModes: ["ReadWriteOnce"]
+ resources:
+ requests:
+ # highlight-next-line
+ storage: 200Gi # 注意要确保磁盘容量能够容纳得下模型的体积
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: ollama
+ namespace: llama
+ labels:
+ app: ollama
+spec:
+ type: ClusterIP
+ ports:
+ - port: 11434
+ protocol: TCP
+ targetPort: 11434
+ selector:
+ app: ollama
diff --git a/codeblock/llama/llama3-gpu-70b.yaml b/codeblock/llama/llama3-gpu-70b.yaml
new file mode 100644
index 0000000..a2f7b44
--- /dev/null
+++ b/codeblock/llama/llama3-gpu-70b.yaml
@@ -0,0 +1,82 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: ollama
+ namespace: llama
+spec:
+ serviceName: "ollama"
+ replicas: 1
+ selector:
+ matchLabels:
+ app: ollama
+ template:
+ metadata:
+ labels:
+ app: ollama
+ spec:
+ initContainers:
+ - name: pull
+ image: ollama/ollama:latest
+ tty: true
+ stdin: true
+ command:
+ - bash
+ - -c
+ - |
+ # highlight-next-line
+ model="llama3:70b" # 替换需要使用的模型,模型库列表: https://ollama.com/library/llama3
+ ollama serve &
+ sleep 5 # 等待 ollama server 就绪,就绪后才能执行 ollama cli 工具的命令
+ result=`ollama list | grep $model`
+ if [ "$result" == "" ]; then
+ echo "downloading model $model"
+ ollama pull $model
+ else
+ echo "model $model already been downloaded"
+ fi
+ volumeMounts:
+ - name: ollama-volume
+ mountPath: /root/.ollama
+ containers:
+ - name: ollama
+ image: ollama/ollama:latest
+ ports:
+ - containerPort: 11434
+ resources:
+ requests:
+ cpu: "2000m"
+ memory: "2Gi"
+ # highlight-next-line
+ nvidia.com/gpu: "1" # 声明使用一张 N 卡
+ limits:
+ cpu: "4000m"
+ memory: "4Gi"
+ volumeMounts:
+ - name: ollama-volume
+ mountPath: /root/.ollama
+ tty: true
+ volumeClaimTemplates:
+ - metadata:
+ name: ollama-volume
+ spec:
+ accessModes: ["ReadWriteOnce"]
+ resources:
+ requests:
+ # highlight-next-line
+ storage: 200Gi # 注意要确保磁盘容量能够容纳得下模型的体积
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: ollama
+ namespace: llama
+ labels:
+ app: ollama
+spec:
+ type: ClusterIP
+ ports:
+ - port: 11434
+ protocol: TCP
+ targetPort: 11434
+ selector:
+ app: ollama
diff --git a/codeblock/llama/open-webui.yaml b/codeblock/llama/open-webui.yaml
new file mode 100644
index 0000000..f8e7962
--- /dev/null
+++ b/codeblock/llama/open-webui.yaml
@@ -0,0 +1,71 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ name: webui-pvc
+ namespace: llama
+ labels:
+ app: webui
+spec:
+ accessModes: ["ReadWriteOnce"]
+ resources:
+ requests:
+ storage: 2Gi
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: webui
+ namespace: llama
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: webui
+ template:
+ metadata:
+ labels:
+ app: webui
+ spec:
+ containers:
+ - name: webui
+ # highlight-next-line
+ image: imroc/open-webui:main # docker hub 中的 mirror 镜像,长期自动同步,可放心使用
+ env:
+ - name: OLLAMA_BASE_URL
+ # highlight-next-line
+ value: http://ollama:11434 # ollama 的地址
+ tty: true
+ ports:
+ - containerPort: 8080
+ resources:
+ requests:
+ cpu: "500m"
+ memory: "500Mi"
+ limits:
+ cpu: "1000m"
+ memory: "1Gi"
+ volumeMounts:
+ - name: webui-volume
+ mountPath: /app/backend/data
+ volumes:
+ - name: webui-volume
+ persistentVolumeClaim:
+ claimName: webui-pvc
+
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: webui
+ namespace: llama
+ labels:
+ app: webui
+spec:
+ type: ClusterIP
+ ports:
+ - port: 8080
+ protocol: TCP
+ targetPort: 8080
+ selector:
+ app: webui
diff --git a/content/cases/llama3.md b/content/cases/llama3.md
index 21259fa..e6f41fd 100644
--- a/content/cases/llama3.md
+++ b/content/cases/llama3.md
@@ -10,168 +10,13 @@ kubectl create ns llama
## 部署 ollama
-```yaml showLineNumbers
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
- name: ollama
- namespace: llama
-spec:
- serviceName: "ollama"
- replicas: 1
- selector:
- matchLabels:
- app: ollama
- template:
- metadata:
- labels:
- app: ollama
- spec:
- initContainers:
- - name: pull
- image: ollama/ollama:latest
- tty: true
- stdin: true
- command:
- - bash
- - -c
- - |
- # highlight-next-line
- model="llama3:8b" # 替换需要使用的模型,模型库列表: https://ollama.com/library/llama3
- ollama serve &
- sleep 5 # 等待 ollama server 就绪,就绪后才能执行 ollama cli 工具的命令
- result=`ollama list | grep $model`
- if [ "$result" == "" ]; then
- echo "downloading model $model"
- ollama pull $model
- else
- echo "model $model already been downloaded"
- fi
- volumeMounts:
- - name: ollama-volume
- mountPath: /root/.ollama
- containers:
- - name: ollama
- image: ollama/ollama:latest
- ports:
- - containerPort: 11434
- resources:
- requests:
- cpu: "2000m"
- memory: "2Gi"
- limits:
- cpu: "4000m"
- memory: "4Gi"
- # highlight-next-line
- nvidia.com/gpu: "0" # 如果要用 Nvidia GPU,这里声明下 GPU 卡
- volumeMounts:
- - name: ollama-volume
- mountPath: /root/.ollama
- tty: true
- volumeClaimTemplates:
- - metadata:
- name: ollama-volume
- spec:
- accessModes: ["ReadWriteOnce"]
- resources:
- requests:
- # highlight-next-line
- storage: 50Gi # 注意要确保磁盘容量能够容纳得下模型的体积
----
-apiVersion: v1
-kind: Service
-metadata:
- name: ollama
- namespace: llama
- labels:
- app: ollama
-spec:
- type: ClusterIP
- ports:
- - port: 11434
- protocol: TCP
- targetPort: 11434
- selector:
- app: ollama
-```
+
## 部署 open-webui
open-webui 是大模型的 web 界面,支持 llama 系列的大模型,通过 API 与 ollama 通信,官方镜像地址是:`ghcr.io/open-webui/open-webui`,在国内拉取速度非常慢,可以替换成 docker hub 里长期自动同步的 mirror 镜像:`docker.io/imroc/open-webui`:
-```yaml showLineNumbers
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
- name: webui-pvc
- namespace: llama
- labels:
- app: webui
-spec:
- accessModes: ["ReadWriteOnce"]
- resources:
- requests:
- storage: 2Gi
-
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: webui
- namespace: llama
-spec:
- replicas: 1
- selector:
- matchLabels:
- app: webui
- template:
- metadata:
- labels:
- app: webui
- spec:
- containers:
- - name: webui
- # highlight-next-line
- image: imroc/open-webui:main # docker hub 中的 mirror 镜像,长期自动同步,可放心使用
- env:
- - name: OLLAMA_BASE_URL
- # highlight-next-line
- value: http://ollama:11434 # ollama 的地址
- tty: true
- ports:
- - containerPort: 8080
- resources:
- requests:
- cpu: "500m"
- memory: "500Mi"
- limits:
- cpu: "1000m"
- memory: "1Gi"
- volumeMounts:
- - name: webui-volume
- mountPath: /app/backend/data
- volumes:
- - name: webui-volume
- persistentVolumeClaim:
- claimName: webui-pvc
-
----
-apiVersion: v1
-kind: Service
-metadata:
- name: webui
- namespace: llama
- labels:
- app: webui
-spec:
- type: ClusterIP
- ports:
- - port: 8080
- protocol: TCP
- targetPort: 8080
- selector:
- app: webui
-```
+
## 打开 webui