From d3132f12d01f267d16744c4884d4bb82ede72ce9 Mon Sep 17 00:00:00 2001
From: roc <roc@imroc.cc>
Date: Fri, 26 Apr 2024 15:40:36 +0800
Subject: [PATCH] update at 2024-04-26 15:40:36

---
 codeblock/llama/llama3-cpu-8b.yaml  |  82 ++++++++++++++
 codeblock/llama/llama3-gpu-70b.yaml |  82 ++++++++++++++
 codeblock/llama/open-webui.yaml     |  71 +++++++++++++
 content/cases/llama3.md             | 159 +---------------------------
 4 files changed, 237 insertions(+), 157 deletions(-)
 create mode 100644 codeblock/llama/llama3-cpu-8b.yaml
 create mode 100644 codeblock/llama/llama3-gpu-70b.yaml
 create mode 100644 codeblock/llama/open-webui.yaml

diff --git a/codeblock/llama/llama3-cpu-8b.yaml b/codeblock/llama/llama3-cpu-8b.yaml
new file mode 100644
index 0000000..555cb4f
--- /dev/null
+++ b/codeblock/llama/llama3-cpu-8b.yaml
@@ -0,0 +1,82 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: ollama
+  namespace: llama
+spec:
+  serviceName: "ollama"
+  replicas: 1
+  selector:
+    matchLabels:
+      app: ollama
+  template:
+    metadata:
+      labels:
+        app: ollama
+    spec:
+      initContainers:
+        - name: pull
+          image: ollama/ollama:latest
+          tty: true
+          stdin: true
+          command:
+            - bash
+            - -c
+            - |
+              # highlight-next-line
+              model="llama3:8b" # 替换需要使用的模型，模型库列表: https://ollama.com/library/llama3
+              ollama serve &
+              sleep 5 # 等待 ollama server 就绪，就绪后才能执行 ollama cli 工具的命令
+              result=`ollama list | grep $model`
+              if [ "$result" == "" ]; then
+                echo "downloading model $model"
+                ollama pull $model
+              else
+                echo "model $model already been downloaded"
+              fi
+          volumeMounts:
+            - name: ollama-volume
+              mountPath: /root/.ollama
+      containers:
+        - name: ollama
+          image: ollama/ollama:latest
+          ports:
+            - containerPort: 11434
+          resources:
+            requests:
+              cpu: "2000m"
+              memory: "2Gi"
+              # highlight-next-line
+              nvidia.com/gpu: "0" # 如果要用 Nvidia GPU，这里声明下 GPU 卡
+            limits:
+              cpu: "4000m"
+              memory: "4Gi"
+          volumeMounts:
+            - name: ollama-volume
+              mountPath: /root/.ollama
+          tty: true
+  volumeClaimTemplates:
+    - metadata:
+        name: ollama-volume
+      spec:
+        accessModes: ["ReadWriteOnce"]
+        resources:
+          requests:
+            # highlight-next-line
+            storage: 200Gi # 注意要确保磁盘容量能够容纳得下模型的体积
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama
+  namespace: llama
+  labels:
+    app: ollama
+spec:
+  type: ClusterIP
+  ports:
+    - port: 11434
+      protocol: TCP
+      targetPort: 11434
+  selector:
+    app: ollama
diff --git a/codeblock/llama/llama3-gpu-70b.yaml b/codeblock/llama/llama3-gpu-70b.yaml
new file mode 100644
index 0000000..a2f7b44
--- /dev/null
+++ b/codeblock/llama/llama3-gpu-70b.yaml
@@ -0,0 +1,82 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: ollama
+  namespace: llama
+spec:
+  serviceName: "ollama"
+  replicas: 1
+  selector:
+    matchLabels:
+      app: ollama
+  template:
+    metadata:
+      labels:
+        app: ollama
+    spec:
+      initContainers:
+        - name: pull
+          image: ollama/ollama:latest
+          tty: true
+          stdin: true
+          command:
+            - bash
+            - -c
+            - |
+              # highlight-next-line
+              model="llama3:70b" # 替换需要使用的模型，模型库列表: https://ollama.com/library/llama3
+              ollama serve &
+              sleep 5 # 等待 ollama server 就绪，就绪后才能执行 ollama cli 工具的命令
+              result=`ollama list | grep $model`
+              if [ "$result" == "" ]; then
+                echo "downloading model $model"
+                ollama pull $model
+              else
+                echo "model $model already been downloaded"
+              fi
+          volumeMounts:
+            - name: ollama-volume
+              mountPath: /root/.ollama
+      containers:
+        - name: ollama
+          image: ollama/ollama:latest
+          ports:
+            - containerPort: 11434
+          resources:
+            requests:
+              cpu: "2000m"
+              memory: "2Gi"
+              # highlight-next-line
+              nvidia.com/gpu: "1" # 声明使用一张 N 卡
+            limits:
+              cpu: "4000m"
+              memory: "4Gi"
+          volumeMounts:
+            - name: ollama-volume
+              mountPath: /root/.ollama
+          tty: true
+  volumeClaimTemplates:
+    - metadata:
+        name: ollama-volume
+      spec:
+        accessModes: ["ReadWriteOnce"]
+        resources:
+          requests:
+            # highlight-next-line
+            storage: 200Gi # 注意要确保磁盘容量能够容纳得下模型的体积
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama
+  namespace: llama
+  labels:
+    app: ollama
+spec:
+  type: ClusterIP
+  ports:
+    - port: 11434
+      protocol: TCP
+      targetPort: 11434
+  selector:
+    app: ollama
diff --git a/codeblock/llama/open-webui.yaml b/codeblock/llama/open-webui.yaml
new file mode 100644
index 0000000..f8e7962
--- /dev/null
+++ b/codeblock/llama/open-webui.yaml
@@ -0,0 +1,71 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: webui-pvc
+  namespace: llama
+  labels:
+    app: webui
+spec:
+  accessModes: ["ReadWriteOnce"]
+  resources:
+    requests:
+      storage: 2Gi
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: webui
+  namespace: llama
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: webui
+  template:
+    metadata:
+      labels:
+        app: webui
+    spec:
+      containers:
+        - name: webui
+          # highlight-next-line
+          image: imroc/open-webui:main # docker hub 中的 mirror 镜像，长期自动同步，可放心使用
+          env:
+            - name: OLLAMA_BASE_URL
+              # highlight-next-line
+              value: http://ollama:11434 # ollama 的地址
+          tty: true
+          ports:
+            - containerPort: 8080
+          resources:
+            requests:
+              cpu: "500m"
+              memory: "500Mi"
+            limits:
+              cpu: "1000m"
+              memory: "1Gi"
+          volumeMounts:
+            - name: webui-volume
+              mountPath: /app/backend/data
+      volumes:
+        - name: webui-volume
+          persistentVolumeClaim:
+            claimName: webui-pvc
+
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: webui
+  namespace: llama
+  labels:
+    app: webui
+spec:
+  type: ClusterIP
+  ports:
+    - port: 8080
+      protocol: TCP
+      targetPort: 8080
+  selector:
+    app: webui
diff --git a/content/cases/llama3.md b/content/cases/llama3.md
index 21259fa..e6f41fd 100644
--- a/content/cases/llama3.md
+++ b/content/cases/llama3.md
@@ -10,168 +10,13 @@ kubectl create ns llama
 
 ## 部署 ollama
 
-```yaml showLineNumbers
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: ollama
-  namespace: llama
-spec:
-  serviceName: "ollama"
-  replicas: 1
-  selector:
-    matchLabels:
-      app: ollama
-  template:
-    metadata:
-      labels:
-        app: ollama
-    spec:
-      initContainers:
-        - name: pull
-          image: ollama/ollama:latest
-          tty: true
-          stdin: true
-          command:
-            - bash
-            - -c
-            - |
-              # highlight-next-line
-              model="llama3:8b" # 替换需要使用的模型，模型库列表: https://ollama.com/library/llama3
-              ollama serve &
-              sleep 5 # 等待 ollama server 就绪，就绪后才能执行 ollama cli 工具的命令
-              result=`ollama list | grep $model`
-              if [ "$result" == "" ]; then
-                echo "downloading model $model"
-                ollama pull $model
-              else
-                echo "model $model already been downloaded"
-              fi
-          volumeMounts:
-            - name: ollama-volume
-              mountPath: /root/.ollama
-      containers:
-        - name: ollama
-          image: ollama/ollama:latest
-          ports:
-            - containerPort: 11434
-          resources:
-            requests:
-              cpu: "2000m"
-              memory: "2Gi"
-            limits:
-              cpu: "4000m"
-              memory: "4Gi"
-              # highlight-next-line
-              nvidia.com/gpu: "0" # 如果要用 Nvidia GPU，这里声明下 GPU 卡
-          volumeMounts:
-            - name: ollama-volume
-              mountPath: /root/.ollama
-          tty: true
-  volumeClaimTemplates:
-    - metadata:
-        name: ollama-volume
-      spec:
-        accessModes: ["ReadWriteOnce"]
-        resources:
-          requests:
-            # highlight-next-line
-            storage: 50Gi # 注意要确保磁盘容量能够容纳得下模型的体积
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: ollama
-  namespace: llama
-  labels:
-    app: ollama
-spec:
-  type: ClusterIP
-  ports:
-    - port: 11434
-      protocol: TCP
-      targetPort: 11434
-  selector:
-    app: ollama
-```
+<FileBlock file="llama/llama3-cpu-8b.yaml" showLineNumbers />
 
 ## 部署 open-webui
 
 open-webui 是大模型的 web 界面，支持 llama 系列的大模型，通过 API 与 ollama 通信，官方镜像地址是：`ghcr.io/open-webui/open-webui`，在国内拉取速度非常慢，可以替换成 docker hub 里长期自动同步的 mirror 镜像：`docker.io/imroc/open-webui`：
 
-```yaml showLineNumbers
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: webui-pvc
-  namespace: llama
-  labels:
-    app: webui
-spec:
-  accessModes: ["ReadWriteOnce"]
-  resources:
-    requests:
-      storage: 2Gi
-
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: webui
-  namespace: llama
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: webui
-  template:
-    metadata:
-      labels:
-        app: webui
-    spec:
-      containers:
-        - name: webui
-          # highlight-next-line
-          image: imroc/open-webui:main # docker hub 中的 mirror 镜像，长期自动同步，可放心使用
-          env:
-            - name: OLLAMA_BASE_URL
-              # highlight-next-line
-              value: http://ollama:11434 # ollama 的地址
-          tty: true
-          ports:
-            - containerPort: 8080
-          resources:
-            requests:
-              cpu: "500m"
-              memory: "500Mi"
-            limits:
-              cpu: "1000m"
-              memory: "1Gi"
-          volumeMounts:
-            - name: webui-volume
-              mountPath: /app/backend/data
-      volumes:
-        - name: webui-volume
-          persistentVolumeClaim:
-            claimName: webui-pvc
-
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: webui
-  namespace: llama
-  labels:
-    app: webui
-spec:
-  type: ClusterIP
-  ports:
-    - port: 8080
-      protocol: TCP
-      targetPort: 8080
-  selector:
-    app: webui
-```
+<FileBlock file="llama/open-webui.yaml" showLineNumbers />
 
 ## 打开 webui