diff --git a/codeblock/llama/llama3-cpu-8b.yaml b/codeblock/llama/llama3-cpu-8b.yaml new file mode 100644 index 0000000..555cb4f --- /dev/null +++ b/codeblock/llama/llama3-cpu-8b.yaml @@ -0,0 +1,82 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: ollama + namespace: llama +spec: + serviceName: "ollama" + replicas: 1 + selector: + matchLabels: + app: ollama + template: + metadata: + labels: + app: ollama + spec: + initContainers: + - name: pull + image: ollama/ollama:latest + tty: true + stdin: true + command: + - bash + - -c + - | + # highlight-next-line + model="llama3:8b" # 替换需要使用的模型,模型库列表: https://ollama.com/library/llama3 + ollama serve & + sleep 5 # 等待 ollama server 就绪,就绪后才能执行 ollama cli 工具的命令 + result=`ollama list | grep $model` + if [ "$result" == "" ]; then + echo "downloading model $model" + ollama pull $model + else + echo "model $model already been downloaded" + fi + volumeMounts: + - name: ollama-volume + mountPath: /root/.ollama + containers: + - name: ollama + image: ollama/ollama:latest + ports: + - containerPort: 11434 + resources: + requests: + cpu: "2000m" + memory: "2Gi" + # highlight-next-line + nvidia.com/gpu: "0" # 如果要用 Nvidia GPU,这里声明下 GPU 卡 + limits: + cpu: "4000m" + memory: "4Gi" + volumeMounts: + - name: ollama-volume + mountPath: /root/.ollama + tty: true + volumeClaimTemplates: + - metadata: + name: ollama-volume + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + # highlight-next-line + storage: 200Gi # 注意要确保磁盘容量能够容纳得下模型的体积 +--- +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: llama + labels: + app: ollama +spec: + type: ClusterIP + ports: + - port: 11434 + protocol: TCP + targetPort: 11434 + selector: + app: ollama diff --git a/codeblock/llama/llama3-gpu-70b.yaml b/codeblock/llama/llama3-gpu-70b.yaml new file mode 100644 index 0000000..a2f7b44 --- /dev/null +++ b/codeblock/llama/llama3-gpu-70b.yaml @@ -0,0 +1,82 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: ollama + namespace: llama +spec: + serviceName: "ollama" + replicas: 1 + selector: + matchLabels: + app: ollama + template: + metadata: + labels: + app: ollama + spec: + initContainers: + - name: pull + image: ollama/ollama:latest + tty: true + stdin: true + command: + - bash + - -c + - | + # highlight-next-line + model="llama3:70b" # 替换需要使用的模型,模型库列表: https://ollama.com/library/llama3 + ollama serve & + sleep 5 # 等待 ollama server 就绪,就绪后才能执行 ollama cli 工具的命令 + result=`ollama list | grep $model` + if [ "$result" == "" ]; then + echo "downloading model $model" + ollama pull $model + else + echo "model $model already been downloaded" + fi + volumeMounts: + - name: ollama-volume + mountPath: /root/.ollama + containers: + - name: ollama + image: ollama/ollama:latest + ports: + - containerPort: 11434 + resources: + requests: + cpu: "2000m" + memory: "2Gi" + # highlight-next-line + nvidia.com/gpu: "1" # 声明使用一张 N 卡 + limits: + cpu: "4000m" + memory: "4Gi" + volumeMounts: + - name: ollama-volume + mountPath: /root/.ollama + tty: true + volumeClaimTemplates: + - metadata: + name: ollama-volume + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + # highlight-next-line + storage: 200Gi # 注意要确保磁盘容量能够容纳得下模型的体积 +--- +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: llama + labels: + app: ollama +spec: + type: ClusterIP + ports: + - port: 11434 + protocol: TCP + targetPort: 11434 + selector: + app: ollama diff --git a/codeblock/llama/open-webui.yaml b/codeblock/llama/open-webui.yaml new file mode 100644 index 0000000..f8e7962 --- /dev/null +++ b/codeblock/llama/open-webui.yaml @@ -0,0 +1,71 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: webui-pvc + namespace: llama + labels: + app: webui +spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 2Gi + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: webui + namespace: llama +spec: + replicas: 1 + selector: + matchLabels: + app: webui + template: + metadata: + labels: + app: webui + spec: + containers: + - name: webui + # highlight-next-line + image: imroc/open-webui:main # docker hub 中的 mirror 镜像,长期自动同步,可放心使用 + env: + - name: OLLAMA_BASE_URL + # highlight-next-line + value: http://ollama:11434 # ollama 的地址 + tty: true + ports: + - containerPort: 8080 + resources: + requests: + cpu: "500m" + memory: "500Mi" + limits: + cpu: "1000m" + memory: "1Gi" + volumeMounts: + - name: webui-volume + mountPath: /app/backend/data + volumes: + - name: webui-volume + persistentVolumeClaim: + claimName: webui-pvc + +--- +apiVersion: v1 +kind: Service +metadata: + name: webui + namespace: llama + labels: + app: webui +spec: + type: ClusterIP + ports: + - port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app: webui diff --git a/content/cases/llama3.md b/content/cases/llama3.md index 21259fa..e6f41fd 100644 --- a/content/cases/llama3.md +++ b/content/cases/llama3.md @@ -10,168 +10,13 @@ kubectl create ns llama ## 部署 ollama -```yaml showLineNumbers -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: ollama - namespace: llama -spec: - serviceName: "ollama" - replicas: 1 - selector: - matchLabels: - app: ollama - template: - metadata: - labels: - app: ollama - spec: - initContainers: - - name: pull - image: ollama/ollama:latest - tty: true - stdin: true - command: - - bash - - -c - - | - # highlight-next-line - model="llama3:8b" # 替换需要使用的模型,模型库列表: https://ollama.com/library/llama3 - ollama serve & - sleep 5 # 等待 ollama server 就绪,就绪后才能执行 ollama cli 工具的命令 - result=`ollama list | grep $model` - if [ "$result" == "" ]; then - echo "downloading model $model" - ollama pull $model - else - echo "model $model already been downloaded" - fi - volumeMounts: - - name: ollama-volume - mountPath: /root/.ollama - containers: - - name: ollama - image: ollama/ollama:latest - ports: - - containerPort: 11434 - resources: - requests: - cpu: "2000m" - memory: "2Gi" - limits: - cpu: "4000m" - memory: "4Gi" - # highlight-next-line - nvidia.com/gpu: "0" # 如果要用 Nvidia GPU,这里声明下 GPU 卡 - volumeMounts: - - name: ollama-volume - mountPath: /root/.ollama - tty: true - volumeClaimTemplates: - - metadata: - name: ollama-volume - spec: - accessModes: ["ReadWriteOnce"] - resources: - requests: - # highlight-next-line - storage: 50Gi # 注意要确保磁盘容量能够容纳得下模型的体积 ---- -apiVersion: v1 -kind: Service -metadata: - name: ollama - namespace: llama - labels: - app: ollama -spec: - type: ClusterIP - ports: - - port: 11434 - protocol: TCP - targetPort: 11434 - selector: - app: ollama -``` + ## 部署 open-webui open-webui 是大模型的 web 界面,支持 llama 系列的大模型,通过 API 与 ollama 通信,官方镜像地址是:`ghcr.io/open-webui/open-webui`,在国内拉取速度非常慢,可以替换成 docker hub 里长期自动同步的 mirror 镜像:`docker.io/imroc/open-webui`: -```yaml showLineNumbers -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: webui-pvc - namespace: llama - labels: - app: webui -spec: - accessModes: ["ReadWriteOnce"] - resources: - requests: - storage: 2Gi - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: webui - namespace: llama -spec: - replicas: 1 - selector: - matchLabels: - app: webui - template: - metadata: - labels: - app: webui - spec: - containers: - - name: webui - # highlight-next-line - image: imroc/open-webui:main # docker hub 中的 mirror 镜像,长期自动同步,可放心使用 - env: - - name: OLLAMA_BASE_URL - # highlight-next-line - value: http://ollama:11434 # ollama 的地址 - tty: true - ports: - - containerPort: 8080 - resources: - requests: - cpu: "500m" - memory: "500Mi" - limits: - cpu: "1000m" - memory: "1Gi" - volumeMounts: - - name: webui-volume - mountPath: /app/backend/data - volumes: - - name: webui-volume - persistentVolumeClaim: - claimName: webui-pvc - ---- -apiVersion: v1 -kind: Service -metadata: - name: webui - namespace: llama - labels: - app: webui -spec: - type: ClusterIP - ports: - - port: 8080 - protocol: TCP - targetPort: 8080 - selector: - app: webui -``` + ## 打开 webui