update at 2024-04-26 15:40:36
parent
b66257a7c1
commit
d3132f12d0
|
@ -0,0 +1,82 @@
|
|||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: llama
|
||||
spec:
|
||||
serviceName: "ollama"
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: ollama
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: ollama
|
||||
spec:
|
||||
initContainers:
|
||||
- name: pull
|
||||
image: ollama/ollama:latest
|
||||
tty: true
|
||||
stdin: true
|
||||
command:
|
||||
- bash
|
||||
- -c
|
||||
- |
|
||||
# highlight-next-line
|
||||
model="llama3:8b" # 替换需要使用的模型,模型库列表: https://ollama.com/library/llama3
|
||||
ollama serve &
|
||||
sleep 5 # 等待 ollama server 就绪,就绪后才能执行 ollama cli 工具的命令
|
||||
result=`ollama list | grep $model`
|
||||
if [ "$result" == "" ]; then
|
||||
echo "downloading model $model"
|
||||
ollama pull $model
|
||||
else
|
||||
echo "model $model already been downloaded"
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: ollama-volume
|
||||
mountPath: /root/.ollama
|
||||
containers:
|
||||
- name: ollama
|
||||
image: ollama/ollama:latest
|
||||
ports:
|
||||
- containerPort: 11434
|
||||
resources:
|
||||
requests:
|
||||
cpu: "2000m"
|
||||
memory: "2Gi"
|
||||
# highlight-next-line
|
||||
nvidia.com/gpu: "0" # 如果要用 Nvidia GPU,这里声明下 GPU 卡
|
||||
limits:
|
||||
cpu: "4000m"
|
||||
memory: "4Gi"
|
||||
volumeMounts:
|
||||
- name: ollama-volume
|
||||
mountPath: /root/.ollama
|
||||
tty: true
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: ollama-volume
|
||||
spec:
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
# highlight-next-line
|
||||
storage: 200Gi # 注意要确保磁盘容量能够容纳得下模型的体积
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: llama
|
||||
labels:
|
||||
app: ollama
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 11434
|
||||
protocol: TCP
|
||||
targetPort: 11434
|
||||
selector:
|
||||
app: ollama
|
|
@ -0,0 +1,82 @@
|
|||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: llama
|
||||
spec:
|
||||
serviceName: "ollama"
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: ollama
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: ollama
|
||||
spec:
|
||||
initContainers:
|
||||
- name: pull
|
||||
image: ollama/ollama:latest
|
||||
tty: true
|
||||
stdin: true
|
||||
command:
|
||||
- bash
|
||||
- -c
|
||||
- |
|
||||
# highlight-next-line
|
||||
model="llama3:70b" # 替换需要使用的模型,模型库列表: https://ollama.com/library/llama3
|
||||
ollama serve &
|
||||
sleep 5 # 等待 ollama server 就绪,就绪后才能执行 ollama cli 工具的命令
|
||||
result=`ollama list | grep $model`
|
||||
if [ "$result" == "" ]; then
|
||||
echo "downloading model $model"
|
||||
ollama pull $model
|
||||
else
|
||||
echo "model $model already been downloaded"
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: ollama-volume
|
||||
mountPath: /root/.ollama
|
||||
containers:
|
||||
- name: ollama
|
||||
image: ollama/ollama:latest
|
||||
ports:
|
||||
- containerPort: 11434
|
||||
resources:
|
||||
requests:
|
||||
cpu: "2000m"
|
||||
memory: "2Gi"
|
||||
# highlight-next-line
|
||||
nvidia.com/gpu: "1" # 声明使用一张 N 卡
|
||||
limits:
|
||||
cpu: "4000m"
|
||||
memory: "4Gi"
|
||||
volumeMounts:
|
||||
- name: ollama-volume
|
||||
mountPath: /root/.ollama
|
||||
tty: true
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: ollama-volume
|
||||
spec:
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
# highlight-next-line
|
||||
storage: 200Gi # 注意要确保磁盘容量能够容纳得下模型的体积
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: llama
|
||||
labels:
|
||||
app: ollama
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 11434
|
||||
protocol: TCP
|
||||
targetPort: 11434
|
||||
selector:
|
||||
app: ollama
|
|
@ -0,0 +1,71 @@
|
|||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: webui-pvc
|
||||
namespace: llama
|
||||
labels:
|
||||
app: webui
|
||||
spec:
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 2Gi
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: webui
|
||||
namespace: llama
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: webui
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: webui
|
||||
spec:
|
||||
containers:
|
||||
- name: webui
|
||||
# highlight-next-line
|
||||
image: imroc/open-webui:main # docker hub 中的 mirror 镜像,长期自动同步,可放心使用
|
||||
env:
|
||||
- name: OLLAMA_BASE_URL
|
||||
# highlight-next-line
|
||||
value: http://ollama:11434 # ollama 的地址
|
||||
tty: true
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
resources:
|
||||
requests:
|
||||
cpu: "500m"
|
||||
memory: "500Mi"
|
||||
limits:
|
||||
cpu: "1000m"
|
||||
memory: "1Gi"
|
||||
volumeMounts:
|
||||
- name: webui-volume
|
||||
mountPath: /app/backend/data
|
||||
volumes:
|
||||
- name: webui-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: webui-pvc
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: webui
|
||||
namespace: llama
|
||||
labels:
|
||||
app: webui
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 8080
|
||||
protocol: TCP
|
||||
targetPort: 8080
|
||||
selector:
|
||||
app: webui
|
|
@ -10,168 +10,13 @@ kubectl create ns llama
|
|||
|
||||
## 部署 ollama
|
||||
|
||||
```yaml showLineNumbers
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: llama
|
||||
spec:
|
||||
serviceName: "ollama"
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: ollama
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: ollama
|
||||
spec:
|
||||
initContainers:
|
||||
- name: pull
|
||||
image: ollama/ollama:latest
|
||||
tty: true
|
||||
stdin: true
|
||||
command:
|
||||
- bash
|
||||
- -c
|
||||
- |
|
||||
# highlight-next-line
|
||||
model="llama3:8b" # 替换需要使用的模型,模型库列表: https://ollama.com/library/llama3
|
||||
ollama serve &
|
||||
sleep 5 # 等待 ollama server 就绪,就绪后才能执行 ollama cli 工具的命令
|
||||
result=`ollama list | grep $model`
|
||||
if [ "$result" == "" ]; then
|
||||
echo "downloading model $model"
|
||||
ollama pull $model
|
||||
else
|
||||
echo "model $model already been downloaded"
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: ollama-volume
|
||||
mountPath: /root/.ollama
|
||||
containers:
|
||||
- name: ollama
|
||||
image: ollama/ollama:latest
|
||||
ports:
|
||||
- containerPort: 11434
|
||||
resources:
|
||||
requests:
|
||||
cpu: "2000m"
|
||||
memory: "2Gi"
|
||||
limits:
|
||||
cpu: "4000m"
|
||||
memory: "4Gi"
|
||||
# highlight-next-line
|
||||
nvidia.com/gpu: "0" # 如果要用 Nvidia GPU,这里声明下 GPU 卡
|
||||
volumeMounts:
|
||||
- name: ollama-volume
|
||||
mountPath: /root/.ollama
|
||||
tty: true
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: ollama-volume
|
||||
spec:
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
# highlight-next-line
|
||||
storage: 50Gi # 注意要确保磁盘容量能够容纳得下模型的体积
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: llama
|
||||
labels:
|
||||
app: ollama
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 11434
|
||||
protocol: TCP
|
||||
targetPort: 11434
|
||||
selector:
|
||||
app: ollama
|
||||
```
|
||||
<FileBlock file="llama/llama3-cpu-8b.yaml" showLineNumbers />
|
||||
|
||||
## 部署 open-webui
|
||||
|
||||
open-webui 是大模型的 web 界面,支持 llama 系列的大模型,通过 API 与 ollama 通信,官方镜像地址是:`ghcr.io/open-webui/open-webui`,在国内拉取速度非常慢,可以替换成 docker hub 里长期自动同步的 mirror 镜像:`docker.io/imroc/open-webui`:
|
||||
|
||||
```yaml showLineNumbers
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: webui-pvc
|
||||
namespace: llama
|
||||
labels:
|
||||
app: webui
|
||||
spec:
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 2Gi
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: webui
|
||||
namespace: llama
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: webui
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: webui
|
||||
spec:
|
||||
containers:
|
||||
- name: webui
|
||||
# highlight-next-line
|
||||
image: imroc/open-webui:main # docker hub 中的 mirror 镜像,长期自动同步,可放心使用
|
||||
env:
|
||||
- name: OLLAMA_BASE_URL
|
||||
# highlight-next-line
|
||||
value: http://ollama:11434 # ollama 的地址
|
||||
tty: true
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
resources:
|
||||
requests:
|
||||
cpu: "500m"
|
||||
memory: "500Mi"
|
||||
limits:
|
||||
cpu: "1000m"
|
||||
memory: "1Gi"
|
||||
volumeMounts:
|
||||
- name: webui-volume
|
||||
mountPath: /app/backend/data
|
||||
volumes:
|
||||
- name: webui-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: webui-pvc
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: webui
|
||||
namespace: llama
|
||||
labels:
|
||||
app: webui
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 8080
|
||||
protocol: TCP
|
||||
targetPort: 8080
|
||||
selector:
|
||||
app: webui
|
||||
```
|
||||
<FileBlock file="llama/open-webui.yaml" showLineNumbers />
|
||||
|
||||
## 打开 webui
|
||||
|
||||
|
|
Loading…
Reference in New Issue