83 lines
2.1 KiB
YAML
83 lines
2.1 KiB
YAML
|
apiVersion: apps/v1
|
||
|
kind: StatefulSet
|
||
|
metadata:
|
||
|
name: ollama
|
||
|
namespace: llama
|
||
|
spec:
|
||
|
serviceName: "ollama"
|
||
|
replicas: 1
|
||
|
selector:
|
||
|
matchLabels:
|
||
|
app: ollama
|
||
|
template:
|
||
|
metadata:
|
||
|
labels:
|
||
|
app: ollama
|
||
|
spec:
|
||
|
initContainers:
|
||
|
- name: pull
|
||
|
image: ollama/ollama:latest
|
||
|
tty: true
|
||
|
stdin: true
|
||
|
command:
|
||
|
- bash
|
||
|
- -c
|
||
|
- |
|
||
|
# highlight-next-line
|
||
|
model="llama3:70b" # 替换需要使用的模型,模型库列表: https://ollama.com/library/llama3
|
||
|
ollama serve &
|
||
|
sleep 5 # 等待 ollama server 就绪,就绪后才能执行 ollama cli 工具的命令
|
||
|
result=`ollama list | grep $model`
|
||
|
if [ "$result" == "" ]; then
|
||
|
echo "downloading model $model"
|
||
|
ollama pull $model
|
||
|
else
|
||
|
echo "model $model already been downloaded"
|
||
|
fi
|
||
|
volumeMounts:
|
||
|
- name: ollama-volume
|
||
|
mountPath: /root/.ollama
|
||
|
containers:
|
||
|
- name: ollama
|
||
|
image: ollama/ollama:latest
|
||
|
ports:
|
||
|
- containerPort: 11434
|
||
|
resources:
|
||
|
requests:
|
||
|
cpu: "2000m"
|
||
|
memory: "2Gi"
|
||
|
# highlight-next-line
|
||
|
nvidia.com/gpu: "1" # 声明使用一张 N 卡
|
||
|
limits:
|
||
|
cpu: "4000m"
|
||
|
memory: "4Gi"
|
||
|
volumeMounts:
|
||
|
- name: ollama-volume
|
||
|
mountPath: /root/.ollama
|
||
|
tty: true
|
||
|
volumeClaimTemplates:
|
||
|
- metadata:
|
||
|
name: ollama-volume
|
||
|
spec:
|
||
|
accessModes: ["ReadWriteOnce"]
|
||
|
resources:
|
||
|
requests:
|
||
|
# highlight-next-line
|
||
|
storage: 200Gi # 注意要确保磁盘容量能够容纳得下模型的体积
|
||
|
---
|
||
|
apiVersion: v1
|
||
|
kind: Service
|
||
|
metadata:
|
||
|
name: ollama
|
||
|
namespace: llama
|
||
|
labels:
|
||
|
app: ollama
|
||
|
spec:
|
||
|
type: ClusterIP
|
||
|
ports:
|
||
|
- port: 11434
|
||
|
protocol: TCP
|
||
|
targetPort: 11434
|
||
|
selector:
|
||
|
app: ollama
|