11 Commits

Author SHA1 Message Date
Michael Thomson
b7e58b9ea3 ollama upgrade ctx size 2026-03-20 09:53:54 -04:00
Michael Thomson
e65f526e92 open webui 2026-03-20 01:42:02 -04:00
Michael Thomson
9e81176778 ollama kustomization interval 2026-03-20 01:21:42 -04:00
Michael Thomson
e90bcfeb6f ollama ingress 2026-03-20 01:18:26 -04:00
Michael Thomson
1062e511e1 ollama 2026-03-20 01:15:23 -04:00
Michael Thomson
6ba6b5d2d3 ollama 2026-03-20 01:13:59 -04:00
Michael Thomson
37db88f2d0 alloy push to loki 2026-03-20 00:20:10 -04:00
Michael Thomson
d11cfda191 loki remove auth 2026-03-20 00:00:59 -04:00
Michael Thomson
dd5d17c053 fix grafana community reference 2026-03-19 23:41:20 -04:00
Michael Thomson
f0bcb88596 add loki helm repo 2026-03-19 23:38:53 -04:00
Michael Thomson
027c79791f add loki and alloy 2026-03-19 23:34:12 -04:00
12 changed files with 377 additions and 0 deletions

View File

@@ -0,0 +1,145 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: alloy
namespace: kube-prometheus-stack
spec:
chart:
spec:
chart: alloy
version: 1.x
sourceRef:
kind: HelmRepository
name: grafana
interval: 15m
releaseName: alloy
values:
alloy:
configMap:
content: |-
// Write your Alloy config here:
loki.write "default" {
endpoint {
url = "http://loki:3100/loki/api/v1/push"
}
}
// discovery.kubernetes allows you to find scrape targets from Kubernetes resources.
// It watches cluster state and ensures targets are continually synced with what is currently running in your cluster.
discovery.kubernetes "pod" {
role = "pod"
// Restrict to pods on the node to reduce cpu & memory usage
selectors {
role = "pod"
field = "spec.nodeName=" + coalesce(sys.env("HOSTNAME"), constants.hostname)
}
}
// discovery.relabel rewrites the label set of the input targets by applying one or more relabeling rules.
// If no rules are defined, then the input targets are exported as-is.
discovery.relabel "pod_logs" {
targets = discovery.kubernetes.pod.targets
// Label creation - "namespace" field from "__meta_kubernetes_namespace"
rule {
source_labels = ["__meta_kubernetes_namespace"]
action = "replace"
target_label = "namespace"
}
// Label creation - "pod" field from "__meta_kubernetes_pod_name"
rule {
source_labels = ["__meta_kubernetes_pod_name"]
action = "replace"
target_label = "pod"
}
// Label creation - "container" field from "__meta_kubernetes_pod_container_name"
rule {
source_labels = ["__meta_kubernetes_pod_container_name"]
action = "replace"
target_label = "container"
}
// Label creation - "app" field from "__meta_kubernetes_pod_label_app_kubernetes_io_name"
rule {
source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"]
action = "replace"
target_label = "app"
}
// Label creation - "job" field from "__meta_kubernetes_namespace" and "__meta_kubernetes_pod_container_name"
// Concatenate values __meta_kubernetes_namespace/__meta_kubernetes_pod_container_name
rule {
source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_container_name"]
action = "replace"
target_label = "job"
separator = "/"
replacement = "$1"
}
// Label creation - "__path__" field from "__meta_kubernetes_pod_uid" and "__meta_kubernetes_pod_container_name"
// Concatenate values __meta_kubernetes_pod_uid/__meta_kubernetes_pod_container_name.log
rule {
source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
action = "replace"
target_label = "__path__"
separator = "/"
replacement = "/var/log/pods/*$1/*.log"
}
// Label creation - "container_runtime" field from "__meta_kubernetes_pod_container_id"
rule {
source_labels = ["__meta_kubernetes_pod_container_id"]
action = "replace"
target_label = "container_runtime"
regex = `^(\S+):\/\/.+$`
replacement = "$1"
}
}
// loki.source.kubernetes tails logs from Kubernetes containers using the Kubernetes API.
loki.source.kubernetes "pod_logs" {
targets = discovery.relabel.pod_logs.output
forward_to = [loki.process.pod_logs.receiver]
}
// loki.process receives log entries from other Loki components, applies one or more processing stages,
// and forwards the results to the list of receivers in the component's arguments.
loki.process "pod_logs" {
stage.static_labels {
values = {
cluster = "server",
}
}
forward_to = [loki.write.default.receiver]
}
// loki.source.kubernetes_events tails events from the Kubernetes API and converts them
// into log lines to forward to other Loki components.
loki.source.kubernetes_events "cluster_events" {
job_name = "integrations/kubernetes/eventhandler"
log_format = "logfmt"
forward_to = [
loki.process.cluster_events.receiver,
]
}
// loki.process receives log entries from other loki components, applies one or more processing stages,
// and forwards the results to the list of receivers in the component's arguments.
loki.process "cluster_events" {
forward_to = [loki.write.default.receiver]
stage.static_labels {
values = {
cluster = "server",
}
}
stage.labels {
values = {
kubernetes_cluster_events = "job",
}
}
}

View File

@@ -0,0 +1,71 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: loki
namespace: kube-prometheus-stack
spec:
chart:
spec:
chart: loki
version: 6.x
sourceRef:
kind: HelmRepository
name: grafana-community
interval: 15m
releaseName: loki
values:
loki:
auth_enabled: false
commonConfig:
replication_factor: 3
schemaConfig:
configs:
- from: "2024-04-01"
store: tsdb
object_store: s3
schema: v13
index:
prefix: loki_index_
period: 24h
pattern_ingester:
enabled: true
limits_config:
allow_structured_metadata: true
volume_enabled: true
ruler:
enable_api: true
minio:
enabled: true
deploymentMode: SingleBinary
singleBinary:
replicas: 3
# Zero out replica counts of other deployment modes
backend:
replicas: 0
read:
replicas: 0
write:
replicas: 0
ingester:
replicas: 0
querier:
replicas: 0
queryFrontend:
replicas: 0
queryScheduler:
replicas: 0
distributor:
replicas: 0
compactor:
replicas: 0
indexGateway:
replicas: 0
bloomCompactor:
replicas: 0
bloomGateway:
replicas: 0

View File

@@ -0,0 +1,8 @@
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: grafana-community
namespace: kube-prometheus-stack
spec:
interval: 15m
url: https://grafana-community.github.io/helm-charts

View File

@@ -0,0 +1,8 @@
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: grafana
namespace: kube-prometheus-stack
spec:
interval: 15m
url: https://grafana.github.io/helm-charts

56
apps/ollama/release.yaml Normal file
View File

@@ -0,0 +1,56 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: ollama
namespace: ollama
spec:
chart:
spec:
chart: ollama
version: 1.x
sourceRef:
kind: HelmRepository
name: otwld
interval: 15m
releaseName: ollama
values:
runtimeClassName: nvidia
ollama:
gpu:
# -- Enable GPU integration
enabled: true
# -- GPU type: 'nvidia' or 'amd'
type: 'nvidia'
# -- Specify the number of GPU to 1
number: 1
nvidiaResource: "nvidia.com/gpu-all"
# -- List of models to pull at container startup
models:
create:
- name: qwen3.5:9b-ctx32768
template: |
FROM qwen3.5:9b
PARAMETER num_ctx 32768
run:
- qwen3.5:9b-ctx32768
ingress:
enabled: true
annotations:
cert-manager.io/cluster-issuer: "letsencrypt-prod"
traefik.ingress.kubernetes.io/router.tls: "true"
traefik.ingress.kubernetes.io/router.entrypoints: websecure
hosts:
- host: ollama.michaelthomson.dev
paths:
- path: /
pathType: Prefix
path: /
tls:
- secretName: ollama-tls
hosts:
- ollama.michaelthomson.dev

View File

@@ -0,0 +1,8 @@
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: otwld
namespace: ollama
spec:
interval: 15m
url: https://helm.otwld.com/

View File

@@ -0,0 +1,27 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: open-webui
namespace: open-webui
spec:
chart:
spec:
chart: open-webui
version: 12.x
sourceRef:
kind: HelmRepository
name: open-webui
interval: 15m
releaseName: open-webui
values:
ollama:
enabled: false
ingress:
enabled: true
annotations:
cert-manager.io/cluster-issuer: "letsencrypt-prod"
traefik.ingress.kubernetes.io/router.tls: "true"
traefik.ingress.kubernetes.io/router.entrypoints: websecure
host: "chat.michaelthomson.dev" # update to your real domain
tls: true
existingSecret: open-webui-tls

View File

@@ -0,0 +1,8 @@
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: open-webui
namespace: open-webui
spec:
interval: 15m
url: https://helm.openwebui.com/

View File

@@ -0,0 +1,19 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: ollama
namespace: flux-system
spec:
interval: 1m
path: ./apps/ollama
prune: true # remove any elements later removed from the above path
wait: true
sourceRef:
kind: GitRepository
name: flux-system
decryption:
provider: sops
secretRef:
name: sops-age
dependsOn:
- name: infra-configs

View File

@@ -0,0 +1,19 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: open-webui
namespace: flux-system
spec:
interval: 1m
path: ./apps/open-webui
prune: true # remove any elements later removed from the above path
wait: true
sourceRef:
kind: GitRepository
name: flux-system
decryption:
provider: sops
secretRef:
name: sops-age
dependsOn:
- name: infra-configs

View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: ollama

View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: open-webui