Skip to content

Commit

Permalink
Reduce minimal infra (#14)
Browse files Browse the repository at this point in the history
feat(Infra): Reduce minimal infrastructure required to get Rivet running
fix(Infra): Disabled K8s Dashboard by default
fix(Infra): Disabled Prometheus and friends (Vector, Loki, Promtail) by default
fix(Infra): Disabled Clickhouse by default
fix(Infra): Disabled NSFW Check API
fix(Infra): Disabled Image Resizing (via Imagor) by default
  • Loading branch information
AngelOnFira authored Feb 23, 2024
1 parent 2a15d47 commit 5897d8a
Show file tree
Hide file tree
Showing 40 changed files with 675 additions and 276 deletions.
1 change: 0 additions & 1 deletion .github/actions/pre-init-distributed/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ runs:
shell: bash -e {0}
run: rm -f target/debug/bolt target/release/bolt

# Run `bolt config generate` so that the `Check` job can start working sooner
- name: Generate Bolt Config
shell: bash -e {0}
run: nix-shell --pure --run "bolt config generate ci"
105 changes: 105 additions & 0 deletions docs/benchmarks/START_TIMES.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Start times

## Low-end machine

> System
>
> - Debian GNU/Linux 11
> - Shared VM, 4 VCPUs (of AMD EPYC 7713 16-Core 2GHz)
> - 8GB memory
### `nix-shell` setup time (fresh)

- Before building `bolt`: 1m31s
- Building `bolt`: 2m15s

### Services (Minimal setup)

| step | up |
| ------------------ | ----- |
| k8s-cluster | 20s |
| k8s-infra | 2m31s |
| redis | 1s |
| cockroach | 1s |
| clickhouse | 1s |
| s3 | 24s |
| infra-artifacts | 50s |
| migrate | 62s |
| up (containerized) | 7s |
| total | 5m17s |

### `k8s-infra` breakdown

_Note, these are not additive as they run in parallel_

_First loki, promtail, and prometheus are provisioned then the rest follow_

| service | up |
| -------------- | ----- |
| promtail | 3s |
| prometheus | 43s |
| loki | 1m14s |
| k8s_dashboard | 3s |
| traefik tunnel | 20s |
| traefik | 20s |
| traffic_server | 26s |
| nats | 27s |
| imagor | 29s |
| minio | 35s |
| nomad_server | 46s |
| clickhouse | 47s |
| redis | 51s |
| nsfw_api | 56s |
| cockroachdb | 1m6s |

## Higher-end machine

> System
>
> - Debian GNU/Linux 11
> - AMD EPYC 7713 16-Core 2GHz
> - 32GB memory
### Services (Complex setup)

_This setup uses postgres as the terraform config storage method, adding overhead to each step_

| step | up | destroy |
| ------------------ | -------- | -------- |
| k8s-cluster | 27s | 16s |
| k8s-infra | 2m34s | - |
| tls | 4m29s | 5s |
| redis | 11s | - |
| cockroach | 10s | - |
| clickhouse | 10s | - |
| vector | 19s | - |
| pools | 2m43s | 1m57s |
| dns | 2m48s | 9s |
| better uptime | untested | untested |
| cf-workers | 15s | 6s |
| cf-tunnels | 18s | 12s |
| s3 | 35s | - |
| infra-artifacts | 35s | - |
| migrate | 58s | - |
| up (containerized) | 7s | - |
| total | 17m2s | 2m40s |

### `k8s-infra` breakdown

| service | up |
| -------------- | ----- |
| promtail | 6s |
| prometheus | 48s |
| loki | 1m20s |
| k8s_dashboard | 6s |
| imagor | 8s |
| traefik | 12s |
| traefik tunnel | 14s |
| traffic_server | 16s |
| minio | 22s |
| nats | 28s |
| clickhouse | 30s |
| redis | 33s |
| nsfw_api | 36s |
| nomad_server | 46s |
| cockroachdb | 49s |
9 changes: 9 additions & 0 deletions errors/feature/disabled.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
name = "FEATURE_DISABLED"
description = "The given feature is disabled: {feature}"
http_status = 400
---

# Feature Disabled

A feature required to use/access this resource is disabled.
28 changes: 14 additions & 14 deletions infra/tf/k8s_infra/clickhouse.tf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
locals {
clickhouse_k8s = var.clickhouse_provider == "kubernetes"
clickhouse_enabled = var.clickhouse_enabled && var.clickhouse_provider == "kubernetes"
service_clickhouse = lookup(var.services, "clickhouse", {
count = 1
resources = {
Expand All @@ -10,7 +10,7 @@ locals {
}

module "clickhouse_secrets" {
count = local.clickhouse_k8s ? 1 : 0
count = local.clickhouse_enabled ? 1 : 0

source = "../modules/secrets"

Expand All @@ -20,14 +20,16 @@ module "clickhouse_secrets" {
}

resource "kubernetes_namespace" "clickhouse" {
count = local.clickhouse_k8s ? 1 : 0
count = local.clickhouse_enabled ? 1 : 0

metadata {
name = "clickhouse"
}
}

resource "kubernetes_priority_class" "clickhouse_priority" {
count = local.clickhouse_enabled ? 1 : 0

metadata {
name = "clickhouse-priority"
}
Expand All @@ -36,12 +38,11 @@ resource "kubernetes_priority_class" "clickhouse_priority" {
}

resource "helm_release" "clickhouse" {
count = local.clickhouse_enabled ? 1 : 0
depends_on = [null_resource.daemons]

count = local.clickhouse_k8s ? 1 : 0

name = "clickhouse"
namespace = kubernetes_namespace.clickhouse[0].metadata.0.name
namespace = kubernetes_namespace.clickhouse.0.metadata.0.name
chart = "../../helm/clickhouse"
# repository = "oci://registry-1.docker.io/bitnamicharts"
# chart = "clickhouse"
Expand All @@ -56,7 +57,7 @@ resource "helm_release" "clickhouse" {
replicaCount = 1
}

priorityClassName = kubernetes_priority_class.clickhouse_priority.metadata.0.name
priorityClassName = kubernetes_priority_class.clickhouse_priority.0.metadata.0.name
resources = var.limit_resources ? {
limits = {
memory = "${local.service_clickhouse.resources.memory}Mi"
Expand Down Expand Up @@ -121,15 +122,15 @@ resource "helm_release" "clickhouse" {
# Admin auth
auth = {
username = "default"
password = module.clickhouse_secrets[0].values["clickhouse/users/default/password"]
password = module.clickhouse_secrets.0.values["clickhouse/users/default/password"]
}

metrics = {
enabled = true

serviceMonitor = {
enabled = true
namespace = kubernetes_namespace.clickhouse[0].metadata.0.name
namespace = kubernetes_namespace.clickhouse.0.metadata.0.name
}

# TODO:
Expand All @@ -142,26 +143,25 @@ resource "helm_release" "clickhouse" {
}

data "kubernetes_secret" "clickhouse_ca" {
count = local.clickhouse_k8s ? 1 : 0

count = local.clickhouse_enabled ? 1 : 0
depends_on = [helm_release.clickhouse]

metadata {
name = "clickhouse-crt"
namespace = kubernetes_namespace.clickhouse[0].metadata.0.name
namespace = kubernetes_namespace.clickhouse.0.metadata.0.name
}
}

resource "kubernetes_config_map" "clickhouse_ca" {
for_each = local.clickhouse_k8s ? toset(["rivet-service", "bolt", "vector"]) : toset([])
for_each = local.clickhouse_enabled ? toset(["rivet-service", "bolt", "vector"]) : toset([])

metadata {
name = "clickhouse-ca"
namespace = each.value
}

data = {
"ca.crt" = data.kubernetes_secret.clickhouse_ca[0].data["ca.crt"]
"ca.crt" = data.kubernetes_secret.clickhouse_ca.0.data["ca.crt"]
}
}

4 changes: 2 additions & 2 deletions infra/tf/k8s_infra/grafana.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ locals {
}

resource "kubernetes_config_map" "grafana_dashboard" {
for_each = local.grafana_dashboards
for_each = var.prometheus_enabled ? local.grafana_dashboards : {}

metadata {
namespace = kubernetes_namespace.prometheus.metadata.0.name
namespace = kubernetes_namespace.prometheus.0.metadata.0.name
name = "prometheus-rivet-${each.key}"
labels = {
grafana_dashboard = "1"
Expand Down
Loading

0 comments on commit 5897d8a

Please sign in to comment.