Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • acides/hepto
  • reminec/hepto
  • lutangar/hepto
3 results
Show changes
Commits on Source (379)
Showing
with 637 additions and 100 deletions
$ANSIBLE_VAULT;1.1;AES256
61306638363530396230303964306161653433636461666561666335623662643063313665343138
3235393533313735656230333333323465353034363035640a666139373665633361623431383539
37383534623334306561303031383966376339366661653739643737653566333634383764303166
6538643432646161390a396561343338653165663736653266616162333032373262376235653137
66383834316332633262353164346234323834646336373230396537636130623234613762333838
65393439303464623838343364396462303539666235643331663433316663386361303463366337
62343662633566336339666462376539333330356534613036333533346431316131656263633130
38343461303434333064616665316133633738356431653530336262366164353532323764643732
31613764306635323061393134316265363631326434626234313931356230396163396164333434
36383730393231383561313939356335306563363266623832313338313162323537626630383463
62326333373134303839346562316233663162616632643030313831393334366539656236626430
32363433666235336539373963363633343664633230336564663663333137653033623036333236
30393965373661306632623135626630646437393963346234613766386139646665366533613839
66616134386562663637626238623738393932393835633162306631386534613439313936616138
30383261326661353532303765313539316461363162343866613134623865363636323737613166
33363634663135326130643365613166343130633135643964316130393635333939343766373834
63653166313566363036333063656162336633333939303836316365653635393961366232383734
64316130356337636536666638363230383765376438343535646463353763386333666436363166
35396364306134643262663333663061363531633836373663383836326661333232636332316164
39613134383937373637663764626436376633353134653361633837336438653965313234393330
33623164396231636363343538353464613639643532396665396463376564663133386532353561
39636462326638366339313438646637313333643534386563633961313636376563353865333031
66653036633039373666353731363265363461636563313530353763343266636334613539616263
31393165346634383036636266303063306465386161643739616337383933343763656265643662
39323831353865363363346434363762633938356361663662323733343161316530356437356435
64376431303562383463333561383562373639613235353864326638383636363965373339316266
37303939386362636135323137643436316165663664373238633935653666356236353163353663
65346166643339633139343537383431353063386166306635656361666534393535366634626632
37666464333133303737613934336338383536663434323363636365356433656265373664333531
65633963373636633562303339646165643961346338656236313664666233303030343863313334
64346134633165366362643439343766623535633461373533626261336561323664303837353234
30313434346335636135366662633530623265653833663863326166616662346463653935376461
66356132613762373633646366386234633735393432346362383163366661656137373163353034
33633533643134366336653131303233666537353832646635616165353137373564393335613863
33613265616234396438306432303364656432323465666263656232323266646261383637376265
31363239373635316437643334336435306536663934386666396630656531383836373666653566
35306139623237626437323566386565653031333430623531623235636538393466333861306531
64346135396236663938343533353337343338326533376433636138393564393863376330653734
633836313564313232393934316539373035
/hepto
/local
*terraform*
image: golang:latest
stages:
- build
- deploy
- test
compile:
stage: build
image: golang:alpine
script:
- go build -tags osusergo,netgo -ldflags '-extldflags=-static' cmd/hepto.go
- source build.env
- go build ./cmd/hepto.go
artifacts:
paths:
- hepto
expire_in: 1 week
variables:
GIT_FETCH_EXTRA_FLAGS: --tags
GOMODCACHE: .go
before_script:
- apk add --no-cache git gcc g++ libseccomp-dev libseccomp-static
- git fetch --tags
- mkdir -p .go
cache:
paths:
- .go/pkg/mod/
.before_script_template: &ansible-job
image: docker.tedomum.net/acides/ansible-ci:main
before_script:
- chmod 750 ansible
- echo "$VAULT_PASSWORD" > /tmp/vault
- eval $(ssh-agent -s)
- ssh-add <(echo "$SSH_KEY"|base64 -d)
variables:
# The ansible vault only contains secret variables, cluster config itself
# is set from the documentation inventory
ANSIBLE_ARGS: -i ../.ci-vault -i inventories/sample-cloud.yaml --vault-password-file /tmp/vault -e $PROVIDER.prefix=ci-$CI_JOB_ID
upload:
stage: test
needs:
- compile
<<: *ansible-job
script:
- cd ansible/
- ansible $ANSIBLE_ARGS -m include_tasks -a file=playbooks/ciupload.yaml localhost
test-cluster:
stage: test
needs:
- upload
dependencies: []
<<: *ansible-job
parallel:
matrix:
- PROVIDER: scaleway
IMAGE:
- debian12
- PROVIDER: hcloud
IMAGE:
- debian12
script:
- cd ansible/
- ansible-playbook $ANSIBLE_ARGS playbooks/deploy.yaml -e cloud_provider=$PROVIDER -e image=$IMAGE
- ansible-playbook $ANSIBLE_ARGS playbooks/test_podinfo.yaml -e cloud_provider=$PROVIDER -e image=$IMAGE
after_script:
- cd ansible/
- ansible-playbook $ANSIBLE_ARGS playbooks/cleanup.yaml -e cloud_provider=$PROVIDER -e image=$IMAGE
Bundling together vanilla kubernetes, containerd, vanilla CNI, and a full
featured CNI requires some level of quirks due to conflicting dependencies
mostly.
We try and use the most up to date version of each dependency while upstreaming
patches required for interoperability. Remaining quirks are listed below.
# Kubernetes beta Release
Kubernetes 1.23+ up until 1.25 requires a recent version of containerd (1.6+),
which pulls otel 1.x, conflicting with both
kuberbetes itself (which is funny tbh) and embedded etcd 3.5.
Fortunately etcd 3.5.6 upgraded its dependency, opening the path for kubernetes, though not released yet.
# CNI plugins forked by Rancher
containernetworking/plugins does not expose its main functions for plugins,
which eventually prevents from using them as reexec targets and bundling them
in a single binary.
Rancher forked the project to build a single CNI binary, which we currently
reuse to bundle them in the main binary instead.
Forked is located at: github.com/rancher/plugins
# Gojose pinned to v2.2.2
TODO (document why this is pinned exactly)
# Hepto is a kubernetes distribution for geodistributed desployments
**This is currently highly experimental and unusable as is.**
## Goals
Hepto is yet another kubernetes distribution. It is driven by the following goals:
- working out of the box over WAN without compromising security
- offering a much opinionated kubernetes experience
- hiding much of the complexity of setting up kubernetes
- running with the minimum resource footprint (mostly memory)
- bundling vanilla upstream with as few to no fork if possible
- compiling to a (true) single binary on both amd64 and arm64
# Rationale
Instead of optimizing for disk size or embedding all kinds of batteries, hepto
focuses on the minimal feature set for a kubernetes distribution. Here are so
dos and donts:
- *do* optimize for lower resident memory, this implies:
- *do not* unpack multiple static binaries at runtime, which do not map well
in the page cache
- *do not* optimize for size by compressing part of the binary and inflating
at runtime
- *do not* embed external binaries and assume Internet access to load further
resources
- *do* work out dependency issues, this implies:
- *do* update packages as much as possible
- *do* depend on Go modules from vanilla upstreams
- *do not* depend on our own forks, if possible not on any fork
- *do not* pin dependencies unless required for compatibility
- *do* write down the rationale for every quirk
- *do* upstream as many fixes as possible to upstreams
- *do* offer an opinionated experience:
- *do* explicit choices and their rationale
- *do* hide complexity any time a user choice is not required
- *do not* offer too many choices to the user
- *do not* embed optional batteries
# Architecture
hepto's architecture is still a moving target, here are some hints of our current
thinking:
- do not rely on service mesh or complex CNI features for securing traffic, simply
run a mesh VPN underneath kubernetes
- do not try and work out Etcd at scale over geographic WAN, keep a central apiserver
and make sure it can be backed up and restored with ease
- embed everything in a single binary for optimized resident memory usage
- run everything in namespace, with cgroup configuration prepared during startup
- the namespace root should only contain the binary itself (for shims and CNI), plus
- if possible, drop privileges and run usermod containerd (this probably requires
that shims are run separately, plus that kubeproxy and maybe volume managers are
bundled instead of running in-cluster)
# **Hepto** is a **Kubernetes** distribution for geo-deployments
## Features
**Hepto** is a *very* opinionated **Kubernetes** distribution. Here are some design choices,
which we think are its features of interest:
- single binary, no dependency except for the kernel,
- automatic cluster discovery based on gossip protocols,
- automatic cluster PKI setup,
- auto-containerization of the k8s stack,
- IPv6-only, from the ground up.
## Getting started
⚠️ **Hepto** is still highly experimental, so quick disclaimer: please do not host anything
critical backed by hepto at the moment, please backup your data, please file issues
if you find bugs.
First prepare your node, you will need:
- a public network interface with available IPv6 and a gateway (**Hepto** will start
its own container and separate network stack from the host, in order for the **Kubernetes**
controlplane to be isolated),
- a recent-enough kernel (tested for >= 5.19),
- a secretly generated security key like `openssl rand -hex 24`, that will be shared
among nodes and secure discovery.
In a general environment file, shared by all nodes, write the following config:
```sh
export HEPTO_CLUSTER=mycluster
export HEPTO_KEY=deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef
```
Note that all **Hepto** options are available through enviromnemt variables, see
`hepto -help` for details.
## Run your first single-node cluster
To start your first cluster, run `hepto` in `master` mode, which embeds both a
master (apiserver, controller manager, etc.) and a node (kubelet, containerd)
inside single process.
```sh
source docs/example.env
hepto \
# The node name, data will be stored in /var/lib/mycluster/mymaster
-name mymaster \
# The role, here a master
-role master \
# The host main network interface
-iface eth0
# Optional, IPv6 for the insance, will autoconfigure based on RA if unset
-ip 2a00::dead:beef/64
# Optional, IPv6 for the gateway, will autoconfigure based on RA if unset
-gw 2a00::1
# Optional, IPv6 for dns servers, will use Cloudflare DNS if unset
-dns 2a00:1234::1234
```
All data, including a useable `kubeconfig`, will be stored in `<data>/<cluster name>/<node name>`.
Once the cluster has stabilized, you may run kubectl against the apiserver:
```sh
export KUBECONFIG=/var/lib/mycluster/mymaster/kubeconfig
kubectl get all -A
```
## Multi-node cluster
When running a multi-node cluster, at least one node in the cluster must have a stable
IP address for discovery. We call that node an anchor: any other node is able to join the
cluster knowing the cluster name, anchor IP address and the cluster security key.
It is customary but not mandatory for the anchor to also be the cluster master. Clusters
running over unstable networks may also have multiple anchors for safety (anchors are only
required when bootstraping nodes).
Start by running a master node anywhere:
```sh
source docs/example.env
hepto -name mymaster -role master -iface eth0 -ip 2a00::dead:beef/64
```
Then on every other node, use the anchor IP address for discovery:
```sh
source docs/example.env
hepto -name mynode1 -role node -iface eth0 -anchor 2a00::dead:beef
```
## Bootstraping the cluster
**Hepto** comes with no battery. After starting the cluster, there is not even a CNI installed
for setting up your pods networking. **Hepto** is tested with the following base stack for
bootstraping:
- `kube-proxy` for routing services, using `iptables`
- `calico` as a CNI, in IPv6 `iptables` mode
- `coredns` as a cluster DNS
The easiest way to bootstrap the cluster is to deploy it using Ansible, as described in the
following section. If you do however wish to bootstrap manually, you will find two kustomizations
in the `bootstrap` directory, from which you can infer how to deploy each component independently:
- first the `calico` kustomization is self-descriptive and deploys the Calico CNI and its CRDs,
- then the `networking` kustomization deploys networking components, including the Calico config
itself, kube-proxy and CoreDNS.
## Deploying a cluster on many nodes using **Ansible**
This repository provides an ansible role to deploy __Hepto__ on a node. Start with an
inventory file listing your nodes, and providing some variables, also add nodes to
the **master**, **anchor** and **public** groups.
See `ansible/inventories/sample-deploy.yaml` for an example.
Then run **Ansible**:
```sh
cd ansible && ansible-playbook -i inventories/inventory.yaml playbooks/deploy.yaml
```
This will deploy **Hepto** on the given nodes and bootstrap the cluster using the
bootstrap helm chart.
If you wish to populate Cloud nodes automatically, see `ansible/inventories/sample-cloud.yaml`
instead for an example inventory.
## Development
**Hepto** is being developped as part of an ongoing effort to provide decent
infrastructure to small-scale geodistributed hosting providers. If your use case
fits our philosophy, any help is welcome in maintaining the project.
Development requirements are:
- `git` (both for cloning and build configuration)
- `go` >= 1.19
- `ld` for static linking
- `libseccomp` headers
```sh
# On Ubuntu/Mint
sudo apt-get install libseccomp-dev build-essential pkg-config
```
Start by cloning the repository, then build the project:
```sh
# Build configuration is declared as environmemt variables
source ./build.env
# Build a single binary
go build ./cmd/hepto.go
```
Or alternatively just run `make`.
For details about the architecture, see [design documentation](docs/design.md).
TODO: move this to Gitlab issues
- [ ] Work out log centralization, possibly reformatting, and log level filtering
- [x] Improve on the concept of wrapper, maybe embed the logging system in there
- [ ] Embed containerd shim and runc somehow (runc being the least trivial of all)
- [ ] Embed kube-proxy, which is currently unavailable
- [x] Auto-generate config files based on an opinionated setup
- [x] Generate a PKI
- [x] Setup a memberlist for cluster discovery
- [x] Use wesher or some kind of wireguard mesh VPN
- [x] Switch to a "container" (namespace set) before setting services up
ansible_collections/*
\ No newline at end of file
[defaults]
# Chick-Fil-A would like a word...
nocows = True
# Installs collections into [current dir]/ansible_collections/namespace/collection_name
collections_path = ./
# Installs roles into [current dir]/roles/namespace.rolename
roles_path = ./roles
### REQUIRED
# The namespace of the collection. This can be a company/brand/organization or product namespace under which all
# content lives. May only contain alphanumeric lowercase characters and underscores. Namespaces cannot start with
# underscores or numbers and cannot contain consecutive underscores
namespace: acides
# The name of the collection. Has the same character restrictions as 'namespace'
name: hepto
# The version of the collection. Must be compatible with semantic versioning
version: 0.0.1
# The path to the Markdown (.md) readme file. This path is relative to the root of the collection
readme: README.md
# A list of the collection's content authors. Can be just the name or in the format 'Full Name <email> (url)
# @nicks:irc/im.site#channel'
authors:
- kaiyou
### OPTIONAL but strongly recommended
# A short summary description of the collection
# description: your collection description
# Either a single license or a list of licenses for content inside of a collection. Ansible Galaxy currently only
# accepts L(SPDX,https://spdx.org/licenses/) licenses. This key is mutually exclusive with 'license_file'
# license:
# - GPL-2.0-or-later
# The path to the license file for the collection. This path is relative to the root of the collection. This key is
# mutually exclusive with 'license'
# license_file: ''
# A list of tags you want to associate with the collection for indexing/searching. A tag name has the same character
# requirements as 'namespace' and 'name'
# tags: []
# Collections that this collection requires to be installed for it to be usable. The key of the dict is the
# collection label 'namespace.name'. The value is a version range
# L(specifiers,https://python-semanticversion.readthedocs.io/en/latest/#requirement-specification). Multiple version
# range specifiers can be set and are separated by ','
dependencies:
"community.general": ">=7.5.1"
# The URL of the originating SCM repository
# repository: http://example.com/repository
# The URL to any online docs
# documentation: http://docs.example.com
# The URL to the homepage of the collection/project
# homepage: http://example.com
# The URL to the collection issue tracker
# issues: http://example.com/issue/tracker
# A list of file glob-like patterns used to filter any files or directories that should not be included in the build
# artifact. A pattern is matched from the relative path of the file or directory of the collection directory. This
# uses 'fnmatch' to match the files or directories. Some directories and files like 'galaxy.yml', '*.pyc', '*.retry',
# and '.git' are always filtered
# build_ignore: []
---
all:
vars:
# Access to cloud provider
cloud_provider: hetzner
node_prefix: node
# hcloud_token: YourHetznerToken
# hcloud_ssh_key: yourkey@host
# Cluster settings
cluster_name: hepto
cluster_key: deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef
# Nodes to spawn, with additional groups (nodes is implied)
nodes:
ham: ["master", "anchor"]
stram: ["public"]
gram: []
master:
vars:
# Flux configuration
#flux:
# source:
# url: https://my-repo
# branch: test
---
all:
vars:
cluster_key: "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef"
# All nodes
nodes:
hosts:
riri:
ansible_host: "2a01:4f9:c012:8136::1"
node_ip: "2a01:4f9:c012:8136::101/64"
fifi:
ansible_host: "2a01:4f9:c010:9d94::1"
node_ip: "2a01:4f9:c010:9d94::101/64"
loulou:
ansible_host: "2a01:4f9:c010:b591::1"
node_ip: "2a01:4f9:c010:b591::101/64"
donald:
ansible_host: "2a01:4f9:c012:8136::1"
node_ip: "2a01:4f9:c012:8136::1ff/64"
vars:
ansible_user: root
node_gw: "fe80::1"
# Nodes that will act as masters, others will be kubelets
master:
hosts:
donald:
# Nodes that will act as anchors
anchor:
hosts:
donald:
# Nodes that are publicly exposed
public:
hosts:
riri:
- hosts: master
roles:
- role: hepto # Load variables from the deployment role
when: false
tasks:
# This sets the host DNS at least temporarily to match the provided
# DNS. This helps bootstraping when hosts lack IPv4 and a DNS64 instance
# is provided
- name: Set the host DNS
copy:
dest: "/etc/resolv.conf"
content: "nameserver {{ node_dns }}"
when: node_dns is defined
# This is require so thqt kubectl apply -k works with remote git
- name: Install git for kustomize
ansible.builtin.package:
name: git
state: present
update_cache: yes
- name: Wait for the cluster apiserver
ansible.builtin.shell: kubectl get node
environment:
KUBECONFIG: "{{ kubeconfig }}"
retries: 10
delay: 5
# Hepto outputs a yaml that we load into variables for proper bootstrap
- name: Dump cluster info
ansible.builtin.shell: |
export $(cat {{ config_file }})
hepto -name {{ node_name }} -info
register: info_dump
- name: Load cluster info
set_fact:
hepto_info: "{{ info_dump.stdout | from_yaml }}"
# We label each node with the standard node-role label, using
# ansible groups
- name: Label cluster nodes
ansible.builtin.shell: "kubectl label node {{ hostvars[item.1].inventory_hostname }} node-role.kubernetes.io/{{ item.0.role }}=true"
loop: "{{ groups | dict2items(key_name='role', value_name='nodes') | subelements('nodes') }}"
environment:
KUBECONFIG: "{{ kubeconfig }}"
retries: 10
delay: 5
- name: Taint master nodes
ansible.builtin.shell: "kubectl taint --overwrite=true node {{ hostvars[item].inventory_hostname }} node-role.kubernetes.io/control-plane=\"\":NoSchedule"
loop: "{{ groups['master'] }}"
environment:
KUBECONFIG: "{{ kubeconfig }}"
- name: Create default namespace
ansible.builtin.shell: "kubectl create namespace default || true"
environment:
KUBECONFIG: "{{ kubeconfig }}"
- name: Create bootstrap directory
file:
path: "/tmp/hepto"
state: directory
- name: Prepare target directories
file:
path: "/tmp/hepto/{{ item.path }}"
state: directory
with_community.general.filetree: "{{ playbook_dir }}/../../bootstrap"
when: item.state == 'directory'
- name: Copy the kustomization
template:
src: "{{ item.src }}"
dest: "/tmp/hepto/{{ item.path }}"
with_community.general.filetree: "{{ playbook_dir }}/../../bootstrap"
when: item.state == 'file'
- name: Deploy bootstrap kustomizations
# Sleep after applying, so that any CRD has time to settle down (the CRD registration
# controller is on a fixed timer)
ansible.builtin.shell: "kubectl apply -k /tmp/hepto/{{ item }}; sleep 30"
loop:
- calico
- networking
- resources
environment:
KUBECONFIG: "{{ kubeconfig }}"
- name: Apply additional kustomizations
ansible.builtin.shell: "kubectl apply -k {{ item }}; sleep 30"
loop: "{{ bootstrap.kustomizations }}"
when: bootstrap.kustomizations is defined
environment:
KUBECONFIG: "{{ kubeconfig }}"
---
# These tasks uploads the build artifact from CI to some S#
# bucket, for two main reasons:
# - it is complex and unsupported to download the artifact of a current build directly from Gitlab
# - uploading from the CI to many cloud providers can become costly
- name: Try and get the url
amazon.aws.s3_object:
endpoint_url: "{{ s3_endpoint }}"
bucket: "{{ s3_bucket }}"
region: "{{ s3_region }}"
access_key: "{{ s3_access_key }}"
secret_key: "{{ s3_secret_key }}"
object: "hepto.{{ lookup('env', 'CI_PIPELINE_ID') }}"
mode: geturl
register: get
ignore_errors: true
- name: Upload the file when necessary
when: get.failed
amazon.aws.s3_object:
endpoint_url: "{{ s3_endpoint }}"
bucket: "{{ s3_bucket }}"
region: "{{ s3_region }}"
access_key: "{{ s3_access_key }}"
secret_key: "{{ s3_secret_key }}"
object: "hepto.{{ lookup('env', 'CI_PIPELINE_ID') }}"
src: "{{ lookup('env', 'CI_PROJECT_DIR') }}/hepto"
mode: put
encrypt: false
register: put
# This is hacky as hell, yet required for the fact to be properly altered in
# all hosts. The when clause makes it possible to call this outsite the playbook
# during CI warmup
- name: Set the hepto download url for nodes
delegate_to: "{{ item }}"
delegate_facts: true
when: "'nodes' in groups"
with_items: "{{ groups['nodes'] }}"
set_fact:
hepto_url: "{{ put.url if put.changed else get.url }}"
---
- hosts: localhost
tasks:
- name: Cleanup cloud deployment
include_role:
name: cloud
tasks_from: cleanup.yaml
---
# This play merely creates nodes and/or probes them, for inclusion
# when deploying or using the cluster
- hosts: localhost
roles:
- cloud
---
# This play will do nothing if no cloud deploying is specified
- import_playbook: cloud.yaml
# If this is a CI deployment, upload hepto to a cloud URL
- hosts: localhost
tasks:
- when: "lookup('env', 'CI_PIPELINE_ID') != ''"
include_tasks: ./ciupload.yaml
# Deploy the nodes, either epxlicitely declared or deployed to cloud
- hosts: nodes
roles:
- hepto
# Bootstrap the cluster
- import_playbook: bootstrap.yaml
---
# Import the cloud playbook to populate inventory
- import_playbook: cloud.yaml
# Do the actual testing from master
- hosts: master
roles:
- role: hepto # import variables from the deployment role
when: false
tasks:
- name: Deploy podinfo
ansible.builtin.shell: |
kubectl create namespace default
kubectl apply -k github.com/stefanprodan/podinfo//kustomize
environment:
KUBECONFIG: "{{ kubeconfig }}"
- name: Expose the podinfo service
ansible.builtin.shell: |
kubectl patch service podinfo --type merge --patch '{"spec":{"externalIPs":{{ external_ips | to_json }} } }'
environment:
KUBECONFIG: "{{ kubeconfig }}"
# This is run from master for now, running from localhost is too complex
- name: Try and access the public URL
ansible.builtin.get_url:
url: "http://[{{ external_ips | first }}]:9898"
dest: /tmp
retries: 100
delay: 30
---
collections:
- name: community.general
version: 9.2.0
cloud_provider: ""
image: debian12
# We set these in a separate variable to later combine the directory
base:
hcloud:
type: cx22
zone: nbg1
scaleway:
type: PLAY2-NANO
zone: fr-par-1
images:
scaleway:
debian12: debian_bookworm
hcloud:
debian12: debian-12
terraform {
required_providers {
hcloud = {
source = "hetznercloud/hcloud"
version = "~> 1.45"
}
}
}
provider "hcloud" {
token = var.token
}
module "node" {
for_each = toset(var.nodes)
source = "./node"
name = "${var.prefix}-${each.value}"
type = var.type
image = var.image
location = var.zone
ssh = var.ssh
}