Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • acides/hepto
  • reminec/hepto
  • lutangar/hepto
3 results
Show changes
Showing with 1051 additions and 1881 deletions
---
title: Design goals and rationale
---
## Goals
Hepto is yet another kubernetes distribution. It is driven by the following goals:
- working out of the box over WAN without compromising security
- offering a much opinionated kubernetes experience
- hiding much of the complexity of setting up kubernetes
- running with the minimum resource footprint (mostly memory)
- bundling vanilla upstream with as few to no fork if possible
- compiling to a (true) single binary on both amd64 and arm64
## User experience
Instead of providing a general purpose kubernetes distribution, we strongly focus
on the user experience of a documented class of users: individuals and small
structures that wish to run Kubernetes in a geo-distributed setting at the lowest
infrastructure cost possible. Here are some dos and donts:
- *do* provide the minimal knobs:
- *do not* offer non-essential choices to the user
- *do not* provide a setting if a sane default covers the documented use case
- *do* name and document settings as simply as possible
- *do not* expose Kubernetes concepts if they seem too complex for the
documented use case
- *do* facilitate forks:
- *do* advertise that any concurrent use case is welcome to fork the project
and provide a different user experience
- *do* provide the best support to forks in regard to our common codebase
- *do* write documentation about general technical choices that would be useful
to separate use cases
- *do* consider how separate use cases could be implemented on top of our codebase
and make explicit choices, including those that prevent different use cases
## Technical rationale
Instead of optimizing for disk size or embedding all kinds of batteries, hepto
focuses on the minimal feature set for a kubernetes distribution. Here are some
dos and donts:
- *do* optimize for lower resident memory, this implies:
- *do not* unpack multiple static binaries at runtime, which do not map well
in the page cache
- *do not* optimize for size by compressing part of the binary and inflating
at runtime
- *do not* embed external binaries and assume Internet access to load further
resources
- *do* work out dependency issues, this implies:
- *do* update packages as much as possible
- *do* depend on Go modules from vanilla upstreams
- *do not* depend on our own forks, if possible not on any fork
- *do not* pin dependencies unless required for compatibility
- *do* write down the rationale for every quirk
- *do* upstream as many fixes as possible to upstreams
- *do* offer an opinionated experience:
- *do* explicit choices and their rationale
- *do* hide complexity any time a user choice is not required
- *do not* offer too many choices to the user
- *do not* embed optional batteries
---
title: Staying close to vanilla
---
Building a single binary requires some fine-tuning of dependency versions for
compatibility. This was hardly possible before kubernetes 1.26 without explicitely
forking some packages.
All pinned dependencies are declared as `replace` in `go.mod` with associated
explaination. Their number should be reduced to the bare minimum, and all other
dependencies should be updated based on `go get -u`.
export HEPTO_CLUSTER=hepto
export HEPTO_KEY=deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
package k8s
import (
"context"
"net/url"
"time"
"go.acides.org/pekahi"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/util/webhook"
"k8s.io/client-go/discovery"
"k8s.io/client-go/discovery/cached/memory"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/dynamic/dynamicinformer"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/metadata"
"k8s.io/client-go/metadata/metadatainformer"
"k8s.io/client-go/rest"
"k8s.io/client-go/restmapper"
"k8s.io/client-go/tools/events"
"k8s.io/kube-aggregator/pkg/apiserver"
)
type Clients struct {
KubeConfig *rest.Config
Client *kubernetes.Clientset
DynClient *dynamic.DynamicClient
EventClient *kubernetes.Clientset
MetadataClient metadata.Interface
CachedClient discovery.CachedDiscoveryInterface
RESTMapper *restmapper.DeferredDiscoveryRESTMapper
Broadcaster events.EventBroadcasterAdapter // The legacy interface is stil required by the scheduler
Informer informers.SharedInformerFactory
MetadataInformer metadatainformer.SharedInformerFactory
DynInformer dynamicinformer.DynamicSharedInformerFactory
Started chan struct{}
}
// Make clients authenticating with a ceritficate (typically for kubelets)
func NewCertsClient(host string, ca *pekahi.Certificate, cert *pekahi.Certificate) (*Clients, error) {
kc := newKC(host, ca)
kc.TLSClientConfig.CertFile = cert.CertPath()
kc.TLSClientConfig.KeyFile = cert.KeyPath()
return newClientsForKC(kc)
}
// Make clients from the master context itself
func NewTokenClients(host string, ca *pekahi.Certificate, token string) (*Clients, error) {
kc := newKC(host, ca)
kc.BearerToken = token
return newClientsForKC(kc)
}
// Make a k8s client config for connecting to the master
func newKC(host string, ca *pekahi.Certificate) *rest.Config {
return &rest.Config{
Host: host,
TLSClientConfig: rest.TLSClientConfig{
CAFile: ca.CertPath(),
},
}
}
func newClientsForKC(kc *rest.Config) (*Clients, error) {
baseClient, err := kubernetes.NewForConfig(kc)
if err != nil {
return nil, err
}
eventsClient, err := kubernetes.NewForConfig(kc)
if err != nil {
return nil, err
}
dynClient, err := dynamic.NewForConfig(kc)
if err != nil {
return nil, err
}
metadataClient, err := metadata.NewForConfig(kc)
if err != nil {
return nil, err
}
cachedClient := memory.NewMemCacheClient(baseClient)
return &Clients{
KubeConfig: kc,
Client: baseClient,
DynClient: dynClient,
EventClient: eventsClient,
MetadataClient: metadataClient,
RESTMapper: restmapper.NewDeferredDiscoveryRESTMapper(cachedClient),
Broadcaster: events.NewEventBroadcasterAdapterWithContext(context.Background(), eventsClient),
Informer: informers.NewSharedInformerFactory(baseClient, 10*time.Minute),
MetadataInformer: metadatainformer.NewSharedInformerFactory(metadataClient, 10*time.Minute),
DynInformer: dynamicinformer.NewDynamicSharedInformerFactory(dynClient, 10*time.Minute),
Started: make(chan struct{}),
}, nil
}
func (c *Clients) Start(ctx context.Context) {
c.Broadcaster.StartRecordingToSink(ctx.Done())
c.Informer.Start(ctx.Done())
c.MetadataInformer.Start(ctx.Done())
c.DynInformer.Start(ctx.Done())
c.Informer.WaitForCacheSync(ctx.Done())
c.MetadataInformer.WaitForCacheSync(ctx.Done())
c.DynInformer.WaitForCacheSync(ctx.Done())
c.RESTMapper.Reset()
close(c.Started)
wait.Until(c.RESTMapper.Reset, 30*time.Second, ctx.Done())
}
func (c *Clients) Stop() {
c.Broadcaster.Shutdown()
}
// Make a service resolver for a given client config
// Resolver first returns the apiserver endpoint for kubernetes.default, then
// tries to resolve endpoints
func (c *Clients) ServiceResolver() webhook.ServiceResolver {
host, _ := url.Parse(c.KubeConfig.Host)
return apiserver.NewLoopbackServiceResolver(apiserver.NewEndpointServiceResolver(
c.Informer.Core().V1().Services().Lister(),
c.Informer.Core().V1().Endpoints().Lister(),
), host)
}
package k8s
import (
"os"
"path/filepath"
"strings"
"golang.org/x/sys/unix"
"k8s.io/mount-utils"
)
// This is a bare minimum mounter interface implementation
// It does not use the mount command, contrary to vanilla implementation,
// and skip most of vanilla optimization and handling of CIFS or NFS edge
// cases
type Mounter struct{}
var _ mount.Interface = &Mounter{}
func (m *Mounter) Mount(source string, target string, fstype string, options []string) error {
return m.MountSensitiveWithoutSystemdWithMountFlags(source, target, fstype, options, []string{}, nil)
}
func (m *Mounter) MountSensitive(source string, target string, fstype string, options []string, sensitiveOptions []string) error {
return m.MountSensitiveWithoutSystemdWithMountFlags(source, target, fstype, options, sensitiveOptions, nil)
}
func (m *Mounter) MountSensitiveWithoutSystemd(source string, target string, fstype string, options []string, sensitiveOptions []string) error {
return m.MountSensitiveWithoutSystemdWithMountFlags(source, target, fstype, options, sensitiveOptions, nil)
}
func (m *Mounter) MountSensitiveWithoutSystemdWithMountFlags(source string, target string, fstype string, options []string, sensitiveOptions []string, mountFlags []string) error {
if source == "" {
source = fstype
}
// Kubernetes does not actually pass useful mount flags, except for -c, which we
// do not consider since we do not canonicalize anyway, so there should not be
// any flag to parse at this moment
var flags uintptr
// Prepare options and check whether this is a bind mount
actualOptions := []string{}
allOptions := append(options, sensitiveOptions...)
bindMount := false
for _, option := range allOptions {
switch option {
case "bind":
bindMount = true
continue
}
actualOptions = append(actualOptions, option)
}
// If this is a bind mount, start by.. bind mounting (which supports no option),
// then set the flag for remounting with given options (this is still quite useless
// since no flag is currently parsed, but meant for future-prooding bind mounts)
if bindMount {
err := unix.Mount(source, target, "bind", unix.MS_BIND, "")
if err != nil {
return err
}
source = ""
flags |= unix.MS_REMOUNT
}
return unix.Mount(source, target, fstype, flags, strings.Join(actualOptions, ","))
}
func (m *Mounter) Unmount(target string) error {
return unix.Unmount(target, 0)
}
func (m *Mounter) List() ([]mount.MountPoint, error) {
return mount.ListProcMounts("/proc/mounts")
}
func (m *Mounter) IsLikelyNotMountPoint(file string) (bool, error) {
// We do not need performance, so just check
result, err := m.IsMountPoint(file)
return !result, err
}
func (m *Mounter) CanSafelySkipMountPointCheck() bool {
// Assume that we are running Linux, which is a hepto requisite
return true
}
func (m *Mounter) IsMountPoint(file string) (bool, error) {
// We do not need so much performance as vanilla, so just be simple
file, err := filepath.EvalSymlinks(file)
if err != nil {
if os.IsNotExist(err) {
return false, err
}
return false, err
}
mounts, err := m.List()
for _, mount := range mounts {
if mount.Path == file {
return true, nil
}
}
return false, nil
}
func (m *Mounter) GetMountRefs(pathname string) ([]string, error) {
_, err := os.Stat(pathname)
if err != nil {
return nil, err
}
file, err := filepath.EvalSymlinks(pathname)
if err != nil {
return nil, err
}
return mount.SearchMountPoints(file, "/proc/self/mountinfo")
}
package k8s
import (
"fmt"
"time"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apiserver/pkg/registry/generic"
"k8s.io/apiserver/pkg/server/options"
"k8s.io/apiserver/pkg/server/storage"
"k8s.io/apiserver/pkg/storage/storagebackend"
"k8s.io/apiserver/pkg/util/flowcontrol/request"
)
// Reimplement the rest options factory to stop depending on option parsers, this instantiate the
// storage factory at the right location and prefix for a given resource
type RestOptionsFactory struct {
StorageFactory storage.StorageFactory
StorageObjectCountTracker request.StorageObjectCountTracker
}
func (f *RestOptionsFactory) GetRESTOptions(resource schema.GroupResource, example runtime.Object) (generic.RESTOptions, error) {
storageConfig, err := f.StorageFactory.NewConfig(resource, example)
if err != nil {
return generic.RESTOptions{}, fmt.Errorf("cannot find storage destination for %v, due to %v", resource, err.Error())
}
return generic.RESTOptions{
StorageConfig: storageConfig,
Decorator: generic.UndecoratedStorage,
DeleteCollectionWorkers: 1,
EnableGarbageCollection: true,
ResourcePrefix: f.StorageFactory.ResourcePrefix(resource),
CountMetricPollPeriod: time.Minute,
StorageObjectCountTracker: f.StorageObjectCountTracker,
}, nil
}
func PrepareStorage(codecs runtime.StorageSerializer, scheme *runtime.Scheme, resources storage.APIResourceConfigSource) *RestOptionsFactory {
// Etcd backend supports being created with a specific codec, however later created storage
// factories override that codec at runtime thanks to using DefaultStorageFactory instead
// of vanilla SimpleStorageFactory, so we create the backend with no pre-configured codec
etcdConfig := storagebackend.NewDefaultConfig("/registry", unstructured.UnstructuredJSONScheme)
etcdConfig.Transport.ServerList = []string{"http://[::1]:2379"}
// The storage factory getter abstracts all storage for the server, by specifying the scheme and codecs
// on top of the storage backend. Vanilla code sets a special multi-group versionner to avoid issues
// with large objects (cee cmd/kube-apiserver/app/apiextensions.go), which we ignore here
var storageFactory storage.StorageFactory
if codecs == nil {
storageFactory = &options.SimpleStorageFactory{StorageConfig: *etcdConfig}
} else {
storageFactory = storage.NewDefaultStorageFactory(
*etcdConfig,
runtime.ContentTypeJSON,
codecs,
storage.NewDefaultResourceEncodingConfig(scheme),
resources,
map[schema.GroupResource]string{},
)
}
return &RestOptionsFactory{
StorageFactory: storageFactory,
}
}
package cluster
import (
"forge.tedomum.net/acides/hepto/hepto/pkg/pki"
"github.com/sirupsen/logrus"
)
func (c *Cluster) initCerts() {
// Prepare the cluster PKI
if c.node.Role == Master {
ca, err := pki.NewClusterCA("/pki")
if err != nil {
logrus.Fatal("could not initialize pki: ", err)
}
masterCerts, err := pki.NewMasterCerts("/master", c.networking.NodeAddress.IP)
if err != nil {
logrus.Fatal("could not initialize master certs: ", err)
}
c.pki = ca
c.masterCerts = masterCerts
c.pki.SignMasterCerts(c.masterCerts)
} else {
ca, err := pki.EmptyClusterCA("/pki")
if err != nil {
logrus.Fatal("could not initialize pki: ", err)
}
c.pki = ca
}
c.ml.State.PKI = c.pki
// Initialize node certificates
certs, err := pki.NewNodeCerts("/certs", c.node.Name)
if err != nil {
logrus.Fatal("could not initialize node certificates: ", err)
}
c.certs = certs
c.ml.State.Certificates = make(map[string]*pki.NodeCerts)
c.ml.State.Certificates[c.node.Name] = certs
}
func (c *Cluster) handlePKI() {
if c.node.Role != Master {
return
}
for name, certs := range c.ml.State.Certificates {
c.pki.SignNodeCerts(name, certs)
}
}
// Hepto cluster implements the main clustering logics, based on
// sml (Simple Memberlist) for cluster bootstraping and node discovery
package cluster
import (
"forge.tedomum.net/acides/hepto/hepto/pkg/pki"
"forge.tedomum.net/acides/hepto/hepto/pkg/sml"
"forge.tedomum.net/acides/hepto/hepto/pkg/wg"
"github.com/sirupsen/logrus"
)
type Cluster struct {
settings *ClusterSettings
ml *sml.Memberlist[HeptoMeta, HeptoState, *HeptoMeta, *HeptoState]
vpn *wg.Wireguard
networking *ClusterNetworking
node *NodeSettings
certs *pki.NodeCerts
masterCerts *pki.MasterCerts
pki *pki.ClusterCA
services *ClusterServices
}
func New(settings *ClusterSettings, node *NodeSettings) *Cluster {
return &Cluster{
settings: settings,
node: node,
networking: NewClusterNetworking(settings.Name, node.Name),
ml: sml.New[HeptoMeta, HeptoState](node.Name, node.IP, node.Port, node.Anchors, settings.Key),
services: NewClusterServices(),
}
}
func (c *Cluster) Run() {
// Initialize components, certs must be initialized before
// environment, which references master keys
c.initVPN()
c.initCerts()
c.initEnv()
c.ml.Meta.Role = string(c.node.Role)
// Start waiting for events
events := c.ml.Events()
err := c.ml.Start()
if err != nil {
logrus.Fatal("could not start memberlist: ", err)
}
instr := c.ml.Instr()
instrUpdates := instr.Updates()
go c.ml.Run()
for {
select {
case <-events:
c.handlePKI()
c.updateVPN()
c.services.Update(c)
case <-instrUpdates:
c.networking.MTU = instr.MinMTU()
c.updateVPN()
case <-c.services.Done():
logrus.Fatal("a service stopped unexpectedly")
}
}
}
package cluster
import (
"errors"
"net"
"forge.tedomum.net/acides/hepto/hepto/pkg/types"
)
type ClusterSettings struct {
// Cluster name, should be locally unique
Name string
// Cluster key, must be shared across nodes
Key []byte
}
type NodeSettings struct {
// Node name, must be unique inside a cluster
Name string
// Node role
Role NodeRole
// Port for initial memberlist negotiations
Port int
// Public IPv6 address for the node
IP net.IP
// Anchors for this node to join
Anchors []string
}
type NodeRole string
const (
Master NodeRole = "master"
Node = "node"
)
func (r *NodeRole) String() string {
return string(*r)
}
func (r *NodeRole) Type() string {
return "NodeRole"
}
func (r *NodeRole) Set(v string) error {
cast := (NodeRole)(v)
if cast == Master || cast == Node {
*r = (NodeRole)(cast)
return nil
}
return errors.New("wrong node type")
}
type ClusterNetworking struct {
NodeNet types.Address
NodeAddress types.Address
ServiceNet types.Address
PodNet types.Address
MTU int
}
// Create a new cluster networking based on sane settings
// All networks use ULA private network derived deterministically
// from cluster name
func NewClusterNetworking(clusterName string, nodeName string) *ClusterNetworking {
// Cluster nodes are hosted on a /64 at :1, e.g. fd00:0:1:0::/64
nodeNet := types.ULA(clusterName, 64, 1)
// Current node address is derived from node name inside the cluster node net
nodeAddress := nodeNet.DeriveAddress(nodeName)
// Pods are hosted on a /56 at :3, e.g. fd00:0:2:0::/56
podNet := types.ULA(clusterName, 56, 2)
// Services are hosted on a /112 at :3, e.g. fd00:0:0:0:0:0:0::/112
serviceNet := types.ULA(clusterName, 112, 3)
return &ClusterNetworking{nodeNet, nodeAddress, serviceNet, podNet, 1500}
}
package cluster
import (
"fmt"
"os"
"path"
"github.com/sirupsen/logrus"
)
const homeDir = "/root"
const binDir = "/bin"
// Initialize hepto environment:
// - deploy subcommand symlinks
func (c *Cluster) initEnv() {
// Remove all binaries and configs
err := os.RemoveAll(binDir)
if err != nil {
logrus.Fatal(err)
}
err = os.RemoveAll(homeDir)
if err != nil {
logrus.Fatal(err)
}
// Create bin directory and all useful symlinks
err = os.Setenv("PATH", binDir)
if err != nil {
logrus.Fatal(err)
}
err = os.MkdirAll(binDir, 0o755)
if err != nil {
logrus.Fatal(err)
}
for _, name := range []string{"kubectl", "ctr", "mount", "containerd-shim", "containerd-shim-runc-v2"} {
err = os.Symlink("/proc/1/exe", path.Join(binDir, name))
}
// Create the admin kubeconfig on master only
if c.node.Role == Master {
err = os.MkdirAll(path.Join(homeDir, ".kube"), 0o755)
if err != nil {
logrus.Fatal(err)
}
rootConfig := KubeConfig{
URL: fmt.Sprintf("https://[%s]:6443", c.networking.NodeAddress.IP.String()),
CACert: c.pki.TLS.CertPath(),
ClientKey: c.masterCerts.RootClient.KeyPath(),
ClientCert: c.masterCerts.RootClient.CertPath(),
}
err = rootConfig.Write(path.Join(homeDir, ".kube/config"))
if err != nil {
logrus.Fatal(err)
}
}
}
package cluster
import (
"os"
"text/template"
)
const kubeconfigTemplate = `
apiVersion: v1
clusters:
- cluster:
server: {{.URL}}
certificate-authority: {{.CACert}}
name: local
contexts:
- context:
cluster: local
namespace: default
user: user
name: Default
current-context: Default
kind: Config
preferences: {}
users:
- name: user
user:
client-certificate: {{.ClientCert}}
client-key: {{.ClientKey}}
`
type KubeConfig struct {
URL string
CACert string
ClientCert string
ClientKey string
}
func (k *KubeConfig) Write(path string) error {
t, _ := template.New("kubeconfig").Parse(kubeconfigTemplate)
file, err := os.Create(path)
if err != nil {
return err
}
defer file.Close()
return t.Execute(file, k)
}
const kubeletConfigTemplate = `
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
authentication:
anonymous:
enabled: false
webhook:
enabled: true
x509:
clientCAFile: "{{.CACert}}"
authorization:
mode: Webhook
clusterDomain: "cluster.local"
imageMinimumGCAge: "120h"
resolvConf: "/etc/resolv.conf"
cgroupDriver: cgroupfs
runtimeRequestTimeout: "15m"
tlsCertFile: "{{.TLSCert}}"
tlsPrivateKeyFile: "{{.TLSKey}}"
syncFrequency: 1m
fileCheckFrequency: 1m
httpCheckFrequency: 1m
nodeStatusUpdateFrequency: 20s
nodeStatusReportFrequency: 5m
`
type KubeletConfig struct {
CACert string
TLSCert string
TLSKey string
}
func (k *KubeletConfig) Write(path string) error {
t, _ := template.New("kubeletconfig").Parse(kubeletConfigTemplate)
file, err := os.Create(path)
if err != nil {
return err
}
defer file.Close()
return t.Execute(file, k)
}
const containerdTemplate = `
plugin_dir = ""
root = "{{.RootDir}}"
state = "/run/containerd"
version = 2
[cgroup]
path = ""
[grpc]
address = "{{.Socket}}"
`
type ContainerdConfig struct {
RootDir string
Socket string
}
func (c *ContainerdConfig) Write(path string) error {
t, _ := template.New("containerd").Parse(containerdTemplate)
file, err := os.Create(path)
if err != nil {
return err
}
defer file.Close()
return t.Execute(file, c)
}