From 0c4481826e63c67fdcbc5d68b9b150343a60348c Mon Sep 17 00:00:00 2001
From: kaiyou <dev@kaiyou.fr>
Date: Wed, 27 Sep 2023 16:17:45 +0200
Subject: [PATCH] Switch to using Dolly for containerization

---
 cmd/hepto/config.go  |  42 ++-----
 cmd/hepto/service.go | 256 +++++++++++++++----------------------------
 go.mod               |   6 +-
 go.sum               |   2 +
 4 files changed, 107 insertions(+), 199 deletions(-)

diff --git a/cmd/hepto/config.go b/cmd/hepto/config.go
index 603ce94..d7b2f54 100644
--- a/cmd/hepto/config.go
+++ b/cmd/hepto/config.go
@@ -3,14 +3,11 @@ package hepto
 import (
 	"io"
 	"net"
-	"os"
-	"path"
 
 	"github.com/go-logr/logr"
 	"github.com/go-logr/zapr"
 	"go.acides.org/hepto/services"
 	"go.acides.org/hepto/utils"
-	"go.acides.org/selfcontain"
 )
 
 type Config struct {
@@ -19,9 +16,17 @@ type Config struct {
 	Pprof         bool
 	Logger        logr.Logger
 	LogLevel      int
-	Cluster       services.ClusterSettings
-	Container     selfcontain.Config
-	Node          services.NodeSettings
+
+	// Networking
+	Iface   string
+	DNS     []net.IP
+	Address net.IPNet
+	Gateway net.IP
+	Mounts  map[string]string
+
+	// Component configs
+	Cluster services.ClusterSettings
+	Node    services.NodeSettings
 }
 
 var config Config
@@ -35,34 +40,9 @@ func (c *Config) SetupLogger(output io.ReadWriteCloser) error {
 	// Use a standard logr logger for most things
 	logger := zapr.NewLogger(zapLogger)
 	c.Logger = logger
-	c.Container.Logger = logger
 	c.Cluster.Logger = logger
 	// Pass low level loggers for hooking (containerd and k8s mostly)
 	c.Cluster.ZapLogger = zapLogger
 	c.Cluster.LogrusLogger = logrusLogger
 	return nil
 }
-
-func (c *Config) SetupContainer() error {
-	// Setup data root
-	c.Container.Data = path.Join(c.DataDir, c.Cluster.Name, c.Node.Name)
-	err := os.MkdirAll(c.Container.Data, 0o700)
-	if err != nil {
-		return err
-	}
-	// Setup data persistence mounts
-	c.Container.Mounts[c.Cluster.DataDir] = c.Container.Data
-	// Set the container IP mask if required, defaults to /64
-	if len(c.Container.IP.Mask) == 0 {
-		mask := c.Container.IP.IP.DefaultMask()
-		if mask == nil {
-			mask = net.CIDRMask(64, 128)
-		}
-		c.Container.IP.Mask = mask
-	}
-	// Additional container settings
-	c.Container.Name = c.Node.Name
-	c.Container.Capabilities = additionalCapabilities
-	c.Container.Devices = additionalDevices
-	return nil
-}
diff --git a/cmd/hepto/service.go b/cmd/hepto/service.go
index 6c3f228..49fe401 100644
--- a/cmd/hepto/service.go
+++ b/cmd/hepto/service.go
@@ -1,10 +1,7 @@
 package hepto
 
 import (
-	"encoding/json"
 	"fmt"
-	"io"
-	"io/ioutil"
 	"net"
 	"os"
 	"os/exec"
@@ -14,12 +11,10 @@ import (
 	"github.com/spf13/cobra"
 	"github.com/spf13/pflag"
 	"github.com/spf13/viper"
+	"go.acides.org/dolly"
 	"go.acides.org/hepto/services"
-	"go.acides.org/selfcontain"
-	"golang.org/x/sys/unix"
 	"k8s.io/component-base/version/verflag"
 	"k8s.io/component-helpers/node/util/sysctl"
-	kubectl "k8s.io/kubectl/pkg/cmd"
 )
 
 var Hepto = &cobra.Command{
@@ -79,181 +74,111 @@ var Start = &cobra.Command{
 				return err
 			}
 		}
-		return config.SetupContainer()
+		return nil
 	},
 	RunE: func(cmd *cobra.Command, args []string) error {
-		newArgs := append([]string{Run.Use}, os.Args[2:]...)
-		// This is a hack while waiting for some refactoring in selfcontain
-		errChan := make(chan error)
-		go func() {
-			errChan <- selfcontain.RunWithArgs(&config.Container, newArgs)
-		}()
-		if config.Node.Role != "node" {
-			go func() {
-				// Wait so that the container is started and its state file exists
-				time.Sleep(5 * time.Second)
-				errChan <- apiserverForward()
-			}()
+		dataPath := path.Join(config.DataDir, config.Cluster.Name, config.Node.Name)
+		err := os.MkdirAll(dataPath, 0o755)
+		if err != nil {
+			return err
+		}
+		mounts := []*dolly.MountVolume{dolly.NewVolume(dataPath, config.Cluster.DataDir)}
+		for src, dst := range config.Mounts {
+			mounts = append(mounts, dolly.NewVolume(src, dst))
+		}
+		iface := dolly.NetIface{
+			Name:   "eth0",
+			Master: config.Iface,
+		}
+		if len(config.Address.IP) > 0 {
+			iface.Addresses = []net.IPNet{config.Address}
+		}
+		if len(config.Gateway) > 0 {
+			iface.Routes[&net.IPNet{}] = config.Gateway
 		}
-		return <-errChan
+		container := dolly.NewForking(
+			dolly.NewEmptyMount(config.Node.Name, append(
+				dolly.DefaultVolumes(),
+				mounts...,
+			)...),
+			&dolly.NetConfig{
+				Ifaces: []dolly.NetIface{iface},
+			},
+			&dolly.Forward{
+				HostBind:      net.ParseIP("::1"),
+				HostPort:      config.Cluster.LoopbackPort,
+				ContainerIP:   net.ParseIP("::1"),
+				ContainerPort: 6443,
+			},
+		)
+		return container.Run(runHepto)
 	},
 }
 
-// This is the apiserver forward hack, which proxifies the apiserver
-// from the host to the container
-// Everything is wrong here: so many magic numbers and constants, poor error
-// management, this function has to go away at some point
-func apiserverForward() error {
-	// Parsing container config and getting the namespace
-	statePath := path.Join(config.Container.Data, config.Container.Name, "state.json")
-	stateBytes, err := ioutil.ReadFile(statePath)
+func runHepto() error {
+	// Install ourselves as a hooking binary
+	self, err := os.Executable()
 	if err != nil {
-		return fmt.Errorf("could not read container state: %w", err)
+		return fmt.Errorf("could not get executable path: %w", err)
 	}
-	var state map[string]interface{}
-	json.Unmarshal(stateBytes, &state)
-	netnsPath := state["namespace_paths"].(map[string]interface{})["NEWNET"].(string)
-	netns, err := unix.Open(netnsPath, unix.O_RDONLY|unix.O_CLOEXEC, 0)
+	err = os.Setenv("PATH", "/bin")
 	if err != nil {
-		return fmt.Errorf("could not get netns: %w", err)
+		return fmt.Errorf("could not set binaries path: %w", err)
 	}
-	// Getting the current netns
-	curns, err := unix.Open(fmt.Sprintf("/proc/%d/ns/net", os.Getpid()), unix.O_RDONLY|unix.O_CLOEXEC, 0)
+	err = os.MkdirAll("/bin", 0o755)
 	if err != nil {
-		return fmt.Errorf("could not get current netns: %w", err)
+		return fmt.Errorf("could not prepare bin dir: %w", err)
 	}
-	// Actually running the proxy
-	l, err := net.Listen("tcp", fmt.Sprintf("[::1]:%d", config.Cluster.LoopbackPort))
-	if err != nil {
-		return fmt.Errorf("could not listen on tcp: %w", err)
-	}
-	config.Cluster.Logger.Info("listening on port", "port", l.Addr().String())
-	for {
-		tcp, err := l.Accept()
+	for _, cmd := range []*cobra.Command{Shim, Runc, Mount, Umount} {
+		err = os.Symlink(self, path.Join("/bin", cmd.Use))
 		if err != nil {
-			config.Cluster.Logger.Error(err, "error on tcp socket")
+			return fmt.Errorf("could not install binary %s: %w", cmd.Use, err)
 		}
-		go func() {
-			defer tcp.Close()
-			// Open a socket in the apiserver namespace
-			unix.Setns(netns, unix.CLONE_NEWNET)
-			proxy, err := net.Dial("tcp", "[::1]:6443")
-			unix.Setns(curns, unix.CLONE_NEWNET)
-			if err != nil {
-				config.Cluster.Logger.Error(err, "cannot dial to container socket")
-				return
-			}
-			defer proxy.Close()
-			go io.Copy(tcp, proxy)
-			io.Copy(proxy, tcp)
-		}()
 	}
-}
-
-var Run = &cobra.Command{
-	Use:   "run",
-	Short: "Actually run hepto inside the container",
-	// Determine current IP, which should run only once inside the container
-	// TODO: handle errors
-	PreRunE: func(cmd *cobra.Command, args []string) error {
-		// Start by evacuating cgroups
-		selfcontain.Evacuate()
-		// Install ourselves as a hooking binary
-		self, err := os.Executable()
-		if err != nil {
-			return fmt.Errorf("could not get executable path: %w", err)
-		}
-		err = os.Setenv("PATH", "/bin")
+	// Temporary directories are not created by selfcontain, but / is a tmpfs, so just mkdir
+	for _, dir := range []string{"/tmp", "/run"} {
+		err = os.Mkdir(dir, 0o777)
 		if err != nil {
-			return fmt.Errorf("could not set binaries path: %w", err)
-		}
-		err = os.MkdirAll("/bin", 0o755)
-		if err != nil {
-			return fmt.Errorf("could not prepare bin dir: %w", err)
-		}
-		for _, cmd := range []*cobra.Command{Kubectl, Ctr, Shim, Runc, Mount, Umount} {
-			err = os.Symlink(self, path.Join("/bin", cmd.Use))
-			if err != nil {
-				return fmt.Errorf("could not install binary %s: %w", cmd.Use, err)
-			}
-		}
-		// Temporary directories are not created by selfcontain, but / is a tmpfs, so just mkdir
-		for _, dir := range []string{"/tmp", "/run"} {
-			err = os.Mkdir(dir, 0o777)
-			if err != nil {
-				return err
-			}
-		}
-		// This is very useful for debugging, especially in network isolated
-		// environments
-		if config.BypassIPCheck {
-			config.Logger.Info("bypassing IP check")
-			if len(config.Container.IP.IP) > 0 {
-				config.Node.IP = config.Container.IP.IP
-			} else {
-				config.Node.IP = net.ParseIP("::1")
-			}
-			return nil
+			return err
 		}
-		// Otherwisem rely on opening a socket and probing the socket local
-		// address for more reliability
-		config.Logger.Info("determining current IP address")
-		for {
-			time.Sleep(time.Second)
-			target := fmt.Sprintf("[%s]:53", config.Container.DNS[0].String())
-			config.Logger.Info("connecting outbound to guess IP", "target", target)
-			conn, err := net.Dial("udp", target)
-			if err != nil {
-				config.Logger.Info("could not connect")
-				continue
-			}
-			localAddr := conn.LocalAddr().(*net.UDPAddr).IP
-			conn.Close()
-			if localAddr.IsLoopback() || localAddr.IsLinkLocalUnicast() {
-				continue
-			}
-			config.Logger.Info("found current IP", "ip", config.Node.IP.String())
-			config.Node.IP = localAddr
-			break
+	}
+	// This is very useful for debugging, especially in network isolated
+	// environments
+	if config.BypassIPCheck {
+		config.Logger.Info("bypassing IP check")
+		if len(config.Address.IP) > 0 {
+			config.Node.IP = config.Address.IP
+		} else {
+			config.Node.IP = net.ParseIP("::1")
 		}
 		return nil
-	},
-	RunE: func(cmd *cobra.Command, args []string) error {
-		c, err := services.NewManager(&config.Cluster, &config.Node, config.Logger)
+	}
+	// Otherwisem rely on opening a socket and probing the socket local
+	// address for more reliability
+	config.Logger.Info("determining current IP address")
+	for {
+		time.Sleep(time.Second)
+		target := fmt.Sprintf("[%s]:53", config.DNS[0].String())
+		config.Logger.Info("connecting outbound to guess IP", "target", target)
+		conn, err := net.Dial("udp", target)
 		if err != nil {
-			return err
+			config.Logger.Info("could not connect")
+			continue
 		}
-		return c.Run()
-	},
-}
-
-var Info = &cobra.Command{
-	Use:   "info",
-	Short: "Display info about a cluster (run on the master node)",
-	RunE: func(cmd *cobra.Command, args []string) error {
-		c, err := services.NewManager(&config.Cluster, &config.Node, config.Logger)
-		if err != nil {
-			return err
+		localAddr := conn.LocalAddr().(*net.UDPAddr).IP
+		conn.Close()
+		if localAddr.IsLoopback() || localAddr.IsLinkLocalUnicast() {
+			continue
 		}
-		fmt.Printf("Node network: %s\n", c.State.Networking().NodeNet.String())
-		fmt.Printf("Pod network: %s\n", c.State.Networking().PodNet.String())
-		fmt.Printf("APIserver service: %s\n", c.State.Networking().APIAddress.String())
-		fmt.Printf("APIserver address: %s\n", c.State.Networking().NodeAddress.String())
-		return nil
-	},
-}
-
-var RunKubectl = &cobra.Command{
-	Use:   "kubectl",
-	Short: "Run kubectl in the cluster",
-	RunE: func(cmd *cobra.Command, args []string) error {
-		config.SetupContainer()
-		kubeconfig := path.Join(config.Container.Data, "kubeconfig")
-		command := kubectl.NewDefaultKubectlCommand()
-		args = append([]string{"--kubeconfig", kubeconfig}, args...)
-		command.SetArgs(args)
-		return command.Execute()
-	},
+		config.Logger.Info("found current IP", "ip", config.Node.IP.String())
+		config.Node.IP = localAddr
+		break
+	}
+	c, err := services.NewManager(&config.Cluster, &config.Node, config.Logger)
+	if err != nil {
+		return err
+	}
+	return c.Run()
 }
 
 func init() {
@@ -261,9 +186,6 @@ func init() {
 	pflag.CommandLine.MarkHidden("azure-container-registry-config")
 	cobra.OnInitialize(viper.AutomaticEnv)
 	Hepto.AddCommand(Start)
-	Hepto.AddCommand(Run)
-	Hepto.AddCommand(Info)
-	Hepto.AddCommand(RunKubectl)
 
 	// General config
 	Hepto.PersistentFlags().CountVarP(&config.LogLevel, "verbose", "v", "Make logs more verbose")
@@ -277,11 +199,11 @@ func init() {
 	Hepto.PersistentFlags().IntVar(&config.Cluster.LoopbackPort, "loopback", 6443, "Loopback apiserver port")
 
 	// Container settings
-	Hepto.PersistentFlags().StringVar(&config.Container.Master, "iface", "eth0", "Master network interface")
-	Hepto.PersistentFlags().IPVar(&config.Container.IP.IP, "ip", net.IP{}, "IP address for the public interface")
-	Hepto.PersistentFlags().IPVar(&config.Container.GW, "gw", net.IP{}, "IP address of the network gateway")
-	Hepto.PersistentFlags().IPSliceVar(&config.Container.DNS, "dns", defaultDNS, "DNS server IP addresses")
-	Hepto.PersistentFlags().StringToStringVar(&config.Container.Mounts, "bind", map[string]string{}, "Additional bind mounts")
+	Hepto.PersistentFlags().StringVar(&config.Iface, "iface", "eth0", "Master network interface")
+	Hepto.PersistentFlags().IPNetVar(&config.Address, "ip", net.IPNet{}, "IP address for the public interface")
+	Hepto.PersistentFlags().IPVar(&config.Gateway, "gw", net.IP{}, "IP address of the network gateway")
+	Hepto.PersistentFlags().IPSliceVar(&config.DNS, "dns", defaultDNS, "DNS server IP addresses")
+	Hepto.PersistentFlags().StringToStringVar(&config.Mounts, "bind", map[string]string{}, "Additional bind mounts")
 
 	// Node settings
 	Hepto.PersistentFlags().IntVar(&config.Node.Port, "discovery-port", 7123, "TCP port used for discovering the cluster")
diff --git a/go.mod b/go.mod
index daa64b1..7b54d03 100644
--- a/go.mod
+++ b/go.mod
@@ -1,6 +1,6 @@
 module go.acides.org/hepto
 
-go 1.19
+go 1.21.0
 
 replace (
 	// rancher fork exposes main as a public method, which makes it available for an external single binary
@@ -15,6 +15,9 @@ replace (
 	go.opentelemetry.io/otel => go.opentelemetry.io/otel v1.13.0
 	go.opentelemetry.io/otel/metric => go.opentelemetry.io/otel/metric v0.32.1
 
+	// Temporary
+	go.acides.org/dolly => ../dolly
+
 	// This is not a hack, but the recommended way of pinning kubernetes as an external dependency for
 	// building vanilla (since those reference themselves as v0.0.0 internally)
 	k8s.io/api => k8s.io/api v0.27.4
@@ -243,6 +246,7 @@ require (
 	github.com/vishvananda/netns v0.0.4 // indirect
 	github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect
 	github.com/xlab/treeprint v1.1.0 // indirect
+	go.acides.org/dolly v0.1.0 // indirect
 	go.etcd.io/bbolt v1.3.7 // indirect
 	go.etcd.io/etcd/api/v3 v3.5.9 // indirect
 	go.etcd.io/etcd/client/pkg/v3 v3.5.9 // indirect
diff --git a/go.sum b/go.sum
index ed64db8..0bd5bff 100644
--- a/go.sum
+++ b/go.sum
@@ -1154,6 +1154,8 @@ github.com/yvasiyarov/gorelic v0.0.0-20141212073537-a9bba5b9ab50/go.mod h1:NUSPS
 github.com/yvasiyarov/newrelic_platform_go v0.0.0-20140908184405-b21fdbd4370f/go.mod h1:GlGEuHIJweS1mbCqG+7vt2nvWLzLLnRHbXz5JKd/Qbg=
 go.acides.org/daeman v0.3.4 h1:gjKq95TpP2+IMUkW/u1do4J43Y6KhTOPwOg6BBCBT74=
 go.acides.org/daeman v0.3.4/go.mod h1:fOZ4YudsVjbUan8WYTs0ED6JfpZ7Z6mlN4Lnn1qW5nk=
+go.acides.org/dolly v0.1.0 h1:3noSsaWNe/gAN/jig04r+20MFJeNfrdQl8bnrXn8Pg8=
+go.acides.org/dolly v0.1.0/go.mod h1:eO56y+YmSx2h0/Zy7+p2zxNeJVl4iIWMM/vsRFMaIDM=
 go.acides.org/pekahi v0.2.1 h1:ysmC14q+hnYXcuB1Ew2XoWDZsgR1jbut89ACRLwi43I=
 go.acides.org/pekahi v0.2.1/go.mod h1:AxgN7Ss6dCRHoNOVWMymkmDafWYdDV7ce6jPl5bqyRc=
 go.acides.org/selfcontain v0.2.2 h1:QGrC7K/E9YPibBdo0IyWwkpNDHwhK2nJ/onpqsygqQk=
-- 
GitLab