cni-plugin.png

kubelet call cni中分析了cri runtime调用cni plugin的流程。

cni plugin打通底层网络的方式各异,runtime如何使用cni plugin呢?

像csi一样规范使用cni plugin

cricsi规范,通过grpc协议规定好了接口方法,
cni是通过binary方式调用,如何定义好规范?

思路

  1. 接口定义方法,方便runtime调用
  2. 插件公共的集群参数
  3. 插件各自的配置

runtime shim侧的pod参数

pkg/kubelet/dockershim/network/cni/cni.go

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
func (plugin *cniNetworkPlugin) buildCNIRuntimeConf(podName string, podNs string, podSandboxID kubecontainer.ContainerID, podNetnsPath string, annotations, options map[string]string) (*libcni.RuntimeConf, error) {
rt := &libcni.RuntimeConf{

Args: [][2]string{
{"IgnoreUnknown", "1"},
{"K8S_POD_NAMESPACE", podNs},
{"K8S_POD_NAME", podName},
{"K8S_POD_INFRA_CONTAINER_ID", podSandboxID.ID},
},
}

}
func (c *CNIConfig) addNetwork(ctx context.Context, name, cniVersion string, net *NetworkConfig, prevResult types.Result, rt *RuntimeConf) (types.Result, error) {
// 执行并将结果返回
// action也是通过环境变量传入的
return invoke.ExecPluginWithResult(ctx, pluginPath, newConf.Bytes, c.args("ADD", rt), c.exec)
}

func ExecPluginWithResult(ctx context.Context, pluginPath string, netconf []byte, args CNIArgs, exec Exec) (types.Result, error) {
// netconf作为stdinData
// args参数设置为环境变量
stdoutBytes, err := exec.ExecPlugin(ctx, pluginPath, netconf, args.AsEnv())
}

func (args *Args) AsEnv() []string {
// 当前环境的变量
env := os.Environ()
pluginArgsStr := args.PluginArgsStr
if pluginArgsStr == "" {
pluginArgsStr = stringify(args.PluginArgs)
}

// Duplicated values which come first will be overrided, so we must put the
// custom values in the end to avoid being overrided by the process environments.
//以k=v形式,添加到字符串slice中
env = append(env,
"CNI_COMMAND="+args.Command,
"CNI_CONTAINERID="+args.ContainerID,
"CNI_NETNS="+args.NetNS,
"CNI_ARGS="+pluginArgsStr,
"CNI_IFNAME="+args.IfName,
"CNI_PATH="+args.Path,
)
// 去重
return dedupEnv(env)
}

插件各自的配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# cat 10-calico.conflist
{
"name": "k8s-pod-network", // 全局参数, 会注入到每一个`cni plugin`中
"cniVersion": "0.3.1",
"plugins": [
{
"type": "calico",
"datastore_type": "kubernetes",
"mtu": 0,
"nodename_file_optional": false,
"log_level": "Info",
"log_file_path": "/var/log/calico/cni/cni.log",
"ipam": { "type": "calico-ipam", "assign_ipv4" : "true", "assign_ipv6" : "false"},
"container_settings": {
"allow_ip_forwarding": false
},
"policy": {
"type": "k8s"
},
"kubernetes": {
"k8s_api_root":"https://10.20.0.1:443",
"kubeconfig": "/etc/cni/net.d/calico-kubeconfig"
}
},
{
"type": "bandwidth",
"capabilities": {"bandwidth": true}
},
{
"type": "portmap",
"snat": true,
"capabilities": {"portMappings": true}
}
]
}

configList文件的对象
位置libcni/api.go

1
2
3
4
5
6
7
8
9
10
11
12
type NetworkConfigList struct {
Name string
CNIVersion string
DisableCheck bool
Plugins []*NetworkConfig
Bytes []byte
}
type NetworkConfig struct {
Network *types.NetConf
Bytes []byte
}

某个cni plugin的配置
位置pkg/types/types.go

1
2
3
4
5
6
7
8
9
10
11
12
13
type NetConf struct {
CNIVersion string `json:"cniVersion,omitempty"`

Name string `json:"name,omitempty"`
Type string `json:"type,omitempty"`
Capabilities map[string]bool `json:"capabilities,omitempty"` // runtime 的能力
IPAM IPAM `json:"ipam,omitempty"` // ipam是某个插件的能力
DNS DNS `json:"dns"`

RawPrevResult map[string]interface{} `json:"prevResult,omitempty"`
PrevResult Result `json:"-"`
}

calico cni plugin为例子, 需要多个cni plugin协同工作,所以配置文件中 plugins 是个列表,type 字段表明使用哪个cni plugin。

该配置文件参数的解析如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
func getDefaultCNINetwork(confDir string, binDirs []string) (*cniNetwork, error) {
// 只识别这三种后缀的文件
files, err := libcni.ConfFiles(confDir, []string{".conf", ".conflist", ".json"})
// 按照字符升序排序
sort.Strings(files)
for _, confFile := range files {
var confList *libcni.NetworkConfigList
// conlist表示该配置文件中有多个cni plugin.
if strings.HasSuffix(confFile, ".conflist") {
confList, err = libcni.ConfListFromFile(confFile)
...
} else {
// 配置文件只有一个cni plugin
...
conf, err := libcni.ConfFromFile(confFile)
...
}
// 该return在for循环中,只找优先级最高的一个配置文件就返回,在里面可配置多个cni plugin
return &cniNetwork{
name: confList.Name,
NetworkConfig: confList, // 配置文件内容
CNIConfig: cniConfig, // libcni对象
Capabilities: caps,
}, nil
}
// 没有找到则返回错误
return nil, fmt.Errorf("no valid networks found in %s", confDir)
}

接口定义方法

github.com/containernetworking/cni/libcni/api.go
在同一个进程中,用interface接口规定方法, 类似于cri和csi中不同进程通过grpc接口通信。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
type CNI interface {
AddNetworkList(ctx context.Context, net *NetworkConfigList, rt *RuntimeConf) (types.Result, error)
CheckNetworkList(ctx context.Context, net *NetworkConfigList, rt *RuntimeConf) error
DelNetworkList(ctx context.Context, net *NetworkConfigList, rt *RuntimeConf) error
GetNetworkListCachedResult(net *NetworkConfigList, rt *RuntimeConf) (types.Result, error)
GetNetworkListCachedConfig(net *NetworkConfigList, rt *RuntimeConf) ([]byte, *RuntimeConf, error)

AddNetwork(ctx context.Context, net *NetworkConfig, rt *RuntimeConf) (types.Result, error)
CheckNetwork(ctx context.Context, net *NetworkConfig, rt *RuntimeConf) error
DelNetwork(ctx context.Context, net *NetworkConfig, rt *RuntimeConf) error
GetNetworkCachedResult(net *NetworkConfig, rt *RuntimeConf) (types.Result, error)
GetNetworkCachedConfig(net *NetworkConfig, rt *RuntimeConf) ([]byte, *RuntimeConf, error)

ValidateNetworkList(ctx context.Context, net *NetworkConfigList) ([]string, error)
ValidateNetwork(ctx context.Context, net *NetworkConfig) ([]string, error)

GetCachedAttachments(containerID string) ([]*NetworkAttachment, error)
}

AddNetworkList方法分析

1
2
3
4
5
6
7
8
9
10
func (plugin *cniNetworkPlugin) addToNetwork(ctx context.Context, network *cniNetwork, podName string, podNamespace string, podSandboxID kubecontainer.ContainerID, podNetnsPath string, annotations, options map[string]string) (cnitypes.Result, error) {
// 生成运行时的参数
rt, err := plugin.buildCNIRuntimeConf(podName, podNamespace, podSandboxID, podNetnsPath, annotations, options)

// 第一个时plugin的配置参数,第二个是实现统一方法的对象,在这里是&libcni.CNIConfig{Path: binDirs},即用的libcni库里的方法。
netConf, cniNet := network.NetworkConfig, network.CNIConfig

res, err := cniNet.AddNetworkList(ctx, netConf, rt)

}

github.com/containernetworking/cni/libcni/api.go

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
func (c *CNIConfig) AddNetworkList(ctx context.Context, list *NetworkConfigList, rt *RuntimeConf) (types.Result, error) 	
// 遍历运行配置的每个插件
for _, net := range list.Plugins {
...
// list.Name, list.CNIVersion是配置文件最上层的name和cniVersion
result, err = c.addNetwork(ctx, list.Name, list.CNIVersion, net, result, rt)
if err != nil {
return nil, err
}
}

func (c *CNIConfig) addNetwork(ctx context.Context, name, cniVersion string, net *NetworkConfig, prevResult types.Result, rt *RuntimeConf) (types.Result, error) {
// 赋值执行器
c.ensureExec()
// 通过配置文件中的type字段找到binary, type必须要和binary名字一致
pluginPath, err := c.exec.FindInPath(net.Network.Type, c.Path)
// 生成新的配置,
//
newConf, err := buildOneConfig(name, cniVersion, net, prevResult, rt)
// 执行并将结果返回
return invoke.ExecPluginWithResult(ctx, pluginPath, newConf.Bytes, c.args("ADD", rt), c.exec)
}

func ExecPluginWithResult(ctx context.Context, pluginPath string, netconf []byte, args CNIArgs, exec Exec) (types.Result, error) {
// netconf作为stdinData
// args参数设置为环境变量
stdoutBytes, err := exec.ExecPlugin(ctx, pluginPath, netconf, args.AsEnv())
}

func (e *RawExec) ExecPlugin(ctx context.Context, pluginPath string, stdinData []byte, environ []string) ([]byte, error) {
stdout := &bytes.Buffer{}
// 将binary路径传入
c := exec.CommandContext(ctx, pluginPath)
c.Env = environ // 将环境变量传入
// 配置文件中的参数作为标准输入参数
c.Stdin = bytes.NewBuffer(stdinData)
c.Stdout = stdout
c.Stderr = e.Stderr
// 调用内置库运行
if err := c.Run(); err != nil {
return nil, pluginErr(err, stdout.Bytes())
}
// 以字节流方式读取stdout,因为binary会将具体实现的err输出到stdout里,从而完成两个进程间输入输出的通信
return stdout.Bytes(), nil
}

cni plugin 如何执行

cni plugin binary 如何获取stdindata和env var来执行,并返回结果

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import github.com/containernetworking/cni/pkg/skel

func main() {
skel.PluginMain(cmdAdd, cmdCheck, cmdDel, version.All, bv.BuildString("none"))
}

func PluginMainWithError(cmdAdd, cmdCheck, cmdDel func(_ *CmdArgs) error, versionInfo version.PluginInfo, about string) *types.Error {
return (&dispatcher{
Getenv: os.Getenv, // pod 所在的环境变量获取, 该方法是golang提供
Stdin: os.Stdin, // 从pod 标准输入获取,plugin的配置文件中的参数
Stdout: os.Stdout,
Stderr: os.Stderr,
}).pluginMain(cmdAdd, cmdCheck, cmdDel, versionInfo, about)
}

func (t *dispatcher) pluginMain(cmdAdd, cmdCheck, cmdDel func(_ *CmdArgs) error, versionInfo version.PluginInfo, about string) *types.Error {
cmd, cmdArgs, err := t.getCmdArgsFromEnv()
switch cmd {
case "ADD":
err = t.checkVersionAndCall(cmdArgs, versionInfo, cmdAdd)
case "DEL":
...
default:
return types.NewError(types.ErrInvalidEnvironmentVariables, fmt.Sprintf("unknown CNI_COMMAND: %v", cmd), "")
}
return err
}

如果cni plugin add流程有错误,会输出到os.Stdout

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
func PluginMain(cmdAdd, cmdCheck, cmdDel func(_ *CmdArgs) error, versionInfo version.PluginInfo, about string) {
if e := PluginMainWithError(cmdAdd, cmdCheck, cmdDel, versionInfo, about); e != nil {
if err := e.Print(); err != nil {
log.Print("Error writing error JSON to stdout: ", err)
}
os.Exit(1)
}
}

func prettyPrint(obj interface{}) error {
data, err := json.MarshalIndent(obj, "", " ")
if err != nil {
return err
}
_, err = os.Stdout.Write(data)
return err
}

通过github.com/containernetworking/cni/libcni这个package,规范了cni的使用姿势

如何调试cni呢?

module还提供了cnitool工具,可以以binary形式调用cni plugin,其原理是和runtime使用cni plugin的姿势完全一致。

debug cni with cnitool

install cnitool

1
2
go get github.com/containernetworking/cni
go install github.com/containernetworking/cni/cnitool

安装可应用程序,即该包下需有main(), 安装到<GOPATH>/bin/

build plugins

1
2
3
4
5
git clone https://github.com/containernetworking/plugins.git
cd plugins
./build_linux.sh
# or
./build_windows.sh

plugin编译到本项目bin目录下

创建/etc/cni/net.d/10-myptp.conf

1
2
3
4
$ echo '{"cniVersion":"0.4.0","name":"myptp","type":"ptp","ipMasq":true,"ipam":{"type":"host-local","subnet":"172.16.29.0/24","routes":[{"dst":"0.0.0.0/0"}]}}' | sudo tee /etc/cni/net.d/10-myptp.conf

$ ip netns add testing
此时网络空间只有`lo`网络

将容器加入网络

不是网络加入容器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
$ CNI_PATH=./bin cnitool add myptp /var/run/netns/testing
{
"cniVersion": "0.4.0",
"interfaces": [
{
"name": "veth901f3f45",
"mac": "c6:28:60:c6:cc:a8"
},
{
"name": "eth0",
"mac": "d6:30:5f:05:21:dd",
"sandbox": "/var/run/netns/testing"
}
],
"ips": [
{
"version": "4",
"interface": 1,
"address": "172.16.29.2/24",
"gateway": "172.16.29.1"
}
],
"routes": [
{
"dst": "0.0.0.0/0"
}
],
"dns": {}
}

host侧

1
2
3
4
5
6
7
8
9
10
# ip a
26: veth901f3f45@if2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default
link/ether c6:28:60:c6:cc:a8 brd ff:ff:ff:ff:ff:ff link-netns testing
inet 172.16.29.1/32 scope global veth901f3f45
valid_lft forever preferred_lft forever
inet6 fe80::c428:60ff:fec6:cca8/64 scope link
valid_lft forever preferred_lft forever

# ip r | grep f45
172.16.29.2 dev veth901f3f45 scope host

host侧, veth pair设备及router均配置好

ns侧

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# ip netns exec testing ip a
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: eth0@if26: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default
link/ether d6:30:5f:05:21:dd brd ff:ff:ff:ff:ff:ff link-netnsid 0
inet 172.16.29.2/24 brd 172.16.29.255 scope global eth0
valid_lft forever preferred_lft forever
inet6 fe80::d430:5fff:fe05:21dd/64 scope link
valid_lft forever preferred_lft forever

# ip netns exec testing ip r
default via 172.16.29.1 dev eth0
172.16.29.0/24 via 172.16.29.1 dev eth0 src 172.16.29.2 //下一跳指向host侧的veth pair口
172.16.29.1 dev eth0 scope link src 172.16.29.2

ns中增加了一个网络,在ns侧生成新的eth0 interface;在host侧对应的veth pairveth901f3f45
ns中的默认路由也是veth pair的地址。

检查联通性

1
2
3
4
5
6
7
8
9
10
从host上ping ns
# ping 172.16.29.2
PING 172.16.29.2 (172.16.29.2) 56(84) bytes of data.
64 bytes from 172.16.29.2: icmp_seq=1 ttl=64 time=0.879 ms

从ns ping host上网卡
# ip netns exec testing ping 10.211.55.5
PING 10.211.55.5 (10.211.55.5) 56(84) bytes of data.
64 bytes from 10.211.55.5: icmp_seq=1 ttl=64 time=1.78 ms

清理

1
2
CNI_PATH=./bin cnitool del myptp /var/run/netns/testing
sudo ip netns del testing

cni plugin 的实现

官方实现:https://github.com/containernetworking/plugins

host-local

保证分配ip不冲突
host-local IPAM plugin allocates ip addresses out of a set of address ranges. It stores the state locally on the host filesystem, therefore ensuring uniqueness of IP addresses on a single host
https://www.cni.dev/plugins/current/ipam/host-local/

calico

自定义实现

调用github.com/vishvananda/netlink

pod cidr的更新

how to update pod cidr

总结

  1. cni仓库的libcni包时runtime更方便使用cni plugin,即将配置文件参数和环境变量参数作为具体cni plugin binary的执行参数.
  2. plugins仓库有官方实现的cni plugin,因为有些网络功能是基础或者公用的。
  3. cnitool使开发者以runtime角度去使用cni plugin且方便debug

Ref

  1. https://www.cni.dev/docs/cnitool/