当本 node 分配的 pod cidr 发生变化时,node上的 pod 如何更新其 ip 呢?

更新node对象

pkg/kubelet/kubelet_node_status.go

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
func (kl *Kubelet) fastStatusUpdateOnce() {
for {
time.Sleep(100 * time.Millisecond)

node, err := kl.GetNode()
podCIDRs := strings.Join(node.Spec.PodCIDRs, ",")
if _, err := kl.updatePodCIDR(podCIDRs); err != nil {
klog.ErrorS(err, "Pod CIDR update failed", "CIDR", podCIDRs)
continue
}
kl.updateRuntimeUp()
kl.syncNodeStatus()
return
}
}
}

启动单独的goroutine,在里面更新podcidr, runtime, nodeStatus.

更新pod cidr

当更新了node对象的网络信息,则需要更新node上的pod。即node.pod cidr需要传给第runtime shim => cni-plugin => pod netns eth0

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
func (kl *Kubelet) updatePodCIDR(cidr string) (bool, error) {

podCIDR := kl.runtimeState.podCIDR()
// 如果cidr没有变化,则不向runtime shim更新pod cidr
if podCIDR == cidr {
return false, nil
}

// kubelet -> generic runtime -> runtime shim -> network plugin
// docker/non-cri implementations have a passthrough UpdatePodCIDR
if err := kl.getRuntime().UpdatePodCIDR(cidr); err != nil {
return true, fmt.Errorf("failed to update pod CIDR: %v", err)
}

kl.runtimeState.setPodCIDR(cidr)
return true, nil
}

将pod的cidr传递给runtime shim, 同时更新到runtimeState

runtime更新配置

pkg/kubelet/kuberuntime/kuberuntime_manager.go

1
2
3
4
5
6
7
8
9
10
11
12
func (m *kubeGenericRuntimeManager) UpdatePodCIDR(podCIDR string) error {
// TODO(#35531): do we really want to write a method on this manager for each
// field of the config?
klog.InfoS("Updating runtime config through cri with podcidr", "CIDR", podCIDR)
// 调用底层运行时, 更新runtime的配置
return m.runtimeService.UpdateRuntimeConfig(
&runtimeapi.RuntimeConfig{
NetworkConfig: &runtimeapi.NetworkConfig{
PodCidr: podCIDR,
},
})
}

docker network 配置更新

pkg/kubelet/dockershim/docker_service.go

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
// 只负责处理pod cidr更新
func (ds *dockerService) UpdateRuntimeConfig(_ context.Context, r *runtimeapi.UpdateRuntimeConfigRequest) (*runtimeapi.UpdateRuntimeConfigResponse, error) {
runtimeConfig := r.GetRuntimeConfig()
if runtimeConfig == nil {
return &runtimeapi.UpdateRuntimeConfigResponse{}, nil
}

klog.InfoS("Docker cri received runtime config", "runtimeConfig", runtimeConfig)
if ds.network != nil && runtimeConfig.NetworkConfig.PodCidr != "" {
event := make(map[string]interface{})
event[network.NET_PLUGIN_EVENT_POD_CIDR_CHANGE_DETAIL_CIDR] = runtimeConfig.NetworkConfig.PodCidr
// 当运行时变更后,就给运行时的告知network变更事件
ds.network.Event(network.NET_PLUGIN_EVENT_POD_CIDR_CHANGE, event)
}

return &runtimeapi.UpdateRuntimeConfigResponse{}, nil
}

更新到cni plugin管理器

更新到pod cidr对象。

1
2
3
func (plugin *cniNetworkPlugin) Event(name string, details map[string]interface{}) {
plugin.podCidr = podCIDR
}

构建运行时的CapabilityArgs参数, 包括

  • portMappings
  • ipRanges
  • bandwidth
  • dns

具体: https://github.com/containernetworking/cni/blob/main/CONVENTIONS.md

runtime参数传递给cni plugin

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
func (plugin *cniNetworkPlugin) buildCNIRuntimeConf(podName string, podNs string, podSandboxID kubecontainer.ContainerID, podNetnsPath string, annotations, options map[string]string) (*libcni.RuntimeConf, error) {
// 要给底层运行时的配置
rt := &libcni.RuntimeConf{
ContainerID: podSandboxID.ID,
NetNS: podNetnsPath,
IfName: network.DefaultInterfaceName,
CacheDir: plugin.cacheDir,
Args: [][2]string{
{"IgnoreUnknown", "1"},
{"K8S_POD_NAMESPACE", podNs},
{"K8S_POD_NAME", podName},
{"K8S_POD_INFRA_CONTAINER_ID", podSandboxID.ID},
},
}
rt.CapabilityArgs = map[string]interface{}{
portMappingsCapability: portMappingsParam,
}

rt.CapabilityArgs[bandwidthCapability] = bandwidthParam

// Set the PodCIDR
rt.CapabilityArgs[ipRangesCapability] = [][]cniIPRange{{{Subnet: plugin.podCidr}}}

}

参数传递给cni plugin stdin

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
func buildOneConfig(name, cniVersion string, orig *NetworkConfig, prevResult types.Result, rt *RuntimeConf) (*NetworkConfig, error) {
var err error

// 给每个cni plugin注入的
inject := map[string]interface{}{
"name": name,
"cniVersion": cniVersion,
}
// 因为有多个cni plugin, 将前一个cni plugin的结果传递到当前cni plugin
// Add previous plugin result
if prevResult != nil {
inject["prevResult"] = prevResult
}

// 将最外层的cniVersion和name注入到每个cni plugin中
// Ensure every config uses the same name and version
orig, err = InjectConf(orig, inject)
if err != nil {
return nil, err
}

// 注入runtime shim的配置参数
return injectRuntimeConfig(orig, rt)
}

origin表示某一个cni plugin中stdin输入的参数。将runtime shim侧的参数也注入到plugin 的 stdin中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
func injectRuntimeConfig(orig *NetworkConfig, rt *RuntimeConf) (*NetworkConfig, error) {
var err error

rc := make(map[string]interface{})
for capability, supported := range orig.Network.Capabilities {
if !supported {
continue
}
if data, ok := rt.CapabilityArgs[capability]; ok {
rc[capability] = data
}
}

if len(rc) > 0 {
orig, err = InjectConf(orig, map[string]interface{}{"runtimeConfig": rc})
if err != nil {
return nil, err
}
}

return orig, nil
}

cni plugin Capability

portmapping

hostPortcontainrePort做关联,内部实现原理是todo

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
// port mappings are a cni capability-based args, rather than parameters
// to a specific plugin
portMappings, err := plugin.host.GetPodPortMappings(podSandboxID.ID)
if err != nil {
return nil, fmt.Errorf("could not retrieve port mappings: %v", err)
}
portMappingsParam := make([]cniPortMapping, 0, len(portMappings))
for _, p := range portMappings {
if p.HostPort <= 0 {
continue
}
portMappingsParam = append(portMappingsParam, cniPortMapping{
HostPort: p.HostPort,
ContainerPort: p.ContainerPort,
Protocol: strings.ToLower(string(p.Protocol)),
HostIP: p.HostIP,
})
}
rt.CapabilityArgs = map[string]interface{}{
portMappingsCapability: portMappingsParam,
}

other

todo

总结

node-ipam-controller(controller-manager) => node(pod cidr,可allcate多个) => runtime shim => cni管理器 => specific cni plugin(配置文件设置是否开启) => pod netns eth