# kubectl get pod -o wide NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES my-busybox-68dc44bf88-j7scp 1/1 Running 0 24m 10.10.50.4 master4 <none> <none> my-busybox-68dc44bf88-qhpss 1/1 Running 0 24m 10.10.50.16 master1 <none> <none>
# kubectl exec -it my-busybox-68dc44bf88-qhpss sh # ip a 3: eth0@if17: <BROADCAST,MULTICAST,UP,LOWER_UP,M-DOWN> mtu 1500 qdisc noqueue link/ether 12:ca:8c:21:5a:c4 brd ff:ff:ff:ff:ff:ff inet 10.10.50.16/32 scope global eth0 valid_lft forever preferred_lft forever inet6 fe80::10ca:8cff:fe21:5ac4/64 scope link valid_lft forever preferred_lft forever 查看pod中路由 / # ip r default via 169.254.1.1 dev eth0 169.254.1.1 dev eth0 scope link
查看arp表 / # ip neigh 169.254.1.1 dev eth0 lladdr ee:ee:ee:ee:ee:ee used 0/0/0 probes 1 STALE 178.104.227.4 dev eth0 lladdr ee:ee:ee:ee:ee:ee used 0/0/0 probes 0 STALE
# host侧配置 ip netns add ns3 ip link add tap3 type veth peer name veth1 netns ns3 ip link set address ee:ee:ee:ee:ee:ee dev tap3 // 定制peer eth的mac echo 1 > /proc/sys/net/ipv4/conf/tap3/proxy_arp ip link set tap3 up ip r a 10.42.1.13 dev tap3 // 确定mac
# pod侧配置 ip netns exec ns3 ip addr add 10.42.1.13/32 dev veth1 ip netns exec ns3 ip route add 169.254.1.1 dev veth1 // 网络路由 ip netns exec ns3 ip route add default via 169.254.1.1 dev veth1 // 默认路由 ip netns exec ns3 ip neigh add 169.254.1.1 dev veth1 lladdr ee:ee:ee:ee:ee:ee ip netns exec ns3 ip link set veth1 up
#从host ping pod网卡 ping 10.42.1.13 PING 10.42.1.13 (10.42.1.13) 56(84) bytes of data. 64 bytes from 10.42.1.13: icmp_seq=1 ttl=64 time=48.7 ms
# 从pod网卡ping host ip netns exec ns3 ping 178.104.163.26 PING 178.104.163.26 (178.104.163.26) 56(84) bytes of data. 64 bytes from 178.104.163.26: icmp_seq=1 ttl=64 time=0.196 ms
# ip a s data 4: data: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc fq_codel state UP group default qlen 1000 link/ether fa:16:b2:76:e8:07 brd ff:ff:ff:ff:ff:ff inet 178.118.232.7/24 brd 178.118.232.255 scope global noprefixroute data valid_lft forever preferred_lft forever
# ip r default via 178.118.230.1 dev eth0 proto dhcp metric 100
// pod目标网段表明pod在其他机器,下一跳是178.118.232.x(虚拟ip,仅起辅助路由作用), 从tunl0出, tunl0进行ipip封装,via所指地址是出包的外层ip 10.244.36.192/26 via 178.118.232.191 dev tunl0 proto bird onlink 10.244.57.64/26 via 178.118.232.35 dev tunl0 proto bird onlink 10.244.136.0/26 via 178.118.232.7 dev tunl0 proto bird onlink 10.244.137.64/26 via 178.118.232.221 dev tunl0 proto bird onlink 10.244.166.128/26 via 178.118.232.198 dev tunl0 proto bird onlink 10.244.175.64/26 via 178.118.232.63 dev tunl0 proto bird onlink // 经tunl ipip封装后的包会报该条路由,从本机的data网卡出去 178.118.232.0/24 dev data proto kernel scope link src 178.118.232.148 metric 102
blackhole 10.244.180.0/26 proto bird // 全是本机pod的ip,32位, 在本机可以 10.244.180.4 dev cali97a0a28d8e4 scope link 10.244.180.10 dev cali119b29c78fc scope link 10.244.180.15 dev cali15c7619fccc scope link 10.244.180.16 dev cali8ece1094b64 scope link 10.244.180.17 dev cali17c27a1ff33 scope link 10.244.180.20 dev caliaeb7f8a6c25 scope link 10.244.180.27 dev cali90e4a410491 scope link 10.244.180.32 dev cali1abad8afd57 scope link 10.244.180.35 dev cali1bff4958e4e scope link 10.244.180.48 dev cali6c3bcfc264e scope link 10.244.180.55 dev cali1ad0b3407f2 scope link 10.244.180.58 dev calibbb6448fd94 scope link 172.17.0.0/16 dev docker0 proto kernel scope link src 172.17.0.1 linkdown 178.118.230.0/24 dev eth0 proto kernel scope link src 178.118.230.223 metric 100 178.118.231.0/24 dev eth1 proto kernel scope link src 178.118.231.238 metric 101 178.118.232.0/24 dev data proto kernel scope link src 178.118.232.148 metric 102
# ip a s tunl0 10: tunl0@NONE: <NOARP,UP,LOWER_UP> mtu 1430 qdisc noqueue state UNKNOWN group default qlen 1000 link/ipip 0.0.0.0 brd 0.0.0.0 inet 10.244.180.11/32 scope global tunl0 valid_lft forever preferred_lft forever
cat > /etc/bird-cfg/bird.cfg << EOL protocol static { # IP blocks for this host. // bird会生成两条路有,这是blackhole的一条。 route 10.42.1.0/24 blackhole; }
# Aggregation of routes on this host; export the block, nothing beneath it. function calico_aggr () { # Block 10.42.1.0/24 is confirmed if ( net = 10.42.1.0/24 ) then { accept; } if ( net ~ 10.42.1.0/24 ) then { reject; } }
filter calico_export_to_bgp_peers { calico_aggr(); if ( net ~ 10.42.0.0/16 ) then { accept; } reject; }
filter calico_kernel_programming { if ( net ~ 10.42.0.0/16 ) then { krt_tunnel = "tunl0"; accept; } accept; }
router id 10.211.55.5;
# Configure synchronization between routing tables and kernel. protocol kernel { learn; # Learn all alien routes from the kernel persist; # Don't remove routes on bird shutdown scan time 2; # Scan kernel routing table every 2 seconds import all; export filter calico_kernel_programming; # Default is export none graceful restart; # Turn on graceful restart to reduce potential flaps in # routes when reloading BIRD configuration. With a full # automatic mesh, there is no way to prevent BGP from # flapping since multiple nodes update their BGP # configuration at the same time, GR is not guaranteed to # work correctly in this scenario. }
# Watch interface up/down events. protocol device { debug all; scan time 2; # Scan interfaces every 2 seconds }
protocol direct { debug all; interface -"tap*", "*"; # Exclude tap* but include everything else. }
# Template for all BGP clients template bgp bgp_template { debug all; description "Connection to BGP peer"; local as 64512; multihop; gateway recursive; # This should be the default, but just in case. import all; # Import all routes, since we don't know what the upstream # topology is and therefore have to trust the ToR/RR. export filter calico_export_to_bgp_peers; # Only want to export routes for workloads. source address 10.211.55.5; # The local address we use for the TCP connection add paths on; graceful restart; # See comment in kernel section about graceful restart. connect delay time 2; connect retry time 5; error wait time 5,30; }
protocol bgp Mesh_10_211_55_6 from bgp_template { neighbor 10.211.55.6 as 64512; } EOF
ip r a 10.42.1.11 dev tap1 ip r a 10.42.1.12 dev tap2
ip netns exec ns1 ip addr add 10.42.1.11/32 dev veth1 ip netns exec ns2 ip addr add 10.42.1.12/32 dev veth1
ip netns exec ns1 ip link set veth1 up ip netns exec ns2 ip link set veth1 up
ip netns exec ns1 ip link set lo up ip netns exec ns2 ip link set lo up
ip netns exec ns1 ip route add 169.254.1.1 dev veth1 ip netns exec ns2 ip route add 169.254.1.1 dev veth1
ip netns exec ns1 ip route add default via 169.254.1.1 dev veth1 ip netns exec ns2 ip route add default via 169.254.1.1 dev veth1
ip netns exec ns1 ip neigh add 169.254.1.1 dev veth1 lladdr ee:ee:ee:ee:ee:ee ip netns exec ns2 ip neigh add 169.254.1.1 dev veth1 lladdr ee:ee:ee:ee:ee:ee
ip r a 10.42.2.11 dev tap1 ip r a 10.42.2.12 dev tap2
ip netns exec ns1 ip addr add 10.42.2.11/32 dev veth1 ip netns exec ns2 ip addr add 10.42.2.12/32 dev veth1
ip netns exec ns1 ip link set veth1 up ip netns exec ns2 ip link set veth1 up
ip netns exec ns1 ip link set lo up ip netns exec ns2 ip link set lo up
ip netns exec ns1 ip route add 169.254.1.1 dev veth1 ip netns exec ns2 ip route add 169.254.1.1 dev veth1
ip netns exec ns1 ip route add default via 169.254.1.1 dev veth1 ip netns exec ns2 ip route add default via 169.254.1.1 dev veth1
ip netns exec ns1 ip neigh add 169.254.1.1 dev veth1 lladdr ee:ee:ee:ee:ee:ee ip netns exec ns2 ip neigh add 169.254.1.1 dev veth1 lladdr ee:ee:ee:ee:ee:ee
1 2 3 4
modprobe ipip ip a a 10.42.2.0/32 brd 10.42.2.0 dev tunl0 ip link set tunl0 up iptables -F
node1 ns1去ping node2上的pod ip # ip netns exec ns1 ping 10.42.2.11 PING 10.42.2.11 (10.42.2.11) 56(84) bytes of data. 64 bytes from 10.42.2.11: icmp_seq=1 ttl=62 time=1.62 ms 64 bytes from 10.42.2.11: icmp_seq=2 ttl=62 time=1.11 ms
node1 ns1 pod ping node2的pod ip # ip netns exec ns1 ping 10.42.1.11 PING 10.42.1.11 (10.42.1.11) 56(84) bytes of data. 64 bytes from 10.42.1.11: icmp_seq=1 ttl=62 time=0.793 ms 64 bytes from 10.42.1.11: icmp_seq=2 ttl=62 time=1.19 ms
node1 ns1 pod ping node2 eth 不通 # ip netns exec ns1 ping 10.211.55.6 PING 10.211.55.6 (10.211.55.6) 56(84) bytes of data. ^C