TCP/IP 知识点

tcp/ip 知识点

tcp-1

linux 简单的命令访问网站并返回http数据包信息示例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# 建立一个非 浏览器 的 http 连接并读取返回信息
exec 8<> /dev/tcp/www.baidu.com/80
echo -e 'GET / HTTP/1.0\n' 1>& 8
cat <&8

获取到baidu HTTP协议返回信息


# linux 进程打开 socket/inode等文件描述(本质:文件系统)
[root@master1 fd]# ps -ef| grep kubelet
root 1538 1513 2 Jun22 ? 1-20:53:43 kube-apiserver --advertise-address=10.33.41.132 --allow-privileged=true --authentication-token-webhook-config-file=/etc/kubernetes/configs/authwebhookconfig.yaml --authorization-mode=Node,RBAC --client-ca-file=/etc/kubernetes/pki/ca.crt --enable-admission-plugins=NodeRestriction --enable-bootstrap-token-auth=true --etcd-cafile=/etc/kubernetes/pki/etcd/ca.crt --etcd-certfile=/etc/kubernetes/pki/apiserver-etcd-client.crt --etcd-keyfile=/etc/kubernetes/pki/apiserver-etcd-client.key --etcd-servers=https://127.0.0.1:2379 --event-ttl=12h --feature-gates=TaintBasedEvictions=false,TTLAfterFinished=true --insecure-port=0 --kubelet-client-certificate=/etc/kubernetes/pki/apiserver-kubelet-client.crt --kubelet-client-key=/etc/kubernetes/pki/apiserver-kubelet-client.key --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname --proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.crt --proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client.key --requestheader-allowed-names=front-proxy-client --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt --requestheader-extra-headers-prefix=X-Remote-Extra- --requestheader-group-headers=X-Remote-Group --requestheader-username-headers=X-Remote-User --secure-port=6443 --service-account-key-file=/etc/kubernetes/pki/sa.pub --service-cluster-ip-range=10.96.0.0/12 --service-node-port-range=30000-32767 --tls-cert-file=/etc/kubernetes/pki/apiserver.crt --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 --tls-min-version=VersionTLS12 --tls-private-key-file=/etc/kubernetes/pki/apiserver.key
root 6914 1 3 Jun22 ? 2-05:27:31 /usr/bin/kubelet --bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf --config=/var/lib/kubelet/config.yaml --cgroup-driver=systemd --dynamic-config-dir=/var/lib/kubelet/dynamic --network-plugin=cni --node-ip=10.33.41.132 --pod-infra-container-image=docker.hikcloud:30001/k8ss/pause:3.1 --root-dir=/var/lib/kubelet
root 31042 16717 0 10:35 pts/1 00:00:00 grep --color=auto kubelet
[root@master1 fd]# cd /proc/6914/fd
[root@master1 fd]# ls -l
total 0
lr-x------ 1 root root 64 Aug 31 10:08 0 -> /dev/null
lrwx------ 1 root root 64 Aug 31 10:08 1 -> socket:[123625]
l-wx------ 1 root root 64 Aug 31 10:08 10 -> pipe:[125488]
lrwx------ 1 root root 64 Aug 31 10:08 11 -> socket:[124397]
lrwx------ 1 root root 64 Aug 31 10:08 12 -> socket:[124404]
lrwx------ 1 root root 64 Aug 31 10:08 13 -> socket:[169557354]
lrwx------ 1 root root 64 Aug 31 10:08 14 -> socket:[125570]
lr-x------ 1 root root 64 Aug 31 10:08 15 -> anon_inode:inotify
lrwx------ 1 root root 64 Aug 31 10:08 16 -> socket:[230272123]
lrwx------ 1 root root 64 Aug 31 10:08 17 -> socket:[125528]
lrwx------ 1 root root 64 Aug 31 10:08 18 -> socket:[125529]
lrwx------ 1 root root 64 Aug 31 10:08 19 -> socket:[122620]
lrwx------ 1 root root 64 Aug 31 10:08 2 -> socket:[123625]
lrwx------ 1 root root 64 Aug 31 10:08 20 -> socket:[122621]
lr-x------ 1 root root 64 Aug 31 10:08 21 -> anon_inode:inotify
lrwx------ 1 root root 64 Aug 31 10:08 22 -> anon_inode:[eventpoll]


# 查看相关文件描述符
[root@master1 fd]# lsof -p 6914
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
kubelet 6914 root cwd DIR 254,0 224 64 /
kubelet 6914 root rtd DIR 254,0 224 64 /
kubelet 6914 root txt REG 254,0 111568408 403332145 /usr/bin/kubelet
kubelet 6914 root mem REG 254,0 61624 36683 /usr/lib64/libnss_files-2.17.so
kubelet 6914 root mem REG 254,0 2151672 36665 /usr/lib64/libc-2.17.so
kubelet 6914 root mem REG 254,0 19288 36671 /usr/lib64/libdl-2.17.so
kubelet 6914 root mem REG 254,0 141968 36691 /usr/lib64/libpthread-2.17.so
kubelet 6914 root mem REG 254,0 163400 36658 /usr/lib64/ld-2.17.so
kubelet 6914 root 0r CHR 1,3 0t0 6352 /dev/null
kubelet 6914 root 1u unix 0xffff88c912dbd400 0t0 123625 socket
kubelet 6914 root 2u unix 0xffff88c912dbd400 0t0 123625 socket
kubelet 6914 root 3u unix 0xffff88ca94f53c00 0t0 124265 @00071
kubelet 6914 root 4u a_inode 0,10 0 6348 [eventpoll]
kubelet 6914 root 5u unix 0xffff88c8f5b23800 0t0 122558 socket
kubelet 6914 root 6r a_inode 0,10 0 6348 inotify
kubelet 6914 root 7u unix 0xffff88cac1311000 0t0 123654 socket
kubelet 6914 root 8u a_inode 0,10 0 6348 [eventpoll]
kubelet 6914 root 9r FIFO 0,9 0t0 125488 pipe
kubelet 6914 root 10w FIFO 0,9 0t0 125488 pipe
kubelet 6914 root 11u unix 0xffff88ca94f51800 0t0 124397 /var/run/387454838
kubelet 6914 root 12u IPv4 124404 0t0 TCP localhost:42420 (LISTEN)
kubelet 6914 root 13u IPv6 169557354 0t0 TCP master1:10250->master3:64790 (ESTABLISHED)
kubelet 6914 root 14u unix 0xffff88cac1310800 0t0 125570 socket
kubelet 6914 root 15r a_inode 0,10 0 6348 inotify
kubelet 6914 root 16u unix 0xffff88c91a133000 0t0 230272123 socket
kubelet 6914 root 17u unix 0xffff88c93dcb8c00 0t0 125528 socket
kubelet 6914 root 18u unix 0xffff88ca93e43800 0t0 125529 socket
kubelet 6914 root 19u unix 0xffff88c93dcbb800 0t0 122620 /var/run/387454838
kubelet 6914 root 20u unix 0xffff88ca93e40c00 0t0 122621 /var/run/387454838
kubelet 6914 root 21r a_inode 0,10 0 6348 inotify
kubelet 6914 root 22u a_inode 0,10 0 6348 [eventpoll]
kubelet 6914 root 23r FIFO 0,9 0t0 125542 pipe
kubelet 6914 root 24w FIFO 0,9 0t0 125542 pipe
kubelet 6914 root 25r CHR 1,11 0t0 6358 /dev/kmsg
kubelet 6914 root 26u IPv6 124463 0t0 TCP *:10250 (LISTEN)
kubelet 6914 root 27u unix 0xffff88caebebd800 0t0 125589 /var/lib/kubelet/device-plugins/kubelet.sock
kubelet 6914 root 28u unix 0xffff88cac1313c00 0t0 125549 /var/lib/kubelet/pod-resources/283926109
kubelet 6914 root 29u IPv4 125551 0t0 TCP localhost:10248 (LISTEN)
kubelet 6914 root 30u unix 0xffff88ca83a21400 0t0 229931580 socket
kubelet 6914 root 31r a_inode 0,10 0 6348 inotify
kubelet 6914 root 33r CHR 1,11 0t0 6358 /dev/kmsg
kubelet 6914 root 34r a_inode 0,10 0 6348 inotify
kubelet 6914 root 35u a_inode 0,10 0 6348 [eventpoll]
kubelet 6914 root 36r FIFO 0,9 0t0 125593 pipe
kubelet 6914 root 37w FIFO 0,9 0t0 125593 pipe
kubelet 6914 root 38u unix 0xffff88cac1386000 0t0 124529 socket
kubelet 6914 root 39u unix 0xffff88caf6d00c00 0t0 123724 socket
kubelet 6914 root 43u IPv4 168915864 0t0 TCP kubernetes.cluster.local.lb:38676->kubernetes.cluster.local.lb:16443 (ESTABLISHED)
  • fd ——> 文件描述符(变量)

传输控制层

tcp-2

tcp/udp(传输控制层)

上面网络层为应用层,应用层基于 传输控制层 建立的连接,来进行数据传输

!连接是什么?连接如何建立?

tcp-3

连接,就是双方都有资源,能够为双方的应用提供服务,进行数据传输。

tcp/udp连接,虚拟概念,底层由内核文件系统 + 数据io + 更底层网络协议实现


tcp:面向连接可靠的传输协议

  • 建立连接:

三次握手:client 发出 syn —— server 返回 syn + ack —— client发出 ack ——>

  • 数据传输:

client & server 申请一定资源,建立连接通信进行数据传输 ——>

  • 断开连接:

四次分手:双方socket资源释放

client 发出fin—— server 返回 fin + ack—— server发出 fin —— client发出 ack。(到此结束)

!三次握手、四次分手过程中,发出的信号包(例如ack确认),长度都为 0

tcp-4

socket ——> [client ip:client port -> server-ip:server port] 四元组

不同ip服务器同个端口,均可以建立 65535 个连接

tcp-5


查看操作系统网络状态 netstat

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
netstat -natp

# 示例
[root@master1 ~]# netstat -natp
Active Internet connections (servers and established)
Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name
tcp 0 0 127.0.0.1:10259 0.0.0.0:* LISTEN 26858/kube-schedule
tcp 0 0 127.0.0.1:42420 0.0.0.0:* LISTEN 6914/kubelet
tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN 3365/sshd
tcp 0 0 127.0.0.1:25 0.0.0.0:* LISTEN 3862/master
tcp 0 0 0.0.0.0:16443 0.0.0.0:* LISTEN 28115/haproxy
tcp 0 0 127.0.0.1:9182 0.0.0.0:* LISTEN 11322/./bin/k8s-key
tcp 0 0 0.0.0.0:9119 0.0.0.0:* LISTEN 28115/haproxy
tcp 0 0 0.0.0.0:8067 0.0.0.0:* LISTEN 28115/haproxy
tcp 0 0 127.0.0.1:10248 0.0.0.0:* LISTEN 6914/kubelet
tcp 0 0 0.0.0.0:8073 0.0.0.0:* LISTEN 28115/haproxy
tcp 0 0 127.0.0.1:10249 0.0.0.0:* LISTEN 2848/kube-proxy
tcp 0 0 0.0.0.0:8074 0.0.0.0:* LISTEN 28115/haproxy
tcp 0 0 127.0.0.1:2381 0.0.0.0:* LISTEN 1576/etcd
tcp 0 0 127.0.0.1:10257 0.0.0.0:* LISTEN 26883/kube-controll
tcp 0 0 0.0.0.0:30001 0.0.0.0:* LISTEN 28115/haproxy
tcp 0 0 127.0.0.1:46448 127.0.0.1:9180 TIME_WAIT -
tcp 0 0 127.0.0.1:46816 127.0.0.1:9180 TIME_WAIT -
tcp 0 0 127.0.0.1:45914 127.0.0.1:9180 TIME_WAIT -
tcp 0 0 127.0.0.1:2381 127.0.0.1:41694 TIME_WAIT -
tcp 0 0 127.0.0.1:46886 127.0.0.1:9180 TIME_WAIT -
tcp 0 0 127.0.0.1:57580 127.0.0.1:2379 ESTABLISHED 1538/kube-apiserver
tcp 0 0 127.0.0.1:57478 127.0.0.1:2379 ESTABLISHED 1538/kube-apiserver
tcp 0 0 127.0.0.1:57594 127.0.0.1:2379 ESTABLISHED 1538/kube-apiserver

在 tcp/udp 连接的传输过程中,任何一方发出 数据包,另一方都要发出 ack 确认动作

tcp-6

1
tcpdump -i (设备名) nn(非机密信息掩饰) port (端口) -X(可选,是否查看数据包内具体内容)

tcp-7


nc程序,可以与指定 ip + port 建立连接。nc客户端执行 get、post、put请求。四次分手,可以 FIN和ACK包一起发,只需要3次

建立连接过程中,tcp/udp连接建立会带有 ? 因为数据包的收发还依赖底层(网络层[数据包收发形式])


网络层

  • IP

  • route

互联网(很多小的网络互相连接起来),主机IP访问(网络号+主机位)

/etc/sysconfig/network-scripts/ifcfg-eth0

IPADDR ip地址

NETMASK 掩码

GATEWAY 网关

DNS 域名解析服务器

IP + NETMASK 与运算,得到——网络号

路由表

查看路由表

GATEWAY 0.0.0.0,即不需要 走 路由器/网关(即无需下一跳),网络直连,直接与同网段 ip 节点互相通信

1
2
3
4
5
6
7
8
9
10
11
12
[root@master1 ~]# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 10.33.41.254 0.0.0.0 UG 0 0 0 eth0
10.33.41.0 0.0.0.0 255.255.255.0 U 0 0 0 eth0
10.244.0.0 0.0.0.0 255.255.255.0 U 0 0 0 cni0
10.244.1.0 10.244.1.0 255.255.255.0 UG 0 0 0 flannel.1
10.244.2.0 10.244.2.0 255.255.255.0 UG 0 0 0 flannel.1
10.244.5.0 10.244.5.0 255.255.255.0 UG 0 0 0 flannel.1
10.244.6.0 10.244.6.0 255.255.255.0 UG 0 0 0 flannel.1
169.254.0.0 0.0.0.0 255.255.0.0 U 1002 0 0 eth0
172.17.0.0 0.0.0.0 255.255.0.0 U 0 0 0 docker0

路由表

作用:

记录当前局域网(局部互联网)所有的连接信息,为 网络路由(网关节点、路由器、流量发出节点) 提供下一跳的 依据

原理:

网络中下一跳点,必有流量入口,另外一个流量出口

举例:

发送给百度数据包。(本机ip 192.168.150.2,百度ip 61.135.169.121),对于发送到外部网络的流量

通过 Genmask 掩码进行与运算匹配

1)255.255.255.0 计算后 得到 目标地址:192.168.150.0 为当前网络段,网络直连。

2)0.0.0.0 计算后 得到 0.0.0.0,即目标地址为 0.0.0.0,走到网关 192.168.150.0,由网关/路由器所在节点决定网络下一跳。

Iface 设备 流量出入,走到 网关/路由器所在节点,决定下一跳地址。

此时发送的目标地址为 61.135.169.121,为了保证这个数据包能够发送到 网关节点,需要为数据包的 目的地址、端口 提供 额外地址信息,即网关/路由器 节点的 MAC 地址(即链路层协议在 网络层数据包外 添加 自身数据包 封装)

数据包发到 服务端,根据 目的ip 发送到指定处理的节点,根据 目标端口,查找在服务器上的对应进程,并进行 数据处理,实现 进程与进程间网络通信。

tcp-8


链路层

链路层地址数据库维护

查看该数据库 arp -a

1
2
3
4
5
6
7
8
9
10
[root@master1 ~]# arp -a
? (172.17.0.2) at 02:42:ac:11:00:02 [ether] on docker0
node1 (10.33.41.135) at fa:16:3e:99:4b:46 [ether] on eth0
kubernetes.cluster.local.lb (10.33.41.130) at <incomplete> on eth0
? (10.33.41.250) at 50:98:b8:19:af:1f [ether] on eth0
node2 (10.33.41.136) at fa:16:3e:8f:eb:a6 [ether] on eth0
? (10.33.41.131) at e8:61:1f:21:87:c9 [ether] on eth0
? (10.33.41.251) at 74:ea:cb:2d:cd:68 [ether] on eth0
master1 (10.33.41.126) at fa:16:3e:8f:eb:a6 [ether] on eth0
? (10.33.41.137) at fa:16:3e:1a:2e:f3 [ether] on eth0

找到 目标设备网卡的 MAC 地址。

tcp-9

例如,访问 www.baidu.com

从 client端发起网络请求,请求数据包—— 百度服务器的IP地址和 (家内/小区内)路由器 的 MAC 地址

中间路由器——将请求数据包( 目标ip、端口不变,修改目标MAC地址)

中间服务器到百度服务器——百度服务器ip地址和端口 & 百度服务器MAC地址

类似链表结构(每次原数据不变化,跳转地址变化)


ip 路由表 本身可以有记录(系统启动时,读取网卡配置项即可)

路由表信息——也称 静态路由。

1
网关代表下一跳! Next Jump

arp mac相关地址信息无记录,必须通过网络请求后,请求完成后添加、更新记录。

即,第一次发出数据包/网络请求时,arp会请求 目标ip节点 的 MAC 地址。

问题?当arp申请mac地址时,它不知道目标 ip的 mac 地址,如何将包发给正确 mac 地址,然后申请获得目标ip的 mac信息呢?

—— arp会广播给局域网所有节点(广播MAC地址为 全 ff 的MAC 地址),仅目标 ip 的 mac 会响应,以此发现目标ip的mac地址

所以,为什么网络是分层的?

层和层之间有依赖关系。

!!!

因此,第一次建立连接前,即三次握手前,需要 发出 arp 请求,获取目标MAC地址

获取到MAC地址后,才开始 三次握手 等 tcp/ip 层网络连接

示例命令:

1
arp -d 192.168.150.2 && curl www.baidu.com
tcp-10

tcp-11

网络方面,-n / -nn 为是否展示详细信息


tcp-12

回环设备——Loop Device

路由表规划不对,需要网工(人为维护)——SDN的源头

tcp-13

tcp-14

有了网络知识,才能入坑并发时,socket处理,以及并行度相关知识

网络——》并发——》并行度


交换机、路由器、网关的关系?

路由器:三层设备,存有路由表,看IP地址

交换机:二层设备,物理层/链路层,看MAC地址,不能作为两个网络区域 连接的使用

家用路由器:交换机+路由器结合,四个插口为交换机口;路由器接两条线,一条接交换机、一条接ISP运营商

NAT:地址转换协议


高薪程序员:M+N(内在和外在兼修)

M:个人修行

设计模式

多线程 + 高并发 JUC

网络到分布式:redis、zookeeper、elasticsearch、mq

调优:OSKernel

N:项目经验,即场景

整合《——》技术《——》问题

技术:落地到应用场景


一个问题:技术选项——技术特征、原理、源码、!场景!


整合一些技术,再解决问题。——结合点


在实际工作中 辩证 实践验证技术特征、能力。

例如 ssm,需要高并发,引入负载均衡(lvs)

flagger 解析

flagger

应用自动发布组件

官网地址

https://github.com/weaveworks/flagger

官方介绍

Flagger can be configured to automate the release process for Kubernetes workloads with a custom resource named canary.

支持的 custom resource(即下文中的 provider)

- istio

- Linkerd

- App Mesh

- Nginx

- contour

- CNI

- Kubernetes

组件架构图

flagger-arch
架构图解释

- primary service:已发布的在线服务(旧版本)

- canary service: 即将发布的新版本服务(新版本)

- Ingress:发布过程中发布粒度控制的 提供者

- Flagger:通过Flagger Spec 以 提供者提供的规范来调整 primary 与 canary 的 流量/副本运行 策略。调整过程中,根据 prometheus 采集的各项指标来决策是否回滚发布 或者 继续调整 流量/副本运行 比例。!!!此过程中,用户可自定义是否人工干预、审核、通知等动作。

示例 yaml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
apiVersion: flagger.app/v1beta1
kind: Canary
metadata:
name: podinfo
namespace: test
spec:
# service mesh provider (optional)
# can be: kubernetes, istio, linkerd, appmesh, nginx, contour, gloo, supergloo
provider: istio
# deployment reference
targetRef:
apiVersion: apps/v1
kind: Deployment
name: podinfo
# the maximum time in seconds for the canary deployment
# to make progress before it is rollback (default 600s)
progressDeadlineSeconds: 60
# HPA reference (optional)
autoscalerRef:
apiVersion: autoscaling/v2beta1
kind: HorizontalPodAutoscaler
name: podinfo
service:
# service name (defaults to targetRef.name)
name: podinfo
# ClusterIP port number
port: 9898
# container port name or number (optional)
targetPort: 9898
# port name can be http or grpc (default http)
portName: http
# add all the other container ports
# to the ClusterIP services (default false)
portDiscovery: true
# HTTP match conditions (optional)
match:
- uri:
prefix: /
# HTTP rewrite (optional)
rewrite:
uri: /
# request timeout (optional)
timeout: 5s
# promote the canary without analysing it (default false)
skipAnalysis: false
# define the canary analysis timing and KPIs
analysis:
# schedule interval (default 60s)
interval: 1m
# max number of failed metric checks before rollback
threshold: 10
# max traffic percentage routed to canary
# percentage (0-100)
maxWeight: 50
# canary increment step
# percentage (0-100)
stepWeight: 5
# validation (optional)
metrics:
- name: request-success-rate
# builtin Prometheus check
# minimum req success rate (non 5xx responses)
# percentage (0-100)
thresholdRange:
min: 99
interval: 1m
- name: request-duration
# builtin Prometheus check
# maximum req duration P99
# milliseconds
thresholdRange:
max: 500
interval: 30s
- name: "database connections"
# custom Prometheus check
templateRef:
name: db-connections
thresholdRange:
min: 2
max: 100
interval: 1m
# testing (optional)用户自定义干预
webhooks:
- name: "conformance test"
type: pre-rollout
url: http://flagger-helmtester.test/
timeout: 5m
metadata:
type: "helmv3"
cmd: "test run podinfo -n test"
- name: "load test"
type: rollout
url: http://flagger-loadtester.test/
metadata:
cmd: "hey -z 1m -q 10 -c 2 http://podinfo.test:9898/"
# alerting (optional)
alerts:
- name: "dev team Slack"
severity: error
providerRef:
name: dev-slack
namespace: flagger
- name: "qa team Discord"
severity: warn
providerRef:
name: qa-discord
- name: "on-call MS Teams"
severity: info
providerRef:
name: on-call-msteams

基本使用

- targetRef: 当前部署的新版本服务(可以是Deployment, 也可以是DaemonSet).

- progressDeadlineSeconds: canary, primary 部署超时时间.如果超过这个时间还没有部署好, 则不会进行 流量/组件副本 调整.

- autoscalerRef: K8s原生的HPA(自动伸缩).

- service: k8s service。当provider是Istio时, 和VirtualSercice(具有调整流量比例,路由策略等能力)相对应

- skipAnalysis: 是否跳过metrcis分析. 如果为true, 相当于一次性将primary替换成canary service.

分析

analysis:

- 包含一些调整primary, canary流量策略配置

- metrics: 指标来源. 例如: avg RT, 成功率, 自定义metrics(可以直接配置prometheus PQL)等

- webhook:可以用来人工审核接入, 压力测试等.

- alerts: 进度详情, 告警通知等

flagger 工作流程

flagger 流程

!核心:webhook——人工干预,实现暂停、回滚、继续、策略调整

部署策略

A/B testing

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
analysis:
# schedule interval (default 60s)
interval: 1m
# total number of iterations
iterations: 10
# max number of failed iterations before rollback
threshold: 2
# canary match condition
match:
- headers:
x-canary:
regex: ".*insider.*"
- headers:
cookie:
regex: "^(.*?;)?(canary=always)(;.*)?$"

以上面代码示例为例:
• 会在创建VirtualService过(istio)程中, 设置多个HTTPRoute.
• 默认流量, 访问primary service
• 通过http header或者cookie 正则匹配方式, 将流量路由到canary service.
• 整个流程会执行10次,每次间隔1分钟, 最多允许2次metrics验证失败. 如果超过2次, 则进行回滚.
• 正常结束后, 会执行”confirm-promotion” webhook, 确认是否将primary替换成cannay

1
2
• 如果是, 会将primary替换成cananry的spec(deployemnt spec, configmap)相关信息
• 如果否, 继续等待

Blue/Green

1
2
3
4
5
6
7
8
9
10
11
12
13
analysis:
# schedule interval (default 60s)
interval: 1m
# total number of iterations
iterations: 10
# max number of failed iterations before rollback
threshold: 2
webhooks:
- name: "load test"
type: rollout
url: http://flagger-loadtester.test/
metadata:
cmd: "hey -z 1m -q 10 -c 2 http://podinfo.test:9898/"

以上面代码示例为例:
• 整个流程会执行10次,每次间隔1分钟, 最多允许2次metrics验证失败.如果超过2次, 则进行回滚.
• 在这段时间会对canary service进行压力测试
• 正常结束后, 会执行”confirm-promotion” webhook, 确认是否将primary替换成cannay

1
2
• 如果是, 会将primary替换成cananry的spec(deployemnt spec, configmap)相关信息
• 如果否, 继续等待

如果配置了mirror=true(只有provider=istio时才支持该特性), 则会使用istio的mirror特性, 将流量分别copy 到primary和canary, 使用primary的reponse作为返回值. 这个时候要特别注意业务是否幂等.

Canary

1
2
3
4
5
6
7
8
9
10
11
12
13
14
analysis:
# schedule interval (default 60s)
interval: 1m
# max number of failed metric checks before rollback
threshold: 2
# max traffic percentage routed to canary
# percentage (0-100)
maxWeight: 50
# canary increment step
# percentage (0-100)
stepWeight: 2
# deploy straight to production without
# the metrics and webhook checks
skipAnalysis: false

以上面代码示例为例:
• 整个流程会执行25(maxWeight/maxWeight)次,每次间隔1分钟, 最多允许2次metrics验证失败.如果超过2次, 则进行回滚.
• 每次primary减少stepWeight%流量, canary增加stepWeight%流量, 直到canary到达maxWeight
• 执行”confirm-promotion” webhook, 确认是否将primary替换成cannay

1
2
• 如果是, 会将primary替换成cananry的spec(deployemnt spec, configmap)相关信息
• 如果否, 继续等待

其它

Webhooks

webhooks: 在整个发布过程中, 定义了相应的扩展点:
• confirm-rollout: 在canary接收流量之前执行. 可以用于人工审核发布, 自动化测试通过等场景.
如果该webhook没有返回成功(例如:请求返回状态码200), 则发布一直等待.
• pre-rollout: 在第一次切流到canary前执行的webhook. 如果执行失败次数超过阀值, 则进行回滚
• rollout: 在发布的每个周期(例如每个stepWeight)中的metrics分析之前执行.如果执行失败次数超过阀值, 则进行回滚
• confirm-promotion: 在primary变更到canary配置相关信息之前执行.
如果不成功, 会一直等待.在等待的过程中, Flagger会继续执行metrics验证直到最终回滚.
• post-rollout: 在rollback或者finish后执行. 如果执行失败只会记录Event日志。
• rollback: 当Canary处于Progressing或者Waiting状态时. 提供人工执行回滚的能力.
• event: 在每个生命周期,都会产生一些相关k8s event. 如果配置event webhook, 则在产生k8s event的同时,发送相关event事件信息.

Metrics

Metrics: 用于决策(A/B, Blue/Green, Canary)流量是否验证失败, 超过制定阀值(threshold)就会回滚发布
• 缺省自带的metrics

1
2
3
4
5
6
7
8
9
10
11
12
13
14
analysis:
metrics:
- name: request-success-rate
interval: 1m
# minimum req success rate (non 5xx responses)
# percentage (0-100)
thresholdRange:
min: 99
- name: request-duration
interval: 1m
# maximum req duration P99
# milliseconds
thresholdRange:
max: 500
  • request-success-rate(请求成功率). 上例说明成功率不能低于99%
  • request-duration(avg RT): RT均值不能超过500ms
    request-success-rate和request-duration是Flagger缺省自带的metrics.

不同的provider有不通实现. 例如:应用可以提供prometheus metrics

• 自定义metrics

  1. 创建MetricTemplate. 比如业务自定义的业务metrics, 如订单支付失败率
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
apiVersion: flagger.app/v1beta1
kind: MetricTemplate
metadata:
name: not-found-percentage
namespace: istio-system
spec:
provider:
type: prometheus
address: http://promethues.istio-system:9090
query: |
100 - sum(
rate(
istio_requests_total{
reporter="destination",
destination_workload_namespace="{{ namespace }}",
destination_workload="{{ target }}",
response_code!="404"
}[{{ interval }}]
)
)
/
sum(
rate(
istio_requests_total{
reporter="destination",
destination_workload_namespace="{{ namespace }}",
destination_workload="{{ target }}"
}[{{ interval }}]
)
) * 100
  1. 引用MetricTemplate

    1
    2
    3
    4
    5
    6
    7
    8
    9
    analysis:
    metrics:
    - name: "404s percentage"
    templateRef:
    name: not-found-percentage
    namespace: istio-system
    thresholdRange:
    max: 5
    interval: 1m

    上例表示canary的关于404错误/s的metrics不能超过5%

Alter

Alter: 用于发布过程中信息通知.
1.定义AlterProvider(可以是slack, 也可以是dingding)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
apiVersion: flagger.app/v1beta1
kind: AlertProvider
metadata:
name: on-call
namespace: flagger
spec:
type: slack
channel: on-call-alerts
username: flagger
# webhook address (ignored if secretRef is specified)
address: https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK
# secret containing the webhook address (optional)
secretRef:
name: on-call-url
---
apiVersion: v1
kind: Secret
metadata:
name: on-call-url
namespace: flagger
data:
address: <encoded-url>

2.使用Alter

1
2
3
4
5
6
7
analysis:
alerts:
- name: "on-call Slack"
severity: error
providerRef:
name: on-call
namespace: flagger

• serverity: 通知信息的等级, 类似日志级别. 包含info, warn, error
在整个部署过程中,不同阶段都会使用alter来发送通知信息, 例如发布成功, webhook执行失败等场景。

!使用 flagger 保证 0 宕机 所需 注意点

  • Deployment的重启策略必须为 RollingUpdate,且

  • Liveness/Readiness 健康检测需要添加

  • Graceful shutdown 必须设置

  • 使用 资源申请和上限 限制

限制:

单纯 kubernetes CNI 仅支持 蓝绿发布

额外发布支持:

NGINX 支持 金丝雀、A/B、蓝绿发布

ISTIO 所有发布类型支持


发布策略、动作支持 核心

webhook

支持以下

  • 确认发布——confirm rollout(确认发布将开始)
  • 预发布——pre-rollout(发布开始时确认动作)
  • 发布每一步——rollout(每次发布变更步骤执行)
  • 确认发布生效——confirm-promotion(确认发布将生效)
  • 预生效——post-rollout(每次生效时确认动作)
  • 回退——rollback(处于 发布中 /等待中 状态时可以执行回滚)
  • 事件——event(发布过程中,发布相关事件都可以被监听,来执行对应的策略)

参考连接

https://docs.flagger.app/usage/webhooks

ceph 入门笔记

概念

分布式存储系统

1
通过网络将数据分散存储在多台独立的设备上。

特性

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
可扩展性
可扩展至 百台甚至千台的集群规模,随着集群规模增长,系统整体性能表现为 线性增长。

水平扩展有以下特性:
1)节点扩展后,旧数据自动迁移到新节点,实现负载均衡,避免单节点过热;
2)水平扩展只需要将新节点和原有集群连接到同一网络,整个过程不会对业务造成影响;
3)当节点被添加到集群,集群的整体容量和性能也随之线性扩展。之后,新节点资源就被管理平台接管,用于分配 或 回收


低成本
自动容错、自动负载均衡机制,可构建于普通的PC机上。机器增加、减少方便,自动运维。

高性能
单服务器/集群,要求具备高性能。

易用
可提供易用对外接口,要求完善的监控、运维工具。可与现有系统集成。

易管理
通过一个简单的 WEB 界面对整个系统进行配置管理、运维操作。

挑战:
在于数据、状态信息的持久化,要求在自动迁移、自动容错、并发读写的过程中保证数据的一致性。
涉及技术主要来自两个领域:分布式系统 && 数据库。

存储分类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
本地存储文件的文件系统,不能在网络上使用
ext3 ext4 xfs ntfs

网络存储——网络文件系统,共享的都是文件系统
nfs 网络文件系统
hdfs 分布式网络文件系统
glusterfs 分布式网络文件系统

共享的是裸设备
块存储 cinder ceph(块存储 对象存储 网络文件系统-分布式)
SAN(存储区域网)——存储与服务器间使用光纤交换机去连接,组成专用存储网络

分布式
集群
client
|
namenode 元数据服务器
|
------------------------------------
| | |
datanode datanode datanode


任何文件系统中的数据,都分为 数据 和 元数据
数据:文件中实际内容
元数据:描述文件特征,文件类型、数据块分布信息(磁盘位置)等,用于定位和访问数据

ceph

设计目标

可避免单节点故障的分布式文件系统,PB级别的扩展能力。提供较好的性能、可扩展性和可靠性

优势

高扩展性

使用普通X86服务器,支持 10~1000台服务器,支持TB到EB级的扩展

高可靠性

没有单点故障,多数据副本,自动管理,自动修复

高性能

数据分布均衡

可用于对象存储、块设备存储和文件系统存储

ceph 架构

ceph架构

基础存储系统 RADOS

Reliable,Autonomic,Distributed Object Store,即可靠、自动化、分布式对象存储。所有ceph系统中的用户数据事实上最终都来源这一层来存储。上述特性也由这一层提供。

基础库 librados

RADOS 进行抽象和封装,并向上提供API,以便直接基于RADOS进行应用开发。!RADOS是一个对象存储系统。

高层应用接口

radosgw:对象网关接口(对象存储)

rbd:块存储

cephfs:文件系统存储

效果/作用:提供在librados库的基础上,提供抽象层次更高、更便于应用或客户端使用的上层接口。

ceph组件

ceph组件

三个主要进程

OSD

用于集群中所有数据与对象的存储。处理集群数据的复制、恢复、回填、再均衡。并向其他 OSD 守护进程发送心跳,然后向 MON 提供一些监控信息。

当ceph存储集群设定数据有两个副本时(一共存两份),至少需要两个OSD守护进程即两个OSD节点,集群才能达到 active+clean状态。

MDS(可选)

为 ceph 文件系统提供元数据计算、缓存与同步(ceph RBD、对象存储不使用MDS)。元数据也是存储在OSD节点中的,MDS类似元数据的代理缓存服务器。MDS进程并不是必须的进程,只有需要CEPHFS时,才需要配置MDS节点。

Monitor

监控整个集群的状态,维护集群的cluster MAP二进制表,保证集群数据的一致性。ClusterMAP 描述了对象块存储的物理位置,以及一个将设备聚合到物理位置的桶列表。

Manager(ceph-mgr)

用于收集 ceph 集群状态、运行指标,比如存储利用率、当前性能指标和系统负载。对外提供 ceph dashboard(ceph ui)和 restful api。Manager组件开启高可用时,至少 2 个。

ceph结构——两部分:client 和 node

ceph client:访问ceph底层服务和组件,对外提供各种接口。比如:对象存储接口、块存储接口、文件系统存储接口。

ceph node:ceph底层服务提供端,也就是 ceph 存储集群。

什么是OSD

对象存储(Object-based Storage)是一种新的网络存储结构,基于对象存储技术的设备就是对象存储设备(Object-based Storage Device),简称OSD。总体上来说,对象存储结合了 NAS 和 SAN 的优点,同时具有 SAN 的高速直接访问和NAS的分布式数据共享等优势,提供了具有高性能、高可靠性、跨平台以及安全的数据共享的存储结构体系。

docker pull问题排查

1、首先docker日志

1
2
3
4
5
6
7
8
9
10
11
Jul 16 14:41:46 worker1 dockerd[2396]: time="2020-07-16T14:41:46.453596079+08:00" level=error msg="Download failed, retrying: unexpected EOF"
Jul 16 14:42:06 worker1 dockerd[2396]: time="2020-07-16T14:42:06.483391207+08:00" level=error msg="Download failed: unexpected EOF"
Jul 16 14:42:29 worker1 dockerd[2396]: time="2020-07-16T14:42:29.220439089+08:00" level=warning msg="Error getting v2 registry: Get https://af.hikvision.com.cn/v2/: dial tcp: lookup af.hikvision.com.cn on
Jul 16 14:42:29 worker1 dockerd[2396]: time="2020-07-16T14:42:29.245091739+08:00" level=warning msg="Error getting v2 registry: Get http://af.hikvision.com.cn/v2/: dial tcp: lookup af.hikvision.com.cn on
Jul 16 14:42:29 worker1 dockerd[2396]: time="2020-07-16T14:42:29.245196494+08:00" level=error msg="Handler for POST /images/create returned error: Get http://af.hikvision.com.cn/v2/: dial tcp: lookup af.h
Jul 16 14:46:33 worker1 dockerd[2396]: time="2020-07-16T14:46:33.972502089+08:00" level=error msg="Download failed, retrying: unexpected EOF"
Jul 16 14:46:38 worker1 dockerd[2396]: time="2020-07-16T14:46:38.999069663+08:00" level=error msg="Download failed, retrying: unexpected EOF"
Jul 16 14:46:49 worker1 dockerd[2396]: time="2020-07-16T14:46:49.014718214+08:00" level=error msg="Download failed, retrying: unexpected EOF"
Jul 16 14:47:04 worker1 dockerd[2396]: time="2020-07-16T14:47:04.041825197+08:00" level=error msg="Download failed, retrying: unexpected EOF"
Jul 16 14:47:24 worker1 dockerd[2396]: time="2020-07-16T14:47:24.070685084+08:00" level=error msg="Download failed: unexpected EOF"

2、修改域名解析问题

1
2
3
4
5
6
nameserver 114.114.114.114
nameserver 10.96.0.10
nameserver 10.1.7.77
nameserver 10.1.7.97
nameserver 10.1.7.98
nameserver 10.1.7.88

修改为

1
nameserver 10.1.7.98

3、查看harbor日志

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
命令:
k logs harbor-harbor-registry-68c9db8d9c-m76x8 -c registry --tail=100


输出:
10.19.123.165 - - [16/Jul/2020:07:30:39 +0000] "GET /v2 HTTP/1.1" 301 39 "" "Go-http-client/1.1"
time="2020-07-16T07:30:39.192445209Z" level=debug msg="authorizing request" go.version=go1.11.8 http.request.host="harbor-harbor-registry:9188" http.request.id=ce22d938-3e7e-4164-b1c8-0d312306a6e0 http.request.method=GET http.request.referer="http://harbor-harbor-registry:9188/v2" http.request.remoteaddr="10.19.123.165:7611" http.request.uri="/v2/" http.request.useragent="Go-http-client/1.1"
time="2020-07-16T07:30:39.192513529Z" level=warning msg="error authorizing context: authorization token required" go.version=go1.11.8 http.request.host="harbor-harbor-registry:9188" http.request.id=ce22d938-3e7e-4164-b1c8-0d312306a6e0 http.request.method=GET http.request.referer="http://harbor-harbor-registry:9188/v2" http.request.remoteaddr="10.19.123.165:7611" http.request.uri="/v2/" http.request.useragent="Go-http-client/1.1"
10.19.123.165 - - [16/Jul/2020:07:30:39 +0000] "GET /v2/ HTTP/1.1" 401 87 "http://harbor-harbor-registry:9188/v2" "Go-http-client/1.1"
172.30.5.3 - - [16/Jul/2020:07:30:47 +0000] "GET / HTTP/1.1" 200 0 "" "kube-probe/1.17"
time="2020-07-16T07:30:47.219604256Z" level=info msg="response completed" go.version=go1.11.8 http.request.host="harbor-harbor-registry:9188" http.request.id=736a16dc-64c3-4aff-98d3-c091919b9665 http.request.method=GET http.request.remoteaddr="172.30.5.2:30577" http.request.uri="/v2" http.request.useragent="Go-http-client/1.1" http.response.contenttype="text/html; charset=utf-8" http.response.duration="85.835µs" http.response.status=301 http.response.written=39
172.30.5.2 - - [16/Jul/2020:07:30:47 +0000] "GET /v2 HTTP/1.1" 301 39 "" "Go-http-client/1.1"
time="2020-07-16T07:30:47.221183139Z" level=debug msg="authorizing request" go.version=go1.11.8 http.request.host="harbor-harbor-registry:9188" http.request.id=ccd84eed-8eee-4585-b861-e6fef8e95cc4 http.request.method=GET http.request.referer="http://harbor-harbor-registry:9188/v2" http.request.remoteaddr="172.30.5.2:30577" http.request.uri="/v2/" http.request.useragent="Go-http-client/1.1"
time="2020-07-16T07:30:47.221269133Z" level=warning msg="error authorizing context: authorization token required" go.version=go1.11.8 http.request.host="harbor-harbor-registry:9188" http.request.id=ccd84eed-8eee-4585-b861-e6fef8e95cc4 http.request.method=GET http.request.referer="http://harbor-harbor-registry:9188/v2" http.request.remoteaddr="172.30.5.2:30577" http.request.uri="/v2/" http.request.useragent="Go-http-client/1.1"
172.30.5.2 - - [16/Jul/2020:07:30:47 +0000] "GET /v2/ HTTP/1.1" 401 87 "http://harbor-harbor-registry:9188/v2" "Go-http-client/1.1"
172.30.5.3 - - [16/Jul/2020:07:30:47 +0000] "GET / HTTP/1.1" 200 0 "" "kube-probe/1.17"
time="2020-07-16T07:30:49.190353024Z" level=info msg="response completed" go.version=go1.11.8 http.request.host="harbor-harbor-registry:9188" http.request.id=6e4083f2-529c-4c35-b07e-99afeb491aa7 http.request.method=GET http.request.remoteaddr="10.19.123.165:63600" http.request.uri="/v2" http.request.useragent="Go-http-client/1.1" http.response.contenttype="text/html; charset=utf-8" http.response.duration="108.978µs" http.response.status=301 http.response.written=39
10.19.123.165 - - [16/Jul/2020:07:30:49 +0000] "GET /v2 HTTP/1.1" 301 39 "" "Go-http-client/1.1"
time="2020-07-16T07:30:49.191510503Z" level=debug msg="authorizing request" go.version=go1.11.8 http.request.host="harbor-harbor-registry:9188" http.request.id=c33522c3-2afb-4a8f-b422-e60b5aeae7af http.request.method=GET http.request.referer="http://harbor-harbor-registry:9188/v2" http.request.remoteaddr="10.19.123.165:63600" http.request.uri="/v2/" http.request.useragent="Go-http-client/1.1"
time="2020-07-16T07:30:49.191572656Z" level=warning msg="error authorizing context: authorization token required" go.version=go1.11.8 http.request.host="harbor-harbor-registry:9188" http.request.id=c33522c3-2afb-4a8f-b422-e60b5aeae7af http.request.method=GET http.request.referer="http://harbor-harbor-registry:9188/v2" http.request.remoteaddr="10.19.123.165:63600" http.request.uri="/v2/" http.request.useragent="Go-http-client/1.1"
10.19.123.165 - - [16/Jul/2020:07:30:49 +0000] "GET /v2/ HTTP/1.1" 401 87 "http://harbor-harbor-registry:9188/v2" "Go-http-client/1.1"
172.30.5.3 - - [16/Jul/2020:07:30:57 +0000] "GET / HTTP/1.1" 200 0 "" "kube-probe/1.17"
time="2020-07-16T07:30:57.219890218Z" level=info msg="response completed" go.version=go1.11.8 http.request.host="harbor-harbor-registry:9188" http.request.id=b463ef5e-7723-4a94-90e1-ca1163550553 http.request.method=GET http.request.remoteaddr="172.30.5.2:8799" http.request.uri="/v2" http.request.useragent="Go-http-client/1.1" http.response.contenttype="text/html; charset=utf-8" http.response.duration="178.483µs" http.response.status=301 http.response.written=39
172.30.5.2 - - [16/Jul/2020:07:30:57 +0000] "GET /v2 HTTP/1.1" 301 39 "" "Go-http-client/1.1"
time="2020-07-16T07:30:57.221293323Z" level=debug msg="authorizing request" go.version=go1.11.8 http.request.host="harbor-harbor-registry:9188" http.request.id=a3964e76-656f-4b74-b1a7-18d72ece4d39 http.request.method=GET http.request.referer="http://harbor-harbor-registry:9188/v2" http.request.remoteaddr="172.30.5.2:8799" http.request.uri="/v2/" http.request.useragent="Go-http-client/1.1"
time="2020-07-16T07:30:57.221359025Z" level=warning msg="error authorizing context: authorization token required" go.version=go1.11.8 http.request.host="harbor-harbor-registry:9188" http.request.id=a3964e76-656f-4b74-b1a7-18d72ece4d39 http.request.method=GET http.request.referer="http://harbor-harbor-registry:9188/v2" http.request.remoteaddr="172.30.5.2:8799" http.request.uri="/v2/" http.request.useragent="Go-http-client/1.1"
172.30.5.2 - - [16/Jul/2020:07:30:57 +0000] "GET /v2/ HTTP/1.1" 401 87 "http://harbor-harbor-registry:9188/v2" "Go-http-client/1.1"

harbor运行正常

4、harbor

文件

1
2
3
cd /opt/hikcloud/harbor/registry/docker/registry/v2/repositories/docker-pbg/ydic/_layers/sha256

ll查看 失败layer存在

5、nginx-controller

查看nginx日志

1
2
3
4
5
6
7
8
9
10
11
12
13
14
2020/07/16 07:22:36 [error] 2043#2043: *197739729 upstream prematurely closed connection while reading upstream, client: 172.30.5.3, server: docker.hikcloud, request: "GET /v2/docker-pbg/ydic/blobs/sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db HTTP/1.1", upstream: "http://172.30.5.3:9185/v2/docker-pbg/ydic/blobs/sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db", host: "docker.hikcloud:30001"
172.30.5.3 - - [16/Jul/2020:07:22:36 +0000] "GET /v2/docker-pbg/ydic/blobs/sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db HTTP/1.1" 200 0 "-" "docker/19.03.4 go/go1.12.10 git-commit/9013bf583a kernel/3.10.0-1062.18.1.el7.x86_64 os/linux arch/amd64 UpstreamClient(Go-http-client/1.1)" 1465 0.016 [kube-system-harbor-harbor-core-80] [] 172.30.5.3:9185 0 0.017 200 ce6fa0910582345c45ded8d5b25ae0d3
2020/07/16 07:22:45 [error] 2046#2046: *197739952 upstream prematurely closed connection while reading upstream, client: 172.30.5.3, server: docker.hikcloud, request: "GET /v2/docker-pbg/ydic/blobs/sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db HTTP/1.1", upstream: "http://172.30.5.2:9185/v2/docker-pbg/ydic/blobs/sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db", host: "docker.hikcloud:30001"
172.30.5.3 - - [16/Jul/2020:07:22:45 +0000] "GET /v2/docker-pbg/ydic/blobs/sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db HTTP/1.1" 200 0 "-" "docker/19.03.4 go/go1.12.10 git-commit/9013bf583a kernel/3.10.0-1062.18.1.el7.x86_64 os/linux arch/amd64 UpstreamClient(Go-http-client/1.1)" 1465 0.015 [kube-system-harbor-harbor-core-80] [] 172.30.5.2:9185 0 0.014 200 54a5962ae803cfaf593c720b4d8f4171
2020/07/16 07:22:51 [error] 2049#2049: *197740104 upstream prematurely closed connection while reading upstream, client: 172.30.5.3, server: docker.hikcloud, request: "GET /v2/docker-pbg/ydic/blobs/sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db HTTP/1.1", upstream: "http://172.30.5.2:9185/v2/docker-pbg/ydic/blobs/sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db", host: "docker.hikcloud:30001"
172.30.5.3 - - [16/Jul/2020:07:22:51 +0000] "GET /v2/docker-pbg/ydic/blobs/sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db HTTP/1.1" 200 0 "-" "docker/19.03.4 go/go1.12.10 git-commit/9013bf583a kernel/3.10.0-1062.18.1.el7.x86_64 os/linux arch/amd64 UpstreamClient(Go-http-client/1.1)" 1465 0.025 [kube-system-harbor-harbor-core-80] [] 172.30.5.2:9185 0 0.025 200 12d8f488258785ebf59bbae48881c3de
[root@worker1 ~]# kubectl logs --tail=100 -n kube-system harbor-harbor-registry-68c9db8d9c-m76x8 -c registry | grep 674
time="2020-07-16T07:23:11.349040402Z" level=debug msg="authorizing request" go.version=go1.11.8 http.request.host="docker.hikcloud:30001" http.request.id=39de630c-307d-4ae0-9d98-d86ba76303e0 http.request.method=GET http.request.remoteaddr=172.30.5.2 http.request.uri="/v2/docker-pbg/ydic/blobs/sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db" http.request.useragent="docker/19.03.4 go/go1.12.10 git-commit/9013bf583a kernel/3.10.0-1062.18.1.el7.x86_64 os/linux arch/amd64 UpstreamClient(Go-http-client/1.1)" vars.digest="sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db" vars.name="docker-pbg/ydic"
time="2020-07-16T07:23:11.349875921Z" level=info msg="authorized request" go.version=go1.11.8 http.request.host="docker.hikcloud:30001" http.request.id=39de630c-307d-4ae0-9d98-d86ba76303e0 http.request.method=GET http.request.remoteaddr=172.30.5.2 http.request.uri="/v2/docker-pbg/ydic/blobs/sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db" http.request.useragent="docker/19.03.4 go/go1.12.10 git-commit/9013bf583a kernel/3.10.0-1062.18.1.el7.x86_64 os/linux arch/amd64 UpstreamClient(Go-http-client/1.1)" vars.digest="sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db" vars.name="docker-pbg/ydic"
time="2020-07-16T07:23:11.349994691Z" level=debug msg=GetBlob auth.user.name= go.version=go1.11.8 http.request.host="docker.hikcloud:30001" http.request.id=39de630c-307d-4ae0-9d98-d86ba76303e0 http.request.method=GET http.request.remoteaddr=172.30.5.2 http.request.uri="/v2/docker-pbg/ydic/blobs/sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db" http.request.useragent="docker/19.03.4 go/go1.12.10 git-commit/9013bf583a kernel/3.10.0-1062.18.1.el7.x86_64 os/linux arch/amd64 UpstreamClient(Go-http-client/1.1)" vars.digest="sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db" vars.name="docker-pbg/ydic"
time="2020-07-16T07:23:11.353393785Z" level=debug msg="filesystem.URLFor("/docker/registry/v2/blobs/sha256/67/674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db/data")" auth.user.name= go.version=go1.11.8 http.request.host="docker.hikcloud:30001" http.request.id=39de630c-307d-4ae0-9d98-d86ba76303e0 http.request.method=GET http.request.remoteaddr=172.30.5.2 http.request.uri="/v2/docker-pbg/ydic/blobs/sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db" http.request.useragent="docker/19.03.4 go/go1.12.10 git-commit/9013bf583a kernel/3.10.0-1062.18.1.el7.x86_64 os/linux arch/amd64 UpstreamClient(Go-http-client/1.1)" trace.duration=36.014µs trace.file="/go/src/github.com/docker/distribution/registry/storage/driver/base/base.go" trace.func="github.com/docker/distribution/registry/storage/driver/base.(*Base).URLFor" trace.id=587d96eb-1aae-4d8d-8abc-cc0cc0c3d972 trace.line=217 vars.digest="sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db" vars.name="docker-pbg/ydic"
time="2020-07-16T07:23:11.36010056Z" level=debug msg="filesystem.Reader("/docker/registry/v2/blobs/sha256/67/674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db/data", 0)" auth.user.name= go.version=go1.11.8 http.request.host="docker.hikcloud:30001" http.request.id=39de630c-307d-4ae0-9d98-d86ba76303e0 http.request.method=GET http.request.remoteaddr=172.30.5.2 http.request.uri="/v2/docker-pbg/ydic/blobs/sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db" http.request.useragent="docker/19.03.4 go/go1.12.10 git-commit/9013bf583a kernel/3.10.0-1062.18.1.el7.x86_64 os/linux arch/amd64 UpstreamClient(Go-http-client/1.1)" trace.duration=6.584606ms trace.file="/go/src/github.com/docker/distribution/registry/storage/driver/base/base.go" trace.func="github.com/docker/distribution/registry/storage/driver/base.(*Base).Reader" trace.id=bc92eae0-665a-42e5-8a2b-42a69ce3b03c trace.line=125 vars.digest="sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db" vars.name="docker-pbg/ydic"
time="2020-07-16T07:23:11.361415935Z" level=info msg="response completed" go.version=go1.11.8 http.request.host="docker.hikcloud:30001" http.request.id=39de630c-307d-4ae0-9d98-d86ba76303e0 http.request.method=GET http.request.remoteaddr=172.30.5.2 http.request.uri="/v2/docker-pbg/ydic/blobs/sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db" http.request.useragent="docker/19.03.4 go/go1.12.10 git-commit/9013bf583a kernel/3.10.0-1062.18.1.el7.x86_64 os/linux arch/amd64 UpstreamClient(Go-http-client/1.1)" http.response.contenttype="application/octet-stream" http.response.duration=14.387271ms http.response.status=200 http.response.written=0
172.30.5.2 - - [16/Jul/2020:07:23:11 +0000] "GET /v2/docker-pbg/ydic/blobs/sha256:674a59b4e199228c78a5882ede113516d45d2eadac407f1828d03dda775875db HTTP/1.1" 200 0 "" "docker/19.03.4 go/go1.12.10 git-commit/9013bf583a kernel/3.10.0-1062.18.1.el7.x86_64 os/linux arch/amd64 UpstreamClient(Go-http-client/1.1)"

upstream prematurely closed connection while reading upstream

错误原因:后端服务异常