首页 > 编程知识 正文

vxlan配置实例详解,华为vxlan配置案例

时间:2023-05-06 04:03:55 阅读:256788 作者:4506

4.1.12内核已经支持vxlan报文的gro功能,意味着vxlan报文交给协议栈之前,已经被聚合过了,而在早期的内核中聚合逻辑是在encap_rcv函数之后实现的。

之前分析的UDP报文处理中,可以知道如果udp_sock定义了encap_rcv函数,将会把报文交给该函数处理,而不是传统的保存到sock队列,唤醒进程收包。

udp_sock定义的encap_rcv函数是在vxlan_socket_create函数中设置的,实际是vxlan_udp_encap_recv函数。

vxlan_udp_encap_recv函数

/* Callback from net/ipv4/udp.c to receive packets */static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb){struct vxlan_sock *vs;struct vxlanhdr *vxh;u32 flags, vni;struct vxlan_metadata md = {0};/* Need Vxlan and inner Ethernet header to be present */if (!pskb_may_pull(skb, VXLAN_HLEN))//报文长度检测goto error;vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);//得到vxlan头指针,和UDP头长度相同,所以可以这么操作flags = ntohl(vxh->vx_flags);vni = ntohl(vxh->vx_vni);if (flags & VXLAN_HF_VNI) {//发送的vxlan报文,该flag必须置1flags &= ~VXLAN_HF_VNI;} else {/* VNI flag always required to be set */goto bad_flags;}if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))//报文移动到内层报文goto drop;vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);vs = rcu_dereference_sk_user_data(sk);if (!vs)goto drop;if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { //VXLAN_HF_RCO意味着发送端的vxlan设置了VXLAN_F_REMCSUM_TX标记vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni,//并且报文的ip_summed == CHECKSUM_PARTIAL !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL));//remcsum检测,检测失败丢弃该报文if (!vxh)goto drop;flags &= ~VXLAN_HF_RCO;//flags去掉VXLAN_HF_RCO标记vni &= VXLAN_VNI_MASK;//vni去掉低8位内容,仅剩下vni ID}/* For backwards compatibility, only allow reserved fields to be * used by VXLAN extensions if explicitly requested. */if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {struct vxlanhdr_gbp *gbp;gbp = (struct vxlanhdr_gbp *)vxh;md.gbp = ntohs(gbp->policy_id);if (gbp->dont_learn)md.gbp |= VXLAN_GBP_DONT_LEARN;if (gbp->policy_applied)md.gbp |= VXLAN_GBP_POLICY_APPLIED;flags &= ~VXLAN_GBP_USED_BITS;}if (flags || vni & ~VXLAN_VNI_MASK) {//flags没有其他标记,vni低8为0/* If there are any unprocessed flags remaining treat * this as a malformed packet. This behavior diverges from * VXLAN RFC (RFC7348) which stipulates that bits in reserved * in reserved fields are to be ignored. The approach here * maintains compatibility with previous stack code, and also * is more robust and provides a little more security in * adding extensions to VXLAN. */goto bad_flags;}md.vni = vxh->vx_vni;vs->rcv(vs, skb, &md);//内核定义了vxlan_rcv,如果是内核自带OVS创建vxlan端口,则使用OVS定义的vxlan_rcv函数。return 0;drop:/* Consume bad packet */kfree_skb(skb);return 0;bad_flags:netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#xn", ntohl(vxh->vx_flags), ntohl(vxh->vx_vni));error:/* Return non vxlan pkt */return 1;}vxlan_rcv函数(内核自带OVS创建vxlan端口时指定)

static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, struct vxlan_metadata *md){struct ovs_tunnel_info tun_info;struct vxlan_port *vxlan_port;struct vport *vport = vs->data;struct iphdr *iph;struct ovs_vxlan_opts opts = {.gbp = md->gbp,};__be64 key;__be16 flags;flags = TUNNEL_KEY | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0);vxlan_port = vxlan_vport(vport);if (vxlan_port->exts & VXLAN_F_GBP && md->gbp)flags |= TUNNEL_VXLAN_OPT;/* Save outer tunnel values */iph = ip_hdr(skb);key = cpu_to_be64(ntohl(md->vni) >> 8);ovs_flow_tun_info_init(&tun_info, iph, udp_hdr(skb)->source, udp_hdr(skb)->dest, key, flags, &opts, sizeof(opts));ovs_vport_receive(vport, skb, &tun_info);//调用OVS收包函数}vxlan_rcv(内核自带)

static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, struct vxlan_metadata *md){struct iphdr *oip = NULL;struct ipv6hdr *oip6 = NULL;struct vxlan_dev *vxlan;struct pcpu_sw_netstats *stats;union vxlan_addr saddr;__u32 vni;int err = 0;union vxlan_addr *remote_ip;vni = ntohl(md->vni) >> 8;/* Is this VNI defined? */vxlan = vxlan_vs_find_vni(vs, vni);if (!vxlan)goto drop;remote_ip = &vxlan->default_dst.remote_ip;skb_reset_mac_header(skb);skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));skb->protocol = eth_type_trans(skb, vxlan->dev);//解析报文protocol,同时会设置skb的dev为vxlan设备skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);//报文移动到IP头,netif_receive_skb要求/* Ignore packet loops (and multicast echo) */if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))//报文源mac等于vxlan设备的mac,丢弃报文goto drop;/* Re-examine inner Ethernet packet */if (remote_ip->sa.sa_family == AF_INET) {oip = ip_hdr(skb);saddr.sin.sin_addr.s_addr = oip->saddr;saddr.sa.sa_family = AF_INET;#if IS_ENABLED(CONFIG_IPV6)} else {oip6 = ipv6_hdr(skb);saddr.sin6.sin6_addr = oip6->saddr;saddr.sa.sa_family = AF_INET6;#endif}if ((vxlan->flags & VXLAN_F_LEARN) && vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source))//vxlan fdb表学习,记录mac和ip的对应关系goto drop;skb_reset_network_header(skb);skb->mark = md->gbp;if (oip6)err = IP6_ECN_decapsulate(oip6, skb);if (oip)err = IP_ECN_decapsulate(oip, skb);//内外层tos检测if (unlikely(err)) {if (log_ecn_error) {if (oip6)net_info_ratelimited("non-ECT from %pI6n", &oip6->saddr);if (oip)net_info_ratelimited("non-ECT from %pI4 with TOS=%#xn", &oip->saddr, oip->tos);}if (err > 1) {++vxlan->dev->stats.rx_frame_errors;++vxlan->dev->stats.rx_errors;goto drop;}}stats = this_cpu_ptr(vxlan->dev->tstats);u64_stats_update_begin(&stats->syncp);stats->rx_packets++;stats->rx_bytes += skb->len;u64_stats_update_end(&stats->syncp);netif_rx(skb);//交给协议栈处理,skb的dev为vxlan_devreturn;drop:/* Consume bad packet */kfree_skb(skb);}


版权声明:该文观点仅代表作者本人。处理文章:请发送邮件至 三1五14八八95#扣扣.com 举报,一经查实,本站将立刻删除。