您的位置:首页 > 理论基础 > 计算机网络

Linux内核分析 - 网络:netif_receive_skb平台报文入口函数详解

2017-11-10 10:48 661 查看
网络收包流程从网卡驱动开始,一直往上,涉及NAPI、GRO、RPS等特性,但是一般最后都会调用__netif_receive_skb函数:

函数主要有几个处理:

1、vlan报文的处理,主要是循环把vlan头剥掉,如果qinq场景,两个vlan都会被剥掉;

2、交给rx_handler处理,例如OVS、linux bridge等;

3、ptype_all处理,例如抓包程序、raw socket等;

4、ptype_base处理,交给协议栈处理,例如ip、arp、rarp等;

static int __netif_receive_skb(struct sk_buff *skb)

{

struct packet_type *ptype, *pt_prev;

rx_handler_func_t *rx_handler;

struct net_device *orig_dev;

struct net_device *null_or_dev;

bool deliver_exact = false;

int ret = NET_RX_DROP;

__be16 type;

if (!netdev_tstamp_prequeue)

net_timestamp_check(skb);

trace_netif_receive_skb(skb);

if (netpoll_receive_skb(skb))

return NET_RX_DROP;

if (!skb->skb_iif)

skb->skb_iif = skb->dev->ifindex;

orig_dev = skb->dev;

skb_reset_network_header(skb); //把L3、L4的头都指向data数据结构,到这里的时候skb已经处理完L2层的头了

skb_reset_transport_header(skb);

skb_reset_mac_len(skb);

pt_prev = NULL;

rcu_read_lock();

another_round:

__this_cpu_inc(softnet_data.processed);

if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {

skb = vlan_untag(skb);

if (unlikely(!skb))

goto out;

}

#ifdef CONFIG_NET_CLS_ACT

if (skb->tc_verd
& TC_NCLS) {

skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);

goto ncls;

}

#endif

list_for_each_entry_rcu(ptype, &ptype_all, list) {
//把包交给特定协议相关的处理函数前,先调用ptype_all中注册的函数

if (!ptype->dev || ptype->dev == skb->dev) {
//最常见的为tcpdump,该工具就是从这里拿到所有收到的包的

if (pt_prev)

ret = deliver_skb(skb, pt_prev, orig_dev);

pt_prev = ptype; //pt_prev的加入是为了优化,只有当找到下一个匹配的时候,才执行这一次的回调函数

}

}

#ifdef CONFIG_NET_CLS_ACT

skb = handle_ing(skb, &pt_prev, &ret, orig_dev);

if (!skb)

goto out;

ncls:

#endif

rx_handler = rcu_dereference(skb->dev->rx_handler);
//由具体驱动决定

if (rx_handler) {

if (pt_prev) {

ret = deliver_skb(skb, pt_prev, orig_dev);

pt_prev = NULL;

}

switch (rx_handler(&skb)) {

case RX_HANDLER_CONSUMED:

goto out;

case RX_HANDLER_ANOTHER:

goto another_round;

case RX_HANDLER_EXACT:

deliver_exact = true;

case RX_HANDLER_PASS:

break;

default:

BUG();

}

}

if (vlan_tx_tag_present(skb)) {

if (pt_prev) {

ret = deliver_skb(skb, pt_prev, orig_dev);

pt_prev = NULL;

}

if (vlan_do_receive(&skb)) {

ret = __netif_receive_skb(skb);

goto out;

} else if (unlikely(!skb))

goto out;

}

/* deliver only exact match when indicated */

null_or_dev = deliver_exact ? skb->dev : NULL;

type = skb->protocol;

list_for_each_entry_rcu(ptype,

&ptype_base[ntohs(type) &
PTYPE_HASH_MASK], list) {

if (ptype->type == type &&

(ptype->dev == null_or_dev || ptype->dev == skb->dev ||

ptype->dev == orig_dev)) {

if (pt_prev)

ret = deliver_skb(skb, pt_prev, orig_dev);
//atomic_inc(&skb->users);

pt_prev = ptype;

}

}

if (pt_prev) {

ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
//一般的最后这一次没有引用计数的增加,直接调用函数

} else {

atomic_long_inc(&skb->dev->rx_dropped);

kfree_skb(skb);

/* Jamal, now you will not able to escape explaining

* me how you were going to use this. :-)

*/

ret = NET_RX_DROP;

}

out:

rcu_read_unlock();

return ret;

}

该函数涉及两个全局变量:

static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;

static struct list_head ptype_all __read_mostly

看几个常见的packet_type,这些都在相应的协议初始化的时候调用dev_add_pack加入到特性的链表中:

static struct packet_type ip_packet_type __read_mostly = {

.type = cpu_to_be16(ETH_P_IP),

.func = ip_rcv,

.gso_send_check = inet_gso_send_check,

.gso_segment = inet_gso_segment,

.gro_receive = inet_gro_receive,

.gro_complete = inet_gro_complete,

};

static struct packet_type arp_packet_type __read_mostly = {

.type = cpu_to_be16(ETH_P_ARP),

.func = arp_rcv,

}

在ip_rcv函数中会对L3头做一些有效性检测:

int ip_rcv(struct sk_buff *skb, struct
net_device *dev, struct packet_type *pt, struct
net_device *orig_dev)

{

const struct iphdr *iph;

u32 len;

/* When the interface is in promisc. mode, drop
all the crap

* that it receives, do not try to analyse
it.

*/

if (skb->pkt_type == PACKET_OTHERHOST)
//驱动根据MAC地址设置的,如果MAC地址不是本机的话,在这里丢弃。

goto drop;

IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len);

if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {

IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);

goto out;

}

if (!pskb_may_pull(skb, sizeof(struct
iphdr)))

goto inhdr_error;

iph = ip_hdr(skb);

/*

* RFC1122: 3.2.1.2
MUST silently discard any IP frame that fails the checksum.

*

* Is the datagram acceptable?

*

* 1. Length at least the size of an ip header

* 2. Version of 4

* 3. Checksums correctly. [Speed
optimisation for later, skip loopback checksums]

* 4. Doesn't have a bogus length

*/

if (iph->ihl < 5 || iph->version != 4)

goto inhdr_error;

if (!pskb_may_pull(skb, iph->ihl*4))

goto inhdr_error;

iph = ip_hdr(skb);

if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
//校验ip头是否正确

goto inhdr_error;

len = ntohs(iph->tot_len);
//iph中的大小是真正的大小,skb中len的大小是驱动中设置的,当包很小的时候,会进行填充,因此会比iph中的大

if (skb->len < len) {//以r8169为例,如果收到udp的包负载为1,则iph中的大小为20+8+1=29。但是此时skb->len=46=64(min)-14-4(vlan)

IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);

goto drop;

} else if (len < (iph->ihl*4))

goto inhdr_error;

/* Our transport medium may have padded the buffer out. Now we
know it

* is IP we can trim to the true length
of the frame.

* Note this now means skb->len holds
ntohs(iph->tot_len).

*/

if (pskb_trim_rcsum(skb, len)) {
//去除填充的数据

IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);

goto drop;

}

/* Remove any debris in the socket control block */

memset(IPCB(skb), 0, sizeof(struct
inet_skb_parm));

/* Must drop socket now because of tproxy. */

skb_orphan(skb);

return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL,

ip_rcv_finish);

inhdr_error:

IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);

drop:

kfree_skb(skb);

out:

return NET_RX_DROP;

}

然后调用ip_rcv_finish:

static int ip_rcv_finish(struct sk_buff *skb)

{

const struct iphdr *iph = ip_hdr(skb);

struct rtable *rt;

/*

* Initialise the virtual path cache for the packet. It describes

* how the packet travels inside Linux networking.

*/

if (skb_dst(skb) == NULL) {

int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,//路由寻找,根据目的地址判断是本地接收还是转发(使能forward的话)

iph->tos, skb->dev);

if (unlikely(err)) {

if (err == -EHOSTUNREACH)

IP_INC_STATS_BH(dev_net(skb->dev),

IPSTATS_MIB_INADDRERRORS);

else if (err == -ENETUNREACH)

IP_INC_STATS_BH(dev_net(skb->dev),

IPSTATS_MIB_INNOROUTES);

else if (err == -EXDEV)

NET_INC_STATS_BH(dev_net(skb->dev),

LINUX_MIB_IPRPFILTER);

goto drop;

}

}

#ifdef CONFIG_IP_ROUTE_CLASSID

if (unlikely(skb_dst(skb)->tclassid)) {

struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);

u32 idx = skb_dst(skb)->tclassid;

st[idx&0xFF].o_packets++;

st[idx&0xFF].o_bytes += skb->len;

st[(idx>>16)&0xFF].i_packets++;

st[(idx>>16)&0xFF].i_bytes += skb->len;

}

#endif

if (iph->ihl > 5
&& ip_rcv_options(skb))

goto drop;

rt = skb_rtable(skb);

if (rt->rt_type == RTN_MULTICAST) {

IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST,

skb->len);

} else if (rt->rt_type == RTN_BROADCAST)

IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST,

skb->len);

return dst_input(skb); //skb_dst(skb)->input(skb);路由寻找过程中赋值,本地接收的话为:ip_local_deliver

drop:

kfree_skb(skb);

return NET_RX_DROP;

}

int ip_local_deliver(struct sk_buff *skb)

{

/*

* Reassemble IP fragments.

*/

if (ip_is_fragment(ip_hdr(skb))) {

if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER))

return 0;

}

return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, skb, skb->dev, NULL,

ip_local_deliver_finish);

}

略过ip defrag流程,直接调用ip_local_deliver_finish,该函数根据L3头指定的L4协议,调用特定的函数:

static int ip_local_deliver_finish(struct sk_buff *skb)

{

struct net *net = dev_net(skb->dev);

__skb_pull(skb, ip_hdrlen(skb));
//增加data,略过L3头,此时data指向L4头

/* Point into the IP datagram, just
past the header. */

skb_reset_transport_header(skb);

rcu_read_lock();

{

int protocol = ip_hdr(skb)->protocol;
//L4类型,如TCP或者UDP

int hash, raw;

const struct net_protocol *ipprot;

resubmit:

raw = raw_local_deliver(skb, protocol);
//

hash = protocol & (MAX_INET_PROTOS - 1);

ipprot = rcu_dereference(inet_protos[hash]);
//udp_protocol

if (ipprot != NULL) {

int ret;

if (!net_eq(net, &init_net) && !ipprot->netns_ok) {

if (net_ratelimit())

printk("%s: proto %d isn't netns-ready\n",

__func__, protocol);

kfree_skb(skb);

goto out;

}

if (!ipprot->no_policy) {

if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {

kfree_skb(skb);

goto out;

}

nf_reset(skb);

}

ret = ipprot->handler(skb);
//udp_rcv

if (ret < 0) {

protocol = -ret;

goto resubmit;

}

IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);

} else {

if (!raw) {

if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {

IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS);

icmp_send(skb, ICMP_DEST_UNREACH,

ICMP_PROT_UNREACH, 0);

}

} else

IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);

kfree_skb(skb);

}

}

out:

rcu_read_unlock();

return 0;

}

udp调用udp_rcv,最后调用__udp4_lib_rcv:

int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,

int proto)

{

struct sock *sk;

struct udphdr *uh;

unsigned short ulen;

struct rtable *rt = skb_rtable(skb);

__be32 saddr, daddr;

struct net *net = dev_net(skb->dev);

/*

* Validate the packet.

*/

if (!pskb_may_pull(skb, sizeof(struct udphdr)))

goto drop; /* No space for header. */

uh = udp_hdr(skb);

ulen = ntohs(uh->len);

saddr = ip_hdr(skb)->saddr;

daddr = ip_hdr(skb)->daddr;

if (ulen > skb->len)

goto short_packet;

if (proto == IPPROTO_UDP) {

/* UDP validates ulen. */

if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))

goto short_packet;

uh = udp_hdr(skb);

}

if (udp4_csum_init(skb, uh, proto))

goto csum_error;

if (rt->rt_flags
& (RTCF_BROADCAST|RTCF_MULTICAST))

return __udp4_lib_mcast_deliver(net, skb, uh,

saddr, daddr, udptable);

sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
//根据ip地址以及端口号查找对应的sock数据结构

//接收进程在对应的链表中睡眠

if (sk != NULL) {
//不为空说明有对应的进程在等待这数据

int ret = udp_queue_rcv_skb(sk, skb);

sock_put(sk);

/* a return value > 0 means to resubmit the input, but

* it wants the return to be -protocol, or 0

*/

if (ret > 0)

return -ret;

return 0;

}

if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))

goto drop;

nf_reset(skb);

/* No socket. Drop packet silently, if checksum
is wrong */

if (udp_lib_checksum_complete(skb))

goto csum_error;

UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);

icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);

/*

* Hmm. We got an UDP packet to a port to which we

* don't wanna listen. Ignore it.

*/

kfree_skb(skb);

return 0;

}

首先看一下sock的hash查找函数:__udp4_lib_lookup_skb,该函数涉及hash表的一些查找,主要看一下具体的匹配函数:

static inline int compute_score(struct sock *sk, struct
net *net, __be32 saddr,

unsigned short hnum,

__be16 sport, __be32 daddr, __be16 dport, int dif)

{

int score = -1;

if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&

!ipv6_only_sock(sk)) {

struct inet_sock *inet = inet_sk(sk);

score = (sk->sk_family == PF_INET ? 1 : 0);
//一般为PF_INET

if (inet->inet_rcv_saddr) {
//bind指定地址的话有设置,否则为INADDR_ANY

if (inet->inet_rcv_saddr != daddr)

return -1;

score += 2;

}

if (inet->inet_daddr) {
//一般为0,参考inet_bind函数

if (inet->inet_daddr != saddr)

return -1;

score += 2;

}

if (inet->inet_dport) {
//一般为0

if (inet->inet_dport != sport)

return -1;

score += 2;

}

if (sk->sk_bound_dev_if) {
//一般为0

if (sk->sk_bound_dev_if != dif)

return -1;

score += 2;

}

}

return score;

}

该函数使用端口号寻找hash表中项,然后根据各个参数决定score,score大于-1表示找到对应的sock

找到sock后,去掉一些有效性检测,udp_queue_rcv_skb的逻辑如下:

if (sk_rcvqueues_full(sk, skb))
//超过限值,sk->sk_rmem_alloc

goto drop;

rc = 0;

bh_lock_sock(sk);

if (!sock_owned_by_user(sk))

rc = __udp_queue_rcv_skb(sk, skb);

else if (sk_add_backlog(sk, skb)) {

bh_unlock_sock(sk);

goto drop;

}

bh_unlock_sock(sk)

分成两种情况:

1)sk没有被人占用,则把skb加入sk_receive_queue,然后唤醒等待的进程。

2)如果sk被人占用,则把skb加入backlog链表,释放sk的时候会处理这种流程

先看第一种情况:

int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)

{

int err;

int skb_len;

unsigned long flags;

struct sk_buff_head *list = &sk->sk_receive_queue;
//获取链表头

/* Cast sk->rcvbuf to unsigned... It
is pointless, but reduces

number of warnings when compiling with -W --ANK

*/

if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=

(unsigned)sk->sk_rcvbuf) {

atomic_inc(&sk->sk_drops);

trace_sock_rcvqueue_full(sk, skb);

return -ENOMEM;

}

err = sk_filter(sk, skb);

if (err)

return err;

if (!sk_rmem_schedule(sk, skb->truesize)) {

atomic_inc(&sk->sk_drops);

return -ENOBUFS;

}

skb->dev = NULL;

skb_set_owner_r(skb, sk);

/* Cache the SKB length before we tack it onto the receive

* queue. Once it is added it no longer belongs to us and

* may be freed by other threads of control pulling packets

* from the queue.

*/

skb_len = skb->len;

/* we escape from rcu protected region, make sure we dont leak

* a norefcounted dst

*/

skb_dst_force(skb);

spin_lock_irqsave(&list->lock, flags);

skb->dropcount = atomic_read(&sk->sk_drops);

__skb_queue_tail(list, skb);

spin_unlock_irqrestore(&list->lock, flags);

if (!sock_flag(sk, SOCK_DEAD))

sk->sk_data_ready(sk, skb_len);
//sock_init_data初始化的时候赋值为:sock_def_readable

return 0;

}

static void sock_def_readable(struct sock *sk, int len)

{

struct socket_wq *wq;

rcu_read_lock();

wq = rcu_dereference(sk->sk_wq);

if (wq_has_sleeper(wq))

wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |

POLLRDNORM | POLLRDBAND);

sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);

rcu_read_unlock();

}

再看第二种情况,加入到对应的链表:

static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)

{

/* dont let skb dst not refcounted, we are going to leave rcu lock */

skb_dst_force(skb);

if (!sk->sk_backlog.tail)

sk->sk_backlog.head = skb;

else

sk->sk_backlog.tail->next = skb;

sk->sk_backlog.tail = skb;

skb->next = NULL;

}

释放sock的时候会判断该链表:

void release_sock(struct sock *sk)

{

/*

* The sk_lock has mutex_unlock() semantics:

*/

mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);

spin_lock_bh(&sk->sk_lock.slock);

if (sk->sk_backlog.tail)

__release_sock(sk);

sk->sk_lock.owned = 0;

if (waitqueue_active(&sk->sk_lock.wq))

wake_up(&sk->sk_lock.wq);

spin_unlock_bh(&sk->sk_lock.slock);

}

__release_sock会遍历tail对应链表上的所有skb,分别调用sk_backlog_rcv函数:

static void __release_sock(struct sock *sk)

__releases(&sk->sk_lock.slock)

__acquires(&sk->sk_lock.slock)

{

struct sk_buff *skb = sk->sk_backlog.head;

do {

sk->sk_backlog.head = sk->sk_backlog.tail = NULL;

bh_unlock_sock(sk);

do {

struct sk_buff *next = skb->next;

WARN_ON_ONCE(skb_dst_is_noref(skb));

skb->next = NULL;

sk_backlog_rcv(sk, skb);
//sk->sk_backlog_rcv(sk, skb)=sk->sk_prot->backlog_rcv

/*

* We are in process context here with softirqs

* disabled, use cond_resched_softirq() to preempt.

* This is safe to do because
we've taken the backlog

* queue private:

*/

cond_resched_softirq();

skb = next;

} while (skb != NULL);

bh_lock_sock(sk);

} while ((skb = sk->sk_backlog.head) != NULL);

/*

* Doing the zeroing here guarantee we can not loop forever

* while a wild producer attempts to flood us.

*/

sk->sk_backlog.len = 0;

}

对于udp为__udp_queue_rcv_skb:

static int __udp_queue_rcv_skb(struct sock *sk, struct
sk_buff *skb)

{

int rc;

if (inet_sk(sk)->inet_daddr)

sock_rps_save_rxhash(sk, skb->rxhash);

rc = ip_queue_rcv_skb(sk, skb);
//调用sock_queue_rcv_skb,回到第一种处理情况

if (rc < 0) {

int is_udplite = IS_UDPLITE(sk);

/* Note that an ENOMEM error is charged
twice */

if (rc == -ENOMEM)

UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,

is_udplite);

UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);

kfree_skb(skb);

trace_udp_fail_queue_rcv_skb(rc, sk);

return -1;

}

return 0;

}

一句话总结,对应udp而言,__netif_receive_skb把底层传上来的skb放到sock对应的sk_receive_queue链表中,然后唤醒等待数据的进程。

ARP报文处理:

在netif_receive_skb()函数中,可以看出处理的是像ARP、IP这些链路层以上的协议,那么,链路层报头是在哪里去掉的呢?答案是网卡驱动中,在调用netif_receive_skb()前,

skb->protocol = eth_type_trans(skb, bp->dev);

该函数对处理后skb>data跳过以太网报头,由mac_header指示以太网报头:



进入netif_receive_skb()函数

list_for_each_entry_rcu(ptype,&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list)

按照协议类型依次由相应的协议模块进行处理,而所以的协议模块处理都会注册在ptype_base中,实际是链表结构。
net/core/dev.c

static struct list_head ptype_base __read_mostly; /* Taps */

而相应的协议模块是通过dev_add_pack()函数加入的:

void dev_add_pack(struct packet_type *pt)
{
int hash;

spin_lock_bh(&ptype_lock);
if (pt->type == htons(ETH_P_ALL))
list_add_rcu(&pt->list, &ptype_all);
else {
hash = ntohs(pt->type) & PTYPE_HASH_MASK;
list_add_rcu(&pt->list, &ptype_base[hash]);
}
spin_unlock_bh(&ptype_lock);
}

以ARP处理为例
该模块的定义,它会在arp_init()中注册进ptype_base链表中:

static struct packet_type arp_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_ARP),
.func = arp_rcv,
};

然后在根据报文的TYPE来在ptype_base中查找相应协议模块进行处理时,实际调用arp_rcv()进行接收
arp_rcv() --> arp_process()

arp = arp_hdr(skb);
……
arp_ptr= (unsigned char *)(arp+1);
sha= arp_ptr;
arp_ptr += dev->addr_len;
memcpy(&sip, arp_ptr, 4);
arp_ptr += 4;
arp_ptr += dev->addr_len;
memcpy(&tip, arp_ptr, 4);

操作后这指针位置:



然后判断是ARP请求报文,这时先查询路由表ip_route_input()

if (arp->ar_op == htons(ARPOP_REQUEST) &&
ip_route_input(skb, tip, sip, 0, dev) == 0)

在ip_route_input()函数中,先在cache中查询是否存在相应的路由表项:

hash = rt_hash(daddr, saddr, iif, rt_genid(net));

缓存的路由项在内核中组织成hash表的形式,因此在查询时,先算出的hash值,再用该项-
rt_hash_table[hash].chain即可。这里可以看到,缓存路由项包括了源IP地址、目的IP地址、网卡号。

如果在缓存中没有查到匹配项,或指定不查询cache,则查询路由表ip_route_input_slow();
进入ip_route_input_slow()函数,最终调用fib_lookup()得到查询结果fib_result

if ((err = fib_lookup(net, &fl, &res)) != 0)

如果结果fib_result合法,则需要更新路由缓存,将此次查询结果写入缓存

hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
err = rt_intern_hash(hash, rth, NULL, skb, fl.iif);

在查找完路由表后,回到arp_process()函数,如果路由项指向本地,则应由本机接收该报文:

if (addr_type == RTN_LOCAL) {
……
if (!dont_send) {
n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
if (n) {
arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
neigh_release(n);
}
}
goto out;
}

首先更新邻居表neigh_event_ns(),然后发送ARP响应 –
arp_send。
至此,大致的ARP流程完成。由于ARP部分涉及到路由表以及邻居表,这都是很大的概念。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  网络协议 linux 函数