您的位置:首页 > 理论基础 > 计算机网络

linux网络协议栈:邻居子系统

2015-07-09 11:06 721 查看
一:邻居子系统概述
         邻居子系统是从物理来说是指在同一个局域网内的终端。从网络拓扑的结构来说,是指他们之间相隔的距离仅为一跳,他们属于同一个突冲域
         邻居子系统的作用:
         它为第三层协议与第二层协议提供地址映射关系。
         提供邻居头部缓存,加速发包的速度
二:邻居子系统在整个协议栈的地位
         发送数据的时候,要在本机进行路由查找,如果有到目的地地址的路径,查看arp缓存中是否存在相应的映射关系,如果没有,则新建邻居项。判断邻居项是否为可用状态。如果不可用。把skb 存至邻居发送对列中,然后将发送arp请求。
         如果接收到arp应答。则将对应邻居项置为可用。如果在指定时间内末收到响应包,则将对应邻居项置为无效状态。
         如果邻居更改为可用状态,则把邻居项对应的skb对列中的数据包发送出去

三:流程概述;
发包流程。
下面以包送udp数据为例,看看是怎么与邻居子系统相关联的
Sendmsg()  à  ip_route_output()(到路由缓存中查找目的出口)à  ip_route_output_slow( 如果缓存中不存在目的项,则到路由表中查找)     à         ip_build_xmit() à output_maybe_reroute à skb->dst->output()
如果至时找到了路由,则根据路由信息分配个dst_entry,并调用arp_bind_neighbour为之绑定邻居 output指针赋值为ip_output 
 转到执行ip_output
ip_output à __ip_finish_output() -à ip_finish_output2() à dst->neighbour->output()
现在就转至邻居项的出口函数了。关于上述详细流程,将在以后的学习中继续给出
Neighbour->output怎么处理呢?我们从初始化时看起
四:邻居子系统初始化
跟以前一样,linux源代码版本为2.6.21
void __init arp_init(void)                                                     (net/ipv4/arp.c)
{
         //邻居表初始化
         neigh_table_init(&arp_tbl);
         //注册arp协议
         dev_add_pack(&arp_packet_type);
         //建立proc对象
         arp_proc_init();
#ifdef CONFIG_SYSCTL
         neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,
                                  NET_IPV4_NEIGH, "ipv4", NULL);
#endif
         //事件通知链表
         register_netdevice_notifier(&arp_netdev_notifier);
}
  在neigh_table_init(&arp_tbl);中,对邻居表进行了相应的初始化,特别的,初始化了一个垃圾回收定时器。后面再给出讨论
arp_packet_type的内容为:
static struct packet_type arp_packet_type = {
         .type =       __constant_htons(ETH_P_ARP),           (链路层对应的协议号)
         .func =       arp_rcv,                                                    《数据包的处理函数》
}
从上面可以看出,当接收到arp数据包时,将用arp_rcv()处理
五:邻居系统数据结构分析
neigh_table结构:
struct neigh_table
{
         //下一个邻居表
         struct neigh_table         *next;
         //协议簇
         int                        family;
         //入口长度,也就是一个邻居结构的大小,初始化为sizeof(neighbour)+4(4为一个IP地址的长度)
         int                        entry_size;
         //哈希关键值长度 即IP地址的长度,为4
         int                        key_len;
         //哈希值的计数函数(哈希值是经对应设备net_device 与 目的Ip计算出来的)
         __u32                           (*hash)(const void *pkey, const struct net_device *);
         //邻居初始化函数
         int                        (*constructor)(struct neighbour *);
         int                        (*pconstructor)(struct pneigh_entry *);
         void                     (*pdestructor)(struct pneigh_entry *);
         void                     (*proxy_redo)(struct sk_buff *skb);
         //邻居表的名称
         char                     *id;
         struct neigh_parms       parms;
         /* HACK. gc_* shoul follow parms without a gap! */
         //常规垃圾回收的时候
         int                        gc_interval;
         int                        gc_thresh1;
         //第二个阀值,如果邻居超过此值,当创建新的邻居时
         //若超过五秒没有刷新,则必须立即刷新,强制垃圾回收
         int                        gc_thresh2;
         //允许邻居的上限
         int                        gc_thresh3;
         //最近刷新时间
         unsigned long                last_flush;
         //常规的垃圾回收定时器
         struct timer_list           gc_timer;
         struct timer_list           proxy_timer;
         struct sk_buff_head     proxy_queue;
         //整个表中邻居的数量
         int                        entries;
         rwlock_t              lock;
         unsigned long                last_rand;
         struct neigh_parms       *parms_list;
         kmem_cache_t              *kmem_cachep;
         struct neigh_statistics  *stats;
         //哈希数组,存入其中的邻居
         struct neighbour  **hash_buckets;
         //哈希数组大小的掩码
         unsigned int                  hash_mask;
         __u32                           hash_rnd;
         unsigned int                  hash_chain_gc;
         //与代理arp相关
         struct pneigh_entry      **phash_buckets;
#ifdef CONFIG_PROC_FS
         struct proc_dir_entry   *pde;
#endif
}
Neighbour结构:
struct neighbour
{
         //下一个邻居
         struct neighbour  *next;
         //所在的邻居表
         struct neigh_table         *tbl;
         //arp传输参数
         struct neigh_parms       *parms;
         //邻居项所对应的网络设备
         struct net_device          *dev;
         //最后使用时间
         unsigned long                used;
         unsigned long                confirmed;
         //更新时间
         unsigned long                updated;
         __u8                    flags;
         //邻居项对应的状态
         __u8                    nud_state;
         __u8                    type;
         //存活标志,如果dead为1,那么垃圾回收函数会将此项删除
         __u8                    dead;
         //重试发送arp请求的次数
         atomic_t              probes;
         rwlock_t              lock;
         //对应邻居的头部缓存
         unsigned char                ha[(MAX_ADDR_LEN+sizeof(unsigned long)-1)&~(sizeof(unsigned long)-1)];
         struct hh_cache            *hh;
         //引用计数
         atomic_t              refcnt;
         //邻居项对应的发送函数
         int                        (*output)(struct sk_buff *skb);
         //对应的发送skb队列
         struct sk_buff_head     arp_queue;
         //定时器
         struct timer_list  timer;
         struct neigh_ops *ops;
         //哈希关键字
         u8                        primary_key[0];
};

在前面已经分析过,查找到路由后,会调用arp_bind_neighbour绑定一个邻居项

int arp_bind_neighbour(struct dst_entry *dst)

{

         struct net_device *dev = dst->dev;

         struct neighbour *n = dst->neighbour;

         if (dev == NULL)

                   return -EINVAL;

         //如果邻居项不存在

         if (n == NULL) {

                   u32 nexthop = ((struct rtable*)dst)->rt_gateway;

                   if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT))

                            nexthop = 0;

                   n = __neigh_lookup_errno(

#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)

                       dev->type == ARPHRD_ATM ? clip_tbl_hook :

#endif

                       &arp_tbl, &nexthop, dev);

                   if (IS_ERR(n))

                            return PTR_ERR(n);

                   dst->neighbour = n;

         }

         return 0;

}

如果邻居项不存同,则执行__neigh_lookup_errno()

__neigh_lookup_errno(struct neigh_table *tbl, const void *pkey,

  struct net_device *dev)

{

         //在邻居表中查找邻居项

         struct neighbour *n = neigh_lookup(tbl, pkey, dev);

         if (n)

                   return n;

         //新建邻居项

         return neigh_create(tbl, pkey, dev);

}

从上面可以看到,它会先到邻居表中寻找对应的邻居项,如果不存在,则新建一项。继续跟进

struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,

                                   struct net_device *dev)

{

         u32 hash_val;

         int key_len = tbl->key_len;

         int error;

         struct neighbour *n1, *rc, *n = neigh_alloc(tbl);

         if (!n) {

                   rc = ERR_PTR(-ENOBUFS);

                   goto out;

         }

         //从此可以看到,哈希键值就是目的IP

         memcpy(n->primary_key, pkey, key_len);

         n->dev = dev;

         dev_hold(dev);

         /* Protocol specific setup. */

         //初始化函数

         if (tbl->constructor &&         (error = tbl->constructor(n)) < 0) {

                   rc = ERR_PTR(error);

                   goto out_neigh_release;

         }

         /* Device specific setup. */

         if (n->parms->neigh_setup &&

             (error = n->parms->neigh_setup(n)) < 0) {

                   rc = ERR_PTR(error);

                   goto out_neigh_release;

         }

         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);

         write_lock_bh(&tbl->lock);

         //如果总数超过了hash_mask +1,则增长哈希表

         if (tbl->entries > (tbl->hash_mask + 1))

                   neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);

         hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;

         // 如果邻居表项为删除项

         if (n->parms->dead) {

                   rc = ERR_PTR(-EINVAL);

                   goto out_tbl_unlock;

         }

         //遍历对应的哈希数组项。如果已经存在,则更新引用计数

                  for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {

                   if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {

                            neigh_hold(n1);

                            rc = n1;

                            goto out_tbl_unlock;

                   }

         }

// 如果不存在,把插入项加到哈希数组项的头部

         n->next = tbl->hash_buckets[hash_val];

         tbl->hash_buckets[hash_val] = n;

         n->dead = 0;

         neigh_hold(n);

         write_unlock_bh(&tbl->lock);

         NEIGH_PRINTK2("neigh %p is created.\n", n);

         rc = n;

out:

         return rc;

out_tbl_unlock:

         write_unlock_bh(&tbl->lock);

out_neigh_release:

         neigh_release(n);

         goto out;

}

在函数里,会调用tbl->constructor()进行初始化。在arp_tbl结构中,为constructor赋值为arp_constructor。

static int arp_constructor(struct neighbour *neigh)

{

         u32 addr = *(u32*)neigh->primary_key;

         struct net_device *dev = neigh->dev;

         struct in_device *in_dev;

         struct neigh_parms *parms;

         neigh->type = inet_addr_type(addr);

         rcu_read_lock();

         in_dev = rcu_dereference(__in_dev_get(dev));

         if (in_dev == NULL) {

                   rcu_read_unlock();

                   return -EINVAL;

         }

         parms = in_dev->arp_parms;

         __neigh_parms_put(neigh->parms);

         neigh->parms = neigh_parms_clone(parms);

         rcu_read_unlock();

         //dev->hard_header.是为被赋值勤

         if (dev->hard_header == NULL) {

                   neigh->nud_state = NUD_NOARP;

                   neigh->ops = &arp_direct_ops;

                   neigh->output = neigh->ops->queue_xmit;

         } else {

                  

#if 1

                                     switch (dev->type) {

                   default:

                            break;

                   case ARPHRD_ROSE:         

                            neigh->ops = &arp_broken_ops;

                            neigh->output = neigh->ops->output;

                            return 0;

#endif

                   ;}

#endif

                   if (neigh->type == RTN_MULTICAST) {

                            neigh->nud_state = NUD_NOARP;

                            arp_mc_map(addr, neigh->ha, dev, 1);

                   } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {

                            neigh->nud_state = NUD_NOARP;

                            memcpy(neigh->ha, dev->dev_addr, dev->addr_len);

                   } else if (neigh->type == RTN_BROADCAST || dev->flags&IFF_POINTOPOINT) {

                            neigh->nud_state = NUD_NOARP;

                            memcpy(neigh->ha, dev->broadcast, dev->addr_len);

                   }

                   if (dev->hard_header_cache)

                            neigh->ops = &arp_hh_ops;

                   else

                            neigh->ops = &arp_generic_ops;

                   //如果邻居项是可用状态,则调用connected_output里的函数

                   if (neigh->nud_state&NUD_VALID)

                            neigh->output = neigh->ops->connected_output;

                   else

                   //如果邻居项不可用

                            neigh->output = neigh->ops->output;

         }

         return 0;

}

在网卡驱动那一章,我们是调用alloc_etherdev()来构建网卡的net_device结构的,在allocetherdev()调用alloc_etherdev对各函数指针赋值

void ether_setup(struct net_device *dev)                                              (drivers/net/net_init.c)

{

         /* Fill in the fields of the device structure with ethernet-generic values.

            This should be in a common file instead of per-driver.  */

        

         dev->change_mtu                  = eth_change_mtu;

         dev->hard_header         = eth_header;

         dev->rebuild_header    = eth_rebuild_header;

         dev->set_mac_address         = eth_mac_addr;

         dev->hard_header_cache       = eth_header_cache;

         dev->header_cache_update= eth_header_cache_update;

         dev->hard_header_parse       = eth_header_parse;

         dev->type           = ARPHRD_ETHER;

         dev->hard_header_len          = ETH_HLEN;

         dev->mtu            = 1500; /* eth_mtu */

         dev->addr_len              = ETH_ALEN;

         dev->tx_queue_len       = 1000;      /* Ethernet wants good queues */  

        

         memset(dev->broadcast,0xFF, ETH_ALEN);

         /* New-style flags. */

         dev->flags           = IFF_BROADCAST|IFF_MULTICAST;

}

所以,neigh->output 就指向了arp_hh_opsàoutput

Arp_hh_ops的结构如下:

static struct neigh_ops arp_hh_ops = {

         .family =             AF_INET,

         .solicit =              arp_solicit,

         .error_report =             arp_error_report,

         .output =             neigh_resolve_output,

         .connected_output =    neigh_resolve_output,

         .hh_output =                dev_queue_xmit,

         .queue_xmit =               dev_queue_xmit,

};

由此可以看到,最终的数据都会流到neigh_resolve_output
http://www.360doc.com/content/10/1026/23/706976_64322489.shtml
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: