您的位置:首页 > 运维架构 > Linux

Linux-网桥原理分析(二)

2012-07-18 23:02 281 查看

4 网桥的实现

在内核,网桥是以模块的方式存在,注册源码路径:\net\brige\br.c:

4.1 初始化

static int __init br_init(void)

{

br_fdb_init(); //网桥数据库初始化,分配slab缓冲区

#ifdef CONFIG_BRIDGE_NETFILTER

if (br_netfilter_init()) //netfilter钩子初始化

return 1;

#endif

brioctl_set(br_ioctl_deviceless_stub); //设置ioctl钩子函数:br_ioctl_hook

br_handle_frame_hook = br_handle_frame;//设置报文处理钩子:br_ioctl_hook

//网桥数据库处理钩子

br_fdb_get_hook = br_fdb_get;

br_fdb_put_hook = br_fdb_put;

//在netdev_chain通知链表上注册

register_netdevice_notifier(&br_device_notifier);

return 0;

}


4.2 新建网桥

前面说到通过brctl addbr br0命令建立网桥,此处用户控件调用的brctl命令最终对应到内核中的br_ioctl_deviceless_stub处理函数:

int br_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)

{

switch (cmd) {

case SIOCGIFBR:

case SIOCSIFBR:

return old_deviceless(uarg);

case SIOCBRADDBR: //新建网桥

case SIOCBRDELBR: //删除网桥

{

char buf[IFNAMSIZ];

if (!capable(CAP_NET_ADMIN))

return -EPERM;

//copy_from_user:把用户空间的数据拷入内核空间

if (copy_from_user(buf, uarg, IFNAMSIZ))

return -EFAULT;

buf[IFNAMSIZ-1] = 0;

if (cmd == SIOCBRADDBR)

return br_add_bridge(buf);

return br_del_bridge(buf);

}

}

return -EOPNOTSUPP;

}


在这里,我们传入的cmd为SIOCBRADDBR.转入br_add_bridge(buf)中进行:

int br_add_bridge(const char *name)

{

struct net_device *dev;

int ret;

//为虚拟桥新建一个net_device

dev = new_bridge_dev(name);

if (!dev)

return -ENOMEM;

rtnl_lock();

//由内核确定接口名字,例如eth0 eth1等

if (strchr(dev->name, '%')) {

ret = dev_alloc_name(dev, dev->name);

if (ret < 0)

goto err1;

}

//向内核注册此网络设备

ret = register_netdevice(dev);

if (ret)

goto err2;

/* network device kobject is not setup until

* after rtnl_unlock does it's hotplug magic.

* so hold reference to avoid race.

*/

dev_hold(dev);

rtnl_unlock();

//在sysfs中建立相关信息

ret = br_sysfs_addbr(dev);

dev_put(dev);

if (ret)

unregister_netdev(dev);

out:

return ret;

err2:

free_netdev(dev);

err1:

rtnl_unlock();

goto out;

}


网桥是一个虚拟的设备,它的注册跟实际的物理网络设备注册是一样的。我们关心的是网桥对应的net_device结构是什么样的,继续跟踪进new_bridge_dev:

static struct net_device *new_bridge_dev(const char *name)

{

struct net_bridge *br;

struct net_device *dev;

//分配net_device

dev = alloc_netdev(sizeof(struct net_bridge), name,

br_dev_setup);

if (!dev)

return NULL;

//网桥的私区结构为net_bridge

br = netdev_priv(dev);

//私区结构中的dev字段指向设备本身

br->dev = dev;

spin_lock_init(&br->lock);

//队列初始化。在port_list中保存了这个桥上的端口列表

INIT_LIST_HEAD(&br->port_list);

spin_lock_init(&br->hash_lock);

//下面这部份代码跟stp协议相关,我们暂不关心

br->bridge_id.prio[0] = 0x80;

br->bridge_id.prio[1] = 0x00;

memset(br->bridge_id.addr, 0, ETH_ALEN);

br->stp_enabled = 0;

br->designated_root = br->bridge_id;

br->root_path_cost = 0;

br->root_port = 0;

br->bridge_max_age = br->max_age = 20 * HZ;

br->bridge_hello_time = br->hello_time = 2 * HZ;

br->bridge_forward_delay = br->forward_delay = 15 * HZ;

br->topology_change = 0;

br->topology_change_detected = 0;

br->ageing_time = 300 * HZ;

INIT_LIST_HEAD(&br->age_list);

br_stp_timer_init(br);

return dev;

}


在br_dev_setup中还做了一些另外在函数指针初始化:

void br_dev_setup(struct net_device *dev)

{

//将桥的MAC地址设为零

memset(dev->dev_addr, 0, ETH_ALEN);

//初始化dev的部分函数指针,因为目前网桥设备主适用于以及网,

//以太网的部分功能对它也适用

ether_setup(dev);

//设置设备的ioctl函数为br_dev_ioctl

dev->do_ioctl = br_dev_ioctl;

//网桥与一般网卡不同,网桥统一统计它的数据包和字节数等信息

dev->get_stats = br_dev_get_stats;

// 网桥接口的数据包发送函数,真实设备要向外发送数据时,是通过网卡向外发送数据

// 而该网桥设备要向外发送数据时,它的处理逻辑与网桥其它接口的基本一致。

dev->hard_start_xmit = br_dev_xmit;

dev->open = br_dev_open;

dev->set_multicast_list = br_dev_set_multicast_list;

dev->change_mtu = br_change_mtu;

dev->destructor = free_netdev;

SET_MODULE_OWNER(dev);

dev->stop = br_dev_stop;

dev->tx_queue_len = 0;

dev->set_mac_address = NULL;

dev->priv_flags = IFF_EBRIDGE;

}


4.3 添加删除端口

仅仅创建网桥,还是不够的。实际应用中的网桥需要添加实际的端口(即物理接口),如例子中的eth1, eth2等。应用程序在使用ioctl来为网桥增加物理接口,对应内核函数br_dev_ioctl的代码和分析如下:

int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)

{

struct net_bridge *br = netdev_priv(dev);

switch(cmd) {

case SIOCDEVPRIVATE:

return old_dev_ioctl(dev, rq, cmd);

case SIOCBRADDIF: //添加

case SIOCBRDELIF: //删除

//同一处理函数,默认为添加

return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);

}

pr_debug("Bridge does not support ioctl 0x%x\n", cmd);

return -EOPNOTSUPP;

}


下面分析具体的添加删除函数add_del_if:

static int add_del_if(struct net_bridge *br, int ifindex, int isadd)

{

struct net_device *dev;

int ret;

if (!capable(CAP_NET_ADMIN))

return -EPERM;

dev = dev_get_by_index(ifindex);

if (dev == NULL)

return -EINVAL;

if (isadd)

ret = br_add_if(br, dev);

else

ret = br_del_if(br, dev);

dev_put(dev);

return ret;

}


对应的添加删除函数分别为:br_add_if, br_del_if;

br_add_if:

int br_add_if(struct net_bridge *br, struct net_device *dev)

{

struct net_bridge_port *p;

int err = 0;

/*--Kernel仅支持以太网网桥--*/

if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER)

return -EINVAL;

/*--把网桥接口当作物理接口加入到另一个网桥中,是不行的,

逻辑和代码上都会出现 loop--*/

if (dev->hard_start_xmit == br_dev_xmit)

return -ELOOP;

/*--该物理接口已经绑定到另一个网桥了--*/

if (dev->br_port != NULL)

return -EBUSY;

/*--为该接口创建一个网桥端口数据,并初始化好该端口的相关数据--*/

if (IS_ERR(p = new_nbp(br, dev, br_initial_port_cost(dev))))

return PTR_ERR(p);

/*--将该接口的物理地址写入到 MAC-端口映射表中,

该MAC是属于网桥内部端口的固定MAC地址,

它在fdb中的记录是固定的,不会失效(agged)--*/

if ((err = br_fdb_insert(br, p, dev->dev_addr)))

destroy_nbp(p);

/*--添加相应的系统文件信息--*/

else if ((err = br_sysfs_addif(p)))

del_nbp(p);

else {

/*--打开该接口的混杂模式,网桥中的各个端口必须处于混杂模式,

网桥才能正确工作--*/

dev_set_promiscuity(dev, 1);

/*--加到端口列表--*/

list_add_rcu(&p->list, &br->port_list);

/*--STP相关设置-*/

spin_lock_bh(&br->lock);

br_stp_recalculate_bridge_id(br);

br_features_recompute(br);

if ((br->dev->flags & IFF_UP)

&& (dev->flags & IFF_UP) && netif_carrier_ok(dev))

br_stp_enable_port(p);

spin_unlock_bh(&br->lock);

/*--设置设备的mtu--*/

dev_set_mtu(br->dev, br_min_mtu(br));

}

return err;

}


br_del_if:

int br_del_if(struct net_bridge *br, struct net_device *dev)

{

struct net_bridge_port *p = dev->br_port;

if (!p || p->br != br)

return -EINVAL;

br_sysfs_removeif(p);

del_nbp(p);

spin_lock_bh(&br->lock);

br_stp_recalculate_bridge_id(br);

br_features_recompute(br);

spin_unlock_bh(&br->lock);

return 0;

}


4 网桥的实现

在内核,网桥是以模块的方式存在,注册源码路径:\net\brige\br.c:

4.1 初始化

static int __init br_init(void)

{

br_fdb_init(); //网桥数据库初始化,分配slab缓冲区

#ifdef CONFIG_BRIDGE_NETFILTER

if (br_netfilter_init()) //netfilter钩子初始化

return 1;

#endif

brioctl_set(br_ioctl_deviceless_stub); //设置ioctl钩子函数:br_ioctl_hook

br_handle_frame_hook = br_handle_frame;//设置报文处理钩子:br_ioctl_hook

//网桥数据库处理钩子

br_fdb_get_hook = br_fdb_get;

br_fdb_put_hook = br_fdb_put;

//在netdev_chain通知链表上注册

register_netdevice_notifier(&br_device_notifier);

return 0;

}


4.2 新建网桥

前面说到通过brctl addbr br0命令建立网桥,此处用户控件调用的brctl命令最终对应到内核中的br_ioctl_deviceless_stub处理函数:

int br_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)

{

switch (cmd) {

case SIOCGIFBR:

case SIOCSIFBR:

return old_deviceless(uarg);

case SIOCBRADDBR: //新建网桥

case SIOCBRDELBR: //删除网桥

{

char buf[IFNAMSIZ];

if (!capable(CAP_NET_ADMIN))

return -EPERM;

//copy_from_user:把用户空间的数据拷入内核空间

if (copy_from_user(buf, uarg, IFNAMSIZ))

return -EFAULT;

buf[IFNAMSIZ-1] = 0;

if (cmd == SIOCBRADDBR)

return br_add_bridge(buf);

return br_del_bridge(buf);

}

}

return -EOPNOTSUPP;

}


在这里,我们传入的cmd为SIOCBRADDBR.转入br_add_bridge(buf)中进行:

int br_add_bridge(const char *name)

{

struct net_device *dev;

int ret;

//为虚拟桥新建一个net_device

dev = new_bridge_dev(name);

if (!dev)

return -ENOMEM;

rtnl_lock();

//由内核确定接口名字,例如eth0 eth1等

if (strchr(dev->name, '%')) {

ret = dev_alloc_name(dev, dev->name);

if (ret < 0)

goto err1;

}

//向内核注册此网络设备

ret = register_netdevice(dev);

if (ret)

goto err2;

/* network device kobject is not setup until

* after rtnl_unlock does it's hotplug magic.

* so hold reference to avoid race.

*/

dev_hold(dev);

rtnl_unlock();

//在sysfs中建立相关信息

ret = br_sysfs_addbr(dev);

dev_put(dev);

if (ret)

unregister_netdev(dev);

out:

return ret;

err2:

free_netdev(dev);

err1:

rtnl_unlock();

goto out;

}


网桥是一个虚拟的设备,它的注册跟实际的物理网络设备注册是一样的。我们关心的是网桥对应的net_device结构是什么样的,继续跟踪进new_bridge_dev:

static struct net_device *new_bridge_dev(const char *name)

{

struct net_bridge *br;

struct net_device *dev;

//分配net_device

dev = alloc_netdev(sizeof(struct net_bridge), name,

br_dev_setup);

if (!dev)

return NULL;

//网桥的私区结构为net_bridge

br = netdev_priv(dev);

//私区结构中的dev字段指向设备本身

br->dev = dev;

spin_lock_init(&br->lock);

//队列初始化。在port_list中保存了这个桥上的端口列表

INIT_LIST_HEAD(&br->port_list);

spin_lock_init(&br->hash_lock);

//下面这部份代码跟stp协议相关,我们暂不关心

br->bridge_id.prio[0] = 0x80;

br->bridge_id.prio[1] = 0x00;

memset(br->bridge_id.addr, 0, ETH_ALEN);

br->stp_enabled = 0;

br->designated_root = br->bridge_id;

br->root_path_cost = 0;

br->root_port = 0;

br->bridge_max_age = br->max_age = 20 * HZ;

br->bridge_hello_time = br->hello_time = 2 * HZ;

br->bridge_forward_delay = br->forward_delay = 15 * HZ;

br->topology_change = 0;

br->topology_change_detected = 0;

br->ageing_time = 300 * HZ;

INIT_LIST_HEAD(&br->age_list);

br_stp_timer_init(br);

return dev;

}


在br_dev_setup中还做了一些另外在函数指针初始化:

void br_dev_setup(struct net_device *dev)

{

//将桥的MAC地址设为零

memset(dev->dev_addr, 0, ETH_ALEN);

//初始化dev的部分函数指针,因为目前网桥设备主适用于以及网,

//以太网的部分功能对它也适用

ether_setup(dev);

//设置设备的ioctl函数为br_dev_ioctl

dev->do_ioctl = br_dev_ioctl;

//网桥与一般网卡不同,网桥统一统计它的数据包和字节数等信息

dev->get_stats = br_dev_get_stats;

// 网桥接口的数据包发送函数,真实设备要向外发送数据时,是通过网卡向外发送数据

// 而该网桥设备要向外发送数据时,它的处理逻辑与网桥其它接口的基本一致。

dev->hard_start_xmit = br_dev_xmit;

dev->open = br_dev_open;

dev->set_multicast_list = br_dev_set_multicast_list;

dev->change_mtu = br_change_mtu;

dev->destructor = free_netdev;

SET_MODULE_OWNER(dev);

dev->stop = br_dev_stop;

dev->tx_queue_len = 0;

dev->set_mac_address = NULL;

dev->priv_flags = IFF_EBRIDGE;

}


4.3 添加删除端口

仅仅创建网桥,还是不够的。实际应用中的网桥需要添加实际的端口(即物理接口),如例子中的eth1, eth2等。应用程序在使用ioctl来为网桥增加物理接口,对应内核函数br_dev_ioctl的代码和分析如下:

int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)

{

struct net_bridge *br = netdev_priv(dev);

switch(cmd) {

case SIOCDEVPRIVATE:

return old_dev_ioctl(dev, rq, cmd);

case SIOCBRADDIF: //添加

case SIOCBRDELIF: //删除

//同一处理函数,默认为添加

return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);

}

pr_debug("Bridge does not support ioctl 0x%x\n", cmd);

return -EOPNOTSUPP;

}


下面分析具体的添加删除函数add_del_if:

static int add_del_if(struct net_bridge *br, int ifindex, int isadd)

{

struct net_device *dev;

int ret;

if (!capable(CAP_NET_ADMIN))

return -EPERM;

dev = dev_get_by_index(ifindex);

if (dev == NULL)

return -EINVAL;

if (isadd)

ret = br_add_if(br, dev);

else

ret = br_del_if(br, dev);

dev_put(dev);

return ret;

}


对应的添加删除函数分别为:br_add_if, br_del_if;

br_add_if:

int br_add_if(struct net_bridge *br, struct net_device *dev)

{

struct net_bridge_port *p;

int err = 0;

/*--Kernel仅支持以太网网桥--*/

if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER)

return -EINVAL;

/*--把网桥接口当作物理接口加入到另一个网桥中,是不行的,

逻辑和代码上都会出现 loop--*/

if (dev->hard_start_xmit == br_dev_xmit)

return -ELOOP;

/*--该物理接口已经绑定到另一个网桥了--*/

if (dev->br_port != NULL)

return -EBUSY;

/*--为该接口创建一个网桥端口数据,并初始化好该端口的相关数据--*/

if (IS_ERR(p = new_nbp(br, dev, br_initial_port_cost(dev))))

return PTR_ERR(p);

/*--将该接口的物理地址写入到 MAC-端口映射表中,

该MAC是属于网桥内部端口的固定MAC地址,

它在fdb中的记录是固定的,不会失效(agged)--*/

if ((err = br_fdb_insert(br, p, dev->dev_addr)))

destroy_nbp(p);

/*--添加相应的系统文件信息--*/

else if ((err = br_sysfs_addif(p)))

del_nbp(p);

else {

/*--打开该接口的混杂模式,网桥中的各个端口必须处于混杂模式,

网桥才能正确工作--*/

dev_set_promiscuity(dev, 1);

/*--加到端口列表--*/

list_add_rcu(&p->list, &br->port_list);

/*--STP相关设置-*/

spin_lock_bh(&br->lock);

br_stp_recalculate_bridge_id(br);

br_features_recompute(br);

if ((br->dev->flags & IFF_UP)

&& (dev->flags & IFF_UP) && netif_carrier_ok(dev))

br_stp_enable_port(p);

spin_unlock_bh(&br->lock);

/*--设置设备的mtu--*/

dev_set_mtu(br->dev, br_min_mtu(br));

}

return err;

}


br_del_if:

int br_del_if(struct net_bridge *br, struct net_device *dev)

{

struct net_bridge_port *p = dev->br_port;

if (!p || p->br != br)

return -EINVAL;

br_sysfs_removeif(p);

del_nbp(p);

spin_lock_bh(&br->lock);

br_stp_recalculate_bridge_id(br);

br_features_recompute(br);

spin_unlock_bh(&br->lock);

return 0;

}


内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: