linux网卡驱动注册与接受数据处理

news/2024/7/3 4:02:54

Linux网络接受

本文基于2.6.15的内核版本来简单学习一下有关Linux内核如何进行网络数据的处理流程。

网卡驱动注册

以ixgb驱动为例,在网卡注册的时候会进入如下流程。

static struct pci_driver ixgb_driver = {.name     = ixgb_driver_name,.id_table = ixgb_pci_tbl,.probe    = ixgb_probe,.remove   = __devexit_p(ixgb_remove),
};...
/*** ixgb_init_module - Driver Registration Routine** ixgb_init_module is the first routine called when the driver is* loaded. All it does is register with the PCI subsystem.**/static int __init
ixgb_init_module(void)
{printk(KERN_INFO "%s - version %s\n",ixgb_driver_string, ixgb_driver_version);printk(KERN_INFO "%s\n", ixgb_copyright);return pci_module_init(&ixgb_driver);
}module_init(ixgb_init_module);

PCI注册的流程会将调用probe方法,即ixgb_probe。

/*** ixgb_probe - Device Initialization Routine* @pdev: PCI device information struct* @ent: entry in ixgb_pci_tbl** Returns 0 on success, negative on failure** ixgb_probe initializes an adapter identified by a pci_dev structure.* The OS initialization, configuring of the adapter private structure,* and a hardware reset occur.**/static int __devinit
ixgb_probe(struct pci_dev *pdev,const struct pci_device_id *ent)
{struct net_device *netdev = NULL;struct ixgb_adapter *adapter;static int cards_found = 0;unsigned long mmio_start;int mmio_len;int pci_using_dac;int i;int err;if((err = pci_enable_device(pdev)))  // 是否可以注册该设备return err;if(!(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK))) { //设置DMA的掩码pci_using_dac = 1;} else {if((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK))) {IXGB_ERR("No usable DMA configuration, aborting\n");return err;}pci_using_dac = 0;}if((err = pci_request_regions(pdev, ixgb_driver_name)))return err;pci_set_master(pdev);netdev = alloc_etherdev(sizeof(struct ixgb_adapter)); // 申请设备结构体if(!netdev) {err = -ENOMEM;goto err_alloc_etherdev;}SET_MODULE_OWNER(netdev);SET_NETDEV_DEV(netdev, &pdev->dev);pci_set_drvdata(pdev, netdev);    // 设置DMA对应的管理的内存空间adapter = netdev_priv(netdev);adapter->netdev = netdev;adapter->pdev = pdev;adapter->hw.back = adapter;mmio_start = pci_resource_start(pdev, BAR_0);mmio_len = pci_resource_len(pdev, BAR_0);adapter->hw.hw_addr = ioremap(mmio_start, mmio_len);if(!adapter->hw.hw_addr) {err = -EIO;goto err_ioremap;}for(i = BAR_1; i <= BAR_5; i++) {if(pci_resource_len(pdev, i) == 0)continue;if(pci_resource_flags(pdev, i) & IORESOURCE_IO) {adapter->hw.io_base = pci_resource_start(pdev, i);break;}}netdev->open = &ixgb_open;  // 设置ethtool对应的操作的方法,即设备的打开的方法netdev->stop = &ixgb_close;   // 设备的关闭的方法netdev->hard_start_xmit = &ixgb_xmit_frame;netdev->get_stats = &ixgb_get_stats;netdev->set_multicast_list = &ixgb_set_multi;netdev->set_mac_address = &ixgb_set_mac;netdev->change_mtu = &ixgb_change_mtu;  // 设置mtu长度ixgb_set_ethtool_ops(netdev);netdev->tx_timeout = &ixgb_tx_timeout;  // 设置超时时间netdev->watchdog_timeo = HZ;
#ifdef CONFIG_IXGB_NAPInetdev->poll = &ixgb_clean;    // 设置设备的poll方法 一般用在NAPI模式中netdev->weight = 64;
#endifnetdev->vlan_rx_register = ixgb_vlan_rx_register;  // 设置vlan的方法netdev->vlan_rx_add_vid = ixgb_vlan_rx_add_vid;netdev->vlan_rx_kill_vid = ixgb_vlan_rx_kill_vid;
#ifdef CONFIG_NET_POLL_CONTROLLERnetdev->poll_controller = ixgb_netpoll;  //是否设置了netpoll的方式
#endifnetdev->mem_start = mmio_start;netdev->mem_end = mmio_start + mmio_len;netdev->base_addr = adapter->hw.io_base;adapter->bd_number = cards_found;adapter->link_speed = 0;adapter->link_duplex = 0;/* setup the private structure */if((err = ixgb_sw_init(adapter)))goto err_sw_init;netdev->features = NETIF_F_SG |NETIF_F_HW_CSUM |NETIF_F_HW_VLAN_TX |NETIF_F_HW_VLAN_RX |NETIF_F_HW_VLAN_FILTER;
#ifdef NETIF_F_TSOnetdev->features |= NETIF_F_TSO;
#endifif(pci_using_dac)netdev->features |= NETIF_F_HIGHDMA;/* make sure the EEPROM is good */if(!ixgb_validate_eeprom_checksum(&adapter->hw)) {printk(KERN_ERR "The EEPROM Checksum Is Not Valid\n");err = -EIO;goto err_eeprom;}ixgb_get_ee_mac_addr(&adapter->hw, netdev->dev_addr);memcpy(netdev->perm_addr, netdev->dev_addr, netdev->addr_len);if(!is_valid_ether_addr(netdev->perm_addr)) {err = -EIO;goto err_eeprom;}adapter->part_num = ixgb_get_ee_pba_number(&adapter->hw);init_timer(&adapter->watchdog_timer);   // 设置定时器adapter->watchdog_timer.function = &ixgb_watchdog;adapter->watchdog_timer.data = (unsigned long)adapter;INIT_WORK(&adapter->tx_timeout_task,(void (*)(void *))ixgb_tx_timeout_task, netdev);if((err = register_netdev(netdev)))  // 注册网络设备goto err_register;/* we're going to reset, so assume we have no link for now */netif_carrier_off(netdev);     // 重置网络设备的参数netif_stop_queue(netdev);printk(KERN_INFO "%s: Intel(R) PRO/10GbE Network Connection\n",netdev->name);ixgb_check_options(adapter);/* reset the hardware with the new settings */ixgb_reset(adapter);cards_found++;return 0;err_register:
err_sw_init:
err_eeprom:iounmap(adapter->hw.hw_addr);
err_ioremap:free_netdev(netdev);
err_alloc_etherdev:pci_release_regions(pdev);return err;
}

注册的方式主要就是通过配置一些网络的参数。

打开网卡接受数据

如果我们使用ethtool工具是该网卡打开,此时就会执行如下流程。

static int
ixgb_open(struct net_device *netdev)
{struct ixgb_adapter *adapter = netdev_priv(netdev);int err;/* allocate transmit descriptors */if((err = ixgb_setup_tx_resources(adapter)))goto err_setup_tx;/* allocate receive descriptors */if((err = ixgb_setup_rx_resources(adapter)))goto err_setup_rx;if((err = ixgb_up(adapter)))   // 注册中断函数,响应网卡数据goto err_up;return 0;err_up:ixgb_free_rx_resources(adapter);
err_setup_rx:ixgb_free_tx_resources(adapter);
err_setup_tx:ixgb_reset(adapter);return err;
}

此时就会调用ixgb_open的函数,进行发送和接受的资源设置,并通过ixgb_up注册网卡的中断回调函数。

int
ixgb_up(struct ixgb_adapter *adapter)
{struct net_device *netdev = adapter->netdev;int err;int max_frame = netdev->mtu + ENET_HEADER_SIZE + ENET_FCS_LENGTH;  // 获取帧大小struct ixgb_hw *hw = &adapter->hw;/* hardware has been reset, we need to reload some things */ixgb_set_multi(netdev);ixgb_restore_vlan(adapter);ixgb_configure_tx(adapter);  // 配置发送ixgb_setup_rctl(adapter);ixgb_configure_rx(adapter);  // 配置接受ixgb_alloc_rx_buffers(adapter);  // 设置接受ring缓冲区#ifdef CONFIG_PCI_MSI{boolean_t pcix = (IXGB_READ_REG(&adapter->hw, STATUS) & IXGB_STATUS_PCIX_MODE) ? TRUE : FALSE;adapter->have_msi = TRUE;if (!pcix)adapter->have_msi = FALSE;else if((err = pci_enable_msi(adapter->pdev))) {printk (KERN_ERR"Unable to allocate MSI interrupt Error: %d\n", err);adapter->have_msi = FALSE;/* proceed to try to request regular interrupt */}}#endifif((err = request_irq(adapter->pdev->irq, &ixgb_intr,SA_SHIRQ | SA_SAMPLE_RANDOM,netdev->name, netdev)))  // 注册网卡的中断处理函数,并注册网卡的中断号return err;/* disable interrupts and get the hardware into a known state */IXGB_WRITE_REG(&adapter->hw, IMC, 0xffffffff);if((hw->max_frame_size != max_frame) ||(hw->max_frame_size !=(IXGB_READ_REG(hw, MFS) >> IXGB_MFS_SHIFT))) {hw->max_frame_size = max_frame;IXGB_WRITE_REG(hw, MFS, hw->max_frame_size << IXGB_MFS_SHIFT);if(hw->max_frame_size >IXGB_MAX_ENET_FRAME_SIZE_WITHOUT_FCS + ENET_FCS_LENGTH) {uint32_t ctrl0 = IXGB_READ_REG(hw, CTRL0);if(!(ctrl0 & IXGB_CTRL0_JFE)) {ctrl0 |= IXGB_CTRL0_JFE;IXGB_WRITE_REG(hw, CTRL0, ctrl0);}}}mod_timer(&adapter->watchdog_timer, jiffies);ixgb_irq_enable(adapter);   // 开启中断处理#ifdef CONFIG_IXGB_NAPInetif_poll_enable(netdev);
#endifreturn 0;
}

主要是设置了接受的数据缓冲区,设置了中断的处理函数ixgb_intr,当数据来了之后就会调用ixgb_intr函数进行处理。

static irqreturn_t
ixgb_intr(int irq, void *data, struct pt_regs *regs)
{struct net_device *netdev = data;struct ixgb_adapter *adapter = netdev_priv(netdev);struct ixgb_hw *hw = &adapter->hw;uint32_t icr = IXGB_READ_REG(hw, ICR);
#ifndef CONFIG_IXGB_NAPIunsigned int i;
#endifif(unlikely(!icr))return IRQ_NONE;  /* Not our interrupt */if(unlikely(icr & (IXGB_INT_RXSEQ | IXGB_INT_LSC))) {mod_timer(&adapter->watchdog_timer, jiffies);}#ifdef CONFIG_IXGB_NAPIif(netif_rx_schedule_prep(netdev)) {  // 是否是NAPI模式运行/* Disable interrupts and register for poll. The flush of the posted write is intentionally left out.*/atomic_inc(&adapter->irq_sem);IXGB_WRITE_REG(&adapter->hw, IMC, ~0);__netif_rx_schedule(netdev);}
#else/* yes, that is actually a & and it is meant to make sure that* every pass through this for loop checks both receive and* transmit queues for completed descriptors, intended to* avoid starvation issues and assist tx/rx fairness. */for(i = 0; i < IXGB_MAX_INTR; i++)  // 通过ixgb_clean_rx_irq进行数据处理if(!ixgb_clean_rx_irq(adapter) &!ixgb_clean_tx_irq(adapter))break;
#endif return IRQ_HANDLED;
}

此时就看配置的是网卡的哪种数据处理类型,一般有如下两种网卡数据处理流程。

NAPI模式

通过softirq内核线程来进行网卡数据的进行poll进行,从而避免过多的中断来进行处理提升效率。

static inline void __netif_rx_schedule(struct net_device *dev)
{unsigned long flags;local_irq_save(flags);dev_hold(dev);list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); // 加入当前设备的队列从而使在软中断中内容持续接受网卡的数据if (dev->quota < 0)dev->quota += dev->weight;elsedev->quota = dev->weight;__raise_softirq_irqoff(NET_RX_SOFTIRQ);  // 发出软中断,发送数据接受软中断local_irq_restore(flags);
}

此时就会调用在NET_RX_SOFTIRQ注册的信号方法。

open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);

此时调用的就是网络处理。

static void net_rx_action(struct softirq_action *h)
{struct softnet_data *queue = &__get_cpu_var(softnet_data);unsigned long start_time = jiffies;int budget = netdev_budget;void *have;local_irq_disable();while (!list_empty(&queue->poll_list)) {  // 检查链表是否为空struct net_device *dev;if (budget <= 0 || jiffies - start_time > 1)goto softnet_break;local_irq_enable();  //使能中断dev = list_entry(queue->poll_list.next,struct net_device, poll_list);  // 获取设备have = netpoll_poll_lock(dev);    if (dev->quota <= 0 || dev->poll(dev, &budget)) {  // 获取链表上面的数据netpoll_poll_unlock(have);local_irq_disable();list_del(&dev->poll_list);  list_add_tail(&dev->poll_list, &queue->poll_list);if (dev->quota < 0)dev->quota += dev->weight;elsedev->quota = dev->weight;} else {netpoll_poll_unlock(have);dev_put(dev);  // 将获取数据放回local_irq_disable();}}
out:local_irq_enable();  //使能中断return;softnet_break:__get_cpu_var(netdev_rx_stat).time_squeeze++;__raise_softirq_irqoff(NET_RX_SOFTIRQ);goto out;
}

通过调用dev的poll函数来进行数据的处理。ixgb对应的poll函数为ixgb_clean。

#ifdef CONFIG_IXGB_NAPI
/*** ixgb_clean - NAPI Rx polling callback* @adapter: board private structure**/static int
ixgb_clean(struct net_device *netdev, int *budget)
{struct ixgb_adapter *adapter = netdev_priv(netdev);int work_to_do = min(*budget, netdev->quota);int tx_cleaned;int work_done = 0;tx_cleaned = ixgb_clean_tx_irq(adapter);ixgb_clean_rx_irq(adapter, &work_done, work_to_do); // 处理单个网络接受数据*budget -= work_done;netdev->quota -= work_done;  // 减去王超的数量/* if no Tx and not enough Rx work done, exit the polling mode */if((!tx_cleaned && (work_done == 0)) || !netif_running(netdev)) {netif_rx_complete(netdev);  // 如果没了就离开该模式ixgb_irq_enable(adapter);return 0;}return 1;
}
#endif

其中ixgb_clean_rx_irq函数,就是我们不经过NAPI模式处理的流程。

数据直接传递

数据直接传递就是直接通过ixgb_clean_rx_irq函数来处理网络包。

static boolean_t
#ifdef CONFIG_IXGB_NAPI
ixgb_clean_rx_irq(struct ixgb_adapter *adapter, int *work_done, int work_to_do)
#else
ixgb_clean_rx_irq(struct ixgb_adapter *adapter)
#endif
{struct ixgb_desc_ring *rx_ring = &adapter->rx_ring;struct net_device *netdev = adapter->netdev;struct pci_dev *pdev = adapter->pdev;struct ixgb_rx_desc *rx_desc, *next_rxd;struct ixgb_buffer *buffer_info, *next_buffer, *next2_buffer;uint32_t length;unsigned int i, j;boolean_t cleaned = FALSE;i = rx_ring->next_to_clean;rx_desc = IXGB_RX_DESC(*rx_ring, i);buffer_info = &rx_ring->buffer_info[i];while(rx_desc->status & IXGB_RX_DESC_STATUS_DD) {struct sk_buff *skb, *next_skb;  // skb数据包u8 status;#ifdef CONFIG_IXGB_NAPIif(*work_done >= work_to_do)break;(*work_done)++;
#endifstatus = rx_desc->status;skb = buffer_info->skb;  //获取网卡处理好的skb数据prefetch(skb->data);if(++i == rx_ring->count) i = 0;next_rxd = IXGB_RX_DESC(*rx_ring, i);prefetch(next_rxd);if((j = i + 1) == rx_ring->count) j = 0;next2_buffer = &rx_ring->buffer_info[j];  // 获取ring的数据prefetch(next2_buffer);next_buffer = &rx_ring->buffer_info[i];next_skb = next_buffer->skb;prefetch(next_skb);cleaned = TRUE;pci_unmap_single(pdev,buffer_info->dma,buffer_info->length,PCI_DMA_FROMDEVICE);length = le16_to_cpu(rx_desc->length);if(unlikely(!(status & IXGB_RX_DESC_STATUS_EOP))) {/* All receives must fit into a single buffer */IXGB_DBG("Receive packet consumed multiple buffers ""length<%x>\n", length);dev_kfree_skb_irq(skb);goto rxdesc_done;}if (unlikely(rx_desc->errors& (IXGB_RX_DESC_ERRORS_CE | IXGB_RX_DESC_ERRORS_SE| IXGB_RX_DESC_ERRORS_P |IXGB_RX_DESC_ERRORS_RXE))) {dev_kfree_skb_irq(skb);goto rxdesc_done;}/* Good Receive */skb_put(skb, length);   //放入skb中保存/* Receive Checksum Offload */ixgb_rx_checksum(adapter, rx_desc, skb); //检查checksumskb->protocol = eth_type_trans(skb, netdev);  //获取协议
#ifdef CONFIG_IXGB_NAPIif(adapter->vlgrp && (status & IXGB_RX_DESC_STATUS_VP)) {vlan_hwaccel_receive_skb(skb, adapter->vlgrp,le16_to_cpu(rx_desc->special) &IXGB_RX_DESC_SPECIAL_VLAN_MASK);} else {netif_receive_skb(skb);  // 如果是NAPI模式 直接调用netif_receive_skb处理}
#else /* CONFIG_IXGB_NAPI */if(adapter->vlgrp && (status & IXGB_RX_DESC_STATUS_VP)) {vlan_hwaccel_rx(skb, adapter->vlgrp,le16_to_cpu(rx_desc->special) &IXGB_RX_DESC_SPECIAL_VLAN_MASK);} else {netif_rx(skb);   // 走单个软中段的处理流程}
#endif /* CONFIG_IXGB_NAPI */netdev->last_rx = jiffies;rxdesc_done:/* clean up descriptor, might be written over by hw */rx_desc->status = 0;buffer_info->skb = NULL;/* use prefetched values */rx_desc = next_rxd;buffer_info = next_buffer;}rx_ring->next_to_clean = i;   ixgb_alloc_rx_buffers(adapter);return cleaned;
}

在非NAPI模式下,会调用netif_rx进入到软中段的网络数据处理中,只不过该步就没有调用poll模式处理。

int netif_rx(struct sk_buff *skb)
{struct softnet_data *queue;unsigned long flags;/* if netpoll wants it, pretend we never saw it */if (netpoll_rx(skb))return NET_RX_DROP;if (!skb->tstamp.off_sec)net_timestamp(skb);/** The code is rearranged so that the path is the most* short when CPU is congested, but is still operating.*/local_irq_save(flags);queue = &__get_cpu_var(softnet_data);__get_cpu_var(netdev_rx_stat).total++;if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {if (queue->input_pkt_queue.qlen) {
enqueue:dev_hold(skb->dev);__skb_queue_tail(&queue->input_pkt_queue, skb);local_irq_restore(flags);return NET_RX_SUCCESS;}netif_rx_schedule(&queue->backlog_dev);  // 调用队列进行接受数据的处理goto enqueue;}__get_cpu_var(netdev_rx_stat).dropped++;local_irq_restore(flags);kfree_skb(skb);return NET_RX_DROP;
}

基本的两种模式下面的网络数据处理就基本分析完成。

总结

本文简单根据Linux-2.6系列的源码,基于网卡ixgb进行了一个网卡数据的简单的接受流程分析,网卡通过ethtool完成对应的接口,并注册对应的驱动,在通过ethtool工具使网卡开始运行的时候,就注册中断处理函数,并通过是否是NAPI处理流程来进行不同的处理,使用了NAPI会使用poll函数在softirq内核线程中,进行网卡数据的处理,效率上会比非NAPI模式要高,在非NAPI模式下面,最后也是通过通过softirq内核线程将获取的网络包进行处理,从而完成网卡数据的接受与处理。由于本人才疏学浅,如有错误请批评指正。


http://lihuaxi.xjx100.cn/news/257239.html

相关文章

【C#串口编程计划】串口编程简介

介绍C#串口编程的基本知识。

这可能是中国最穷的211大学!连研究生奖学金都发不起了....

本文募格学术撰写。参考资料&#xff1a;青塔、西部网-陕西新闻网、中国新闻周刊、西北大学官网等。万万没想到曾经因为“穷”而登上了微博热搜榜的西北大学。最近又因为“没钱”惹的祸而闯上了风口浪尖....近日&#xff0c;西北大学因研究生奖学金发放问题引发关注。起因是临近…

解析目标检测全流程!附代码数据

↑↑↑关注后"星标"Datawhale每日干货 & 每月组队学习&#xff0c;不错过Datawhale干货 作者&#xff1a;王程伟&#xff0c;算法工程师&#xff0c;Datawhale成员在计算机视觉中&#xff0c;红外弱小目标检测是一个重要的方向&#xff0c;但直到近一两年&#x…

拥有「人类智能」的全球首款有「思想」的机器人,活细胞培养的神经元

出品 | AI科技大本营&#xff08;ID:rgznai100&#xff09; 脑机接口&#xff0c;其主体是人的大脑&#xff0c;利用人大脑中产生的信号转换为命令而执行任务。 首款有思想的机器人&#xff1f;是的&#xff0c;你真的没有看错&#xff01; 反过来说呢&#xff0c;比如主体是机…

puppet(1.7-2.1)

puppet配置模块(一)模块是puppet的最大单元&#xff0c;模块里面有类&#xff0c;类下面有资源。同步文件、远程执行命令、cron等叫做资源&#xff0c;都是通过模块来实现的。下面我们来定义一个模块&#xff1a;在服务端上做如下操作&#xff1a;mkdir /etc/puppet/modules/te…

爆赞,对 volatile 关键字讲解最好的一篇文章!

欢迎关注方志朋的博客&#xff0c;回复”666“获面试宝典最近&#xff0c;在一篇文章中了解到了 volatile 关键字&#xff0c;在强烈的求知欲趋使下&#xff0c;我查阅了一些相关资料进行了学习&#xff0c;并将学习笔记记录如下&#xff0c;希望能给小伙伴们带来一些帮助。如果…

【C#串口编程计划】如何避免关闭串口时软件死锁

本文分析了关闭串口时软件死锁的原因并提出了解决方案。

理解Git!实习老板要我用Git协作

↑↑↑关注后"星标"Datawhale每日干货 & 每月组队学习&#xff0c;不错过Datawhale干货 作者&#xff1a;吴忠强&#xff0c;东北大学&#xff0c;Datawhale成员在公司实习&#xff0c;老板&#xff1a;你git新建一个分支&#xff0c;先让测试的同学测试完你写的…