背景
最近在排查一個(gè)網(wǎng)絡(luò)問(wèn)題,ifconfig eth0 up 后,網(wǎng)卡link up比較慢。因此,分析了下從ifconfig up 到網(wǎng)絡(luò)驅(qū)動(dòng)的調(diào)用流程。這里順便作個(gè)記錄。
ifconfig eth0 up 調(diào)用的是busybox 的命令,因此從busybox 源碼入手,逐步分析下調(diào)用流程。代碼介紹文件位于:networking/ifenslave.c
ifconfig eth0 up
ifconfig eth0 up 和 ifconfig eth0 down 分別對(duì)應(yīng)busybox 的set_if_up()和set_if_down().
staticintset_if_down(char*ifname,intflags) { intres=set_if_flags(ifname,flags&~IFF_UP); if(res) bb_perror_msg("%s:can'tdown",ifname); returnres; }
staticintset_if_up(char*ifname,intflags) { intres=set_if_flags(ifname,flags|IFF_UP); if(res) bb_perror_msg("%s:can'tup",ifname); returnres; }
比如,當(dāng)我們敲ifconfig eth0 down時(shí),實(shí)則就是調(diào)用:
set_if_down("eth0",master_flags.ifr_flags);
set_if_flags()會(huì)將網(wǎng)卡名,up / down 標(biāo)志位flags通過(guò)ioctl命令SIOCSIFFLAGS 傳遞給內(nèi)核網(wǎng)卡驅(qū)動(dòng)。
staticintset_if_flags(char*ifname,intflags) { structifreqifr; ifr.ifr_flags=flags; returnset_ifrname_and_do_ioctl(SIOCSIFFLAGS,&ifr,ifname); }
dev_ifsioc
接著深入到內(nèi)核代碼中,看下SIOCSIFFLAGS命令在哪里實(shí)現(xiàn)。代碼位于kernel etcoredev_ioctl.c。
staticintdev_ifsioc(structnet*net,structifreq*ifr,unsignedintcmd) { interr; structnet_device*dev=__dev_get_by_name(net,ifr->ifr_name); conststructnet_device_ops*ops; if(!dev) return-ENODEV; ops=dev->netdev_ops; switch(cmd){ caseSIOCSIFFLAGS:/*Setinterfaceflags*/ returndev_change_flags(dev,ifr->ifr_flags); caseSIOCSIFMETRIC:/*Setthemetricontheinterface (currentlyunused)*/ return-EOPNOTSUPP; ................... } returnerr; }
dev_ifsioc()會(huì)調(diào)用__dev_get_by_name()根據(jù) 網(wǎng)卡名遍歷 net鏈表,如果匹配到則返回net_device結(jié)構(gòu)體指針。接著,SIOCSIFFLAGS會(huì)調(diào)用到dev_change_flags(),最后調(diào)用到__dev_change_flags()。
dev_change_flags
intdev_change_flags(structnet_device*dev,unsignedintflags) { intret; unsignedintchanges,old_flags=dev->flags,old_gflags=dev->gflags; ret=__dev_change_flags(dev,flags); if(ret0) ??return?ret; ?changes?=?(old_flags?^?dev->flags)|(old_gflags^dev->gflags); __dev_notify_flags(dev,old_flags,changes); returnret; }
int__dev_change_flags(structnet_device*dev,unsignedintflags) { unsignedintold_flags=dev->flags; intret; ASSERT_RTNL(); /* *Settheflagsonourdevice. */ dev->flags=(flags&(IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP| IFF_DYNAMIC|IFF_MULTICAST|IFF_PORTSEL| IFF_AUTOMEDIA))| (dev->flags&(IFF_UP|IFF_VOLATILE|IFF_PROMISC| IFF_ALLMULTI)); /* *Loadinthecorrectmulticastlistnowtheflagshavechanged. */ if((old_flags^flags)&IFF_MULTICAST) dev_change_rx_flags(dev,IFF_MULTICAST); dev_set_rx_mode(dev); /* *Havewedownedtheinterface.WehandleIFF_UPourselves *accordingtouserattemptstosetit,ratherthanblindly *settingit. */ ret=0; /*兩個(gè)標(biāo)識(shí)有一個(gè)是IFF_UP*/ if((old_flags^flags)&IFF_UP) ret=((old_flags&IFF_UP)?__dev_close:__dev_open)(dev);//通過(guò)flags判斷調(diào)用__dev_close還是__dev_open if((flags^dev->gflags)&IFF_PROMISC){ intinc=(flags&IFF_PROMISC)?1:-1; unsignedintold_flags=dev->flags; dev->gflags^=IFF_PROMISC; if(__dev_set_promiscuity(dev,inc,false)>=0) if(dev->flags!=old_flags) dev_set_rx_mode(dev); } /*NOTE:orderofsynchronizationofIFF_PROMISCandIFF_ALLMULTI isimportant.Some(broken)driverssetIFF_PROMISC,when IFF_ALLMULTIisrequestednotaskingusandnotreporting. */ if((flags^dev->gflags)&IFF_ALLMULTI){ intinc=(flags&IFF_ALLMULTI)?1:-1; dev->gflags^=IFF_ALLMULTI; __dev_set_allmulti(dev,inc,false); } returnret; }
在__dev_change_flags(dev, flags)函數(shù)中,通過(guò)判斷flag的IFF_UP位上的值是否相反,來(lái)實(shí)現(xiàn)是調(diào)用__dev_close()還是__dev_open()來(lái)開(kāi)關(guān)eth0。
__dev_close
__dev_close中會(huì)將當(dāng)前的net_device加入到等待設(shè)備關(guān)閉列表中。
staticint__dev_close(structnet_device*dev) { intretval; LIST_HEAD(single); list_add(&dev->close_list,&single); retval=__dev_close_many(&single); list_del(&single); returnretval; }
__dev_close_many
__dev_close_many通知設(shè)備正在關(guān)閉,等待未發(fā)送完的數(shù)據(jù)發(fā)送完,最后清除開(kāi)啟標(biāo)記。
staticint__dev_close_many(structlist_head*head) { structnet_device*dev; ASSERT_RTNL(); might_sleep(); list_for_each_entry(dev,head,close_list){ /*Temporarilydisablenetpolluntiltheinterfaceisdown*/ /*禁用netpoll*/ netpoll_poll_disable(dev); /*通知設(shè)備正在關(guān)閉*/ call_netdevice_notifiers(NETDEV_GOING_DOWN,dev); /*清除start標(biāo)志位*/ clear_bit(__LINK_STATE_START,&dev->state); /*Synchronizetoscheduledpoll.Wecannottouchpolllist,it *canbeevenondifferentcpu.Sojustclearnetif_running(). * *dev->stop()willinvokenapi_disable()onallofit's *napi_structinstancesonthisdevice. */ smp_mb__after_atomic();/*Commitnetif_running().*/ } /*未發(fā)送完的數(shù)據(jù)發(fā)送完*/ dev_deactivate_many(head); list_for_each_entry(dev,head,close_list){ conststructnet_device_ops*ops=dev->netdev_ops; /* *Callthedevicespecificclose.Thiscannotfail. *OnlyifdeviceisUP * *WeallowittobecalledevenafteraDETACHhot-plug *event. */ /*調(diào)用設(shè)備關(guān)閉操作*/ if(ops->ndo_stop) ops->ndo_stop(dev); /*標(biāo)記設(shè)備關(guān)閉*/ dev->flags&=~IFF_UP; /*啟用netpoll*/ netpoll_poll_enable(dev); } return0; }
ndo_stop
ndo_stop為關(guān)閉網(wǎng)卡時(shí),不同網(wǎng)卡驅(qū)動(dòng)注冊(cè)的不同的關(guān)閉函數(shù),我們以海思的網(wǎng)卡驅(qū)動(dòng)為例,分析下ndo_stop函數(shù)的實(shí)現(xiàn)。代碼位于kerneldrivers etethernethisiliconhnshns_enet.c。
hns_nic_net_stop
staticinthns_nic_net_stop(structnet_device*ndev) { hns_nic_net_down(ndev); return0; }
hns_nic_net_down
staticvoidhns_nic_net_down(structnet_device*ndev) { inti; structhnae_ae_ops*ops; structhns_nic_priv*priv=netdev_priv(ndev); if(test_and_set_bit(NIC_STATE_DOWN,&priv->state)) return; (void)del_timer_sync(&priv->service_timer); netif_tx_stop_all_queues(ndev); netif_carrier_off(ndev); netif_tx_disable(ndev); priv->link=0; if(priv->phy) phy_stop(priv->phy); ops=priv->ae_handle->dev->ops; if(ops->stop) ops->stop(priv->ae_handle); netif_tx_stop_all_queues(ndev); for(i=priv->ae_handle->q_num-1;i>=0;i--){ hns_nic_ring_close(ndev,i); hns_nic_ring_close(ndev,i+priv->ae_handle->q_num); /*cleantxbuffers*/ hns_nic_tx_clr_all_bufs(priv->ring_data+i); } }
hns_nic_net_down()中會(huì)調(diào)用netif_carrier_off()通知內(nèi)核子系統(tǒng)網(wǎng)絡(luò)斷開(kāi)。下面我們?cè)敿?xì)分析下netif_carrier_off()的實(shí)現(xiàn)。
netif_carrier_off()
voidnetif_carrier_off(structnet_device*dev) { /*設(shè)置網(wǎng)卡為載波斷開(kāi)狀態(tài)即nocarrier狀態(tài),上行時(shí)軟中斷下半部讀到該狀態(tài)不會(huì)進(jìn)行網(wǎng)卡收包*/ if(!test_and_set_bit(__LINK_STATE_NOCARRIER,&dev->state)){ if(dev->reg_state==NETREG_UNINITIALIZED) return; /*增加設(shè)備改變狀態(tài)*/ atomic_inc(&dev->carrier_changes); /*加入事件處理隊(duì)列進(jìn)行處理*/ linkwatch_fire_event(dev); } }
linkwatch_fire_event()
linkwatch_fire_event()函數(shù)將設(shè)備加入到事件隊(duì)列,并且進(jìn)行事件調(diào)度,調(diào)度中會(huì)根據(jù)是否為緊急事件做不同處理。
voidlinkwatch_fire_event(structnet_device*dev) { /*判斷是否是緊急處理的事件*/ boolurgent=linkwatch_urgent_event(dev); /*判斷是否是緊急處理的事件*/ if(!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING,&dev->state)){ /*添加事件到事件列表*/ linkwatch_add_event(dev); }elseif(!urgent) /*設(shè)備以前已經(jīng)設(shè)置了pending標(biāo)記,不是緊急事件,直接返回*/ return; /*事件調(diào)度*/ linkwatch_schedule_work(urgent); }
linkwatch_urgent_event()
linkwatch_urgent_event()判斷是否是否需要緊急處理。
staticboollinkwatch_urgent_event(structnet_device*dev) { /*設(shè)備未運(yùn)行,非緊急*/ if(!netif_running(dev)) returnfalse; /*設(shè)備的索引號(hào)與連接索引號(hào)不等,緊急*/ if(dev->ifindex!=dev_get_iflink(dev)) returntrue; /*設(shè)備作為teamport,緊急*/ if(dev->priv_flags&IFF_TEAM_PORT) returntrue; /*連接與否&&發(fā)送隊(duì)列排隊(duì)規(guī)則改變與否*/ returnnetif_carrier_ok(dev)&&qdisc_tx_changing(dev); }
linkwatch_add_event()
linkwatch_add_event()將設(shè)備加入到事件處理鏈表。
staticvoidlinkwatch_add_event(structnet_device*dev) { unsignedlongflags; spin_lock_irqsave(&lweventlist_lock,flags); /*若未添加,則添加設(shè)備到事件列表*/ if(list_empty(&dev->link_watch_list)){ list_add_tail(&dev->link_watch_list,&lweventlist); dev_hold(dev); } spin_unlock_irqrestore(&lweventlist_lock,flags); }
linkwatch_schedule_work()
linkwatch_schedule_work()對(duì)事件處理進(jìn)行調(diào)度,緊急事件立即執(zhí)行,非緊急事件延后執(zhí)行。
staticvoidlinkwatch_schedule_work(inturgent) { unsignedlongdelay=linkwatch_nextevent-jiffies; /*已經(jīng)設(shè)置了緊急標(biāo)記,則返回*/ if(test_bit(LW_URGENT,&linkwatch_flags)) return; /*需要緊急調(diào)度*/ if(urgent){ /*之前設(shè)置了,則返回*/ if(test_and_set_bit(LW_URGENT,&linkwatch_flags)) return; /*未設(shè)置緊急,則立即執(zhí)行*/ delay=0; } /*如果大于1s則立即執(zhí)行*/ if(delay>HZ) delay=0; /*如果設(shè)置了緊急標(biāo)記,則立即執(zhí)行*/ if(test_bit(LW_URGENT,&linkwatch_flags)) mod_delayed_work(system_wq,&linkwatch_work,0); else /*未設(shè)置緊急標(biāo)記,則按照delay執(zhí)行*/ schedule_delayed_work(&linkwatch_work,delay); }
__linkwatch_run_queue()
__linkwatch_run_queue()完成對(duì)事件調(diào)度隊(duì)列中設(shè)備的處理。
staticvoid__linkwatch_run_queue(inturgent_only) { structnet_device*dev; LIST_HEAD(wrk); /* *Limitthenumberoflinkwatcheventstoone *persecondsothatarunawaydriverdoesnot *causeastormofmessagesonthenetlink *socket.Thislimitdoesnotapplytoupevents *whilethedeviceqdiscisdown. */ /*已達(dá)到調(diào)度時(shí)間*/ if(!urgent_only) linkwatch_nextevent=jiffies+HZ; /*Limitwrap-aroundeffectondelay.*/ /* 未到達(dá)調(diào)度時(shí)間,并且下一次調(diào)度在當(dāng)前時(shí)間的1s以后 那么設(shè)置調(diào)度時(shí)間是當(dāng)前時(shí)間 */ elseif(time_after(linkwatch_nextevent,jiffies+HZ)) linkwatch_nextevent=jiffies; /*清除緊急標(biāo)識(shí)*/ clear_bit(LW_URGENT,&linkwatch_flags); spin_lock_irq(&lweventlist_lock); list_splice_init(&lweventlist,&wrk); /*遍歷鏈表*/ while(!list_empty(&wrk)){ /*獲取設(shè)備*/ dev=list_first_entry(&wrk,structnet_device,link_watch_list); /*從鏈表移除設(shè)備*/ list_del_init(&dev->link_watch_list); /*未到達(dá)調(diào)度時(shí)間&&不需要緊急處理*/ if(urgent_only&&!linkwatch_urgent_event(dev)){ /*添加到鏈表尾部*/ list_add_tail(&dev->link_watch_list,&lweventlist); /*繼續(xù)處理*/ continue; } spin_unlock_irq(&lweventlist_lock); /*處理設(shè)備*/ linkwatch_do_dev(dev); spin_lock_irq(&lweventlist_lock); } /*鏈表有未處理事件,則以非緊急狀態(tài)調(diào)度隊(duì)列*/ if(!list_empty(&lweventlist)) linkwatch_schedule_work(0); spin_unlock_irq(&lweventlist_lock); }
linkwatch_do_dev()
linkwatch_do_dev()完成對(duì)某個(gè)設(shè)備的狀態(tài)改變處理。
staticvoidlinkwatch_do_dev(structnet_device*dev) { /* *Makesuretheabovereadiscompletesinceitcanbe *rewrittenassoonasweclearthebitbelow. */ smp_mb__before_atomic(); /*Weareabouttohandlethisdevice, *soneweventscanbeaccepted */ /*清除pending標(biāo)記*/ clear_bit(__LINK_STATE_LINKWATCH_PENDING,&dev->state); rfc2863_policy(dev); /*如果設(shè)備啟動(dòng)狀態(tài)*/ if(dev->flags&IFF_UP){ /*鏈路連接*/ if(netif_carrier_ok(dev)) /*啟用排隊(duì)規(guī)則*/ dev_activate(dev); else /*關(guān)閉排隊(duì)規(guī)則*/ dev_deactivate(dev); /*設(shè)備狀態(tài)改變處理,執(zhí)行netdev_chain上設(shè)備狀態(tài)變更回調(diào)*/ netdev_state_change(dev); } dev_put(dev); }
phy_stop()
最后,hns_nic_net_down()中會(huì)調(diào)用phy_stop()將網(wǎng)卡link down。
voidphy_stop(structphy_device*phydev) { mutex_lock(&phydev->lock); if(PHY_HALTED==phydev->state) gotoout_unlock; if(phy_interrupt_is_valid(phydev)){ /*DisablePHYInterrupts*/ phy_config_interrupt(phydev,PHY_INTERRUPT_DISABLED); /*Clearanypendinginterrupts*/ phy_clear_interrupt(phydev); } phydev->state=PHY_HALTED; out_unlock: mutex_unlock(&phydev->lock); /*Cannotcallflush_scheduled_work()hereasdesiredbecause *ofrtnl_lock(),butPHY_HALTEDshallguaranteephy_change() *willnotreenableinterrupts. */ }
phy_stop()將phydev->state設(shè)置為PHY_HALTED,將網(wǎng)卡關(guān)閉。
__dev_open
__dev_open為設(shè)備啟用核心函數(shù),該函數(shù)打開(kāi)eth0,設(shè)置啟用標(biāo)記,并且設(shè)置接收模式,排隊(duì)規(guī)則等。
staticint__dev_open(structnet_device*dev) { conststructnet_device_ops*ops=dev->netdev_ops; intret; ASSERT_RTNL(); /*設(shè)備不可用*/ if(!netif_device_present(dev)) return-ENODEV; /*Blocknetpollfromtryingtodoanyrxpathservicing. *Ifwedon'tdothisthereisachancendo_poll_controller *orndo_pollmayberunningwhileweopenthedevice */ /*禁用netpoll*/ netpoll_poll_disable(dev); /*設(shè)備打開(kāi)前通知*/ ret=call_netdevice_notifiers(NETDEV_PRE_UP,dev); ret=notifier_to_errno(ret); if(ret) returnret; /*設(shè)置設(shè)備打開(kāi)標(biāo)記,設(shè)備將設(shè)置IFF_UP標(biāo)志位*/ set_bit(__LINK_STATE_START,&dev->state); /*校驗(yàn)地址*/ if(ops->ndo_validate_addr) ret=ops->ndo_validate_addr(dev); /*執(zhí)行打開(kāi)*/ if(!ret&&ops->ndo_open) ret=ops->ndo_open(dev); /*啟用netpoll*/ netpoll_poll_enable(dev); /*失敗,清除打開(kāi)標(biāo)記*/ if(ret) clear_bit(__LINK_STATE_START,&dev->state); /*設(shè)備打開(kāi)操作*/ else{ /*設(shè)置打開(kāi)標(biāo)記*/ dev->flags|=IFF_UP; /*設(shè)置接收模式*/ dev_set_rx_mode(dev); /*初始化排隊(duì)規(guī)則*/ dev_activate(dev); /*加入設(shè)備數(shù)據(jù)到熵池*/ add_device_randomness(dev->dev_addr,dev->addr_len); } returnret; }
hns_nic_net_open()
我們以海思的網(wǎng)卡驅(qū)動(dòng)為例,分析下ndo_open()函數(shù)的實(shí)現(xiàn)。代碼位于kerneldrivers etethernethisiliconhnshns_enet.c。
staticinthns_nic_net_open(structnet_device*ndev) { structhns_nic_priv*priv=netdev_priv(ndev); structhnae_handle*h=priv->ae_handle; intret; if(test_bit(NIC_STATE_TESTING,&priv->state)) return-EBUSY; priv->link=0; netif_carrier_off(ndev); /*設(shè)置txqueue的個(gè)數(shù)*/ ret=netif_set_real_num_tx_queues(ndev,h->q_num); if(ret0)?{ ??netdev_err(ndev,?"netif_set_real_num_tx_queues?fail,?ret=%d! ", ??????ret); ??return?ret; ?} ?/*設(shè)置rx?queue的個(gè)數(shù)*/ ?ret?=?netif_set_real_num_rx_queues(ndev,?h->q_num); if(ret0)?{ ??netdev_err(ndev, ??????"netif_set_real_num_rx_queues?fail,?ret=%d! ",?ret); ??return?ret; ?} ?/*啟動(dòng)網(wǎng)卡*/ ?ret?=?hns_nic_net_up(ndev); ?if?(ret)?{ ??netdev_err(ndev, ??????"hns?net?up?fail,?ret=%d! ",?ret); ??return?ret; ?} ?return?0; }
hns_nic_net_up()
staticinthns_nic_net_up(structnet_device*ndev) { structhns_nic_priv*priv=netdev_priv(ndev); structhnae_handle*h=priv->ae_handle; inti,j,k; intret; /*初始化中斷,并設(shè)置中斷函數(shù)為hns_irq_handle,每個(gè)rx和txqueue都對(duì)應(yīng)一個(gè)中斷*/ ret=hns_nic_init_irq(priv); if(ret!=0){ netdev_err(ndev,"hnsinitirqfailed!ret=%d ",ret); returnret; } for(i=0;iq_num*2;i++){ /*使能中斷,使能napi*/ ret=hns_nic_ring_open(ndev,i); if(ret) gotoout_has_some_queues; } for(k=0;kq_num;k++) h->dev->ops->toggle_queue_status(h->qs[k],1); /*設(shè)置mac地址*/ ret=h->dev->ops->set_mac_addr(h,ndev->dev_addr); if(ret) gotoout_set_mac_addr_err; /*hns的start函數(shù)為null*/ ret=h->dev->ops->start?h->dev->ops->start(h):0; if(ret) gotoout_start_err; if(priv->phy) /*啟動(dòng)phy*/ phy_start(priv->phy); clear_bit(NIC_STATE_DOWN,&priv->state); /*修改time每一秒到期一次*/ (void)mod_timer(&priv->service_timer,jiffies+SERVICE_TIMER_HZ); return0; out_start_err: netif_stop_queue(ndev); out_set_mac_addr_err: for(k=0;kq_num;k++) h->dev->ops->toggle_queue_status(h->qs[k],0); out_has_some_queues: for(j=i-1;j>=0;j--) hns_nic_ring_close(ndev,j); set_bit(NIC_STATE_DOWN,&priv->state); returnret; }
phy_start()
最后會(huì)調(diào)用到phy_start()啟動(dòng)網(wǎng)卡。
voidphy_start(structphy_device*phydev) { booldo_resume=false; interr=0; mutex_lock(&phydev->lock); switch(phydev->state){ casePHY_STARTING: phydev->state=PHY_PENDING; break; casePHY_READY: phydev->state=PHY_UP; break; casePHY_HALTED: /*makesureinterruptsarere-enabledforthePHY*/ err=phy_enable_interrupts(phydev); if(err0) ???break; ??phydev->state=PHY_RESUMING; do_resume=true; break; default: break; } mutex_unlock(&phydev->lock); /*ifphywassuspended,bringthephysicallinkupagain*/ if(do_resume) phy_resume(phydev); }
審核編輯:劉清
-
網(wǎng)卡驅(qū)動(dòng)
+關(guān)注
關(guān)注
0文章
35瀏覽量
17705
原文標(biāo)題:【網(wǎng)絡(luò)驅(qū)動(dòng)】ifconfig up 后內(nèi)核網(wǎng)絡(luò)驅(qū)動(dòng)做了什么?
文章出處:【微信號(hào):嵌入式與Linux那些事,微信公眾號(hào):嵌入式與Linux那些事】歡迎添加關(guān)注!文章轉(zhuǎn)載請(qǐng)注明出處。
發(fā)布評(píng)論請(qǐng)先 登錄
相關(guān)推薦
評(píng)論