1.簡(jiǎn)介
在內(nèi)核調(diào)試中,會(huì)經(jīng)常出現(xiàn)內(nèi)核僵死的問(wèn)題,也就是發(fā)生死循環(huán),內(nèi)核不能產(chǎn)生調(diào)度。導(dǎo)致內(nèi)核失去響應(yīng)。這種情況下我們可以采用修改系統(tǒng)內(nèi)核中的系統(tǒng)時(shí)鐘的中斷來(lái)定位發(fā)生僵死的進(jìn)程和函數(shù)名稱(chēng)。因?yàn)閮?nèi)核系統(tǒng)系統(tǒng)時(shí)鐘采用的是硬件中斷的形式存在,所以,軟件發(fā)生僵死的時(shí)候,系統(tǒng)時(shí)鐘照樣會(huì)發(fā)生中斷。
1.1、我們?cè)诿钚休斎耄? cat /proc/interrupts?
# cat /proc/interrupts CPU0 30: 8316 s3c S3C2410 Timer Tick -----> 系統(tǒng)時(shí)鐘 33: 0 s3c s3c-mci 34: 0 s3c I2SSDI 35: 0 s3c I2SSDO 37: 12 s3c s3c-mci 42: 0 s3c ohci_hcd:usb1 43: 0 s3c s3c2440-i2c 51: 1047 s3c-ext eth0 60: 0 s3c-ext s3c-mci 70: 16 s3c-uart0 s3c2440-uart 71: 26 s3c-uart0 s3c2440-uart 79: 8 s3c-adc s3c2410_action 80: 1732 s3c-adc s3c2410_action 83: 0 - s3c2410-wdtErr: 0#
30: 8316 s3c S3C2410 Timer Tick 這個(gè)就是系統(tǒng)時(shí)鐘,中斷號(hào)為30 1.2、在內(nèi)核代碼中搜索"S3C2410 Timer Tick"字樣?! ≡赥ime.c (archarmplat-s3c24xx)文件中有如下代碼。
static struct irqaction s3c2410_timer_irq = { .name = "S3C2410 Timer Tick", .flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL, .handler = s3c2410_timer_interrupt,};/* * IRQ handler for the timer */static irqreturn_ts3c2410_timer_interrupt(int irq, void *dev_id){#if 1 static pid_t pre_pid; static int cnt=0; //時(shí)鐘中斷的中斷號(hào)是30 if(irq==30) { if(pre_pid==current->pid) { cnt++; } else { cnt=0; pre_pid=current->pid; } //如果本進(jìn)程十秒鐘還沒(méi)有離開(kāi)的話,就會(huì)打印下面的語(yǔ)句 if(cnt==10*HZ) { cnt=0; printk("s3c2410_timer_interrupt : pid = %d, task_name = %s ",current->pid,current->comm); } }#endif write_seqlock(&xtime_lock); timer_tick(); write_sequnlock(&xtime_lock); return IRQ_HANDLED;}
①、每個(gè)進(jìn)程都有一個(gè)結(jié)構(gòu)task_struct用來(lái)存儲(chǔ)進(jìn)程的一些狀態(tài)信息。current是一個(gè)宏,表示當(dāng)前進(jìn)程的信息,也就是一個(gè)task_struct結(jié)構(gòu)體,所以current->pid為當(dāng)前進(jìn)程的pid號(hào),current->comm表示當(dāng)前進(jìn)程的name。
②、HZ也是一個(gè)宏定于,表示1s需要多少次中斷。10*HZ表示就就是10s需要多少次中斷!
2、測(cè)試
編譯內(nèi)核:#make uImage
加載一個(gè)帶有while(1);的驅(qū)動(dòng)程序,系統(tǒng)發(fā)送僵死,系統(tǒng)會(huì)打印如下信息:
# insmod first_drv.ko # ./firstdrvtest ons3c2410_timer_interrupt : pid = 770, task_name = firstdrvtests3c2410_timer_interrupt : pid = 770, task_name = firstdrvtest
根據(jù)上述信息可知,發(fā)送僵死的進(jìn)程號(hào)為:770,發(fā)送僵死的進(jìn)程名稱(chēng)為:firstdrvtest
3、繼續(xù)完善,增加PC值,更加詳細(xì)的定位僵死的地方
我們知道,當(dāng)中斷發(fā)送的時(shí)候,在匯編中會(huì)調(diào)用asm_do_irq函數(shù),
.macro irq_handler get_irqnr_preamble r5, lr1: get_irqnr_and_base r0, r6, r5, lr movne r1, sp @ @ routine called with r0 = irq number, r1 = struct pt_regs * @ adrne lr, 1b bne asm_do_IRQ #調(diào)用C語(yǔ)言的函數(shù)
asm_do_IRQ 函數(shù)原型:
asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs){ static pid_t pre_pid; static int cnt=0; struct pt_regs *old_regs = set_irq_regs(regs); struct irq_desc *desc = irq_desc + irq; /* * Some hardware gives randomly wrong interrupts. Rather * than crashing, do something sensible. */ if (irq >= NR_IRQS) desc = &bad_irq_desc; irq_enter(); desc_handle_irq(irq, desc); /* AT91 specific workaround */ irq_finish(irq); irq_exit(); set_irq_regs(old_regs); }
asm_do_IRQ這個(gè)函數(shù),在這個(gè)函數(shù)里面我們發(fā)現(xiàn)了一個(gè)結(jié)構(gòu)體:struct pt_regs,這個(gè)結(jié)構(gòu)體就用來(lái)保存發(fā)生中斷時(shí)的現(xiàn)場(chǎng),其中PC值就是:ARM_pc
我們將上面在:s3c2410_timer_interrupt里面加入的信息都刪除,并在:asm_do_IRQ函數(shù)里面加修改后改函數(shù)為:(紅色為添加的程序)
asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs){#if 1 static pid_t pre_pid; static int cnt=0; //時(shí)鐘中斷的中斷號(hào)是30 if(irq==30) { if(pre_pid==current->pid) { cnt++; } else { cnt=0; pre_pid=current->pid; } if(cnt==10*HZ) { cnt=0; printk("s3c2410_timer_interrupt : pid = %d, task_name = %s ",current->pid,current->comm); printk("pc = %08x ",regs->ARM_pc);//打印pc值 } }#endif static pid_t pre_pid; static int cnt=0; struct pt_regs *old_regs = set_irq_regs(regs); struct irq_desc *desc = irq_desc + irq; /* * Some hardware gives randomly wrong interrupts. Rather * than crashing, do something sensible. */ if (irq >= NR_IRQS) desc = &bad_irq_desc; irq_enter(); desc_handle_irq(irq, desc); /* AT91 specific workaround */ irq_finish(irq); irq_exit(); set_irq_regs(old_regs); }
4、測(cè)試:
# insmod first_drv.ko # ./firstdrvtest ons3c2410_timer_interrupt : pid = 771, task_name = firstdrvtestpc = bf000084
4.1、查看內(nèi)核中內(nèi)核函數(shù)、加載的函數(shù)的地址
#cat /proc/kallsyms > /kallsyms.txt
找到pc地址為bf000084附近的函數(shù):
....................................00000000 a first_drv.c [first_drv]bf000088 t first_drv_init [first_drv]bf000140 t first_drv_exit [first_drv]c48761cc ? __mod_license87 [first_drv]bf000940 b $d [first_drv]bf000740 d first_drv_fops [first_drv]bf000740 d $d [first_drv]bf00003c t first_drv_write [first_drv] #大概就在這個(gè)函數(shù)里面,可以確定僵死的地方在bf000000 t first_drv_open [first_drv]bf000000 t $a [first_drv]bf000038 t $d [first_drv]bf00003c t $a [first_drv]bf000114 t $d [first_drv]bf00094c b firstdrv_class [first_drv]bf000950 b firstdrv_class_dev [first_drv]bf000140 t $a [first_drv]bf000184 t $d [first_drv]00000000 a first_drv.mod.c [first_drv]c48761d8 ? __module_depends [first_drv]bf0008ac d $d [first_drv]c4876204 ? __mod_vermagic5 [first_drv]c01bd44c u class_device_create [first_drv]c008ca94 u register_chrdev [first_drv]c01bd668 u class_device_unregister [first_drv]bf000948 b major [first_drv]bf000944 b gpfcon [first_drv]c0031ad0 u __iounmap [first_drv]c01bc968 u class_create [first_drv]bf0007c0 d __this_module [first_drv]bf000088 t init_module [first_drv]c008c9dc u unregister_chrdev [first_drv]bf000140 t cleanup_module [first_drv]c01bc9dc u class_destroy [first_drv]bf000940 b gpfdat [first_drv]c0031a6c u __arm_ioremap [first_drv]c0172f80 u __copy_from_user [first_drv]c01752e0 u __memzero [first_drv]
4.2、查看反匯編
#arm-linux-objdump -D first_drv.ko > first_drv.dis
在kallsyms.txt中可以知道,first_drv_write的入口地址為?bf00003c?
打開(kāi)first_drv.dis,如何查找真正僵死的位置?
(1)首先從反匯編文件中找到位置為00000000的函數(shù):00000000 :
(2)在kallsyms.txt中,first_drv_open 實(shí)際位置是:bf000000?
(3)根據(jù)上面的信息,可知知道,在反匯編中,發(fā)送僵死的位置為00000084 - 4 ?處
(4)查找00000084處代碼在函數(shù):first_drv_write中
0000003c : 3c: e1a0c00d mov ip, sp 40: e92dd800 stmdb sp!, {fp, ip, lr, pc} 44: e24cb004 sub fp, ip, #4 ; 0x4 48: e24dd004 sub sp, sp, #4 ; 0x4 4c: e3cd3d7f bic r3, sp, #8128 ; 0x1fc0 50: e3c3303f bic r3, r3, #63 ; 0x3f 54: e5933008 ldr r3, [r3, #8] 58: e0910002 adds r0, r1, r2 5c: 30d00003 sbcccs r0, r0, r3 60: 33a03000 movcc r3, #0 ; 0x0 64: e3530000 cmp r3, #0 ; 0x0 68: e24b0010 sub r0, fp, #16 ; 0x10 6c: 1a00001c bne e4 0x5c> 70: ebfffffe bl 70 0x34> 74: ea00001f b f8 0x70> 78: e3520000 cmp r2, #0 ; 0x0 7c: 11a01002 movne r1, r2 80: 1bfffffe blne 80 #錯(cuò)誤在這,死循環(huán)?。。?! 84: ea00001f b 108
注意:在arm中,中斷保存的PC是當(dāng)前指令加4,所以真正僵死的位置是:bf00000080,也就是:80
?
?
評(píng)論
查看更多