今天还是研究内核调试,
死机,这个词语,大家应该不陌生.
当我们写程序,如果加入到内核中的程序中有出现死循环的话,启动内核运行程序会直接进入相对死机状态.
那么怎么可以解决这个问题呢?
我们都知道,我们人的心脏是一直跳动的,而恰恰如此,内核也有它的跳动,那就是tick中断,
所以我们可以从tick中断入手,解决上面的死机问题.
在开发板上运行cat /proc/interrupts 可以查看系统当前的各种中断号,
可以看到一个中断名为i.MX Timer Tick 的中断,那么它就是我们今天的主角.
1 root@EasyARM-iMX257 /mnt/nfs/module# cat /proc/interrupts
2 CPU0
3 9: 0 - mxsdhci
4 14: 0 - CSPI_IRQ
5 25: 2 - imxdi - mxcsdma
6 35: 0 - ehci_hcd:usb1
7 37 2453 - mxcintuart
8 46: 3 - m - i.MX Timer Tick
9 57: 0 - mxsdhci
10 Err: 0
在内核中查找 Timer Tick的源代码,如下所示:
1 /* linux-2.6.31/arch/arm/plat-mxc/time.c
2 * IRQ handler for the timer
3 */
4 static irqreturn_t mxc_timer_interrupt(int irq, void *dev_id)
5 {
6 struct clock_event_device *evt = &clockevent_mxc;
7 uint32_t tstat;
8
9 if (timer_is_v2())
10 tstat = __raw_readl(timer_base + MX3_TSTAT);
11 else
12 tstat = __raw_readl(timer_base + MX1_2_TSTAT);
13
14 gpt_irq_acknowledge();
15
16 evt->event_handler(evt);
17
18 return IRQ_HANDLED;
19 }
20
21 static struct irqaction mxc_timer_irq = {
22 .name = 'i.MX Timer Tick',
23 .flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
24 .handler = mxc_timer_interrupt,
25 };
在这个函数中,我们可以增加一些代码:有点类似看门狗
一.在 mxc_timer_interrupt中增加打印语句
在mxc_timer_interrupt 中断函数中检测系统当前正在运行的中断,如果10S之内都是同一个进程正在运行的话,那就我们就把这个进程打印出来(先从简单入手,此处先不做太多的复杂事情)
步骤:
①首先备份 linux-2.6.31/arch/arm/plat-mxc/time.c,
②接着修改time.c的内容,
③最后编译内核,重新给板子启动新内核
root@Lover雪:/home/study/nfs_home/system/linux-2.6.31/arch/arm/plat-mxc# cp time.c time.c.bak
修改time.c,再中断函数中加入打印语句
root@Lover雪:/home/study/nfs_home/system/linux-2.6.31/arch/arm/plat-mxc# vi time.c
************************************************************************************************
root@Lover雪:/home/study/nfs_home/system/linux-2.6.31/arch/arm/plat-mxc# cd ../../..
编译内核
root@Lover雪:/home/study/nfs_home/system/linux-2.6.31# make uImage
CHK include/linux/version.h
make[1]: 'include/asm-arm/mach-types.h' is up to date.
CHK include/linux/utsrelease.h
SYMLINK include/asm -> include/asm-arm
************************************************************************************************
Data Size: 2180620 Bytes = 2129.51 kB = 2.08 MB
Load Address: 80008000
Entry Point: 80008000
Image arch/arm/boot/uImage is ready
root@Lover雪:/home/study/nfs_home/system/linux-2.6.31# cp arch/arm/boot/uImage /tftpboot/uImage
root@Lover雪:/home/study/nfs_home/system/linux-2.6.31#
************************************************************************************************
在开发板上重新烧写内核
MX25 U-Boot > run upsystem
FEC: enable RMII gasket
ver 192.168.31.179; our IP address is 192.168.31.180
Filename '00
Loading: #################################################################
#################################################################
###################
done
************************************************************************************************
加载完毕后,如果不动开发板,会发现,每隔10s钟,就会有进程pid=0,名字name=swapper的打印消息.
root@EasyARM-iMX257 ~# mxc_timer_interrupt: pid = 0, name = swapper
root@EasyARM-iMX257 ~# mxc_timer_interrupt: pid = 0, name = swapper
root@EasyARM-iMX257 ~#
修改time.c如下所示:
1 /* linux-2.6.31/arch/arm/plat-mxc/time.c
2 * IRQ handler for the timer
3 */
4 static irqreturn_t mxc_timer_interrupt(int irq, void *dev_id)
5 {
6 struct clock_event_device *evt = &clockevent_mxc;
7 uint32_t tstat;
8 ////////////////////////////////////////
9 static pid_t pre_pid;
10 static int cnt = 0;
11 if(pre_pid == current->pid){
12 cnt++;
13 }else{
14 cnt = 0;
15 pre_pid = current->pid;
16 }
17 if(cnt == 10*HZ){
18 cnt = 0;
19 printk('mxc_timer_interrupt: pid = %d, name = %sn',current->pid, current->comm);
20 }
21 //////////////////////////////////////////
22 if (timer_is_v2())
23 tstat = __raw_readl(timer_base + MX3_TSTAT);
24 else
25 tstat = __raw_readl(timer_base + MX1_2_TSTAT);
26
27 gpt_irq_acknowledge();
28
29 evt->event_handler(evt);
30
31 return IRQ_HANDLED;
32 }
二.修改错误代码,在代码中增加死循环
还是沿用我们前面的err_led.c的驱动程序.
参考博客地址:http://www.cnblogs.com/lihaiyan/p/4470390.html
再open函数中,我们故意加入一个死循环.
/* err_led.c
*/
44 static int key_open(struct inode *inode, struct file *file)
45 {
46 printk('<0>function open!nn');
47 //在此加入一个死循环
48 while(1);
49 return 0;
50 }
编译接着在开发板中加载错误驱动程序,使用cat 命令打开设备.
root@EasyARM-iMX257 ~# ifconfig eth0 192.168.31.181;mount -t nfs 192.168.31.179:
/home/study/nfs_home /mnt/nfs -o nolock;cd /mnt/nfs/module/
root@EasyARM-iMX257 /mnt/nfs/module#
root@EasyARM-iMX257 /mnt/nfs/module# cd 39_debug_with_timer/
root@EasyARM-iMX257 /mnt/nfs/module/39_debug_with_timer# insmod err_led.ko
Hello,this is err_led_dev module!
addr base_iomux : c4a26000
addr base_gpio3 : c4a2a000
addCTL : c4a26270
addr GDIR_GPIO3a2a000
root@EasyARM-iMX257 /mnt/nfs/module/39_debug_with_timer# cat /dev/err_led_dev
function open!
#################################################################
可以发现,打开设备后,进入open函数,系统直接进入死机状态,每格10s中便会打印出我们的进程号pid=1805
mxc_timer_interrupt: pid = 1805, name = cat
mxc_timer_interrupt: pid = 1805, name = cat
mxc_timer_interrupt: pid = 1805, name = cat
三.修改错误代码,在代码中增加死循环
接着恢复上面的time.c的代码,我们找到linux-2.6.31/arch/arm/kernel/irq.c文件中找打系统中断总调用者asm_do_IRQ,
我们在asm_do_IRQ函数里加入前面time.c中的打印代码.
root@Lover雪:/home/study/nfs_home/system/linux-2.6.31# cd arch/arm/plat-mxc/
root@Lover雪:/home/study/nfs_home/system/linux-2.6.31/arch/arm/plat-mxc# mv time.c.bak time.c
root@Lover雪:/home/study/nfs_home/system/linux-2.6.31/arch/arm/plat-mxc# cd ..
root@Lover雪:/home/study/nfs_home/system/linux-2.6.31/arch/arm# cd kernel/
root@Lover雪:/home/study/nfs_home/system/linux-2.6.31/arch/arm/kernel# vi irq.c
root@Lover雪:/home/study/nfs_home/system/linux-2.6.31/arch/arm/kernel# cd ../../../
root@Lover雪:/home/study/nfs_home/system/linux-2.6.31# make uImage
########################################################
Load Address: 80008000
Entry Point: 80008000
Image arch/arm/boot/uImage is ready
root@Lover雪:/home/study/nfs_home/system/linux-2.6.31# cp arch/arm/boot/uImage /tftpboot/uImage
root@Lover雪:/home/study/nfs_home/system/linux-2.6.31#
########################################################
从开发板重新烧写新内核
启动开发板
Irq.c修改内容如下:
1 /* linux-2.6.31/arch/arm/kernel/irq.c
2 * do_IRQ handles all hardware IRQ's. Decoded IRQs should not
3 * come via this function. Instead, they should provide their
4 * own 'handler'
5 */
6 asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
7 {
8 struct pt_regs *old_regs = set_irq_regs(regs);
9 ////////////////////////////////////////////////////////////////////
10 //从 cat /proc/interrupts 中得到我们的tick中断为46
11 if(irq == 46)
12 {
13 ////////////////////////////////////////
14 static pid_t pre_pid;
15 static int cnt = 0;
16 if(pre_pid == current->pid){
17 cnt++;
18 }else{
19 cnt = 0;
20 pre_pid = current->pid;
21 }
22 if(cnt == 10*HZ){
23 cnt = 0;
24 printk('asm_do_IRQ => mxc_timer_interrupt: pid = %d, name = %sn',current->pid, current->comm);
25 printk('pc = %08xn',regs->ARM_pc);//ptract.h
26 }
27 /////////////////////////////////////////
28 }
29 ////////////////////////////////////////////////////////////////////
30
31 irq_enter();
32
33 /*
34 * Some hardware gives randomly wrong interrupts. Rather
35 * than crashing, do something sensible.
36 */
37 if (unlikely(irq >= NR_IRQS)) {
38 if (printk_ratelimit())
39 printk(KERN_WARNING 'Bad IRQ%un', irq);
40 ack_bad_irq(irq);
41 } else {