理解Linux內核搶佔模型（最透徹一篇）

時間 2020-12-05

標籤 linux 服務器 app ide 函數 oop 線程調試 code blog 欄目 Linux 简体版

原文原文鏈接

原創宋寶華 Linux閱碼場 5月6日linux

本文原文地址：
https://devarea.com/understanding-linux-kernel-preemption/#.XrKLcfnx05k
做者：Liran B.H
譯者：宋寶華服務器

當配置Linux內核的時候，咱們能夠選擇一些參數，這些參數能影響系統的行爲。你能夠用不一樣的優先級、調度類和搶佔模型來工做。正確地選擇這些參數是很是重要的。
本文將論述不一樣的搶佔模型如何影響用戶和系統的行爲。
當你使用 make menuconfig配置內核的時候，你能看到這樣的菜單：

爲了深刻理解這三個搶佔模型的區別，咱們將寫一個案例：app

2個線程，一個高優先級RT（50），一個低優先級RT（30）
高優先級的線程要睡眠3秒
低優先級的線程用CPU來作計算
3秒後高優先級線程喚醒。
若是低優先級的線程陷入系統調用，高優先級的線程睡眠到期，究竟會發生什麼？下面咱們來一種模型一種模型地看。
No Forced Preemption

這種狀況下，上下文切換髮生在系統調用返回用戶空間的點。案例以下：ide
2個線程，一個高優先級RT（50），一個低優先級RT（30）
高優先級的線程要睡眠3秒
低優先級的線程進入系統調用計算5秒
5秒後低優先級線程從內核系統調用返回
高優先級線程將醒來（可是比預期遲了2秒）。
內核代碼，簡單的字符設備：

1   #include <asm/uaccess.h>
2   #include <linux/fs.h>
3   #include <linux/gfp.h>
4   #include <linux/cdev.h>
5   #include <linux/sched.h>
6   #include <linux/kdev_t.h>
7   #include <linux/delay.h>
8   #include <linux/ioctl.h>
9   #include <linux/slab.h>
10  #include <linux/mempool.h>
11  #include <linux/mm.h>
12  #include <asm/io.h>
13
14
15    static dev_t my_dev;
16    static struct cdev *my_cdev;
17
18
19    // callback for read system call on the device
20    static ssize_t my_read(struct file *file, char __user *buf,size_t count,loff_t *ppos)
21  {
22   int len=5;
23   if(*ppos > 0)
24   {
25  return 0;
26   }
27   mdelay(5000); // busy-wait for 5 seconds
28   if (copy_to_user(buf , "hello" , len)) {
29      return -EFAULT;
30   } else {
31       *ppos +=len;
32       return len;
33   }
34  }
35
36
37
38  static struct file_operations my_fops =
39  {
40  .owner = THIS_MODULE,
41  .read = my_read,
42  };
43
44
45
46
47   static int hello_init (void)
48  {
49
50  my_dev = MKDEV(400,0);
51  register_chrdev_region(my_dev,1,"demo");
52
53  my_cdev=cdev_alloc();
54  if(!my_cdev)
55  {
56    printk (KERN_INFO "cdev alloc error.\n");
57     return -1;    
58  }
59  my_cdev->ops = &my_fops;
60  my_cdev->owner = THIS_MODULE;
61
62  if(cdev_add(my_cdev,my_dev,1))
63  {
64    printk (KERN_INFO "cdev add error.\n");
65     return -1;    
66   }
67
68
69     return 0;
70
71   }
72
73
74      static void
75      hello_cleanup (void)
76     {
77      cdev_del(my_cdev);
78      unregister_chrdev_region(my_dev, 1);
79  }
80
81
82   module_init (hello_init);
83   module_exit (hello_cleanup);
84   MODULE_LICENSE("GPL")

讀裏面delay了5秒，注意mdelay是一個計算型的busy-loop。
用戶空間代碼以下：函數

1.   #include<stdio.h>
2    #include<unistd.h>
3    #include<pthread.h>
4    #include <sys/types.h>
5    #include <sys/stat.h>
6    #include <fcntl.h>
7
8
9    void *hi_prio(void *p)
10   {
11     printf("thread1 start time=%ld\n",time(NULL));
12     sleep(3);
13     printf("thread1 stop time=%ld\n",time(NULL));
14    return NULL;
15   }
16
17   void *low_prio(void *p)
18   {
19    char buf[20];
20   sleep(1);
21    int fd=open("/dev/demo",O_RDWR);  // #mknod /dev/demo c 400 0
22   puts("thread2 start");
23   read(fd,buf,20);
24    puts("thread2 stop");
25    return NULL;
26    }
27
28
29  int main()
30   {
31   pthread_t t1,t2,t3;
32
33    pthread_attr_t attr;
34 
35   struct sched_param param;
36 
37   pthread_attr_init(&attr);
38   pthread_attr_setschedpolicy(&attr, SCHED_RR);
39
40   param.sched_priority = 50;
41  pthread_attr_setschedparam(&attr, &param);
42
43
44   pthread_create(&t1,&attr,hi_prio,NULL);
45
46   param.sched_priority = 30;
47   pthread_attr_setschedparam(&attr, &param);
48 
49   pthread_create(&t2,&attr,low_prio,NULL);
50  sleep(10);
51   puts("end test");
52  return 0;
53  }

實驗步驟：oop

高優先級線程開始睡眠3秒
低優先級線程睡眠1秒而後作系統調用
高優先級線程6秒後醒來（stop和start的時間差）

1    # insmod demo.ko 
2    # ./app
3    thread1 start time=182
4    thread2 start
5    thread1 stop time=188
6    thread2 stop
7    end test

Preemptible Kernel

這種狀況內核裏面也能夠搶佔，意味着上述程序裏面的高優先級線程3秒後可醒來。
這種狀況下，系統會有更多的上下文切換，可是實時性更加好。對於要求軟實時的嵌入式系統而言，這個選項是最佳的。可是對於服務器而言，一般第一個選項更好——更少的上下文切換，更多的CPU時間用做有用功。
運行結果（stop、start時間差3秒）：線程

1     # insmod ./demo.ko
2     #./app
3    thread1 start time=234
4     thread2 start
5     thread1 stop time=237
6    thread2 stop
7    end test

Voluntary Kernel Preemption

這種狀況和第一種狀況"no forced preemption"相似，可是內核開發者能夠在進行復雜操做的時候，時不時檢查一下是否能夠reschedule。他們能夠調用might_resched()函數。
在下面的代碼中，咱們添加了一些檢查點（check point）調試

1   // callback for read system call on the device
2   static ssize_t my_read(struct file *file, char __user *buf,size_t 
3   {
4    int len=5;
5     if(*ppos > 0)
6     {
7    return 0;
8     }
9   mdelay(4000); // busy-wait for 4 seconds
10   might_resched();
11   delay(3000);  // busy wait for 3 seconds 
12   if (copy_to_user(buf , "hello" , len)) {
13           return -EFAULT;
14        } else {
15           *ppos +=len;
16             return len;
17        }
18   }

若是咱們把might_resched()註釋掉，它會delay 7秒。
添加cond_resched()調用將致使系統檢查是否有高優先級的任務被喚醒，這樣高優先級任務5秒能夠醒來（其中1秒在systemcall以前，另外4秒在kernel）。
運行結果：code

1   # insmod ./demo.ko
2   #./app
3   thread1 start time=320
4   thread2 start
5   thread1 stop time=325
6   thread2 stop
7  end test

Full Real Time Preemption

若是咱們使能RT補丁，咱們會獲得一個硬實時的kernel。這意味着任何代碼能夠搶佔任何人。好比一個更加緊急的任務能夠搶佔中斷服務程序ISR。這個patch進行了以下改動：blog

把中斷服務程序轉化爲優先級是50的RT線程
把softIRQ轉化爲優先級是49的RT線程
把全部的spinlock變成mutex
高精度定時器
其餘的細小改動

打補丁後會看到2個新增的菜單：

其中「Preemptible Kernel (Basic RT)」是爲了調試目的的，爲了全面使用RT補丁的功能，咱們應該選擇最後一項 – Fully Preemptible Kernel。這樣咱們會有更多的上下文切換，可是能夠知足RT的實時要求。

(END)

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。

理解Linux內核搶佔模型（最透徹一篇）

No Forced Preemption

Preemptible Kernel

Voluntary Kernel Preemption

Full Real Time Preemption