使用定時器任務,可讓內核在未來的一個指定時刻執行一段指定的代碼。內核定時器相關的接口在linux/timer.h文件中。
linux
本文將會先介紹定時任務的使用,而後在此基礎上了解其內部的實現邏輯。
數組
1、定時任務結構體表示:app
struct timer_list { struct list_head entry; //用於連接到內核定時器鏈表中 unsigned long expires; //定時任務過時時間 void (*function)(unsigned long); //定時任務的工做函數 unsigned long data; //定時任務工做函數參數 struct tvec_base *base; //定時任務關聯的內核定時器 #ifdef CONFIG_TIMER_STATS void *start_site; char start_comm[16]; int start_pid; #endif #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; #endif };
2、定時任務相關的接口:ide
1. 初始化定時任務函數
#define TIMER_INITIALIZER(_function, _expires, _data) { \ .entry = { .prev = TIMER_ENTRY_STATIC }, \ .function = (_function), \ .expires = (_expires), \ .data = (_data), \ .base = &boot_tvec_bases, \ __TIMER_LOCKDEP_MAP_INITIALIZER( \ __FILE__ ":" __stringify(__LINE__)) \ } #define DEFINE_TIMER(_name, _function, _expires, _data) \ struct timer_list _name = \ TIMER_INITIALIZER(_function, _expires, _data) #define setup_timer(timer, fn, data) \ do { \ static struct lock_class_key __key; \ setup_timer_key((timer), #timer, &__key, (fn), (data));\ } while (0)
` 主要是完成定時任務的成員初始化,這裏要注意一下.base = &boot_tvec_bases;boot_tvec_bases是內核在初始化的時候建立好的。ui
其實過時時間expires在初始化的時候設置,通常是沒有什麼意義的,一般都是在註冊定時器任務的時候才設置過時時間。
this
2. 註冊定時任務:debug
void add_timer(struct timer_list *timer);
當一個定時任務註冊到內核的定時器列表後,就會處於激活狀態。這裏要注意的是:註冊的定時任務在只會被執行一次,由於在執行的時候會將其從定時器鏈表中移除,若是須要實現每隔一段時間就執行,則須要在其定時任務函數中再次註冊,才能再次被執行。
rest
3. 註銷定時任務:code
int del_timer(struct timer_list * timer); int del_timer_sync(struct timer_list *timer);
有可能在註銷定時任務的時候,此時的定時任務正在被執行中,那麼調用del_timer_sync()就會等待任務被執行完畢後再註銷。
4. 修改定時任務的過時時間
當調用add_timer()函數將定時任務註冊後,定時任務就處於激活的狀態,此時若是須要修改過時時間,則必須經過以下接口來完成:
int mod_timer(struct timer_list *timer, unsigned long expires);
5. 判判定時任務的狀態:
static inline int timer_pending(const struct timer_list * timer) { return timer->entry.next != NULL; }
看完上面的接口介紹以後,再看一個簡單的例子:
#include <linux/module.h> #include <linux/timer.h> #include <linux/delay.h> #define ENTER() printk(KERN_DEBUG "%s() Enter", __func__) #define EXIT() printk(KERN_DEBUG "%s() Exit", __func__) #define ERR(fmt, args...) printk(KERN_ERR "%s()-%d: " fmt "\n", __func__, __LINE__, ##args) #define DBG(fmt, args...) printk(KERN_DEBUG "%s()-%d: " fmt "\n", __func__, __LINE__, ##args) struct test_timer { struct timer_list t; unsigned long nums; }; static void my_timer_func(unsigned long data) { struct test_timer *timer = (struct test_timer *)data; DBG("nums: %lu", timer->nums--); if (timer->nums > 0) { mod_timer(&timer->t, timer->t.expires + HZ); //再次註冊定時任務 } } static struct test_timer my_timer; static int __init timer_demo_init(void) { setup_timer(&my_timer.t, my_timer_func, (unsigned long)&my_timer); my_timer.nums = 30; msleep_interruptible(2000); DBG("before mod_timer"); mod_timer(&my_timer.t, jiffies + 2 * HZ); DBG("success"); return 0; } static void __exit timer_demo_exit(void) { ENTER(); while (my_timer.nums > 0) { DBG("waiting my_timer exit"); msleep_interruptible(1000); } EXIT(); } MODULE_LICENSE("GPL"); module_init(timer_demo_init); module_exit(timer_demo_exit);
3、定時任務的註冊:
接下來,分析一下內核是如何管理咱們註冊的定時任務的,首先從add_timer()開始:
void add_timer(struct timer_list *timer) { BUG_ON(timer_pending(timer)); mod_timer(timer, timer->expires); }
這裏能夠看出,咱們調用add_timer()和調用mod_timer()進行註冊,是同樣的。
int mod_timer(struct timer_list *timer, unsigned long expires) { /* * This is a common optimization triggered by the * networking code - if the timer is re-modified * to be the same thing then just return: */ if (timer_pending(timer) && timer->expires == expires) return 1; return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); }
先判斷下定時任務是否已經處於激活狀態,若是已經處於激活狀態,則直接返回,避免重複註冊,不然調用__mod_timer():
static inline int __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only, int pinned) { struct tvec_base *base, *new_base; unsigned long flags; int ret = 0 , cpu; timer_stats_timer_set_start_info(timer); BUG_ON(!timer->function); base = lock_timer_base(timer, &flags); /*若是timer_list已經處於激活狀態,則先將其從鏈表中移除:detach_timer()*/ if (timer_pending(timer)) { detach_timer(timer, 0); if (timer->expires == base->next_timer && !tbase_get_deferrable(timer->base)) base->next_timer = base->timer_jiffies; ret = 1; } else { if (pending_only) goto out_unlock; } debug_activate(timer, expires); new_base = __get_cpu_var(tvec_bases); cpu = smp_processor_id(); #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { int preferred_cpu = get_nohz_load_balancer(); if (preferred_cpu >= 0) cpu = preferred_cpu; } #endif new_base = per_cpu(tvec_bases, cpu); if (base != new_base) { /* * We are trying to schedule the timer on the local CPU. * However we can't change timer's base while it is running, * otherwise del_timer_sync() can't detect that the timer's * handler yet has not finished. This also guarantees that * the timer is serialized wrt itself. */ if (likely(base->running_timer != timer)) { /* See the comment in lock_timer_base() */ timer_set_base(timer, NULL); spin_unlock(&base->lock); base = new_base; spin_lock(&base->lock); timer_set_base(timer, base); } } timer->expires = expires; if (time_before(timer->expires, base->next_timer) && !tbase_get_deferrable(timer->base)) base->next_timer = timer->expires; internal_add_timer(base, timer); out_unlock: spin_unlock_irqrestore(&base->lock, flags); return ret; }
最終調用internal_add_timer()完成註冊:
static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) { unsigned long expires = timer->expires; unsigned long idx = expires - base->timer_jiffies; struct list_head *vec; /* 根據過時時間選擇合適的的定時器鏈表 */ if (idx < TVR_SIZE) { int i = expires & TVR_MASK; vec = base->tv1.vec + i; } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { int i = (expires >> TVR_BITS) & TVN_MASK; vec = base->tv2.vec + i; } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; vec = base->tv3.vec + i; } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; vec = base->tv4.vec + i; } else if ((signed long) idx < 0) { /* * Can happen if you add a timer with expires == jiffies, * or you set a timer to go off in the past */ vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); } else { int i; /* If the timeout is larger than 0xffffffff on 64-bit * architectures then we use the maximum timeout: */ if (idx > 0xffffffffUL) { idx = 0xffffffffUL; expires = idx + base->timer_jiffies; } i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; vec = base->tv5.vec + i; } /* * Timers are FIFO: */ list_add_tail(&timer->entry, vec); /*添加到定時器鏈表尾部*/ }
這裏須要補充說明一下struct tvsec_base結構體,看完以後就大體清楚是怎麼管理的了:
/* * per-CPU timer vector definitions: */ #define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) #define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) #define TVN_SIZE (1 << TVN_BITS) #define TVR_SIZE (1 << TVR_BITS) #define TVN_MASK (TVN_SIZE - 1) #define TVR_MASK (TVR_SIZE - 1) struct tvec { struct list_head vec[TVN_SIZE]; }; struct tvec_root { struct list_head vec[TVR_SIZE]; }; struct tvec_base { spinlock_t lock; struct timer_list *running_timer; //保存正在運行的定時任務 unsigned long timer_jiffies; unsigned long next_timer; struct tvec_root tv1; struct tvec tv2; struct tvec tv3; struct tvec tv4; struct tvec tv5; } ____cacheline_aligned;
每個CPU都會包含一個struct tvsec_base類型的對象,用於存儲註冊到每一個CPU上的定時任務。看完這個結構體,能夠發現包含有5個鏈表數組,分別用於存儲不一樣過時時間的定時任務,分佈以下:
過時時間在0 ~ (1<<8) --> tv1, 具體在tv1.vec數組的哪一個鏈表,則是經過掩碼來肯定,即: 過時時間 & ((1 << 8) - 1)
過時時間在(1 << 8) ~ (1 << (8+6)) --> tv2, 具體在tv2.vec數組的哪一個鏈表,則是經過掩碼來肯定,即: (過時時間 -(1 << 8)) & ((1<<6) - 1)
過時時間在(1 << (8+6)) ~ (1 << (8+2*6)) --> tv3,具體在tv3.vec數組的哪一個鏈表,也是經過掩碼肯定,即: (過時時間 - (1 << (8+1*6))) & ((1<<6) - 1)
過時時間在(1 << (8 + 6*2)) ~ (1 << (8 + 3*6)) --> tv4, 具體在tv4.vec數組的哪一個鏈表,也是經過掩碼肯定,即: (過時時間 - (1 << (8+2*6)) & ((1 << 6)- 1)
若是過時時間超過(1 << (8 + 3 * 6)) --> tv5, 具體在tv5.vec數組的哪一個鏈表,也是經過掩碼肯定,即: (過時時間 - ((1 << (8+3*6)) & ((1 << 6) - 1)
之因此要分紅5個數組,就是爲了提升效率,由於當有中斷髮生,就會觸發內核去檢查是否存在過時的定時任務須要執行,若是把全部的鏈表都去遍歷,那麼顯然效率會很低下,因此內核每次只會去檢查tv1.sec數組上的鏈表是否存在須要執行的按期任務。具體是怎麼執行的,下面會有分析。這裏暫時能夠理解爲註冊一個定時任務,就是將此定時任務保存到本地CPU上的定時器的某個鏈表中。
4、定時任務的執行:
定時器的執行,是在軟中斷中執行的,是在一個原子上下文環境中,即不容許定時任務發生睡眠等待。
在內核初始化的時候,會調用init_timers()註冊軟中斷:
void __init init_timers(void) { int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); init_timer_stats(); BUG_ON(err == NOTIFY_BAD); register_cpu_notifier(&timers_nb); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); }
調用open_softirq()函數註冊定時器的軟中斷,處理函數爲run_timer_softirq。軟中斷是由軟件模擬的中斷,大部分狀況下軟中斷會在irq_exit階段被執行,在irq_exit階段沒被處理完的軟中斷,會在守護進程ksoftirqd中執行。這裏暫時不深究軟中斷的實現原理,暫時認爲中斷髮生以後,會觸發定時器軟中斷的處理函數run_timer_softirq的執行。
/* * This function runs timers and the timer-tq in bottom half context. */ static void run_timer_softirq(struct softirq_action *h) { struct tvec_base *base = __get_cpu_var(tvec_bases); perf_event_do_pending(); hrtimer_run_pending(); // 判斷是否有超時,jiffies >= base->timer_jiffies則表示有超時,有定時任務須要執行。 if (time_after_eq(jiffies, base->timer_jiffies)) __run_timers(base); } /** * __run_timers - run all expired timers (if any) on this CPU. * @base: the timer vector to be processed. * * This function cascades all vectors and executes all expired timer * vectors. */ static inline void __run_timers(struct tvec_base *base) { struct timer_list *timer; spin_lock_irq(&base->lock); while (time_after_eq(jiffies, base->timer_jiffies)) { struct list_head work_list; struct list_head *head = &work_list; int index = base->timer_jiffies & TVR_MASK; /* * Cascade timers: */ // 尋找已經超時的定時任務鏈表,並將超時的鏈表上的定時任務移動到上一級的鏈表 if (!index && (!cascade(base, &base->tv2, INDEX(0))) && (!cascade(base, &base->tv3, INDEX(1))) && !cascade(base, &base->tv4, INDEX(2))) cascade(base, &base->tv5, INDEX(3)); ++base->timer_jiffies; list_replace_init(base->tv1.vec + index, &work_list); while (!list_empty(head)) { void (*fn)(unsigned long); unsigned long data; timer = list_first_entry(head, struct timer_list,entry); fn = timer->function; // 定時任務函數 data = timer->data; timer_stats_account_timer(timer); set_running_timer(base, timer); detach_timer(timer, 1); spin_unlock_irq(&base->lock); { int preempt_count = preempt_count(); #ifdef CONFIG_LOCKDEP /* * It is permissible to free the timer from * inside the function that is called from * it, this we need to take into account for * lockdep too. To avoid bogus "held lock * freed" warnings as well as problems when * looking into timer->lockdep_map, make a * copy and use that here. */ struct lockdep_map lockdep_map = timer->lockdep_map; #endif /* * Couple the lock chain with the lock chain at * del_timer_sync() by acquiring the lock_map * around the fn() call here and in * del_timer_sync(). */ lock_map_acquire(&lockdep_map); trace_timer_expire_entry(timer); fn(data); // 執行定時任務函數 trace_timer_expire_exit(timer); lock_map_release(&lockdep_map); if (preempt_count != preempt_count()) { printk(KERN_ERR "huh, entered %p " "with preempt_count %08x, exited" " with %08x?\n", fn, preempt_count, preempt_count()); BUG(); } } spin_lock_irq(&base->lock); } } set_running_timer(base, NULL); spin_unlock_irq(&base->lock); }
這段代碼的邏輯比較複雜,我也還不能徹底理解,不過從上面來看,就是把已經超時的鏈表取出到work_list,而後依次執行work_list上的定時任務。
在代碼的前面部分,有一段是從新調整定時任務鏈表的操做:
int index = base->timer_jiffies & TVR_MASK; /* * Cascade timers: */ if (!index && (!cascade(base, &base->tv2, INDEX(0))) && (!cascade(base, &base->tv3, INDEX(1))) && !cascade(base, &base->tv4, INDEX(2))) cascade(base, &base->tv5, INDEX(3)); ++base->timer_jiffies;
這裏要先看一下INDEX宏和cascade()函數:
static int cascade(struct tvec_base *base, struct tvec *tv, int index) { /* cascade all the timers from tv up one level */ struct timer_list *timer, *tmp; struct list_head tv_list; list_replace_init(tv->vec + index, &tv_list); /* * We are removing _all_ timers from the list, so we * don't have to detach them individually. */ list_for_each_entry_safe(timer, tmp, &tv_list, entry) { BUG_ON(tbase_get_base(timer->base) != base); internal_add_timer(base, timer); } return index; } #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
能夠看出INDEX宏是根據定時器的過時時間來獲得其所在數組的索引,而cascade()函數就是將此索引對應的鏈表取出,而後將此鏈表上的每個定時任務重新加入到定時器中。