#define PAGE_SHIFT 12
THREAD_ORDER定義在linux/arch/x86/include/asm/下的page_32_types.h和page_64_types.h中,如下爲32位系統下的定義。
#ifdef CONFIG_4KSTACKS
#define THREAD_ORDER 0
#else
#define THREAD_ORDER 1
#endif
在64位系統中,THREAD_ORDER定義爲1。
/*
* PMD_SHIFT determines the size of the area a middle-level
* page table can map
*/
#define PMD_SHIFT 21
在64位系統中,PMD_SHIFT定義爲21。
如此,在32位系統上,設置了4k棧的系統上,MIN_KERNEL_ALIGN_LG2的值爲12,不然爲13。
pad3: .word 0
cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
#added with boot protocol
#version 2.06
hardware_subarch: .long 0 # subarchitecture, added with 2.07
# default to 0 for normal x86 PC
hardware_subarch_data: .quad 0
payload_offset: .long ZO_input_data
payload_length: .long ZO_z_input_len
setup_data: .quad 0 # 64-bit physical pointer to
# single linked list of
# struct setup_data
pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset)
#define VO_INIT_SIZE (VO__end - VO__text)
#if ZO_INIT_SIZE > VO_INIT_SIZE
#define INIT_SIZE ZO_INIT_SIZE
#else
#define INIT_SIZE VO_INIT_SIZE
#endif
init_size: .long INIT_SIZE # kernel initialization size
下面來看看start_of_setup。
.section ".entrytext", "ax"
start_of_setup:
#ifdef SAFE_RESET_DISK_CONTROLLER
# Reset the disk controller.
movw $0x0000, %ax # Reset disk controller
movb $0x80, %dl # All disks
int $0x13
#endif
若是配置了須要安全重置磁盤控制器,那麼首先作的事就是重置全部磁盤的控制器。
start_of_setup在最開始部分會將擴展段設置與數據段相同。
# Force %es = %ds
movw %ds, %ax
movw %ax, %es
cld
# Apparently some ancient versions of LILO invoked the kernel with %ss != %ds,
# which happened to work by accident for the old code. Recalculate the stack
# pointer if %ss is invalid. Otherwise leave it alone, LOADLIN sets up the
# stack behind its own code, so we can't blindly put it directly past the heap.
movw %ss, %dx
cmpw %ax, %dx # %ds == %ss?
movw %sp, %dx
je 2f # -> assume %sp is reasonably set
# Invalid %ss, make up a new stack
movw $_end, %dx
testb $CAN_USE_HEAP, loadflags
jz 1f
movw heap_end_ptr, %dx
1: addw $STACK_SIZE, %dx
jnc 2f
xorw %dx, %dx # Prevent wraparound
2: # Now %dx should point to the end of our stack space
andw $~3, %dx # dword align (might as well...)
jnz 3f
movw $0xfffc, %dx # Make sure we're not zero
3: movw %ax, %ss
movzwl %dx, %esp # Clear upper half of %esp
sti # Now we should have a working stack
以上部分代碼是用來初始化堆棧的,有了堆棧以後就能運行C代碼了。
# We will have entered with %cs = %ds+0x20, normalize %cs so
# it is on par with the other segments.
pushw %ds
pushw $6f
lretw
6:
# Check signature at end of setup
cmpl $0x5a5aaa55, setup_sig
jne setup_bad
以上代碼經過push、ret設置了代碼段寄存器,接下來的cmp來檢查setup末尾的簽名,若是不爲0x5a5aaa55那麼說明setup是壞的。
接下來會清空bss段,bss段是未初始化的數據段。
# Zero the bss
movw $__bss_start, %di
movw $_end+3, %cx
xorl %eax, %eax
subw %di, %cx
shrw $2, %cx
rep; stosl
每次清空四個字節,因此cx右移了兩位,而cx加3的目的是爲了向上取整。
# Jump to C code (should not return)
calll main
在這裏跳轉到了C代碼中的main函數,main是不返回的。
# Setup corrupt somehow...
setup_bad:
movl $setup_corrupt, %eax
calll puts
# Fall through...
.globl die
.type die, @function
die:
hlt
jmp die
.size die, .-die
.section ".initdata", "a"
setup_corrupt:
.byte 7
.string "No setup signature found...\n"
setup的最後一部分代碼是出錯時處理相關的。
3、
linux-2.6.34.13/arch/
x86/boot/
main.c
在main.c中完成了要在實模式中所作的工做,最後會進入保護模式。
void main(void)
{
/* First, copy the boot header into the "zeropage" */
copy_boot_params();
/* End of heap check */
init_heap();
/* Make sure we have all the proper CPU support */
if (validate_cpu()) {
puts("Unable to boot - please use a kernel appropriate "
"for your CPU.\n");
die();
}
/* Tell the BIOS what CPU mode we intend to run in. */
set_bios_mode();
/* Detect memory layout */
detect_memory();
/* Set keyboard repeat rate (why?) */
keyboard_set_repeat();
/* Query MCA information */
query_mca();
/* Query Intel SpeedStep (IST) information */
query_ist();
/* Query APM information */
#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
query_apm_bios();
#endif
/* Query EDD information */
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
query_edd();
#endif
/* Set the video mode */
set_video();
/* Parse command line for 'quiet' and pass it to decompressor. */
if (cmdline_find_option_bool("quiet"))
boot_params.hdr.loadflags |= QUIET_FLAG;
/* Do the last things and invoke protected mode */
go_to_protected_mode();
}
首先,在main中要作的事情是初始化boot_params,這是在copy_boot_params()中完成的,代碼以下。
static void copy_boot_params(void)
{
struct old_cmdline {
u16 cl_magic;
u16 cl_offset;
};
const struct old_cmdline * const oldcmd =
(const struct old_cmdline *)OLD_CL_ADDRESS;
BUILD_BUG_ON(sizeof boot_params != 4096);
memcpy(&boot_params.hdr, &hdr, sizeof hdr);
這裏將hdr拷貝到boot_params.hdr中,hdr是在header.S中定義的數據。注意,這個變量全局變量,且未被初始化,因此位於bss段,它就位於_bss_start的開始位置。而在以後當啓動保護模式的分頁功能後,第一個頁面就是從它開始的(注意,不是從0x0開始的喔)。因此內核註釋它爲「zeropage」,即所謂的0號頁面,足見這個boot_params的重要性。
if (!boot_params.hdr.cmd_line_ptr &&
oldcmd->cl_magic == OLD_CL_MAGIC) {
/* Old-style command line protocol. */
u16 cmdline_seg;
若是老的bootloader沒有指定命令行參數,那麼就將hdr的命令行參數指針指向老的命令行。
/* Figure out if the command line falls in the region
of memory that an old kernel would have copied up
to 0x90000... */
if (oldcmd->cl_offset < boot_params.hdr.setup_move_size)
cmdline_seg = ds();
else
cmdline_seg = 0x9000;
boot_params.hdr.cmd_line_ptr =
(cmdline_seg << 4) + oldcmd->cl_offset;
}
}
boot_params是未初始化的全局變量,編譯器會將它放在bss段,而在進入main以前已經將bss段清零,因此在執行copy_boot_params()以前它是空的。上述代碼初始化了boot_params。
接下來所作的工做是初始化堆,調用了init_heap(),代碼以下。 linux
static void init_heap(void) ios
{
char *stack_end;
/* 若是bootloader告訴kernel須要使用heap, bootloader須要把hdr.loadflags的CAN_US_HEAP位置1. */
if (boot_params.hdr.loadflags & CAN_USE_HEAP) {
/* esp是當前堆棧的底,堆棧的大小是STACK_SIZE,由此計算出堆棧的頂stack_end是esp-STACK_SIZE */
asm("leal %P1(%%esp),%0"
: "=r" (stack_end) : "i" (-STACK_SIZE));
/* 堆的底是由boot_params.hdr.heap_end_ptr指定。這個值應該是由bootloader填入的,堆的大小是0x200。那麼heap_end就是heap_end_ptr+0x200 */
heap_end = (char *)
((size_t)boot_params.hdr.heap_end_ptr + 0x200);
/* 若是堆棧和堆有重疊,那麼就減少堆的大小 */
if (heap_end > stack_end)
heap_end = stack_end;
} else {
/* Boot protocol 2.00 only, no heap available */
puts("WARNING: Ancient bootloader, some functionality "
"may be limited!\n");
}
}
初始化了堆以後接着要檢查CPU是否支持,若是內核要求的CPU等級高於當前CPU那麼就終止。其中,對CPU進行檢查的代碼在cpucheck.c中。
int validate_cpu(void)
{
u32 *err_flags;
int cpu_level, req_level;
const unsigned char *msg_strs;
check_cpu(&cpu_level, &req_level, &err_flags);
if (cpu_level < req_level) {
printf("This kernel requires an %s CPU, ",
cpu_name(req_level));
printf("but only detected an %s CPU.\n",
cpu_name(cpu_level));
return -1;
}
if (err_flags) {
int i, j;
puts("This kernel requires the following features "
"not present on the CPU:\n");
msg_strs = (const unsigned char *)x86_cap_strs;
for (i = 0; i < NCAPINTS; i++) {
u32 e = err_flags[i];
for (j = 0; j < 32; j++) {
if (msg_strs[0] < i ||
(msg_strs[0] == i && msg_strs[1] < j)) {
/* Skip to the next string */
msg_strs += 2;
while (*msg_strs++)
;
}
if (e & 1) {
if (msg_strs[0] == i &&
msg_strs[1] == j &&
msg_strs[2])
printf("%s ", msg_strs+2);
else
printf("%d:%d ", i, j);
}
e >>= 1;
}
}
putchar('\n');
return -1;
} else {
return 0;
}
}
緊接着設置bios模式,告訴CPU咱們想要進入什麼模式,經過代碼能夠看出,在32位系統中是不作改變的,而在64位系統中要經過中斷改變模式。
static void set_bios_mode(void)
{
#ifdef CONFIG_X86_64
struct biosregs ireg;
initregs(&ireg);
ireg.ax = 0xec00;
ireg.bx = 2;
intcall(0x15, &ireg, NULL);
#endif
}
而後要檢查內存,detect_memory()函數代碼很是簡單,linux內核會分別嘗試調用detect_memory_e820()、detcct_memory_e801()、detect_memory_88()得到系統物理內存佈局
int detect_memory(void)
{
int err = -1;
if (detect_memory_e820() > 0)
err = 0;
if (!detect_memory_e801())
err = 0;
if (!detect_memory_88())
err = 0;
return err;
}
detect_memory_e820()、detcct_memory_e801()、detect_memory_88()這3個函數內部其實都會之內聯彙編的形式調用bios中斷以取得內存信息,該中斷調用形式爲int 0x15,同時調用前分別把AX寄存器設置爲0xe820h、0xe801h、0x88h,這裏以e820爲例說明。
因爲歷史緣由,一些i/o設備也會佔據一部份內存物理地址空間,所以系統可使用的物理內存空間是不連續的,系統內存被分紅了不少段,每一個段的屬性也是不同的。int 0x15 查詢物理內存時每次返回一個內存段的信息,所以要想返回系統中全部的物理內存,咱們必須以迭代的方式去查詢。detect_memory_e820()函數把int 0x15放到一個do-while循環裏,每次獲得的一個內存段放到struct e820entry裏,而struct e820entry的結構正是e820返回結果的結構!而像其它啓動時得到的結果同樣,最終都會被放到boot_params裏,e820被放到了 boot_params.e820_map。
static int detect_memory_e820(void)
{
int count = 0;
struct biosregs ireg, oreg;
struct e820entry *desc = boot_params.e820_map;
static struct e820entry buf; /* static so it is zeroed */
initregs(&ireg);
ireg.ax = 0xe820;
ireg.cx = sizeof buf;
ireg.edx = SMAP;
ireg.di = (size_t)&buf;
/*
* Note: at least one BIOS is known which assumes that the
* buffer pointed to by one e820 call is the same one as
* the previous call, and only changes modified fields. Therefore,
* we use a temporary buffer and copy the results entry by entry.
*
* This routine deliberately does not try to account for
* ACPI 3+ extended attributes. This is because there are
* BIOSes in the field which report zero for the valid bit for
* all ranges, and we don't currently make any use of the
* other attribute bits. Revisit this if we see the extended
* attribute bits deployed in a meaningful way in the future.
*/
do {
intcall(0x15, &ireg, &oreg);
ireg.ebx = oreg.ebx; /* for next iteration... */
/* BIOSes which terminate the chain with CF = 1 as opposed
to %ebx = 0 don't always report the SMAP signature on
the final, failing, probe. */
if (oreg.eflags & X86_EFLAGS_CF)
break;
/* Some BIOSes stop returning SMAP in the middle of
the search loop. We don't know exactly how the BIOS
screwed up the map at that point, we might have a
partial map, the full map, or complete garbage, so
just return failure. */
if (oreg.eax != SMAP) {
count = 0;
break;
}
*desc++ = buf;
count++;
} while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));
return boot_params.e820_entries = count;
}
detcct_memory_e801()也是用於獲取內存的佈局。
static int detect_memory_e801(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ax = 0xe801;
intcall(0x15, &ireg, &oreg);
if (oreg.eflags & X86_EFLAGS_CF)
return -1;
/* Do we really need to do this? */
if (oreg.cx || oreg.dx) {
oreg.ax = oreg.cx;
oreg.bx = oreg.dx;
}
if (oreg.ax > 15*1024) {
return -1; /* Bogus! */
} else if (oreg.ax == 15*1024) {
boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax;
} else {
/*
* This ignores memory above 16MB if we have a memory
* hole there. If someone actually finds a machine
* with a memory hole at 16MB and no support for
* 0E820h they should probably generate a fake e820
* map.
*/
boot_params.alt_mem_k = oreg.ax;
}
return 0;
}
detcct_memory_88()一樣是用於獲取內存的佈局。
static int detect_memory_88(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ah = 0x88;
intcall(0x15, &ireg, &oreg);
boot_params.screen_info.ext_mem_k = oreg.ax;
return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
}
接下來要設置鍵盤的重複率,可是貌似是無關緊要的。在對keyboard_set_repeat的說明中,有這麼一段註釋「Set the keyboard repeat rate to maximum. Unclear why this is done here; this might be possible to kill off as stale code.」因此對這個操做的解釋是有疑問的。
在緊接着的query_mca()中,這其實是經過int 15h,ah=0c0h中斷來獲取MCA(Micro Channel Architecture)系統描述表,詳情可有查閱該中斷的說明。
int query_mca(void)
{
struct biosregs ireg, oreg;
u16 len;
initregs(&ireg);
ireg.ah = 0xc0;
intcall(0x15, &ireg, &oreg);
if (oreg.eflags & X86_EFLAGS_CF)
return -1; /* No MCA present */
set_fs(oreg.es);
len = rdfs16(oreg.bx);
if (len > sizeof(boot_params.sys_desc_table))
len = sizeof(boot_params.sys_desc_table);
copy_from_fs(&boot_params.sys_desc_table, oreg.bx, len);
return 0;
}
再接下來的query_ist()中經過int 15h,ax=0e980h中斷來獲取Intel Speed Step信息。
static void query_ist(void)
{
struct biosregs ireg, oreg;
/* Some older BIOSes apparently crash on this call, so filter
it from machines too old to have SpeedStep at all. */
if (cpu.level < 6)
return;
initregs(&ireg);
ireg.ax = 0xe980; /* IST Support */
ireg.edx = 0x47534943; /* Request value */
intcall(0x15, &ireg, &oreg);
boot_params.ist_info.signature = oreg.eax;
boot_params.ist_info.command = oreg.ebx;
boot_params.ist_info.event = oreg.ecx;
boot_params.ist_info.perf_level = oreg.edx;
}
根據配置,還須要獲取APM信息或EDD信息,獲取方法與IST相似。
最後在進入保護模式以前設置視頻模式,set_video()在video.c中定義。
void set_video(void)
{
u16 mode = boot_params.hdr.vid_mode;
RESET_HEAP();
store_mode_params();
save_screen();
probe_cards(0);
for (;;) {
if (mode == ASK_VGA)
mode = mode_menu();
if (!set_mode(mode))
break;
printf("Undefined video mode number: %x\n", mode);
mode = ASK_VGA;
}
boot_params.hdr.vid_mode = mode;
vesa_store_edid();
store_mode_params();
if (do_restore)
restore_screen();
}
根據hdr獲得視頻模式,存儲到內部變量mode中。在header.S中設置的vid_mode值是SVGA_MODE。隨後,調用store_mode_params()來設置boot_params的screen_info字段。
/*
* Store the video mode parameters for later usage by the kernel.
* This is done by asking the BIOS except for the rows/columns
* parameters in the default 80x25 mode -- these are set directly,
* because some very obscure BIOSes supply insane values.
*/
static void store_mode_params(void)
{
u16 font_size;
int x, y;
/* For graphics mode, it is up to the mode-setting driver
(currently only video-vesa.c) to store the parameters */
if (graphic_mode)
return;
store_cursor_position();
store_video_mode();
if (boot_params.screen_info.orig_video_mode == 0x07) {
/* MDA, HGC, or VGA in monochrome mode */
video_segment = 0xb000;
} else {
/* CGA, EGA, VGA and so forth */
video_segment = 0xb800;
}
set_fs(0);
font_size = rdfs16(0x485); /* Font size, BIOS area */
boot_params.screen_info.orig_video_points = font_size;
x = rdfs16(0x44a);
y = (adapter == ADAPTER_CGA) ? 25 : rdfs8(0x484)+1;
if (force_x)
x = force_x;
if (force_y)
y = force_y;
boot_params.screen_info.orig_video_cols = x;
boot_params.screen_info.orig_video_lines = y;
}
在store_mode_params()函數中調用了store_cursor_position和store_video_mode來得到光標位置和視頻模式。
static void store_cursor_position(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ah = 0x03;
intcall(0x10, &ireg, &oreg);
boot_params.screen_info.orig_x = oreg.dl;
boot_params.screen_info.orig_y = oreg.dh;
if (oreg.ch & 0x20)
boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR;
if ((oreg.ch & 0x1f) > (oreg.cl & 0x1f))
boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR;
}
static void store_video_mode(void)
{
struct biosregs ireg, oreg;
/* N.B.: the saving of the video page here is a bit silly,
since we pretty much assume page 0 everywhere. */
initregs(&ireg);
ireg.ah = 0x0f;
intcall(0x10, &ireg, &oreg);
/* Not all BIOSes are clean with respect to the top bit */
boot_params.screen_info.orig_video_mode = oreg.al & 0x7f;
boot_params.screen_info.orig_video_page = oreg.bh;
}
以上是store_cursor_position和store_video_mode,它們都是經過中斷來獲取信息的。
接下來調用save_screen來保存屏幕內容。
/* Save screen content to the heap */
static struct saved_screen {
int x, y;
int curx, cury;
u16 *data;
} saved;
static void save_screen(void)
{
/* Should be called after store_mode_params() */
saved.x = boot_params.screen_info.orig_video_cols;
saved.y = boot_params.screen_info.orig_video_lines;
saved.curx = boot_params.screen_info.orig_x;
saved.cury = boot_params.screen_info.orig_y;
if (!heap_free(saved.x*saved.y*sizeof(u16)+512))
return; /* Not enough heap to save the screen */
saved.data = GET_HEAP(u16, saved.x*saved.y);
set_fs(video_segment);
copy_from_fs(saved.data, 0, saved.x*saved.y*sizeof(u16));
}
以上代碼的具體工做是從video_segment讀取數據而後保存到堆。
而後掃描整個顯卡列表,video_cards和video_cards_end都是bootloader傳遞過來的顯卡列表。
/* Probe the video drivers and have them generate their mode lists. */
void probe_cards(int unsafe)
{
struct card_info *card;
static u8 probed[2];
if (probed[unsafe])
return;
probed[unsafe] = 1;
for (card = video_cards; card < video_cards_end; card++) {
if (card->unsafe == unsafe) {
if (card->probe)
card->nmodes = card->probe();
else
card->nmodes = 0;
}
}
}
若是bootloader設置hdr的vid_mode爲ASK_VGA,就進行一些交互式的工做,在header.S中定義的vid_mode是SVGA_MODE,也就是
ASK_VGA。
而後調用vesa_store_edid()函數,它是對EDID的設置。EDID是一種VESA標準數據格式,其中包含有關監視器及其性能的參數,包括供應商信息、最大圖像大小、顏色設置、廠商預設置、頻率範圍的限制以及顯示器名和序列號的字符串。
接下來會再次執行store_mode_params()來保存數據,最後調用restore_screen()恢復屏幕內容。
static void restore_screen(void)
{
/* Should be called after store_mode_params() */
int xs = boot_params.screen_info.orig_video_cols;
int ys = boot_params.screen_info.orig_video_lines;
int y;
addr_t dst = 0;
u16 *src = saved.data;
struct biosregs ireg;
if (graphic_mode)
return; /* Can't restore onto a graphic mode */
if (!src)
return; /* No saved screen contents */
/* Restore screen contents */
set_fs(video_segment);
for (y = 0; y < ys; y++) {
int npad;
if (y < saved.y) {
int copy = (xs < saved.x) ? xs : saved.x;
copy_to_fs(dst, src, copy*sizeof(u16));
dst += copy*sizeof(u16);
src += saved.x;
npad = (xs < saved.x) ? 0 : xs-saved.x;
} else {
npad = xs;
}
/* Writes "npad" blank characters to
video_segment:dst and advances dst */
asm volatile("pushw %%es ; "
"movw %2,%%es ; "
"shrw %%cx ; "
"jnc 1f ; "
"stosw \n\t"
"1: rep;stosl ; "
"popw %%es"
: "+D" (dst), "+c" (npad)
: "bdS" (video_segment),
"a" (0x07200720));
}
/* Restore cursor position */
if (saved.curx >= xs)
saved.curx = xs-1;
if (saved.cury >= ys)
saved.cury = ys-1;
initregs(&ireg);
ireg.ah = 0x02; /* Set cursor position */
ireg.dh = saved.cury;
ireg.dl = saved.curx;
intcall(0x10, &ireg, NULL);
store_cursor_position();
}
到這裏video就設置完畢了,進入保護模式前的準備工做就作好了。
4、
linux-2.6.34.13/arch/
x86/boot/
pm.c
進入保護模式的代碼在boot/pm.c中,在main的最後調用了go_to_protected_mode(),這是一個不會返回的函數。
void go_to_protected_mode(void)
{
/* Hook before leaving real mode, also disables interrupts */
realmode_switch_hook();
/* Enable the A20 gate */
if (enable_a20()) {
puts("A20 gate not responding, unable to boot...\n");
die();
}
/* Reset coprocessor (IGNNE#) */
reset_coprocessor();
/* Mask all interrupts in the PIC */
mask_all_interrupts();
/* Actual transition to protected mode... */
setup_idt();
setup_gdt();
protected_mode_jump(boot_params.hdr.code32_start,
(u32)&boot_params + (ds() << 4));
}
在進入保護模式以前要先檢查有沒有hook代碼,有則調用,沒有則關閉中斷、禁用不可屏蔽中斷。
static void realmode_switch_hook(void)
{
if (boot_params.hdr.realmode_swtch) {
asm volatile("lcallw *%0"
: : "m" (boot_params.hdr.realmode_swtch)
: "eax", "ebx", "ecx", "edx");
} else {
asm volatile("cli");
outb(0x80, 0x70); /* Disable NMI */
io_delay();
}
}
而後打開a20地址線,若是打開失敗則直接die掉。那麼什麼是a20 地址線 呢?在8086中是用SEG:OFFSET這樣的模式來分段的,因此能表示的最大內存是FFFF:FFFF,也就是10FFEFh。但是在8086中只有20位的地址總線,因此只能尋址到1MB,若是試圖訪問超過1MB的地址時會怎麼樣呢?實際上系統不會發生異常,而是回捲(wrap)回去,從新從地址零開始尋址。但是到了80286時,真的能夠訪問超過1MB的地址,若是遇到一樣的狀況,系統不會再回卷尋址,這樣就形成了向下不兼容,威客可保證兼容性,IBM使用8042鍵盤控制器來控制第20個(從0開始數)地址位,這就是a20地址線,若是不被打開,第20個地址爲將會老是爲零。
下圖就是關於實模式下A20禁用與使用的區別。
static void enable_a20_bios(void)
{
struct biosregs ireg;
initregs(&ireg);
ireg.ax = 0x2401;
intcall(0x15, &ireg, NULL);
}
static void enable_a20_kbc(void)
{
empty_8042();
outb(0xd1, 0x64); /* Command write */
empty_8042();
outb(0xdf, 0x60); /* A20 on */
empty_8042();
outb(0xff, 0x64); /* Null command, but UHCI wants it */
empty_8042();
}
static void enable_a20_fast(void)
{
u8 port_a;
port_a = inb(0x92); /* Configuration port A */
port_a |= 0x02; /* Enable A20 */
port_a &= ~0x01; /* Do not reset machine */
outb(port_a, 0x92);
}
打開a20地址線不止一種方法,在該版本內核中採用了三種方法來,從而儘量避免打開失敗。
緊接着重置數學協處理器,這裏就是向端口0xf0和0xf1寫一個0。
static void reset_coprocessor(void)
{
outb(0, 0xf0);
io_delay();
outb(0, 0xf1);
io_delay();
}
還要標記PIC上的全部中斷,這裏也是經過向0xa1和0x21端口寫數據完成的。
static void mask_all_interrupts(void)
{
outb(0xff, 0xa1); /* Mask all interrupts on the secondary PIC */
io_delay();
outb(0xfb, 0x21); /* Mask all but cascade on the primary PIC */
io_delay();
}
進入保護模式以前最關鍵的動做時設置gdt和idt。
struct gdt_ptr {
u16 len;
u32 ptr;
} __attribute__((packed));
static void setup_gdt(void)
{
/* There are machines which are known to not boot with the GDT
being 8-byte unaligned. Intel recommends 16 byte alignment. */
static const u64 boot_gdt[] __attribute__((aligned(16))) = {
/* CS: code, read/execute, 4 GB, base 0 */
[GDT_ENTRY_BOOT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff),
這裏的GDT_ENTRY(flags,base,limit)在asm/segment.h中定義,flags是標誌位,base是基址,limit是段界限。
flags的各個位的表明內容以下:
第0-3位爲TYPE(描述符類型),第4位爲S(1表示數據段和代碼段描述符,0表示系統段描述符和門描述符),第五、6位爲DPL(段的特權等級),第7位爲P(1表示段在內存中存在,0表示段在內存中不存在),第8-11位爲段界限的16-19位,第12位爲AVL(保留而且能夠被操做系統使用),第13位爲保留位(老是0),第14位爲D/B,第15位爲G(0表示段界限粒度爲字節,1表示段界限粒度爲4KB)。
從這裏能夠看出,CS段定義的flags爲0xC09B,G位置1表示段界限粒度爲4KB,段界限爲0xFFFFF,總計能夠尋址4GB。
/* DS: data, read/write, 4 GB, base 0 */
[GDT_ENTRY_BOOT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
DS段定義的flags爲0xC093,G位置1表示段界限粒度爲4KB,段界限爲0xFFFFF,總計能夠尋址4GB。
/* TSS: 32-bit tss, 104 bytes, base 4096 */
/* We only have a TSS here to keep Intel VT happy;
we don't actually use it for anything. */
[GDT_ENTRY_BOOT_TSS] = GDT_ENTRY(0x0089, 4096, 103),
這裏雖然定義了TSS段,可是按照註釋TSS段應該沒有被使用。
};
/* Xen HVM incorrectly stores a pointer to the gdt_ptr, instead
of the gdt_ptr contents. Thus, make it static so it will
stay in memory, at least long enough that we switch to the
proper kernel GDT. */
static struct gdt_ptr gdt;
這裏將全局描述符表的長度、地址信息保存到gdt_ptr結構,最後調用lgdt指令設置GDT。
gdt.len = sizeof(boot_gdt)-1;
gdt.ptr = (u32)&boot_gdt + (ds() << 4);
asm volatile("lgdtl %0" : : "m" (gdt));
}
與GDT設置相比,IDT的設置在這個階段就比較簡單了。
static void setup_idt(void)
{
static const struct gdt_ptr null_idt = {0, 0};
asm volatile("lidtl %0" : : "m" (null_idt));
}
實際上,只是調用lidt指令設置一個空表。
設置完gdt、idt後,調用protected_mode_jump()跳轉到code32_start, code32_start 在header.S中定義的值爲0x100000,也能夠由bootloader指定。
5、
linux-2.6.34.13/arch/
x86/boot/
pmjump.S
protected_mode_jump 在pmjump.S中定義,是使用匯編編寫的,它的工做是在進入保護模式並跳轉到code32_start。
最開始是16位彙編代碼。
.text
.code16
/*
* void protected_mode_jump(u32 entrypoint, u32 bootparams);
*/
GLOBAL(protected_mode_jump)
movl %edx, %esi # Pointer to boot_params table
edx的內容爲bootparams,這是由於內核中參數傳遞是fastcall類型的,優先經過寄存器傳參,eax的值爲entrypoint。
xorl %ebx, %ebx
movw %cs, %bx
shll $4, %ebx
addl %ebx, 2f
這幾句代碼的做用計算並設置下文中的32位jmp指令將要跳轉到的地址。
jmp 1f # Short jump to serialize on 386/486
1:
movw $__BOOT_DS, %cx
movw $__BOOT_TSS, %di
movl %cr0, %edx
orb $X86_CR0_PE, %dl # Protected mode
movl %edx, %cr0
上面三條指令設置了cr0寄存器的PE標識,這樣CPU就進入保護模式工做。
接下來是一條32位指令,它的做用是跳轉到in_pm32。
# Transition to 32-bit mode
.byte 0x66, 0xea # ljmpl opcode
2: .long in_pm32 # offset
.word __BOOT_CS # segment
ENDPROC(protected_mode_jump)
in_pm32的做用主要是設置寄存器和跳轉到entrypoint。
.code32
.section ".text32","ax"
GLOBAL(in_pm32)
# Set up data segments for flat 32-bit mode
movl %ecx, %ds
movl %ecx, %es
movl %ecx, %fs
movl %ecx, %gs
movl %ecx, %ss
# The 32-bit code sets up its own stack, but this way we do have
# a valid stack if some debugging hack wants to use it.
addl %ebx, %esp
# Set up TR to make Intel VT happy
ltr %di
# Clear registers to allow for future extensions to the
# 32-bit boot protocol
xorl %ecx, %ecx
xorl %edx, %edx
xorl %ebx, %ebx
xorl %ebp, %ebp
xorl %edi, %edi
# Set up LDTR to make Intel VT happy
lldt %cx
jmpl *%eax # Jump to the 32-bit entrypoint
ENDPROC(in_pm32)
隨着最後的一個jmp指令的執行,咱們終於到了保護模式。