标签:
我们跳过boot,setup,直接来到head代码,内核映像的起点是stext,也是_stext,引导和解压缩以后的整个映像放在内存从0x100000即1MB开始的区间。CPU执行内核映像的入口startup_32就在内核映像开头的地方,因此其物理地址也是0x100000。
然而,在正常运行时整个内核映像都应该在系统空间中,系统空间的虚拟地址与物理地址间有个固定的位移,这就是0xC0000000,即3GB。所以,在连接内核映像时已经在所有的符号地址加了一个偏移量0xC0000000,这样startup_32的虚拟地址就成了0xC0100000。
进入startup_32时都运行于保护模式下的段式寻址方式。段描述表中与_KERNEL_CS和_KERNEL_DS想对应的描述项所提供的基地址都是0。其中代码段寄存器CS已在进入startup_32之前设置成_KERNEL_CS,数据段寄存器则尚未设置成_KERNEL_DS。
虽然代码段寄存器已经设置成_KERNEL_CS,从而startup_32的地址为0xC0100000。但是在转入这个入口时使用的指令是"ljmp 0x100000"而不是”ljmp startup_32“,所以装入CPU中的寄存器IP的地址是物理地址0x100000而不是虚拟地址0xC0000000。这样,CPU在进入startup_32以后就会继续以物理地址取指令。只要不在代码段中引用某个地址,例如向某个地址绝对转移,或者调用某个子程序,就可以一直这样运行下去,而与CS的内容无关。此外,CPU的中断已在进入startup_32之前关闭了。
从startup_32开始的汇编代码在arch/i386/kernel/head.S中,代码如下:
/*
* linux/arch/i386/head.S -- the 32-bit startup code.
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* Enhanced CPU detection and feature setting code by Mike Jagdis
* and Martin Mares, November 1997.
*/
.text
#include <linux/config.h>
#include <linux/threads.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/desc.h>
#define OLD_CL_MAGIC_ADDR 0x90020
#define OLD_CL_MAGIC 0xA33F
#define OLD_CL_BASE_ADDR 0x90000
#define OLD_CL_OFFSET 0x90022
#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
/*
* References to members of the boot_cpu_data structure.
*/
#define CPU_PARAMS SYMBOL_NAME(boot_cpu_data)
#define X86 CPU_PARAMS+0
#define X86_VENDOR CPU_PARAMS+1
#define X86_MODEL CPU_PARAMS+2
#define X86_MASK CPU_PARAMS+3
#define X86_HARD_MATH CPU_PARAMS+6
#define X86_CPUID CPU_PARAMS+8
#define X86_CAPABILITY CPU_PARAMS+12
#define X86_VENDOR_ID CPU_PARAMS+16
/*
* swapper_pg_dir is the main page directory, address 0x00101000
*
* On entry, %esi points to the real-mode code as a 32-bit pointer.
*/
ENTRY(stext)
ENTRY(_stext)
startup_32:
/*
* Set segments to known values
*/
cld
movl $(__KERNEL_DS),%eax
movl %eax,%ds
movl %eax,%es
movl %eax,%fs
movl %eax,%gs //将ds,es,fs,gs都设置成_KERNEL_DS
......
/*
* Initialize page tables
*/
movl $pg0-__PAGE_OFFSET,%edi //pg0是虚拟地址,所以要减去3GB的地址,才变成物理地址
movl $007,%eax //"007"代表PRESENT+RW+USER
2: stosl //向目标地址复制数据
add $0x1000,%eax //每次递增0x1000
cmp $empty_zero_page-__PAGE_OFFSET,%edi //直到empty_zero_pag就不在复制了
jne 2b ////从pg0开始直到empty_zero_page之间的8K字节设置成一个临时的页面映射表,依次是0x0,0x1000,0x2000,也就是物理内存中的页面0、1、2。映射表的大小是两个页面,即2K个表项,所以代表着一块8MB的存储空间,这就是Linux内核对内存大小的最低限度要求
/*
* Enable paging
*/
3:
movl $swapper_pg_dir-__PAGE_OFFSET,%eax //页目录表的位置
movl %eax,%cr3 //设置页目录表的地址
movl %cr0,%eax
orl $0x80000000,%eax
movl %eax,%cr0 //开启分页机制
jmp 1f //此时使用的是物理地址,这就是页目录表中低768个表项的前两项设置成0x00102007,0x00103007,起过度作用
1:
movl $1f,%eax
jmp *%eax //此时再跳转,使用的就是虚拟地址了,也就是1标识符的实际物理地址+3GB,形成虚拟地址,虚拟地址再通过分页机制,也就是页目录表中低256个表项中前两项设置成0x00102007,0x00103007,得到1标识符的实际物理地址,实际上就是1标识符的实际物理地址+3GB再减去3GB
1:
/* Set up the stack pointer */
lss stack_start,%esp//设置了堆栈的位置
......
/*
* Clear BSS first so that there are no surprises...
* No need to cld as DF is already clear from cld above...
*/
xorl %eax,%eax//暂时忽略
movl $ SYMBOL_NAME(__bss_start),%edi
movl $ SYMBOL_NAME(_end),%ecx
subl %edi,%ecx
rep
stosb
/*
* start system 32-bit setup. We need to re-do some of the things done
* in 16-bit mode for the "real" operations.
*/
call setup_idt//初始化中断向量表
/*
* Initialize eflags. Some BIOS‘s leave bits like NT set. This would
* confuse the debugger if this code is traced.
* XXX - best to initialize before switching to protected mode.
*/
pushl $0
popfl
/*
* Copy bootup parameters out of the way. First 2kB of
* _empty_zero_page is for boot parameters, second 2kB
* is for the command line.
*
* Note: %esi still has the pointer to the real-mode data.
*/
movl $ SYMBOL_NAME(empty_zero_page),%edi//将setup传递过来的引导参数和命令行复制到empty_zero_page中
movl $512,%ecx
cld
rep
movsl
xorl %eax,%eax
movl $512,%ecx
rep
stosl
movl SYMBOL_NAME(empty_zero_page)+NEW_CL_POINTER,%esi
andl %esi,%esi
jnz 2f # New command line protocol
cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
jne 1f
movzwl OLD_CL_OFFSET,%esi
addl $(OLD_CL_BASE_ADDR),%esi
2:
movl $ SYMBOL_NAME(empty_zero_page)+2048,%edi
movl $512,%ecx
rep
movsl
......
movl $-1,X86_CPUID # -1 for no CPUID initially
/* check if it is 486 or 386. */
/*
* XXX - this does a lot of unnecessary setup. Alignment checks don‘t
* apply at our cpl of 0 and the stack ought to be aligned already, and
* we don‘t need to preserve eflags.
*/
movl $3,X86 # at least 386 //暂不关心
pushfl # push EFLAGS
popl %eax # get EFLAGS
movl %eax,%ecx # save original EFLAGS
xorl $0x40000,%eax # flip AC bit in EFLAGS
pushl %eax # copy to EFLAGS
popfl # set EFLAGS
pushfl # get new EFLAGS
popl %eax # put it in eax
xorl %ecx,%eax # change in flags
andl $0x40000,%eax # check if AC bit changed
je is386
movl $4,X86 # at least 486
movl %ecx,%eax
xorl $0x200000,%eax # check ID flag
pushl %eax
popfl # if we are on a straight 486DX, SX, or
pushfl # 487SX we can‘t change it
popl %eax
xorl %ecx,%eax
pushl %ecx # restore original EFLAGS
popfl
andl $0x200000,%eax
je is486
/* get vendor info */
xorl %eax,%eax # call CPUID with 0 -> return vendor ID
cpuid
movl %eax,X86_CPUID # save CPUID level
movl %ebx,X86_VENDOR_ID # lo 4 chars
movl %edx,X86_VENDOR_ID+4 # next 4 chars
movl %ecx,X86_VENDOR_ID+8 # last 4 chars
orl %eax,%eax # do we have processor info as well?
je is486
movl $1,%eax # Use the CPUID instruction to get CPU type
cpuid
movb %al,%cl # save reg for future use
andb $0x0f,%ah # mask processor family
movb %ah,X86
andb $0xf0,%al # mask model
shrb $4,%al
movb %al,X86_MODEL
andb $0x0f,%cl # mask mask revision
movb %cl,X86_MASK
movl %edx,X86_CAPABILITY
is486:
movl %cr0,%eax # 486 or better
andl $0x80000011,%eax # Save PG,PE,ET
orl $0x50022,%eax # set AM, WP, NE and MP
jmp 2f
is386: pushl %ecx # restore original EFLAGS
popfl
movl %cr0,%eax # 386
andl $0x80000011,%eax # Save PG,PE,ET
orl $2,%eax # set MP
2: movl %eax,%cr0
call check_x87
......
lgdt gdt_descr //设置CPU的"全局段描述表寄存器"GDTR
lidt idt_descr//设置CPU的"中断描述表寄存器"IDTR
ljmp $(__KERNEL_CS),$1f //重新装载cs,ds,es,fs,gs
1: movl $(__KERNEL_DS),%eax # reload all the segment registers
movl %eax,%ds # after changing gdt.
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
......
lss stack_start,%esp # Load processor stack
......
xorl %eax,%eax
lldt %ax //LDTR选择子清零
cld # gcc2 wants the direction flag cleared at all times
......
call SYMBOL_NAME(start_kernel) //开始执行start_kernel
L6:
jmp L6 # main should never return here, but
# just in case, we know what happens.
#ifdef CONFIG_SMP
ready: .byte 0
#endif
/*
* We depend on ET to be correct. This checks for 287/387.
*/
check_x87:
movb $0,X86_HARD_MATH
clts
fninit
fstsw %ax
cmpb $0,%al
je 1f
movl %cr0,%eax /* no coprocessor: have to set bits */
xorl $4,%eax /* set EM */
movl %eax,%cr0
ret
ALIGN
1: movb $1,X86_HARD_MATH
.byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */
ret
/*
* setup_idt
*
* sets up a idt with 256 entries pointing to
* ignore_int, interrupt gates. It doesn‘t actually load
* idt - that can be done only after paging has been enabled
* and the kernel moved to PAGE_OFFSET. Interrupts
* are enabled elsewhere, when we can be relatively
* sure everything is ok.
*/
setup_idt://每个表项的大小是8个字节,共有256个表项,都指向了同一个中断响应程序ignore_int
lea ignore_int,%edx
movl $(__KERNEL_CS << 16),%eax
movw %dx,%ax /* selector = 0x0010 = cs */
movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
lea SYMBOL_NAME(idt_table),%edi
mov $256,%ecx
rp_sidt:
movl %eax,(%edi)
movl %edx,4(%edi)
addl $8,%edi
dec %ecx
jne rp_sidt
ret
ENTRY(stack_start) //task_struct和堆栈共同占用两个页面,堆栈在高地址端
.long SYMBOL_NAME(init_task_union)+8192
.long __KERNEL_DS
/* This is the default interrupt "handler" :-) */
int_msg:
.asciz "Unknown interrupt\n"
ALIGN
ignore_int://中断处理程序
cld
pushl %eax
pushl %ecx
pushl %edx
pushl %es
pushl %ds
movl $(__KERNEL_DS),%eax
movl %eax,%ds
movl %eax,%es
pushl $int_msg
call SYMBOL_NAME(printk)
popl %eax
popl %ds
popl %es
popl %edx
popl %ecx
popl %eax
iret
/*
* The interrupt descriptor table has room for 256 idt‘s,
* the global descriptor table is dependent on the number
* of tasks we can have..
*/
#define IDT_ENTRIES 256
#define GDT_ENTRIES (__TSS(NR_CPUS))
.globl SYMBOL_NAME(idt)
.globl SYMBOL_NAME(gdt)
ALIGN
.word 0
idt_descr:
.word IDT_ENTRIES*8-1 //中断描述符表的长度
SYMBOL_NAME(idt):
.long SYMBOL_NAME(idt_table) //中断描述符表的基地址.idt_table是个全局变量
.word 0
gdt_descr:
.word GDT_ENTRIES*8-1//全局段描述表的长度
SYMBOL_NAME(gdt):
.long SYMBOL_NAME(gdt_table) //全局段描述表的基地址,gdt_table如下
/*
* This is initialized to create an identity-mapping at 0-8M (for bootup
* purposes) and another mapping of the 0-8M area at virtual address
* PAGE_OFFSET.
*/
.org 0x1000
ENTRY(swapper_pg_dir)//参考下面的解释
.long 0x00102007 //指向了pg0
.long 0x00103007 //指向了pg1
.fill BOOT_USER_PGD_PTRS-2,4,0 //768
/* default: 766 entries */
.long 0x00102007 //指向了pg0
.long 0x00103007 //指向了pg1
/* default: 254 entries */
.fill BOOT_KERNEL_PGD_PTRS-2,4,0 //256
/*
* The page tables are initialized to only 8MB here - the final page
* tables are set up later depending on memory size.
*/
.org 0x2000 //实际的物理地址是0x00102007
ENTRY(pg0)
.org 0x3000 //实际的物理地址是0x00103007
ENTRY(pg1)
/*
* empty_zero_page must immediately follow the page tables ! (The
* initialization loop counts until empty_zero_page)
*/
.org 0x4000
ENTRY(empty_zero_page)
.org 0x5000
ENTRY(empty_bad_page)
.org 0x6000
ENTRY(empty_bad_pte_table)
#if CONFIG_X86_PAE
.org 0x7000
ENTRY(empty_bad_pmd_table)
.org 0x8000
#else
.org 0x7000
#endif
/*
* This starts the data section. Note that the above is all
* in the text section because it has alignment requirements
* that we cannot fulfill any other way.
*/
.data
ALIGN
/*
* This contains typically 140 quadwords, depending on NR_CPUS.
*
* NOTE! Make sure the gdt descriptor in head.S matches this if you
* change anything.
*/
ENTRY(gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */
.quad 0x0000000000000000 /* not used */
.quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */
.quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */
.quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */
.quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */
.quad 0x0000000000000000 /* not used */
.quad 0x0000000000000000 /* not used */
/*
* The APM segments have byte granularity and their bases
* and limits are set at run time.
*/
.quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS‘s */
.quad 0x00409a0000000000 /* 0x48 APM CS code */
.quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */
.quad 0x0040920000000000 /* 0x58 APM DS data */
.fill NR_CPUS*4,8,0 /* space for TSS‘s and LDT‘s */
/*
* This is to aid debugging, the various locking macros will be putting
* code fragments here. When an oops occurs we‘d rather know that it‘s
* inside the .text.lock section rather than as some offset from whatever
* function happens to be last in the .text segment.
*/
.section .text.lock
ENTRY(stext_lock)
.org 0x1000 ENTRY(swapper_pg_dir) .long 0x00102007 .long 0x00103007 .fill BOOT_USER_PGD_PTRS-2,4,0 //768 /* default: 766 entries */ .long 0x00102007 .long 0x00103007 /* default: 254 entries */ .fill BOOT_KERNEL_PGD_PTRS-2,4,0 //256我们单独解释下这段代码,一个页目录表有1024个表项,共代表着4GB的虚拟空间。Linux内核以3GB为界把整个虚拟空间分成用户空间和系统空间。所以,页目录表中低768个表项用于用户空间的映射,而高256个表项用于系统空间的映射。
在Linux0.11中,内核空间和用户空间是这样切换的。
首先页目录项是这样的:
页目录表的前4项用于内核空间,分别指向页表0,页表1,页表2,页表3,共映射16MB的空间,内核态使用GDT,基地址为0,可以访问到所有的内存地址。
当处于进程2的用户态时,对应的页目录表是32~48项,对应的16个页表是自己创建的。由于用户态使用LDT,基地址为128MB。比如cs:eip,其中eip为0,那么经过分段机制,虚拟地址为128MB,经过分页机制,首先根据虚拟地址的前10位选择的便是页目录项中的第32项,然后根据虚拟地址的中间10位是选择的是第32项所指向页表中的第一个页表项,最后根据后12位都为0,这个页表项指向的内存地址便是要访问的物理地址。
在Linux2.4中,内核空间和用户空间是这样切换的。
每个进程有不同的页目录表,页目录价表有1024个表项,共代表着4GB的虚拟空间。Linux内核以3GB为界把整个虚拟空间分成用户空间和系统空间。所以,页目录表中低768个表项用于用户空间的映射,而高256个表项用于系统空间的映射。
用户空间的虚拟地址是0~3G,也就是对应得了页目录表中的低768个表项。还记得我们分配用户空间的虚拟地址就是从0分配到3G么,Linux内核源代码情景分析-execve()。
内核空间的虚拟地址是3G~4G,对应的是页目录表中的高256个表项,由于内核空间的标识符经过链接后都在实际的物理地址上加上了3G,所以访问内核空间时,虚拟地址在3G~4G,经过分页机制(如上)就变成了实际的物理地址(其实就是虚拟地址减去3G)。
Linux2.4的不适用LDT,只使用GDT,无论在内核空间还是用户空间,逻辑地址经过分段机制,得到的虚拟地址与逻辑地址相同。
GDT如下:
ENTRY(gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ .quad 0x0000000000000000 /* not used */ .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */ .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */ .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ .quad 0x0000000000000000 /* not used */ .quad 0x0000000000000000 /* not used */ /*
标签:
原文地址:http://blog.csdn.net/jltxgcy/article/details/45770469