Linux内核创建一个新进程的过程

时间：2015-04-12 22:33:42 阅读：352 评论：0 收藏：0 [点我收藏+]

标签：

进程在创建它的时刻开始存活。在Linux系统中，这通常是fork()系统的结果，该系统调用通过复制一个现有的进程来创建一个全新的进程。只有在创建init进程时，是通过代码实现数据结构的填充。调用fork()的进程称为父进程，新生的进程称为子进程。在系统调用结束时，在返回点这个相同位置上，父进程恢复执行，子进程开始执行。fork()系统调用从内核两次返回：一次回到父进程，另一次回到创建的新的子进程。

进程的描述符--task_struct （下面是中英文大体注释）

volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */ 进程的状态
void *stack;　　　　//进程通过alloc_thread_info函数分配它的内核栈，通过free_thread_info函数释放所分配的内核栈。
unsigned int flags;    /* per process flags, defined below */ 进程的标签
　
进程的调度
　　int on_rq;
    int prio, static_prio, normal_prio;//进程优先级 实时优先级范围是0到MAX_RT_PRIO-1（即99），而普通进程的静态优先级范围是从MAX_RT_PRIO到MAX_PRIO-1（即100到139）。值越大静态优先级越低。
    unsigned int rt_priority;//rt_priority用于保存实时优先级。normal_prio值取决于静态优先级和调度策略，static_prio用于保存静态优先级，可以通过nice系统调用来进行修改。
    const struct sched_class *sched_class;//sched_class结构体表示调度类
    struct sched_entity se;
    struct sched_rt_entity rt;
　　struct sched_dl_entity dl;
　　unsigned int policy; //policy表示进程的调度策略
    int nr_cpus_allowed;
    cpumask_t cpus_allowed;//cpus_allowed用于控制进程可以在哪里处理器上运行。
　　struct list_head tasks; //用于构建进程链表 ，内核的双向循环链表的实现方法 - 一个更简略的双向循环链表
　　struct mm_struct *mm, *active_mm;//mm指向进程所拥有的内存描述符，而active_mm指向进程运行时所使用的内存描述符。对于普通进程而言，这两个指针变量的值相同。但是，内核线程不 拥有任何内存描述符，所以它们的mm成员总是为NULL。当内核线程得以运行时，它的active_mm成员被初始化为前一个运行进程的 active_mm值。
　　/* per-thread vma caching */
    u32 vmacache_seqnum;
    struct vm_area_struct *vmacache[VMACACHE_SIZE];
/* task state */
    int exit_state;
    int exit_code, exit_signal;//exit_code用于设置进程的终止代号，这个值要么是_exit()或exit_group()系统调用参数（正常终止），要么是由内核提供的一个错误代号（异常终止）。
    int pdeath_signal;  /*  The signal sent when the parent dies  *///exit_signal被置为-1时表示是某个线程组中的一员。只有当线程组的最后一个成员终止时，才会产生一个信号，以通知线程组的领头进程的父进程。
    unsigned int jobctl;    /* JOBCTL_*, siglock protected */

    /* Used for emulating ABI behavior of previous Linux versions */
    unsigned int personality;

    unsigned in_execve:1;    /* Tell the LSMs that the process is doing an
                 * execve */
    unsigned in_iowait:1;

    /* Revert to default priority/policy when forking */
    unsigned sched_reset_on_fork:1;
    unsigned sched_contributes_to_load:1;
　　unsigned long atomic_flags; /* Flags needing atomic access. */

    pid_t pid; //进程标识号
    pid_t tgid;
/*程序创建的进程具有父子关系，在编程时往往需要引用这样的父子关系。进程描述符中有几个域用来表示这样的关系
     * pointers to (original) parent process, youngest child, younger sibling,
     * older sibling, respectively.  (p->father can be replaced with
     * p->real_parent->pid)
     */
    struct task_struct __rcu *real_parent; /* real parent process */
    struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
    /*
     * children/sibling forms the list of my natural children
     */
    struct list_head children;    /* list of my children */
    struct list_head sibling;    /* linkage in my parent‘s children list */
    struct task_struct *group_leader;    /* threadgroup leader */
　　cputime_t utime, stime, utimescaled, stimescaled;//utime/stime用于记录进程在用户态/内核态下所经过的节拍数（定时器）。utimescaled/stimescaled也是用于记录进程在用户态/内核态的运行时间，但它们以处理器的频率为刻度。
    cputime_t gtime;//gtime是以节拍计数的虚拟机运行时间（guest time）。
    unsigned long nvcsw, nivcsw; /* context switch counts */
    u64 start_time;        /* monotonic time in nsec */
    u64 real_start_time;    /* boot based time in nsec */
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
    unsigned long min_flt, maj_flt;

    struct task_cputime cputime_expires;
    struct list_head cpu_timers[3];

/* process credentials */
    const struct cred __rcu *real_cred; /* objective and real subjective task
                     * credentials (COW) */
    const struct cred __rcu *cred;    /* effective (overridable) subjective task
                     * credentials (COW) */
    char comm[TASK_COMM_LEN]; /* executable name excluding path
                     - access with [gs]et_task_comm (which lock
                       it with task_lock())
                     - initialized normally by setup_new_exec */
/* file system info */
    int link_count, total_link_count;
/* CPU-specific state of this task */
    struct thread_struct thread;
/* filesystem information */
    struct fs_struct *fs;
/* open file information */
    struct files_struct *files;
/* namespaces */
    struct nsproxy *nsproxy;
/* signal handlers */
    struct signal_struct *signal;//signal指向进程的信号描述符。
    struct sighand_struct *sighand;//sighand指向进程的信号处理程序描述符。

    sigset_t blocked, real_blocked;//blocked表示被阻塞信号的掩码，real_blocked表示临时掩码。
    sigset_t saved_sigmask;    /* restored if set_restore_sigmask() was used */
    struct sigpending pending; //pending存放私有挂起信号的数据结构。

    unsigned long sas_ss_sp;//  sas_ss_sp是信号处理程序备用堆栈的地址，sas_ss_size表示堆栈的大小。
    size_t sas_ss_size;
    int (*notifier)(void *priv);
    void *notifier_data;//设备驱动程序常用notifier指向的函数来阻塞进程的某些信号（notifier_mask是这些信号的位掩码），notifier_data指的是notifier所指向的函数可能使用的数据。
    sigset_t *notifier_mask;
    struct callback_head *task_works;

    struct audit_context *audit_context;//进程审计
/* Thread group tracking */
       u32 parent_exec_id;
       u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
 * mempolicy */
    spinlock_t alloc_lock;

    /* Protection of the PI data structures: */
    raw_spinlock_t pi_lock;
/* journalling filesystem info */
    void *journal_info;

/* stacked block device info */
    struct bio_list *bio_list;
/* VM state */
    struct reclaim_state *reclaim_state;

    struct backing_dev_info *backing_dev_info;

    struct io_context *io_context;

    unsigned long ptrace_message;
    siginfo_t *last_siginfo; /* For ptrace use.  */
    struct task_io_accounting ioac;
struct rcu_head rcu;

    /*
     * cache last used pipe for splice
     */
    struct pipe_inode_info *splice_pipe;

    struct page_frag task_frag;
/*
     * time slack values; these are used to round up poll() and
     * select() etc timeout values. These are in nanoseconds.
     */
    unsigned long timer_slack_ns;
    unsigned long default_timer_slack_ns;

分配进程描述符

Linux通过slab分配器分配task_struct结构，这样能达到对象复用和缓存着色。

struct thread_info {
    struct task_struct    *task;        /* main task structure */
    struct exec_domain    *exec_domain;    /* execution domain */
    unsigned long        flags;        /* low level flags */
    __u32            status;        /* thread synchronous flags */
    __u32            cpu;
    int            preempt_count; /* 0 => preemptable, <0 => BUG */
    mm_segment_t        addr_limit;    /* thread address space */
    struct restart_block    restart_block;
    unsigned long        previous_sp;    /* sp of previous stack in case
                           of nested IRQ stacks */
    __u8            supervisor_stack[0];
};

技术分享

进程的状态：

//include/linux/Sched.h
#define TASK_RUNNING        0
#define TASK_INTERRUPTIBLE    1
#define TASK_UNINTERRUPTIBLE    2
#define __TASK_STOPPED        4
#define __TASK_TRACED        8

技术分享

进程的创建

Linux通过clone()系统调用实现fork()。然后又有clone()去调用do_fork()。do_fork()完成了创建中大部分工作，它定义在kernel/fork.c文件中。调用copy_process()函数，通过copy_process()创建子进程的描述符，并创建子进程执行时所需的其他数据结构，最终则会返回这个创建好的进程描述符(子进程的描述符)。

p = copy_process(clone_flags, stack_start, stack_size,child_tidptr, NULL, trace);//struct task_struct *p;

在copy_process()函数中调用dump_task_struct()。

//在copy_process()函数
p = dup_task_struct(current);//struct task_struct *p;

retval = copy_thread(clone_flags, stack_start, stack_size, p);

dump_task_struct()

dump_task_struct()为新进程创建一个内核栈、thread_info结构和task_struct，这些值与当前进程的值相同，此时父子进程的的描述符完全相同。

//在dump_task_struct()函数中
tsk = alloc_task_struct_node(node);//struct task_struct *tsk;

ti = alloc_thread_info_node(tsk, node);//struct thread_info *ti;

err = arch_dup_task_struct(tsk, orig);//int err;
tsk->stack = ti;
setup_thread_stack(tsk, orig);

#define alloc_task_struct_node(node)                        \
({                                            struct page *page = alloc_pages_node(node, GFP_KERNEL | __GFP_COMP,                             KERNEL_STACK_SIZE_ORDER);            struct task_struct *ret = page ? page_address(page) : NULL;                                                    ret;                                    })

alloc_task_struct_node(node)函数中创建页，其中一部分就用于堆栈

arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)函数复制一个PCB——task_struct

在arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)函数
*dst = *src;//在arch_dup_task_struct()函数中，通过赋值完成复制操作

setup_thread_stack()函数只是复制thread_info，而非复制内核堆栈

static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
{
    *task_thread_info(p) = *task_thread_info(org);
    task_thread_info(p)->task = p;
}

copy_thread()函数

在copy_thread()函数中，完成初始化

　　//在copy_thread(unsigned long clone_flags, unsigned long sp,unsigned long arg, struct task_struct *p)函数中
　　 struct pt_regs *childregs = task_pt_regs(p);//内核堆栈的栈底，p代表子进程
    struct task_struct *tsk;
    int err;

    p->thread.sp = (unsigned long) childregs;
    p->thread.sp0 = (unsigned long) (childregs+1);
　　if (unlikely(p->flags & PF_KTHREAD)) {//复制一部分int指令和SAVE_ALL压栈内容
        /* kernel thread */
        memset(childregs, 0, sizeof(struct pt_regs));
        p->thread.ip = (unsigned long) ret_from_kernel_thread;
        task_user_gs(p) = __KERNEL_STACK_CANARY;
        childregs->ds = __USER_DS;
        childregs->es = __USER_DS;
        childregs->fs = __KERNEL_PERCPU;
        childregs->bx = sp;    /* function */
        childregs->bp = arg;
        childregs->orig_ax = -1;
        childregs->cs = __KERNEL_CS | get_kernel_rpl();
        childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
        p->thread.io_bitmap_ptr = NULL;
        return 0;
    }
　　*childregs = *current_pt_regs();//复制内核堆栈
    childregs->ax = 0;//为什么子进程的fork返回0，这里就是原因！
　　if (sp)
        childregs->sp = sp;//调度到子进程时的内核栈顶
　　p->thread.ip = (unsigned long) ret_from_fork; //调度到子进程时的第一条指令地址

参考文献：

http://blog.csdn.net/npy_lp/article/details/7335187

Linux内核创建一个新进程的过程

标签：

原文地址：http://www.cnblogs.com/pingandezhufu/p/4420074.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行