SlideShare une entreprise Scribd logo
1  sur  26
Télécharger pour lire hors ligne
Process Management


   Roy Lee, 20 June 2005
   NCTU Computer Operating System Lab



                                        1
Process State

                                                          Newly Created.
                                                          Runnable & Running
                                                          Expired
                                                          Interrupted
                                                          Resume
                                                          Terminated




Robert Love, “Linux Kernel Development,” 2nd Edition
                                                                           2
Process Creation – fork()
                  fork()                          exec()
          Copy the whole address space   Discard the current address space
          and the page table             and load another program


         A                        A                                 A
                                  A                                 W
Parent             Parent                 Child      Parent                  Child
                                  B                                 X
                                  C                                 Y
         B                        B                                 B
   ...




                      ...




                                            ...




                                                        ...




                                                                              ...
         C                        C                                 C
                                  D                                 Z
         D                        D                                 D
         ...




                                  ...




                                                                    ...
                                                                                    3
Process Creation – vfork()
                  vfork()                       exec()
          Copy the whole address space   Discard the current address space
          and the page table             and load another program


         A                        A                                 A
                                                                    W
Parent             Parent                            Parent                  Child
                   Child                                            X
                                                                    Y
         B                        B                                 B
   ...




                      ...




                                                        ...




                                                                              ...
         C                        C                                 C
                                                                    Z
         D                        D                                 D
         ...




                                  ...




                                                                    ...
                                                                                    4
Process Creation – Copy-on-Write
                  fork()                    copy-on-write
          Only copy the page table         Delay or altogether prevent
                                           copying of data


         A                           A                                   A
                                                                         B’
Parent             Parent                   Child      Parent                  Child



         B                           B                                   B
   ...




                      ...




                                              ...




                                                          ...




                                                                                ...
         C                           C                                   C


         D                           D                                   D
         ...




                                     ...




                                                                         ...
                                                                                      5
Process Creation – Copy-on-Write
                  fork()                            exec()
          Only copy the page table         Delay or altogether prevent
                                           copying of data


         A                           A                                   A
                                                                         W
Parent             Parent                   Child      Parent                  Child
                                                                         X
                                                                         Y
         B                           B                                   B
   ...




                      ...




                                              ...




                                                          ...




                                                                                ...
         C                           C                                   C
                                                                         Z
         D                           D                                   D
         ...




                                     ...




                                                                         ...
                                                                                      6
task_struct
  [include/linux/sched.h]




Robert Love, “Linux Kernel Development,” 2nd Edition
Daniel P. Bovet, Marco Cesati, “Understanding the Linux Kernel,” 3rd Edition   7
Process Creation - Threads
   Threads in Linux
       To linux, threads are just processes that share more certain
        resources.

   Clone() - The heart of the Linux implementation of threads

       Threads are created like normal tasks, except that the clone() syscall
        is passed flags indicating to specific resources to be shared
        clone(CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND, 0);

       Actully both fork() and vfork() are also implemented via the clone()
        syscall

        clone(SIGCHLD, 0);
        clone(CLONE_VFORK | CLONE_VM | SIGCHLD, 0);



                                                                                 8
Process Creation Flow
                                                                                         User space

sys_fork()      sys_vfork()      sys_clone()
                                                                                         Kernel space
                                                             [kernel/process.c]
                                                             [kernel/fork.c]
             do_fork()                                       [kernel/sched.c]

                   alloc_pidmap()

                                             duplicate the task_struct, initialize it
                   copy_process()
                                             and setup according to the specified clone_flags

                            success?
                                 yes
                     wake_up_new_task()          put the child into runqueue
       no

  free_pidmap()              vfork?            wait_for_completion()
                                       yes
                                               when the child terminates,
                            no                 it wakes up the parent sleeping in the wait queue

               return pid




                                                                                                        9
Process State

                       preempted




                       schedule
               ready               running



        fork                                 exit




   initial               asleep                zombie




                                                        10
Process State

                                   interrupt

                       preempted
                                               syscall,
                                               exception

                       schedule     kernel                   user
               ready
                                   running                 running
                                               return


        fork                                    exit




   initial               asleep                   zombie




                                                                     11
Execution Mode and Context

                                              User mode


                        application
                                                          not allowed
                        (user) code

 Process                                                                 System
 context                                                                 context

                        system calls,                     interrupts,
                        exceptions                        system tasks



                                           Kernel mode


URESH VAHALA, “UNIX INTERNALS – THE NEW FRONTIERS”
                                                                                   12
Execution Mode and Context
                             User mode                                     A

                                                                           W
               application
                                         not allowed                       X
               (user) code
Process
context                                                    System          Y
                                                           context

               system calls,             interrupts,                       B
               exceptions                system tasks
                                                                           C

                             Kernel mode                                   Z
                                                        User               D
                                                        Space




                                                                           …
                                                                           …
                                                        Kernel
                                                        Space




                                                                               ...
                                                                     ...


                                                             P0                       P1

  URESH VAHALA, “UNIX INTERNALS – THE NEW FRONTIERS”
                                                                                     13
thread_info
                                              struct thread_info {
                                                  struct task_struct *task;
                                                  struct exec_domain *exec_domain;
                                                  __u32            flags;
                                                  __u32            status;
                                                  __u32            cpu;
                                                  int              preempt_count;

                                                    mm_segment_t         addr_limit;
                                                    struct restart_block     restart_block;
                                              };




Daniel P. Bovet, Marco Cesati, “Understanding the Linux Kernel,” 3rd Edition
                                                                                          14
do_fork()                          (1/4)
                                                                                 [kernel/fork.c]


1.    long pid = alloc_pidmap();

2.    if (pid < 0)
3.        return -EAGAIN;
4.    …
5.    p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
6.    if (!IS_ERR(p)) {
7.        struct completion vfork;

8.        if (clone_flags & CLONE_VFORK) {
9.             p->vfork_done = &vfork;
10.            init_completion(&vfork);
11.       }
12.       …
13.       if (!(clone_flags & CLONE_STOPPED))
14.            wake_up_new_task(p, clone_flags);
15.       else
16.            p->state = TASK_STOPPED;
17.       …
18.       if (clone_flags & CLONE_VFORK) {
19.            wait_for_completion(&vfork);
20.            if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
21.                ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
22.       }
23.   } else {
24.       free_pidmap(pid);
25.       pid = PTR_ERR(p);
26.   }
27.   return pid;




                                                                                                        15
do_fork()                          (2/4)
                                                                                 [kernel/fork.c]


1.    long pid = alloc_pidmap();

2.    if (pid < 0)
3.        return -EAGAIN;
4.    …
5.    p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
6.    if (!IS_ERR(p)) {
7.        struct completion vfork;

8.        if (clone_flags & CLONE_VFORK) {
9.             p->vfork_done = &vfork;
10.            init_completion(&vfork);
11.       }
12.       …
13.       if (!(clone_flags & CLONE_STOPPED))
14.            wake_up_new_task(p, clone_flags);
15.       else
16.            p->state = TASK_STOPPED;
17.       …
18.       if (clone_flags & CLONE_VFORK) {
19.            wait_for_completion(&vfork);
20.            if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
21.                ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
22.       }
23.   } else {
24.       free_pidmap(pid);
25.       pid = PTR_ERR(p);
26.   }
27.   return pid;




                                                                                                        16
do_fork()                          (3/4)
                                                                                 [kernel/fork.c]


1.    long pid = alloc_pidmap();

2.    if (pid < 0)
3.        return -EAGAIN;
4.    …
5.    p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
6.    if (!IS_ERR(p)) {
7.        struct completion vfork;

8.        if (clone_flags & CLONE_VFORK) {
9.             p->vfork_done = &vfork;
10.            init_completion(&vfork);
11.       }
12.       …
13.       if (!(clone_flags & CLONE_STOPPED))
14.            wake_up_new_task(p, clone_flags);
15.       else
16.            p->state = TASK_STOPPED;
17.       …
18.       if (clone_flags & CLONE_VFORK) {
19.            wait_for_completion(&vfork);
20.            if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
21.                ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
22.       }
23.   } else {
24.       free_pidmap(pid);
25.       pid = PTR_ERR(p);
26.   }
27.   return pid;




                                                                                                        17
do_fork()                          (4/4)
                                                                                 [kernel/fork.c]


1.    long pid = alloc_pidmap();

2.    if (pid < 0)
3.        return -EAGAIN;
4.    …
5.    p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
6.    if (!IS_ERR(p)) {
7.        struct completion vfork;

8.        if (clone_flags & CLONE_VFORK) {
9.             p->vfork_done = &vfork;
10.            init_completion(&vfork);
11.       }
12.       …
13.       if (!(clone_flags & CLONE_STOPPED))
14.            wake_up_new_task(p, clone_flags);
15.       else
16.            p->state = TASK_STOPPED;
17.       …
18.       if (clone_flags & CLONE_VFORK) {
19.            wait_for_completion(&vfork);
20.            if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
21.                ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
22.       }
23.   } else {
24.       free_pidmap(pid);
25.       pid = PTR_ERR(p);
26.   }
27.   return pid;




                                                                                                        18
copy_process()                                           [kernel/fork.c]

  int retval;
  struct task_struct *p = NULL;

  if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
      return ERR_PTR(-EINVAL);

  /*
   * Thread groups must share signals as well, and detached threads
   * can only be started up within the thread group.
   */
  if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
      return ERR_PTR(-EINVAL);

  /*
   * Shared signal handlers imply shared VM. By way of the above,
   * thread groups also imply shared VM. Blocking this case allows
   * for various simplifications in other code.
   */
  if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
      return ERR_PTR(-EINVAL);




                                                                          19
copy_process()
  retval = security_task_create(clone_flags);
  if (retval)
      goto fork_out;

  retval = -ENOMEM;
  p = dup_task_struct(current);
  if (!p)
      goto fork_out;

  retval = -EAGAIN;
  if (atomic_read(&p->user->processes) >=
          p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
      if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
              p->user != &root_user)
          goto bad_fork_free;
  }

  atomic_inc(&p->user->__count);
  atomic_inc(&p->user->processes);
  get_group_info(p->group_info);




                                                                     20
dup_task_struct()
  static struct task_struct *dup_task_struct(struct task_struct *orig)
  {
      struct task_struct *tsk;          #define unlazy_fpu(tsk) do { 
      struct thread_info *ti;               if ((tsk)->thread_info->status & TS_USEDFPU) 
                                                save_init_fpu(tsk); 
      prepare_to_copy(orig);            } while (0)

      tsk = alloc_task_struct();
      if (!tsk)                                                                            2
          return NULL;
                                               __get_free_pages(GFP_KERNEL,THREAD_ORDER)
      ti = alloc_thread_info(tsk);
      if (!ti) {
          free_task_struct(tsk);
          return NULL;
      }

      *ti = *orig->thread_info;
      *tsk = *orig;
      tsk->thread_info = ti;
      ti->task = tsk;

      atomic_set(&tsk->usage,2);
      return tsk;
  }



                                                                                               21
Daniel P. Bovet, Marco Cesati, “Understanding the Linux Kernel,” 3rd Edition
copy_process()
  if (nr_threads >= max_threads)
      goto bad_fork_cleanup_count;

  if (!try_module_get(p->thread_info->exec_domain->module))
      goto bad_fork_cleanup_count;

  if (p->binfmt && !try_module_get(p->binfmt->module))
      goto bad_fork_cleanup_put_domain;

  p->did_exec = 0;
  copy_flags(clone_flags, p);
  p->pid = pid;
  retval = -EFAULT;
  if (clone_flags & CLONE_PARENT_SETTID)
      if (put_user(p->pid, parent_tidptr))
          goto bad_fork_cleanup;
  ...

  p->tgid = p->pid;
  if (clone_flags & CLONE_THREAD)
      p->tgid = current->tgid;



                                                              22
PID v.s. TGID
1.Every process has an unique pid.
2.Each process in the same thread group has the same tgid.
3.The tgid is the pid of the oldest process in that group

do_fork(){
    ...                                                               pid:1002
    copy_process(){                                                  tgid:1002
        ...
        p->pid = pid;
        ...                                                           fork()
        p->tgid = p->pid;
        if (clone_flags & CLONE_THREAD)
                                                              pid:1003    clone()     pid:1005
            p->tgid = current->tgid;
                                                              tgid:1003              tgid:1003
    }
}
                                           pid:1007                                 clone()
                                           tgid:1004     fork()


asmlinkage long sys_getpid(void)                    clone()                          pid:1006
                                                              pid:1004               tgid:1003
{
                                                              tgid:1004
    return current->tgid;
}




                                                                                              23
copy_process()
 if ((retval = security_task_alloc(p)))
     goto bad_fork_cleanup_policy;
 if ((retval = audit_alloc(p)))
     goto bad_fork_cleanup_security;
 /* copy all the process information */
 if ((retval = copy_semundo(clone_flags, p)))
     goto bad_fork_cleanup_audit;
 if ((retval = copy_files(clone_flags, p)))
     goto bad_fork_cleanup_semundo;           bad_fork_cleanup_namespace:
                                                  exit_namespace(p);
 if ((retval = copy_fs(clone_flags, p)))      bad_fork_cleanup_keys:
     goto bad_fork_cleanup_files;                 exit_keys(p);
                                              bad_fork_cleanup_mm:
 if ((retval = copy_sighand(clone_flags, p)))     if (p->mm)
     goto bad_fork_cleanup_fs;                        mmput(p->mm);
                                              bad_fork_cleanup_signal:
 if ((retval = copy_signal(clone_flags, p)))      exit_signal(p);
     goto bad_fork_cleanup_sighand;           bad_fork_cleanup_sighand:
                                                  exit_sighand(p);
 if ((retval = copy_mm(clone_flags, p)))      bad_fork_cleanup_fs:
     goto bad_fork_cleanup_signal;                exit_fs(p); /* blocking */
                                              bad_fork_cleanup_files:
 if ((retval = copy_keys(clone_flags, p)))        exit_files(p); /* blocking      */
                                              bad_fork_cleanup_semundo:
     goto bad_fork_cleanup_mm;                    exit_sem(p);
 if ((retval = copy_namespace(clone_flags, p)))
                                              bad_fork_cleanup_audit:
                                                  audit_free(p);
     goto bad_fork_cleanup_keys;              bad_fork_cleanup_security:
                                                         security_task_free(p);
                                                     bad_fork_cleanup_policy:




                                                                                       24
copy_process()
 if ((retval = security_task_alloc(p)))
                                bad_fork_cleanup_namespace:
     goto bad_fork_cleanup_policy;
 if ((retval = audit_alloc(p)))
     goto bad_fork_cleanup_security; exit_namespace(p);
                                bad_fork_cleanup_keys:
 /* copy all the process information */
 if ((retval = copy_semundo(clone_flags, p)))
     goto bad_fork_cleanup_audit;     exit_keys(p);
                                bad_fork_cleanup_mm:
 if ((retval = copy_files(clone_flags, p)))
     goto bad_fork_cleanup_semundo;
                                      if (p->mm)
 if ((retval = copy_fs(clone_flags, p)))
     goto bad_fork_cleanup_files;
 if ((retval = copy_sighand(clone_flags, p)))
                                             mmput(p->mm);
     goto bad_fork_cleanup_fs; bad_fork_cleanup_signal:
 if ((retval = copy_signal(clone_flags, p)))
     goto bad_fork_cleanup_sighand;
                                      exit_signal(p);
                                bad_fork_cleanup_sighand:
 if ((retval = copy_mm(clone_flags, p)))
     goto bad_fork_cleanup_signal;
                                      exit_sighand(p);
 if ((retval = copy_keys(clone_flags, p)))
     goto bad_fork_cleanup_mm; bad_fork_cleanup_fs:
 if ((retval = copy_namespace(clone_flags, p)))
     goto bad_fork_cleanup_keys;      exit_fs(p); /* blocking   */
                          bad_fork_cleanup_files:
                              exit_files(p); /* blocking */
                          bad_fork_cleanup_semundo:
                              exit_sem(p);
                          bad_fork_cleanup_audit:
                              audit_free(p);
                          bad_fork_cleanup_security:
                              security_task_free(p);
                          bad_fork_cleanup_policy:


                                                                     25
context_swtich()
                                                               [kernel/sched.c]
1.    static inline
2.    task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next)
3.    {
4.        struct mm_struct *mm = next->mm;
5.        struct mm_struct *oldmm = prev->active_mm;

6.       if (unlikely(!mm)) {
7.           next->active_mm = oldmm;
8.           atomic_inc(&oldmm->mm_count);
9.           enter_lazy_tlb(oldmm, next);
10.      } else
11.          switch_mm(oldmm, mm, next);

12.      if (unlikely(!prev->mm)) {
13.          prev->active_mm = NULL;
14.          WARN_ON(rq->prev_mm);
15.          rq->prev_mm = oldmm;
16.      }

17.      /* Here we just switch the register state and the stack. */
18.      switch_to(prev, next, prev);

19.      return prev;
20. }



                                                                                  26

Contenu connexe

Tendances

Tendances (20)

Virtualization Support in ARMv8+
Virtualization Support in ARMv8+Virtualization Support in ARMv8+
Virtualization Support in ARMv8+
 
Scheduling in Android
Scheduling in AndroidScheduling in Android
Scheduling in Android
 
Learn how to addressing medical and industrial challenges with BlackBerry QNX...
Learn how to addressing medical and industrial challenges with BlackBerry QNX...Learn how to addressing medical and industrial challenges with BlackBerry QNX...
Learn how to addressing medical and industrial challenges with BlackBerry QNX...
 
Cgroups in android
Cgroups in androidCgroups in android
Cgroups in android
 
Making Linux do Hard Real-time
Making Linux do Hard Real-timeMaking Linux do Hard Real-time
Making Linux do Hard Real-time
 
Linux : PSCI
Linux : PSCILinux : PSCI
Linux : PSCI
 
HKG15-505: Power Management interactions with OP-TEE and Trusted Firmware
HKG15-505: Power Management interactions with OP-TEE and Trusted FirmwareHKG15-505: Power Management interactions with OP-TEE and Trusted Firmware
HKG15-505: Power Management interactions with OP-TEE and Trusted Firmware
 
Linux Locking Mechanisms
Linux Locking MechanismsLinux Locking Mechanisms
Linux Locking Mechanisms
 
SFO15-TR9: PSCI, ACPI (and UEFI to boot)
SFO15-TR9: PSCI, ACPI (and UEFI to boot)SFO15-TR9: PSCI, ACPI (and UEFI to boot)
SFO15-TR9: PSCI, ACPI (and UEFI to boot)
 
Linux Memory Management
Linux Memory ManagementLinux Memory Management
Linux Memory Management
 
IntelON 2021 Processor Benchmarking
IntelON 2021 Processor BenchmarkingIntelON 2021 Processor Benchmarking
IntelON 2021 Processor Benchmarking
 
Container Performance Analysis
Container Performance AnalysisContainer Performance Analysis
Container Performance Analysis
 
XPDS16: Keeping coherency on ARM - Julien Grall, ARM
XPDS16: Keeping coherency on ARM - Julien Grall, ARMXPDS16: Keeping coherency on ARM - Julien Grall, ARM
XPDS16: Keeping coherency on ARM - Julien Grall, ARM
 
Understanding eBPF in a Hurry!
Understanding eBPF in a Hurry!Understanding eBPF in a Hurry!
Understanding eBPF in a Hurry!
 
Memory management in Linux kernel
Memory management in Linux kernelMemory management in Linux kernel
Memory management in Linux kernel
 
Android Binder IPC for Linux
Android Binder IPC for LinuxAndroid Binder IPC for Linux
Android Binder IPC for Linux
 
Linux MMAP & Ioremap introduction
Linux MMAP & Ioremap introductionLinux MMAP & Ioremap introduction
Linux MMAP & Ioremap introduction
 
Linux on ARM 64-bit Architecture
Linux on ARM 64-bit ArchitectureLinux on ARM 64-bit Architecture
Linux on ARM 64-bit Architecture
 
Linux memory-management-kamal
Linux memory-management-kamalLinux memory-management-kamal
Linux memory-management-kamal
 
Linux memory
Linux memoryLinux memory
Linux memory
 

En vedette

Linux process management
Linux process managementLinux process management
Linux process management
Raghu nath
 
Introduction to UBI
Introduction to UBIIntroduction to UBI
Introduction to UBI
Roy Lee
 
Process management
Process managementProcess management
Process management
Mohd Arif
 

En vedette (20)

Process management
Process managementProcess management
Process management
 
Linux Process Management Workshop
Linux Process Management WorkshopLinux Process Management Workshop
Linux Process Management Workshop
 
Wait queue
Wait queueWait queue
Wait queue
 
Process and Threads in Linux - PPT
Process and Threads in Linux - PPTProcess and Threads in Linux - PPT
Process and Threads in Linux - PPT
 
Scheduling
SchedulingScheduling
Scheduling
 
Linux process management
Linux process managementLinux process management
Linux process management
 
Introduction to UBI
Introduction to UBIIntroduction to UBI
Introduction to UBI
 
Deep dive into Android Data Binding
Deep dive into Android Data BindingDeep dive into Android Data Binding
Deep dive into Android Data Binding
 
Android : Deep dive into developing MobileApp using Android
Android : Deep dive into developing MobileApp using AndroidAndroid : Deep dive into developing MobileApp using Android
Android : Deep dive into developing MobileApp using Android
 
Linux Commands
Linux CommandsLinux Commands
Linux Commands
 
Improving continuous process operation using data analytics delta v applicati...
Improving continuous process operation using data analytics delta v applicati...Improving continuous process operation using data analytics delta v applicati...
Improving continuous process operation using data analytics delta v applicati...
 
Low Level View of Android System Architecture
Low Level View of Android System ArchitectureLow Level View of Android System Architecture
Low Level View of Android System Architecture
 
Fumaigation Process in Operation Theaters
Fumaigation Process in Operation TheatersFumaigation Process in Operation Theaters
Fumaigation Process in Operation Theaters
 
Linux commands
Linux commandsLinux commands
Linux commands
 
Operating Systems: Process Scheduling
Operating Systems: Process SchedulingOperating Systems: Process Scheduling
Operating Systems: Process Scheduling
 
Process management in linux
Process management in linuxProcess management in linux
Process management in linux
 
Process management
Process managementProcess management
Process management
 
Operating Systems and Memory Management
Operating Systems and Memory ManagementOperating Systems and Memory Management
Operating Systems and Memory Management
 
Operation Process
Operation ProcessOperation Process
Operation Process
 
Operating System 2
Operating System 2Operating System 2
Operating System 2
 

Process Management

  • 1. Process Management Roy Lee, 20 June 2005 NCTU Computer Operating System Lab 1
  • 2. Process State  Newly Created.  Runnable & Running  Expired  Interrupted  Resume  Terminated Robert Love, “Linux Kernel Development,” 2nd Edition 2
  • 3. Process Creation – fork() fork() exec() Copy the whole address space Discard the current address space and the page table and load another program A A A A W Parent Parent Child Parent Child B X C Y B B B ... ... ... ... ... C C C D Z D D D ... ... ... 3
  • 4. Process Creation – vfork() vfork() exec() Copy the whole address space Discard the current address space and the page table and load another program A A A W Parent Parent Parent Child Child X Y B B B ... ... ... ... C C C Z D D D ... ... ... 4
  • 5. Process Creation – Copy-on-Write fork() copy-on-write Only copy the page table Delay or altogether prevent copying of data A A A B’ Parent Parent Child Parent Child B B B ... ... ... ... ... C C C D D D ... ... ... 5
  • 6. Process Creation – Copy-on-Write fork() exec() Only copy the page table Delay or altogether prevent copying of data A A A W Parent Parent Child Parent Child X Y B B B ... ... ... ... ... C C C Z D D D ... ... ... 6
  • 7. task_struct [include/linux/sched.h] Robert Love, “Linux Kernel Development,” 2nd Edition Daniel P. Bovet, Marco Cesati, “Understanding the Linux Kernel,” 3rd Edition 7
  • 8. Process Creation - Threads  Threads in Linux  To linux, threads are just processes that share more certain resources.  Clone() - The heart of the Linux implementation of threads  Threads are created like normal tasks, except that the clone() syscall is passed flags indicating to specific resources to be shared clone(CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND, 0);  Actully both fork() and vfork() are also implemented via the clone() syscall clone(SIGCHLD, 0); clone(CLONE_VFORK | CLONE_VM | SIGCHLD, 0); 8
  • 9. Process Creation Flow User space sys_fork() sys_vfork() sys_clone() Kernel space [kernel/process.c] [kernel/fork.c] do_fork() [kernel/sched.c] alloc_pidmap() duplicate the task_struct, initialize it copy_process() and setup according to the specified clone_flags success? yes wake_up_new_task() put the child into runqueue no free_pidmap() vfork? wait_for_completion() yes when the child terminates, no it wakes up the parent sleeping in the wait queue return pid 9
  • 10. Process State preempted schedule ready running fork exit initial asleep zombie 10
  • 11. Process State interrupt preempted syscall, exception schedule kernel user ready running running return fork exit initial asleep zombie 11
  • 12. Execution Mode and Context User mode application not allowed (user) code Process System context context system calls, interrupts, exceptions system tasks Kernel mode URESH VAHALA, “UNIX INTERNALS – THE NEW FRONTIERS” 12
  • 13. Execution Mode and Context User mode A W application not allowed X (user) code Process context System Y context system calls, interrupts, B exceptions system tasks C Kernel mode Z User D Space … … Kernel Space ... ... P0 P1 URESH VAHALA, “UNIX INTERNALS – THE NEW FRONTIERS” 13
  • 14. thread_info struct thread_info { struct task_struct *task; struct exec_domain *exec_domain; __u32 flags; __u32 status; __u32 cpu; int preempt_count; mm_segment_t addr_limit; struct restart_block restart_block; }; Daniel P. Bovet, Marco Cesati, “Understanding the Linux Kernel,” 3rd Edition 14
  • 15. do_fork() (1/4) [kernel/fork.c] 1. long pid = alloc_pidmap(); 2. if (pid < 0) 3. return -EAGAIN; 4. … 5. p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid); 6. if (!IS_ERR(p)) { 7. struct completion vfork; 8. if (clone_flags & CLONE_VFORK) { 9. p->vfork_done = &vfork; 10. init_completion(&vfork); 11. } 12. … 13. if (!(clone_flags & CLONE_STOPPED)) 14. wake_up_new_task(p, clone_flags); 15. else 16. p->state = TASK_STOPPED; 17. … 18. if (clone_flags & CLONE_VFORK) { 19. wait_for_completion(&vfork); 20. if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) 21. ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); 22. } 23. } else { 24. free_pidmap(pid); 25. pid = PTR_ERR(p); 26. } 27. return pid; 15
  • 16. do_fork() (2/4) [kernel/fork.c] 1. long pid = alloc_pidmap(); 2. if (pid < 0) 3. return -EAGAIN; 4. … 5. p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid); 6. if (!IS_ERR(p)) { 7. struct completion vfork; 8. if (clone_flags & CLONE_VFORK) { 9. p->vfork_done = &vfork; 10. init_completion(&vfork); 11. } 12. … 13. if (!(clone_flags & CLONE_STOPPED)) 14. wake_up_new_task(p, clone_flags); 15. else 16. p->state = TASK_STOPPED; 17. … 18. if (clone_flags & CLONE_VFORK) { 19. wait_for_completion(&vfork); 20. if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) 21. ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); 22. } 23. } else { 24. free_pidmap(pid); 25. pid = PTR_ERR(p); 26. } 27. return pid; 16
  • 17. do_fork() (3/4) [kernel/fork.c] 1. long pid = alloc_pidmap(); 2. if (pid < 0) 3. return -EAGAIN; 4. … 5. p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid); 6. if (!IS_ERR(p)) { 7. struct completion vfork; 8. if (clone_flags & CLONE_VFORK) { 9. p->vfork_done = &vfork; 10. init_completion(&vfork); 11. } 12. … 13. if (!(clone_flags & CLONE_STOPPED)) 14. wake_up_new_task(p, clone_flags); 15. else 16. p->state = TASK_STOPPED; 17. … 18. if (clone_flags & CLONE_VFORK) { 19. wait_for_completion(&vfork); 20. if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) 21. ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); 22. } 23. } else { 24. free_pidmap(pid); 25. pid = PTR_ERR(p); 26. } 27. return pid; 17
  • 18. do_fork() (4/4) [kernel/fork.c] 1. long pid = alloc_pidmap(); 2. if (pid < 0) 3. return -EAGAIN; 4. … 5. p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid); 6. if (!IS_ERR(p)) { 7. struct completion vfork; 8. if (clone_flags & CLONE_VFORK) { 9. p->vfork_done = &vfork; 10. init_completion(&vfork); 11. } 12. … 13. if (!(clone_flags & CLONE_STOPPED)) 14. wake_up_new_task(p, clone_flags); 15. else 16. p->state = TASK_STOPPED; 17. … 18. if (clone_flags & CLONE_VFORK) { 19. wait_for_completion(&vfork); 20. if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) 21. ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); 22. } 23. } else { 24. free_pidmap(pid); 25. pid = PTR_ERR(p); 26. } 27. return pid; 18
  • 19. copy_process() [kernel/fork.c] int retval; struct task_struct *p = NULL; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); 19
  • 20. copy_process() retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; retval = -EAGAIN; if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) goto bad_fork_free; } atomic_inc(&p->user->__count); atomic_inc(&p->user->processes); get_group_info(p->group_info); 20
  • 21. dup_task_struct() static struct task_struct *dup_task_struct(struct task_struct *orig) { struct task_struct *tsk; #define unlazy_fpu(tsk) do { struct thread_info *ti; if ((tsk)->thread_info->status & TS_USEDFPU) save_init_fpu(tsk); prepare_to_copy(orig); } while (0) tsk = alloc_task_struct(); if (!tsk) 2 return NULL; __get_free_pages(GFP_KERNEL,THREAD_ORDER) ti = alloc_thread_info(tsk); if (!ti) { free_task_struct(tsk); return NULL; } *ti = *orig->thread_info; *tsk = *orig; tsk->thread_info = ti; ti->task = tsk; atomic_set(&tsk->usage,2); return tsk; } 21 Daniel P. Bovet, Marco Cesati, “Understanding the Linux Kernel,” 3rd Edition
  • 22. copy_process() if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(p->thread_info->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) goto bad_fork_cleanup_put_domain; p->did_exec = 0; copy_flags(clone_flags, p); p->pid = pid; retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) if (put_user(p->pid, parent_tidptr)) goto bad_fork_cleanup; ... p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; 22
  • 23. PID v.s. TGID 1.Every process has an unique pid. 2.Each process in the same thread group has the same tgid. 3.The tgid is the pid of the oldest process in that group do_fork(){ ... pid:1002 copy_process(){ tgid:1002 ... p->pid = pid; ... fork() p->tgid = p->pid; if (clone_flags & CLONE_THREAD) pid:1003 clone() pid:1005 p->tgid = current->tgid; tgid:1003 tgid:1003 } } pid:1007 clone() tgid:1004 fork() asmlinkage long sys_getpid(void) clone() pid:1006 pid:1004 tgid:1003 { tgid:1004 return current->tgid; } 23
  • 24. copy_process() if ((retval = security_task_alloc(p))) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_security; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; bad_fork_cleanup_namespace: exit_namespace(p); if ((retval = copy_fs(clone_flags, p))) bad_fork_cleanup_keys: goto bad_fork_cleanup_files; exit_keys(p); bad_fork_cleanup_mm: if ((retval = copy_sighand(clone_flags, p))) if (p->mm) goto bad_fork_cleanup_fs; mmput(p->mm); bad_fork_cleanup_signal: if ((retval = copy_signal(clone_flags, p))) exit_signal(p); goto bad_fork_cleanup_sighand; bad_fork_cleanup_sighand: exit_sighand(p); if ((retval = copy_mm(clone_flags, p))) bad_fork_cleanup_fs: goto bad_fork_cleanup_signal; exit_fs(p); /* blocking */ bad_fork_cleanup_files: if ((retval = copy_keys(clone_flags, p))) exit_files(p); /* blocking */ bad_fork_cleanup_semundo: goto bad_fork_cleanup_mm; exit_sem(p); if ((retval = copy_namespace(clone_flags, p))) bad_fork_cleanup_audit: audit_free(p); goto bad_fork_cleanup_keys; bad_fork_cleanup_security: security_task_free(p); bad_fork_cleanup_policy: 24
  • 25. copy_process() if ((retval = security_task_alloc(p))) bad_fork_cleanup_namespace: goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_security; exit_namespace(p); bad_fork_cleanup_keys: /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; exit_keys(p); bad_fork_cleanup_mm: if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if (p->mm) if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) mmput(p->mm); goto bad_fork_cleanup_fs; bad_fork_cleanup_signal: if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; exit_signal(p); bad_fork_cleanup_sighand: if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; exit_sighand(p); if ((retval = copy_keys(clone_flags, p))) goto bad_fork_cleanup_mm; bad_fork_cleanup_fs: if ((retval = copy_namespace(clone_flags, p))) goto bad_fork_cleanup_keys; exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_security: security_task_free(p); bad_fork_cleanup_policy: 25
  • 26. context_swtich() [kernel/sched.c] 1. static inline 2. task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next) 3. { 4. struct mm_struct *mm = next->mm; 5. struct mm_struct *oldmm = prev->active_mm; 6. if (unlikely(!mm)) { 7. next->active_mm = oldmm; 8. atomic_inc(&oldmm->mm_count); 9. enter_lazy_tlb(oldmm, next); 10. } else 11. switch_mm(oldmm, mm, next); 12. if (unlikely(!prev->mm)) { 13. prev->active_mm = NULL; 14. WARN_ON(rq->prev_mm); 15. rq->prev_mm = oldmm; 16. } 17. /* Here we just switch the register state and the stack. */ 18. switch_to(prev, next, prev); 19. return prev; 20. } 26