1 Task Management March, 2016 Dept. of software Dankook University http://embedded.dankook.ac.kr/~baeksj
Program & Process 2
Task, Process and Thread 3 Process Thread
Task, Process and Thread 4 Process Thread - 실행상태에있는프로그램의 instance - 자원소유권의단위 - - 디스패칭의단위 - 실행흐름 - 수행의단위 -
Task, Process and Thread 5 Process Thread - 실행상태에있는프로그램의 instance - 자원소유권의단위 - - 디스패칭의단위 - 실행흐름 - 수행의단위 - A B
Task, Process and Thread 6 Process Thread - 실행상태에있는프로그램의 instance - 자원소유권의단위 - - 디스패칭의단위 - 실행흐름 - 수행의단위 - fork or vfork clone or pthread_create A B
Task, Process and Thread 7 A B
Task, Process and Thread 8 1. PID A PID B PID
Task, Process and Thread 9 1. PID 2. POSIX: 한 process 내의 thread 는동일한 PID 를공유해야한다 A PID TGID B PID TGID
Task example - fork 10 #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <linux/unistd.h> int main(void) { int pid; } printf("before fork n n"); if((pid = fork()) < 0){ printf("fork error n"); exit(-2); }else if (pid == 0){ printf( TGID(%d), PID(%d): Child n", getpid(), syscall( NR_gettid)); }else{ printf("tgid(%d), PID(%d): Parent n", getpid(), syscall( NR_gettid)); sleep(2); } printf( after fork n n ); return 0;
Task example - pthread 11 #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <linux/unistd.h> void *t_function(void *data) { int id; int i=0; pthread_t t_id; id = *((int *)data); printf( TGID(%d), PID(%d), pthread_self(%d): Child n", getpid(), syscall( NR_gettid), pthread_self()); sleep(2); } int main() { pthread_t p_thread[2]; int thr_id; int status; int a = 1; int b = 2; printf("before pthread_create() n"); if((thr_id = pthread_create(&p_thread[0], NULL, t_function, (void*)&a)) < 0){ perror("thread create error : "); exit(1); } if((thr_id = pthread_create(&p_thread[1], NULL, t_function, (void*)&b)) < 0){ perror("thread create error : "); exit(2); } pthread_join(p_thread[0], (void **)&status); printf("thread join : %d n", status); pthread_join(p_thread[1], (void **)&status); printf("thread join : %d n", status); } printf( TGID(%d), PID(%d): Parent n", getpid(), syscall( NR_gettid)); return 0;
Task example - vfork 12 #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <linux/unistd.h> int main(void) { pid_t pid; printf("before vfork n"); } if((pid = vfork()) < 0){ printf("fork error n"); exit(-2); }else if (pid == 0){ printf( TGID(%d), PID(%d): Child n", getpid(), syscall( NR_gettid)); _exit(0); }else{ printf( TGID(%d), PID(%d): Parent n", getpid(), syscall( NR_gettid)); } printf( after vfork n n ); exit(0);
Task example - clone 13 #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <linux/unistd.h> #include <sched.h> int sub_a(void *arg) { printf( TGID(%d), PID(%d): Child n", getpid(), syscall( NR_gettid)); sleep(2); return 0; } int main(void) { int child_a_stack[4096], child_b_stack[4096]; printf( before clone n"); printf( TGID(%d), PID(%d) n", getpid(), syscall( NR_gettid)); CLONE_THREAD option 추가시같은 tgid 를가짐 } clone (sub_a, (void *)(child_a_stack+4095), CLONE_CHILD_CLEARTID CLONE_CHILD_SETTID, NULL); clone (sub_a, (void *)(child_b_stack+4095), CLONE_CHILD_CLEARTID CLONE_CHILD_SETTID, NULL); sleep(1); printf( after clone n n ); return 0;
Flow controls Task implementation in Linux 14 User App Library System call fork() fork() clone() sys_clone() vfork() vfork() vfork() sys_vfork() clone() clone() clone() sys_fork() pthread_create() pthread_create() clone() User Level Kernel Level do_fork() Using strace, ltrace, ctags and cscope kernel_thread()
Task example kernel thread 15 int init module_thread_init(void); void exit module_thread_exit(void); void test_kernel_thread(void *arg){ int val, i, j; val = (int)(long)arg; printk("<0>kernel_thread %d CPU : %d n", val, smp_processor_id()); for(j=0 ; j < 100000000 ; j++); for(i=0 ; i < 100000000 ; i++); printk("<0>kernel_thread %d CPU : %d n", val, smp_processor_id()); } int init module_thread_init(){ } int i; for(i = 0 ; i < 4 ; i++) kernel_thread((int (*)(void *))test_kernel_thread, (void *)(long)i, 0); return 0; void exit module_thread_exit(){ printk("<0>module Thread Test Exit n"); } module_init(module_thread_init); module_exit(module_thread_exit);
Task example kernel thread 16 obj-m KDIR PWD default: clean: := module_thread.o := /lib/modules/$(shell uname -r)/build := $(shell pwd) $(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules rm -rf *.ko rm -rf *.mod.* rm -rf.*.cmd rm -rf *.o
Task, Process and Thread 17 1. PID 2. POSIX: 한 process 내의 thread 는동일한 PID 를공유해야한다 Task A PID TGID Task B PID TGID
Task, Process and Thread 18 struct task_struct { pid, tgid, Task A PID TGID Task B PID TGID
Linux Thread Model 19 Internal structure : Process : Thread Process A Process B Process C : task_struct 구조체 : Process Discriptor(8KB) ( 프로세스디스크립터 + 커널모드스택 ) : 커널모드스택 User Level Kernel Level task_struct task_struct task_struct task_struct task_struct task_struct task_struct Kernel Thread A task_struct Kernel Thread B task_struct Scheduler
문맥 (Context) : Context 커널이관리하는태스크의자원과수행환경집합 3 부분으로구성 : 시스템문맥 (system context), 메모리문맥 (memory context), 하드웨어문맥 (hardware context) memory system context task structure segment table page table 20 file table fd thread structure eip sp eflags eax cs hardware context (TSS) swap or a.out disk memory context
System Context 21 태스크를관리하는정보집합 태스크정보 : pid, uid, euid, suid, 태스크상태 : 실행상태, READY 상태, 수면상태, 태스크의가족관계 : p_pptr, p_cptr, next_task, next_run 스케줄링정보 : policy, priority, counter, rt_priority, need_resched 태스크가접근한파일정보 : file descriptor 시그널정보그외 : 수행시간정보, 수행파일이름, 등등 (kernel dependent)
태스크생성과전이 ( 커널수준으로진입 / 수면 ) 의예 22 /* test.c */ int glob = 6; char buf[] = a write to stdout n ; int main(void) { int var; pid_t pid; var = 88; write(stdout_fileno, buf, sizeof(buf)-1); printf( before fork n ); if ((pid = fork()) == 0) { /* child */ glob++; var++; } else sleep(2); /* parent */ printf( pid = %d, glob = %d, var = %d n, getpid(), glob, var); exit (0); } (Source : Adv. programming in the UNIX Env., pgm 8.1)
Task 생성과주소공간예제결과 23
Process State Diagram(1/2) 24 New Exit Admit dispatch Release Ready Running Event Occurs Time-Out Event Wait Blocked Five-State Process Model (Source : OS(stallings))
Process State Diagram(2/2) 25 fork initial (idle) running exit zombie wait fork switch sleep, lock ready wakeup, unlock sleep swap swap suspended ready suspended sleep (Source : UNIX Internals)
Task State Diagram of LINUX(2.4) 26 create wait signal TASK_STOPPED signal TASK_ZOMBIE schedule exit TASK_RUNNING (ready) TASK_RUNNING (running) preempt Event Occurs Event Wait TASK_INTERRUPTIBLE TASK_UNINTERRUPTIBLE
Task State Diagram of LINUX(2.6) 27 create TASK_STOPPED signal TASK_TRACED signal EXIT_ZOMBIE wait schedule exit EXIT_DEAD TASK_RUNNING (ready) TASK_RUNNING (running) preempt Event Occurs Event Wait TASK_INTERRUPTIBLE TASK_UNINTERRUPTIBLE
Task State Diagram of LINUX(3.18) 28 fork TASK_WAKING signal TASK_STOPPED TASK_TRACED schedule signal TASK_DEAD EXIT_ZOMBIE exit wait TASK_RUNNING (ready) TASK_RUNNING (running) TASK_DEAD preempt EXIT_DEAD TASK_WAKING event occur TASK_INTERRUPTIBLE TASK_UNINTERRUPTIBLE TASK_KILLABLE event wait
Memory Context (1/5) 29 fork internal : compile results gcc test.c header movl %eax, [glob] addl %eax, 1 movl [glob], %eax... 0xffffffff 0xbfffffff kernel stack text data bss stack glob, buf var, pid 0x0 data text user s perspective (virtual memory) a.out : (ELF format) /*include/linux/elf.h */
Memory Context (2/5) 30 fork internal : after loading (after run a.out) & before fork task_struct pid = 11 segment table (vm_area_struct) memory text var, pid stack data glob, buf In this figure, we assume that there is no paging mechanism.
Memory Context (3/5) 31 fork internal : after fork memory glob, buf task_struct pid = 11 segment data text var, pid stack task_struct pid = 12 segment data glob, buf var, pid stack address space : basic protection barrier
Memory Context (4/5) 32 fork internal : with COW (Copy on Write) mechanism after fork with COW after glob++ operation task_struct segment task_struct segment data pid = 11 text pid = 11 text stack stack task_struct segment task_struct segment pid = 12 data pid = 12 data memory memory
Memory Context (5/5) 33 새로운프로그램수행 : execve() internal memory task_struct pid = 11 segment data text stack text data a.out header text data bss stack stack
Virtual Memory Describing 34 메모리관리자료구조 include/linux/sched.h, include/linux/mm.h, include/asm-i386/page.h task_struct mm_struct vm_area_struct text mmap vm_start, vm_end mm map_count vm_next pgd... start_code, end_code vm_offset, vm_file data start_data, end_data start_brk, brk start_stack arg_start, arg_end env_start, env_end; vm_start, vm_end vm_next vm_offset, vm_file rss,... pgd_t swap or a.out
Hardware Context (1/3) 35 brief reminds the 80x86 architecture ALU IN Control U. OUT Registers eip, eflags eax, ebx, ecx, edx, esi, edi, cs, ds, ss, es, fs,... cr0, cr1, cr2, cr3, LDTR, TR,...
Hardware Context(2/3) 36 stack 쓰레드자료구조 : CPU 추상화 stack 2 heap data text movl $2, %eax pushl %eax addl %eax, %ebx EIP ESP EAX... 10 2 registers movl $10, %eax call func1 heap data text Address space for Task A Address space for Task B 다시 Task A 가수행되려면? 문맥교환 (Context Switch) thread structure
Hardware Context(3/3) 37 쓰레드자료구조 : CPU 추상화 stack stack 2 heap data EIP ESP EAX... heap data text movl $2, %eax pushl %eax addl %eax, %ebx Address space for Task A EIP ESP EAX registers EIP ESP EAX movl $10, %eax call func1 Address space for Task B text...... tss struct. for task A tss struct. for task B
task_struct & H/W context 38 쓰레드자료구조 : CPU 추상화 task_struct struct thread_struct { struct desc_struct tls_array[gdt_entry_tls_entries]; unsigned long sp0; unsigned long sp; unsigned long sysenter_cs; unsigned long ip; unsigned long cr2; unsigned long fs; unsigned long gs; struct vm86_struct user * vm86_info; unsigned long screen_bitmap; unsigned long v86flags, v86mask, saved_sp0; unsigned int saved_fs, saved_gs; unsigned long *io_bitmap_ptr; }; struct vm86_regs { long ebx; long ecx; long edx; long esi; long edi; long ebp; long eax; long eflags; long esp; };
Kernel Stack 39 Thread union 커널은각태스크별로 8KB메모리할당 thread_info 구조체와 kernel stack alloc_thread_struct, free_thread_struct pt_regs CPU Stack Pointer Register stack current thread_info{ *task flags(need_resched) } task_struct
User VS kernel mode 40 INT, syscall kernel mode 로전환 control path : kernel mode 로진입하기위한일련의명령어 struct pt_regs? 현재 P 의상태를커널스택에저장하기위해사용
Thread Model 비교 41 LinuxThreads vs NPTL
Task 계층구조확인 42
Task 계층구조확인 43
태스크연결자료구조 44 태스크연결구조 init_task task_struct task_struct task_struct next_task next_task next_task next_task prev_task prev_task prev_task prev_task... task_struct run_queue next_run prev_run 태스크가족관계 children.next next_run prev_run where is run_queue? prev_task? next_task? task_struct parent children.prev next_run prev_run younger child task_struct parent, sibling.prev parent, sibling.next sibling.next sibling.prev parent child task_struct sibling.next sibling.prev older child task_struct
Linux Signal 45 시그널처리자료구조 /* kernel/signal.c */ task_struct. sig blocked signal sigpending. sys_signal(sig, handler) sigset_t. signal_struct count action[_nsig] siglock 63 0 sigaction sa_handler sa_flags sa_restorer sa_mask sigset_t. 63 0 /* kernel/signal.c */ sys_kill(pid,sig) do_sigaction(sig, new_sa, old_sa) kill_pgrp_info(sig, info, pid) send_sig_info(sig, info, *t) sigaddset(t->signal, sig); t->sigpending = 1;
46 Linux Signal 시그널처리자료구조 count action[_nsig] siglock sa_handler sa_flags sa_restorer sa_mask signal sighand pending blocked. sighand_struct sigaction sigset_t 63 0. sigset_t 63 0 task_struct list signal sigpending next prev info sigqueue count shared_pendin g rlim signal_struct list signal sigpending si_signo si_code siginfo next prev info sigqueue si_signo si_code siginfo next prev info sigqueue next prev info sigqueue si_signo si_code siginfo
Linux File 47 파일관리자료구조 : fd, inode include/linux/sched.h task_struct... fs_struct files_struct... struct fs_struct { atomic_t count; int umask; struct dentry *root, *pwd; } include/linux/sched.h struct files_struct { atomic_t count; int max_fds; struct file **fd; fd_set close_on_exec; fd_set open_fds; } include/linux/fs.h struct file { struct file *f_next, **f_pprev; struct dentry f_dentry;; struct file_operations *f_op; mode_t f_mode; loff_t f_pos; unsigned int f_count, f_flags;. } include/linux/dcache.h struct dentry { int d_count, d_flags; struct inode *d_inode; struct dentry *d_parent;. unsigned char d_iname[] } include/linux/fs.h struct inode {. }
Multi-CPU 를위한리눅스특징 48 스케줄링 Run queue per each CPU 일반 task : SCHED_NORMAL, SCHED_BATCH, SCHED_IDLE 100~139(-20~19) 실시간 task : SCHED_FIFO, SCHED_RR, SCHED_DEADLINE, 0~99 Load balancing T do_fork() wake_up_new_task() Runqueue T T schedule() CPU tasklist T T T T T T Runqueue T T T schedule() CPU wake_up() load_balance() Runqueue schedule() T CPU
Scheduling Domain & Group 49 struct sched_domain (Multi-chip level) domain struct sched_group 0 2 1 3 Shared L3 cache 4 6 5 7 Shared L3 cache (Multi-core level) domain 0 2 1 3 4 6 5 7 (Hyper-threading level) domain
RT Task Scheduling 50 FIFO & RR O(1) scheduler queue[0] queue[3] queue[99] rt_prio_array.bitmap rt_prio_array.queue DEADLINE (a.k.a., EDF) deadline, runtime, period RB-tree (with deadline) Provides deterministic scheduling
Runqueue and Scheduler Data Structure 51 CPU CPU CFS Runqueue struct rq { unsigned int nr_running; struct load_weight load; u64 nr_switches; struct rt_rq rt; struct dl_rq dl; struct cfs_rq cfs; struct task_struct *curr; struct sched_domain *sd; unsigned long cpu_capacity; unsigned long cpu_load[ ]; }; struct cfs_rq { struct load_weight load; unsigned long nr_running; u64 exec_clock; u64 min_vruntime; struct rb_node *rb_leftmost; struct sched_entity *curr; }; #define MAX_RT_PRIO 100 struct rt_prio_array { DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); struct list_head queue[max_rt_prio]; }; FIFO & RR struct rt_rq { struct rt_prio_array active; unsigned int rt_nr_running; }; DEADLINE struct dl_rq { struct rb_root rb_root; struct rb_node *rb_leftmost; unsigned long dl_nr_running; }; struct sched_domain { struct sched_domain *parent; struct sched_domain *child; struct sched_group *groups; };
Test Code 52 #include <stdio.h> #include <unistd.h> #include <sched.h> int main(void) { struct sched_param param; int i, j; sched_getparam( 0, ¶m); } printf(" nbefore set n"); printf(" Param.priority = %d n", param.sched_priority); printf(" Sched policy = %d n", sched_getscheduler(0)); param.sched_priority = 10; sched_setscheduler(0, SCHED_FIFO, ¶m); sched_getparam( 0, ¶m); printf(" nfifo set n"); printf(" Param.priority = %d n", param.sched_priority); printf(" Sched policy = %d n", sched_getscheduler(0)); param.sched_priority = 20; sched_setscheduler(0, SCHED_RR, ¶m); sched_getparam( 0, ¶m); printf(" nrr set n"); printf(" Param.priority = %d n", param.sched_priority); printf(" Sched policy = %d n", sched_getscheduler(0)); return 0; 1 2 3 아래코드를화살표가가리키고있는세군데에각각넣고실행해보자. 실행도중키보드를누르면반응이있는가? 언제반응이있고, 언제없는가? for(i=0; i<100000; i++) for(j=0; j<100000; j++);
Normal Task Scheduling 53 CFS Completely Fair Scheduler RB-tree (with vruntime) Timer tick 에의해현재수행중인태스크의 vruntime 갱신 struct task_struct { const struct sched_class *sched_class; struct sched_entity se; }; struct sched_entity { struct load_weight load; u64 vruntime; }; update_curr(struct cfs_rq *cfs_rq) curr->vruntime += calc_delta_fair(delta_exec, curr); delta = calc_delta(delta, NICE_0_LOAD, &se->load); struct load_weight { unsigned long weight; u32 inv_weight; };
Normal Task Scheduling 54 CFS Completely Fair Scheduler RB-tree (with vruntime) Timer tick에의해현재수행중인태스크의 vruntime갱신 To avoid frequent context switch, there are minimum runtime and struct task_struct { const struct sched_class *sched_class; struct sched_entity se; }; struct sched_entity { struct load_weight load; u64 vruntime; }; sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) slice = sched_period(cfs_rq->nr_running +!se->on_rq); if(nr_running > sched_nr_latency) return sysctl_sched_min_granularity * nr_running else return sysctl_sched_latency struct load_weight { unsigned long weight; u32 inv_weight; };
Scheduler Activation 55 How When Directly call schedule() Indirectly, mark need_resched flag Check check_preempt_tick() function for more details! Timer tick 수행중이던태스크가자신의타임슬라이스를다썼거나대기상태로전환현재태스크보다높은우선순위를가진태스크가깨어난경우스케줄링관련시스템콜호출 함수명 set_tsk_need_resched(task) clear_tsk_need_resched(task) need_resched() 목적 주어진프로세스의 need_resched 플래그를설정 주어진프로세스의 need_resched 플래그를해제 need_resched 플래그를검사. Set 이면 true, clear 이면 false 를리턴
Linux 스케줄링인터페이스 56 스케줄링관련시스템콜 nice() (just for backward compatibility) sys_nice() 절대값 40 넘으면 40으로맞춤 음수는관리자권한필요 capable() 함수호출해 CAP_SYS_NICE 특성이있는지확인 권한이있다면 current의 nice필드에 increment값더함 단, -20~19사이유지하게함
Linux 스케줄링인터페이스 57 getpriority(), setpriority() 지정한 process그룹에있는모든 process의기본우선순위에영향을미침 getpriority() 는그룹내모든process중가장낮은nice필드값뺀값반환 setpriority() 는지정그룹내모든process의기본우선순위설정 sys_getpriority(), sys_setpriority() 매개변수 which : 프로세스그룹을식별 PRIO_PROCCESS PRIO_PGRP PRIO_USER who : process 선택에사용하는 pid 나, pgrp, uid 필드의값, 0 이면 current niceval : 새로운우선순위값
Linux 스케줄링인터페이스 58 실시간 process 관련시스템콜 sched_getscheduler(), sched_setscheduler() 매개변수 pid 로지정한 process 에현재적용중인스케줄링정책질의 / 설정 Pid 를가지고 find_process_by_pid() 호출해, policy 값을반환 sched_getparam(), sched_setparam() pid 에해당하는 process 의스케줄링인자얻어오거나 / 설정 rt_priority 필드를 sched_param 타입지역변수로저장후 copy_to_user() 호출해복사해줌 sched_yield() Process 를보류상태로만들지않고자발적으로 CPU 반납 sched_get_priority_min(), sched_get_priority_max() policy 매개변수로지정한스케줄링정책에서사용할수있는실시간정적우선순위의최소, 최대값을반환 sched_rr_get_interval() pid 매개변수로지정한실시간 P 의 RR 타임퀀텀을사용자모드주소공간에있는구조체에기록 / 가져옴
Scheduling Policy and Class 59 SCHED_FIFO SCHED_RR const struct sched_class rt_sched_class = {.next = &fair_sched_class,.enqueue_task = enqueue_task_rt,.dequeue_task = dequeue_task_rt,.yield_task = yield_task_rt, }; SCHED_DEADLINE struct task_struct { const struct sched_class *sched_class; unsigned int policy; }; SCHED_NORMAL SCHED_BATCH SCHED_IDLE const struct sched_class rt_sched_class = {.next = &fair_sched_class,.enqueue_task = enqueue_task_rt,.dequeue_task = dequeue_task_rt,.yield_task = yield_task_rt, }; static const struct sched_class fair_sched_class = {.next = &idle_sched_class,.enqueue_task = enqueue_task_fair,.dequeue_task = dequeue_task_fair,.yield_task = yield_task_fair, };
Context Switching 60 stack Task A stack Task B heap heap User mode data text data text Kernel mode EIP ESP EAX... pt_regs stack registers pt_regs stack thread_info{ } struct task_struct { void * stack; struct thread_struct thread; }; struct task_struct { void * stack; struct thread_struct thread; }; thread_info{ }
switch_to() in ARM (1/16) 61 Code Window Memory Dump Window CPU register window
switch_to() in ARM (2/16) 62
switch_to() in ARM (3/16) 63
switch_to() in ARM (4/16) 64
switch_to() in ARM (5/16) 65
switch_to() in ARM (6/16) 66
switch_to() in ARM (7/16) 67
switch_to() in ARM (8/16) 68
switch_to() in ARM (9/16) 69
switch_to() in ARM (10/16) 70
switch_to() in ARM (11/16) 71
switch_to() in ARM (12/16) 72
switch_to() in ARM (13/16) 73
switch_to() in ARM (14/16) 74
switch_to() in ARM (15/16) 75
switch_to() in ARM (16/16) 76