Linux5.10 arm64 系统调用过程学习记录 简介 用户态 svc 进入内核态 找到系统调用函数 相关数据结构 系统调用表 参考 简介 进程使用标准库例程,库例程接下来调用内核函数,最终,由内核负责在各个请求进程之间公平而且流畅地共享资源和服务
用户态 # include int main ( ) { FILE * fp = NULL ; fp = fopen ( "test.txt" , "w" ) ; fprintf ( fp, "test\n" ) ; fclose ( fp) ; }
编译,追踪
uname -a Linux 5.11 .0-27-generic gcc write_test.c -o write_test ldd write_test
svc 用户层进入内核态执行系统调用函数,通过异常方式(库函数完成),将当前系统调用函数的调用号放入x8
寄存器,然后使用svc
指令,发起同步异常。参考[1]
Supervisor Call causes an exception to be taken to EL1.On executing an SVC instruction, the PE records the exception as a Supervisor Call exception in ESR_ELx, using the EC value 0x15
, and the value of the immediate argument.
进入内核态 以下源码参考:linux-5.10
SYM_CODE_START ( vectors) kernel_ventry 1 , sync_invalid kernel_ventry 1 , irq_invalid kernel_ventry 1 , fiq_invalid kernel_ventry 1 , error_invalid kernel_ventry 1 , sync kernel_ventry 1 , irq kernel_ventry 1 , fiq_invalid kernel_ventry 1 , error kernel_ventry 0 , sync kernel_ventry 0 , irq kernel_ventry 0 , fiq_invalid kernel_ventry 0 , error . macro kernel_ventry, el, label, regsize = 64 . align 7 sub sp, sp, #S_FRAME_SIZE b el\( ) \el\( ) _\label . endmSYM_CODE_START_LOCAL_NOALIGN ( el0_sync) kernel_entry 0 mov x0, sp bl el0_sync_handler b ret_to_userSYM_CODE_END ( el0_sync)
找到系统调用函数 asmlinkage void noinstr el0_sync_handler ( struct pt_regs * regs) { unsigned long esr = read_sysreg ( esr_el1) ; switch ( ESR_ELx_EC ( esr) ) { case ESR_ELx_EC_SVC64: el0_svc ( regs) ; break ; . . . } } static void noinstr el0_svc ( struct pt_regs * regs) { . . . do_el0_svc ( regs) ; } void do_el0_svc ( struct pt_regs * regs) { sve_user_discard ( ) ; el0_svc_common ( regs, regs-> regs[ 8 ] , __NR_syscalls, sys_call_table) ; } static void el0_svc_common ( struct pt_regs * regs, int scno, int sc_nr, const syscall_fn_t syscall_table[ ] ) { unsigned long flags = current_thread_info ( ) -> flags; regs-> orig_x0 = regs-> regs[ 0 ] ; regs-> syscallno = scno; . . . . invoke_syscall ( regs, scno, sc_nr, syscall_table) ; . . . . } static void invoke_syscall ( struct pt_regs * regs, unsigned int scno, unsigned int sc_nr, const syscall_fn_t syscall_table[ ] ) { long ret; if ( scno < sc_nr) { syscall_fn_t syscall_fn; syscall_fn = syscall_table[ array_index_nospec ( scno, sc_nr) ] ; ret = __invoke_syscall ( regs, syscall_fn) ; } else { ret = do_ni_syscall ( regs, scno) ; } if ( is_compat_task ( ) ) ret = lower_32_bits ( ret) ; regs-> regs[ 0 ] = ret; } static long __invoke_syscall ( struct pt_regs * regs, syscall_fn_t syscall_fn) { return syscall_fn ( regs) ; }
相关数据结构 struct pt_regs { union { struct user_pt_regs user_regs; struct { u64 regs[ 31 ] ; u64 sp; u64 pc; u64 pstate; } ; } ; u64 orig_x0; # ifdef __AARCH64EB__ u32 unused2; s32 syscallno; # else s32 syscallno; u32 unused2; # endif u64 orig_addr_limit; u64 pmr_save; u64 stackframe[ 2 ] ; u64 lockdep_hardirqs; u64 exit_rcu; } ; typedef long ( * syscall_fn_t) ( const struct pt_regs * regs) ;
系统调用表 # undef __SYSCALL # define __SYSCALL ( nr, sym) asmlinkage long __arm64_## sym ( const struct pt_regs * ) ; # include # undef __SYSCALL # define __SYSCALL ( nr, sym) [ nr] = __arm64_## sym, const syscall_fn_t sys_call_table[ __NR_syscalls] = { [ 0 . . . __NR_syscalls - 1 ] = __arm64_sys_ni_syscall, # include } ; # include # define NR_syscalls ( __NR_syscalls) # define __ARCH_WANT_RENAMEAT # define __ARCH_WANT_NEW_STAT # define __ARCH_WANT_SET_GET_RLIMIT # define __ARCH_WANT_TIME32_SYSCALLS # define __ARCH_WANT_SYS_CLONE3 # include # define __NR_io_setup 0 __SC_COMP ( __NR_io_setup, sys_io_setup, compat_sys_io_setup) # define __NR_io_destroy 1 __SYSCALL ( __NR_io_destroy, sys_io_destroy) . . . . # define __NR_syscalls 441
通过上面文件的展开, 系统调用表为:
const syscall_fn_t sys_call_table[ __NR_syscalls] = { [ 0 . . . __NR_syscalls - 1 ] = __arm64_sys_ni_syscall, __arm64_compat_sys_io_setup, __arm64_sys_io_destroy, . . . . . . } ;
系统调用宏
# define __SYSCALL_DEFINEx ( x, name, . . . ) \ asmlinkage long __arm64_sys ## name ( const struct pt_regs * regs) ; \ ALLOW_ERROR_INJECTION ( __arm64_sys## name, ERRNO) ; \ static long __se_sys## name ( __MAP ( x, __SC_LONG, __VA_ARGS__) ) ; \ static inline long __do_sys## name ( __MAP ( x, __SC_DECL, __VA_ARGS__) ) ; \ asmlinkage long __arm64_sys ## name ( const struct pt_regs * regs) \ { \ return __se_sys## name ( SC_ARM64_REGS_TO_ARGS ( x, __VA_ARGS__) ) ; \ } \ static long __se_sys## name ( __MAP ( x, __SC_LONG, __VA_ARGS__) ) \ { \ long ret = __do_sys## name ( __MAP ( x, __SC_CAST, __VA_ARGS__) ) ; \ __MAP ( x, __SC_TEST, __VA_ARGS__) ; \ __PROTECT ( x, ret, __MAP ( x, __SC_ARGS, __VA_ARGS__) ) ; \ return ret; \ } \ static inline long __do_sys## name ( __MAP ( x, __SC_DECL, __VA_ARGS__) ) # define SYSCALL_DEFINE0 ( sname) \ SYSCALL_METADATA ( _## sname, 0 ) ; \ asmlinkage long __arm64_sys_ ## sname ( const struct pt_regs * __unused) ; \ ALLOW_ERROR_INJECTION ( __arm64_sys_## sname, ERRNO) ; \ asmlinkage long __arm64_sys_ ## sname ( const struct pt_regs * __unused) # ifndef SYSCALL_DEFINE0 # define SYSCALL_DEFINE0 ( sname) \ SYSCALL_METADATA ( _## sname, 0 ) ; \ asmlinkage long sys_ ## sname ( void ) ; \ ALLOW_ERROR_INJECTION ( sys_## sname, ERRNO) ; \ asmlinkage long sys_ ## sname ( void ) # endif # define SYSCALL_DEFINE1 ( name, . . . ) SYSCALL_DEFINEx ( 1 , _## name, __VA_ARGS__) # define SYSCALL_DEFINE2 ( name, . . . ) SYSCALL_DEFINEx ( 2 , _## name, __VA_ARGS__) # define SYSCALL_DEFINE3 ( name, . . . ) SYSCALL_DEFINEx ( 3 , _## name, __VA_ARGS__) # define SYSCALL_DEFINE4 ( name, . . . ) SYSCALL_DEFINEx ( 4 , _## name, __VA_ARGS__) # define SYSCALL_DEFINE5 ( name, . . . ) SYSCALL_DEFINEx ( 5 , _## name, __VA_ARGS__) # define SYSCALL_DEFINE6 ( name, . . . ) SYSCALL_DEFINEx ( 6 , _## name, __VA_ARGS__) # define SYSCALL_DEFINE_MAXARGS 6 # define SYSCALL_DEFINEx ( x, sname, . . . ) \ SYSCALL_METADATA ( sname, x, __VA_ARGS__) \ __SYSCALL_DEFINEx ( x, sname, __VA_ARGS__)
对于 write
系统调用宏展开
# define __NR_write 64 __SYSCALL ( __NR_write, sys_write) asmlinkage long __arm64_sys_write ( const struct pt_regs * ) ; SYSCALL_DEFINE3 ( write, unsigned int , fd, const char __user * , buf, size_t, count) { return ksys_write ( fd, buf, count) ; }
参考 1 系统调用实现原理 2 Linux内核系统调用原理与实现 3 Linux系统调用之SYSCALL_DEFINE