C++ 學習 ---__libc_open 函式的原理
引言
__libc_open 是 glibc 中 openat 介面的封裝,針對給定的 file 路徑,開啟檔案,生成一個檔案描述符 fd。可以建立檔案/建立臨時檔案/開啟已有檔案。在很多 IO 介面函式中都有使用到,我們來一起看看它的實現原理。
原始碼分析
程式碼參考:glibc/sysdeps/unix/sysv/linux/open.c
28 /* Open FILE with access OFLAG. If O_CREAT or O_TMPFILE is in OFLAG,
29 a third argument is the file protection. */
30 int
31 __libc_open (const char *file, int oflag, ...)
32 {
33 int mode = 0;
34
35 if (__OPEN_NEEDS_MODE (oflag))
36 {
37 va_list arg;
38 va_start (arg, oflag);
39 mode = va_arg (arg, int);
40 va_end (arg);
41 }
42
43 return SYSCALL_CANCEL (openat, AT_FDCWD, file, oflag, mode);
44 }
複製程式碼
首先根據 oflag(裡面是否包含 O_CREAT 或 O_TMPFILE)檢查,是否需要獲取第三個引數 mode(用來進行讀寫控制的)。
__OPEN_NEEDS_MODE 巨集判定
具體實現裡面就是通過位運算檢測 oflag 中是否包含__O_TMPFILE 或 O_CREAT
//glibc/io/fcntl.h
37 /* Detect if open needs mode as a third argument (or for openat as a fourth
38 argument). */
39 #ifdef __O_TMPFILE
40 # define __OPEN_NEEDS_MODE(oflag) \
41 (((oflag) & O_CREAT) != 0 || ((oflag) & __O_TMPFILE) == __O_TMPFILE)
42 #else
43 # define __OPEN_NEEDS_MODE(oflag) (((oflag) & O_CREAT) != 0)
44 #endif
複製程式碼
SYSCALL_CANCEL
在這個巨集中實際上最後還是呼叫 INLINE_SYSCALL_CALL 實現我們的呼叫,多餘的部分實際上是 syscall 取消的檢測,這裡我們就不展開了。
//glibc/sysdeps/unix/sysdep.h
111 #if IS_IN (rtld)
112 /* All cancellation points are compiled out in the dynamic loader. */
113 # define NO_SYSCALL_CANCEL_CHECKING 1
114 #else
115 # define NO_SYSCALL_CANCEL_CHECKING SINGLE_THREAD_P
116 #endif
117
118 #define SYSCALL_CANCEL(...) \
119 ({ \
120 long int sc_ret; \
121 if (NO_SYSCALL_CANCEL_CHECKING) \
122 sc_ret = INLINE_SYSCALL_CALL (__VA_ARGS__); \
123 else \
124 { \
125 int sc_cancel_oldtype = LIBC_CANCEL_ASYNC (); \
126 sc_ret = INLINE_SYSCALL_CALL (__VA_ARGS__); \
127 LIBC_CANCEL_RESET (sc_cancel_oldtype); \
128 } \
129 sc_ret; \
130 })
複製程式碼
INLINE_SYSCALL_CALL
這裡就是對 syscall 的相關封裝了,實際上就是要從我們之前傳入的 SYSCALL_CANCEL (openat, AT_FDCWD, file, oflag, mode);
解析出具體需要執行的彙編指令
103 /* Issue a syscall defined by syscall number plus any other argument
104 required. Any error will be handled using arch defined macros and errno
105 will be set accordingly.
106 It is similar to INLINE_SYSCALL macro, but without the need to pass the
107 expected argument number as second parameter. */
108 #define INLINE_SYSCALL_CALL(...) \
109 __INLINE_SYSCALL_DISP (__INLINE_SYSCALL, __VA_ARGS__)
複製程式碼
巨集展開之後 __INLINE_SYSCALL_DISP (__INLINE_SYSCALL, openat, AT_FDCWD, file, oflag, mode)
100 #define __INLINE_SYSCALL_DISP(b,...) \
101 __SYSCALL_CONCAT (b,__INLINE_SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__)
複製程式碼
展開之後: __SYSCALL_CONCAT (__INLINE_SYSCALL, __INLINE_SYSCALL_NARGS(openat, AT_FDCWD, file, oflag, mode)) (openat, AT_FDCWD, file, oflag, mode)
__INLINE_SYSCALL_NARGS
先具體分析__INLINE_SYSCALL_NARGS 的巨集展開方式:
98 #define __INLINE_SYSCALL_NARGS(...) \
99 __INLINE_SYSCALL_NARGS_X (__VA_ARGS__,7,6,5,4,3,2,1,0,)
複製程式碼
展開如下,在原有引數後面增加了 8 個數字
__INLINE_SYSCALL_NARGS_X (openat, AT_FDCWD, file, oflag, mode,7,6,5,4,3,2,1,0,)
繼續展開,可以看到這裡是取第九個引數,對應上面這個表示式,也就是 4,可以推出,如果輸入有 1 個引數,那麼返回 0,輸入有 2 個引數,返回 1,依次類推,輸入有 8 個引數,返回 7。 97 #define __INLINE_SYSCALL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n
__SYSCALL_CONCAT
實際上就是把兩個字元進行了連線,“##”在巨集當中是進行字元連線的
27 #define __SYSCALL_CONCAT_X(a,b) a##b
28 #define __SYSCALL_CONCAT(a,b) __SYSCALL_CONCAT_X (a, b)
複製程式碼
至此,我們的巨集展開就應該是這樣的
__INLINE_SYSCALL4(openat, AT_FDCWD, file, oflag, mode)
__INLINE_SYSCALL4
對應的定義如下:就應該被展開為 INLINE_SYSCALL (openat, 4, AT_FDCWD, file, oflag, mode)
80 #define __INLINE_SYSCALL0(name) \
81 INLINE_SYSCALL (name, 0)
82 #define __INLINE_SYSCALL1(name, a1) \
83 INLINE_SYSCALL (name, 1, a1)
84 #define __INLINE_SYSCALL2(name, a1, a2) \
85 INLINE_SYSCALL (name, 2, a1, a2)
86 #define __INLINE_SYSCALL3(name, a1, a2, a3) \
87 INLINE_SYSCALL (name, 3, a1, a2, a3)
88 #define __INLINE_SYSCALL4(name, a1, a2, a3, a4) \
89 INLINE_SYSCALL (name, 4, a1, a2, a3, a4)
90 #define __INLINE_SYSCALL5(name, a1, a2, a3, a4, a5) \
91 INLINE_SYSCALL (name, 5, a1, a2, a3, a4, a5)
92 #define __INLINE_SYSCALL6(name, a1, a2, a3, a4, a5, a6) \
93 INLINE_SYSCALL (name, 6, a1, a2, a3, a4, a5, a6)
94 #define __INLINE_SYSCALL7(name, a1, a2, a3, a4, a5, a6, a7) \
95 INLINE_SYSCALL (name, 7, a1, a2, a3, a4, a5, a6, a7)
複製程式碼
INLINE_SYSCALL
封裝呼叫了 INTERNAL_SYSCALL 巨集
INTERNAL_SYSCALL (openat, 4, AT_FDCWD, file, oflag, mode)
38 /* Define a macro which expands into the inline wrapper code for a system
39 call. It sets the errno and returns -1 on a failure, or the syscall
40 return value otherwise. */
41 #undef INLINE_SYSCALL
42 #define INLINE_SYSCALL(name, nr, args...) \
43 ({ \
44 long int sc_ret = INTERNAL_SYSCALL (name, nr, args); \
45 __glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (sc_ret)) \
46 ? SYSCALL_ERROR_LABEL (INTERNAL_SYSCALL_ERRNO (sc_ret)) \
47 : sc_ret; \
48 })
複製程式碼
INTERNAL_SYSCALL
這一步的實現就與每個平臺不同了,這裡以 x86_64 平臺為例上面的被依次如下封裝:
internal_syscall4 (SYS_ify (openat), AT_FDCWD, file, oflag, mode)
internal_syscall4 (__NR_openat, AT_FDCWD, file, oflag, mode)
//glibc/sysdeps/unix/sysv/linux/x86_64/sysdep.h
29 /* For Linux we can use the system call table in the header file
30 /usr/include/asm/unistd.h
31 of the kernel. But these symbols do not follow the SYS_* syntax
32 so we have to redefine the `SYS_ify' macro here. */
33 #undef SYS_ify
34 #define SYS_ify(syscall_name) __NR_##syscall_name
233 #undef INTERNAL_SYSCALL
234 #define INTERNAL_SYSCALL(name, nr, args...) \
235 internal_syscall##nr (SYS_ify (name), args)
複製程式碼
最終我們的呼叫為 internal_syscall4 (__NR_openat, AT_FDCWD, file, oflag, mode)
依次將 4 個引數裝入暫存器中,然後呼叫 syscall 彙編程式碼實現呼叫,這裡用到 C 語言的 asm 彙編語法
301 #undef internal_syscall4
302 #define internal_syscall4(number, arg1, arg2, arg3, arg4) \
303 ({ \
304 unsigned long int resultvar; \
305 TYPEFY (arg4, __arg4) = ARGIFY (arg4); \
306 TYPEFY (arg3, __arg3) = ARGIFY (arg3); \
307 TYPEFY (arg2, __arg2) = ARGIFY (arg2); \
308 TYPEFY (arg1, __arg1) = ARGIFY (arg1); \
309 register TYPEFY (arg4, _a4) asm ("r10") = __arg4; \
310 register TYPEFY (arg3, _a3) asm ("rdx") = __arg3; \
311 register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
312 register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
313 asm volatile ( \
314 "syscall\n\t" \
315 : "=a" (resultvar) \
316 : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) \
317 : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
318 (long int) resultvar; \
319 })
複製程式碼
我們看一下該架構下使用最多引數的巨集,(x86_64 架構最多隻支援 6 個引數)
344 #undef internal_syscall6
345 #define internal_syscall6(number, arg1, arg2, arg3, arg4, arg5, arg6) \
346 ({ \
347 unsigned long int resultvar; \
348 TYPEFY (arg6, __arg6) = ARGIFY (arg6); \
349 TYPEFY (arg5, __arg5) = ARGIFY (arg5); \
350 TYPEFY (arg4, __arg4) = ARGIFY (arg4); \
351 TYPEFY (arg3, __arg3) = ARGIFY (arg3); \
352 TYPEFY (arg2, __arg2) = ARGIFY (arg2); \
353 TYPEFY (arg1, __arg1) = ARGIFY (arg1); \
354 register TYPEFY (arg6, _a6) asm ("r9") = __arg6; \
355 register TYPEFY (arg5, _a5) asm ("r8") = __arg5; \
356 register TYPEFY (arg4, _a4) asm ("r10") = __arg4; \
357 register TYPEFY (arg3, _a3) asm ("rdx") = __arg3; \
358 register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
359 register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
360 asm volatile ( \
361 "syscall\n\t" \
362 : "=a" (resultvar) \
363 : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
364 "r" (_a5), "r" (_a6) \
365 : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
366 (long int) resultvar; \
367 })
複製程式碼
暫存器的使用中引數依次裝入 rdi,rsi,rdx,r10,r8,r9
支援最多 7 個引數的只有如下的架構:
riscv 架構:glibc/sysdeps/unix/sysv/linux/riscv/sysdep.h 分別使用 a0 到 a6 暫存器裝引數
323 # define internal_syscall7(number, arg0, arg1, arg2, arg3, arg4, arg5, arg6) \
324 ({ \
325 long int _sys_result; \
326 long int _arg0 = (long int) (arg0); \
327 long int _arg1 = (long int) (arg1); \
328 long int _arg2 = (long int) (arg2); \
329 long int _arg3 = (long int) (arg3); \
330 long int _arg4 = (long int) (arg4); \
331 long int _arg5 = (long int) (arg5); \
332 long int _arg6 = (long int) (arg6); \
333 \
334 { \
335 register long int __a7 asm ("a7") = number; \
336 register long int __a0 asm ("a0") = _arg0; \
337 register long int __a1 asm ("a1") = _arg1; \
338 register long int __a2 asm ("a2") = _arg2; \
339 register long int __a3 asm ("a3") = _arg3; \
340 register long int __a4 asm ("a4") = _arg4; \
341 register long int __a5 asm ("a5") = _arg5; \
342 register long int __a6 asm ("a6") = _arg6; \
343 __asm__ volatile ( \
344 "scall\n\t" \
345 : "+r" (__a0) \
346 : "r" (__a7), "r" (__a1), "r" (__a2), "r" (__a3), \
347 "r" (__a4), "r" (__a5), "r" (__a6) \
348 : __SYSCALL_CLOBBERS); \
349 _sys_result = __a0; \
350 } \
351 _sys_result; \
352 })
複製程式碼
mips 架構:glibc/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h
315 #define internal_syscall7(v0_init, input, number, err, \
316 arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
317 ({ \
318 union __mips_syscall_return _sc_ret; \
319 _sc_ret.val = __mips_syscall7 ((long int) (arg1), \
320 (long int) (arg2), \
321 (long int) (arg3), \
322 (long int) (arg4), \
323 (long int) (arg5), \
324 (long int) (arg6), \
325 (long int) (arg7), \
326 (long int) (number)); \
327 _sc_ret.reg.v1 != 0 ? -_sc_ret.reg.v0 : _sc_ret.reg.v0; \
328 })
複製程式碼
總結
從最開始的 SYSCALL_CANCEL (openat, AT_FDCWD, file, oflag, mode)
巨集,依次轉換為
internal_syscall4 (__NR_openat, AT_FDCWD, file, oflag, mode)
,然後調用匯編實現底層 syscall 操作。
- 那些 Go 語言發展歷史上的重大決策
- 從趨勢到挑戰,一站式解讀作業系統運維和可觀測性
- 百萬級 Topic,騰訊雲的 Apache Pulsar 穩定性實踐
- Apache Doris 在思必馳的應用優化實踐:海量語音通話資料下,實時、離線一體的數倉架構設計實踐
- 愛數正式開源認知智慧開發框架 KWeaver
- 運維智慧化的三大關鍵技術
- “抄我的還‘反捅’我一刀”,Gary Marcus 發文駁斥圖靈獎得主 Yann LeCun
- 當出海成為必選項,企業如何構建全場景全生態技術底座?
- 數智底座必備能力三:快速構建創新應用
- Docker 多階段構建實戰 (multi-stage builds)
- 工作筆記之 SELECT 語句在 SAP ABAP 中的用法總結(上)
- 經久不衰的設計定律是不要讓我思考的設計
- 不要指望下一個像 GPT 這樣的大型語言模型會民主化
- Java 近期新聞:Helidon Níma、Spring Framework、MicroProfile、MicroStream、Kotlin 和 Piranha
- 一文入門 jQuery
- C 學習 ---__libc_open 函式的原理
- 監控系統工作原理
- 甲骨文新微服務框架 Helidon Níma:使用虛擬執行緒實現高效能
- 【雲原生 | 從零開始學 Kubernetes】二、使用 kubeadm 搭建 K8S 叢集
- Elasticsearch 聚合學習之四:結果排序