Skip to content
This repository was archived by the owner on Jan 6, 2025. It is now read-only.

Commit 5807c1e

Browse files
author
Sarah Jelinek
authored
Merge pull request #37 from GBuella/fix_intercept_template_s
intercept asm template improvements/fixes
2 parents 7c4d134 + e8ccf57 commit 5807c1e

File tree

1 file changed

+64
-35
lines changed

1 file changed

+64
-35
lines changed

src/intercept_template.s

Lines changed: 64 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,25 @@
248248

249249
backtrace_placeholder:
250250
.cfi_startproc
251+
/*
252+
* Call Frame Information. The stack size is 0x580,
253+
* and the cfi_offset entries mark the place of a each
254+
* register's previous value in the call frame.
255+
*/
251256
.cfi_def_cfa_offset 0x580
257+
.cfi_offset 6, -136 /* rbp */
258+
.cfi_offset 15, -152 /* r15 */
259+
.cfi_offset 14, -160 /* r14 */
260+
.cfi_offset 13, -168 /* r13 */
261+
.cfi_offset 12, -176 /* r12 */
262+
.cfi_offset 10, -184 /* r10 */
263+
.cfi_offset 9, -192 /* r9 */
264+
.cfi_offset 8, -200 /* r8 */
265+
.cfi_offset 2, -208 /* rcx */
266+
.cfi_offset 4, -216 /* rdx */
267+
/* rsi at -224 */
268+
/* rdi at -232 */
269+
.cfi_offset 3, -240 /* rbx */
252270
nop
253271
nop
254272
nop
@@ -259,7 +277,20 @@ backtrace_placeholder:
259277

260278
backtrace_placeholder_2:
261279
.cfi_startproc
262-
.cfi_def_cfa_offset 0x578
280+
.cfi_def_cfa_offset 0x588
281+
.cfi_offset 6, -136 /* rbp */
282+
.cfi_offset 15, -152 /* r15 */
283+
.cfi_offset 14, -160 /* r14 */
284+
.cfi_offset 13, -168 /* r13 */
285+
.cfi_offset 12, -176 /* r12 */
286+
.cfi_offset 10, -184 /* r10 */
287+
.cfi_offset 9, -192 /* r9 */
288+
.cfi_offset 8, -200 /* r8 */
289+
.cfi_offset 2, -208 /* rcx */
290+
.cfi_offset 4, -216 /* rdx */
291+
/* rsi at -224 */
292+
/* rdi at -232 */
293+
.cfi_offset 3, -240 /* rbx */
263294
nop
264295
nop
265296
nop
@@ -306,8 +337,6 @@ intercept_asm_wrapper_push_stack_first_return_addr:
306337

307338
movq %rsp, %rbx
308339

309-
orq $0x1f, %rsp
310-
subq $0x3f, %rsp
311340
/*
312341
* Reserve stack for SIMD registers.
313342
* Largest space is used in the AVX512 case, 32 * 32 bytes.
@@ -319,14 +348,14 @@ intercept_asm_wrapper_simd_save:
319348
* these nops are going to be replace with CPU
320349
* dependent code.
321350
*/
322-
movaps %xmm0, (%rsp)
323-
movaps %xmm1, 0x10 (%rsp)
324-
movaps %xmm2, 0x20 (%rsp)
325-
movaps %xmm3, 0x30 (%rsp)
326-
movaps %xmm4, 0x40 (%rsp)
327-
movaps %xmm5, 0x50 (%rsp)
328-
movaps %xmm6, 0x60 (%rsp)
329-
movaps %xmm7, 0x70 (%rsp)
351+
movups %xmm0, (%rsp)
352+
movups %xmm1, 0x10 (%rsp)
353+
movups %xmm2, 0x20 (%rsp)
354+
movups %xmm3, 0x30 (%rsp)
355+
movups %xmm4, 0x40 (%rsp)
356+
movups %xmm5, 0x50 (%rsp)
357+
movups %xmm6, 0x60 (%rsp)
358+
movups %xmm7, 0x70 (%rsp)
330359
.fill 32, 1, 0x90
331360

332361
pushq %rbx
@@ -452,14 +481,14 @@ L1:
452481
popq %rbx
453482

454483
intercept_asm_wrapper_simd_restore:
455-
movaps (%rsp), %xmm0
456-
movaps 0x10 (%rsp), %xmm1
457-
movaps 0x20 (%rsp), %xmm2
458-
movaps 0x30 (%rsp), %xmm3
459-
movaps 0x40 (%rsp), %xmm4
460-
movaps 0x50 (%rsp), %xmm5
461-
movaps 0x60 (%rsp), %xmm6
462-
movaps 0x70 (%rsp), %xmm7
484+
movups (%rsp), %xmm0
485+
movups 0x10 (%rsp), %xmm1
486+
movups 0x20 (%rsp), %xmm2
487+
movups 0x30 (%rsp), %xmm3
488+
movups 0x40 (%rsp), %xmm4
489+
movups 0x50 (%rsp), %xmm5
490+
movups 0x60 (%rsp), %xmm6
491+
movups 0x70 (%rsp), %xmm7
463492
.fill 32, 1, 0x90
464493

465494

@@ -540,23 +569,23 @@ intercept_asm_wrapper_call_clone_child_intercept:
540569
intercept_asm_wrapper_end:
541570

542571
intercept_asm_wrapper_simd_save_YMM:
543-
vmovaps %ymm0, (%rsp)
544-
vmovaps %ymm1, 0x20 (%rsp)
545-
vmovaps %ymm2, 0x40 (%rsp)
546-
vmovaps %ymm3, 0x60 (%rsp)
547-
vmovaps %ymm4, 0x80 (%rsp)
548-
vmovaps %ymm5, 0xa0 (%rsp)
549-
vmovaps %ymm6, 0xc0 (%rsp)
550-
vmovaps %ymm7, 0xe0 (%rsp)
572+
vmovups %ymm0, (%rsp)
573+
vmovups %ymm1, 0x20 (%rsp)
574+
vmovups %ymm2, 0x40 (%rsp)
575+
vmovups %ymm3, 0x60 (%rsp)
576+
vmovups %ymm4, 0x80 (%rsp)
577+
vmovups %ymm5, 0xa0 (%rsp)
578+
vmovups %ymm6, 0xc0 (%rsp)
579+
vmovups %ymm7, 0xe0 (%rsp)
551580
intercept_asm_wrapper_simd_save_YMM_end:
552581

553582
intercept_asm_wrapper_simd_restore_YMM:
554-
vmovaps (%rsp), %ymm0
555-
vmovaps 0x20 (%rsp), %ymm1
556-
vmovaps 0x40 (%rsp), %ymm2
557-
vmovaps 0x60 (%rsp), %ymm3
558-
vmovaps 0x80 (%rsp), %ymm4
559-
vmovaps 0xa0 (%rsp), %ymm5
560-
vmovaps 0xc0 (%rsp), %ymm6
561-
vmovaps 0xe0 (%rsp), %ymm7
583+
vmovups (%rsp), %ymm0
584+
vmovups 0x20 (%rsp), %ymm1
585+
vmovups 0x40 (%rsp), %ymm2
586+
vmovups 0x60 (%rsp), %ymm3
587+
vmovups 0x80 (%rsp), %ymm4
588+
vmovups 0xa0 (%rsp), %ymm5
589+
vmovups 0xc0 (%rsp), %ymm6
590+
vmovups 0xe0 (%rsp), %ymm7
562591
intercept_asm_wrapper_simd_restore_YMM_end:

0 commit comments

Comments
 (0)