[hjl@gnu-6 interrupt-8]$ cat x.i extern void subroutine1(void); __attribute__ ((interrupt)) void ih(void * frame) { subroutine1(); } [hjl@gnu-6 interrupt-8]$ make x.s /export/build/gnu/llvm-clang-bootstrap/stage1/build-x86_64-linux/bin/clang -O2 -Wall -march=nehalem -S -o x.s x.i [hjl@gnu-6 interrupt-8]$ cat x.s .text .file "x.i" .globl ih .p2align 4, 0x90 .type ih,@function ih: # @ih .cfi_startproc # BB#0: pushq %rsp .Ltmp0: .cfi_def_cfa_offset 16 pushq %rax .Ltmp1: .cfi_def_cfa_offset 24 pushq %r11 .Ltmp2: .cfi_def_cfa_offset 32 pushq %r10 .Ltmp3: .cfi_def_cfa_offset 40 pushq %r9 .Ltmp4: .cfi_def_cfa_offset 48 pushq %r8 .Ltmp5: .cfi_def_cfa_offset 56 pushq %rdi .Ltmp6: .cfi_def_cfa_offset 64 pushq %rsi .Ltmp7: .cfi_def_cfa_offset 72 pushq %rdx .Ltmp8: .cfi_def_cfa_offset 80 pushq %rcx .Ltmp9: .cfi_def_cfa_offset 88 subq $392, %rsp # imm = 0x188 movaps %xmm31, 368(%rsp) # 16-byte Spill movaps %xmm30, 352(%rsp) # 16-byte Spill movaps %xmm29, 336(%rsp) # 16-byte Spill movaps %xmm28, 320(%rsp) # 16-byte Spill movaps %xmm27, 304(%rsp) # 16-byte Spill movaps %xmm26, 288(%rsp) # 16-byte Spill movaps %xmm25, 272(%rsp) # 16-byte Spill movaps %xmm24, 256(%rsp) # 16-byte Spill movaps %xmm23, 240(%rsp) # 16-byte Spill movaps %xmm22, 224(%rsp) # 16-byte Spill movaps %xmm21, 208(%rsp) # 16-byte Spill movaps %xmm20, 192(%rsp) # 16-byte Spill movaps %xmm19, 176(%rsp) # 16-byte Spill movaps %xmm18, 160(%rsp) # 16-byte Spill movaps %xmm17, 144(%rsp) # 16-byte Spill movaps %xmm16, 128(%rsp) # 16-byte Spill movaps %xmm15, 112(%rsp) # 16-byte Spill movaps %xmm14, 96(%rsp) # 16-byte Spill movaps %xmm13, 80(%rsp) # 16-byte Spill movaps %xmm12, 64(%rsp) # 16-byte Spill movaps %xmm11, 48(%rsp) # 16-byte Spill movaps %xmm10, 32(%rsp) # 16-byte Spill movaps %xmm9, 16(%rsp) # 16-byte Spill movaps %xmm8, (%rsp) # 16-byte Spill movaps %xmm7, -16(%rsp) # 16-byte Spill movaps %xmm6, -32(%rsp) # 16-byte Spill movaps %xmm5, -48(%rsp) # 16-byte Spill movaps %xmm4, -64(%rsp) # 16-byte Spill movaps %xmm3, -80(%rsp) # 16-byte Spill movaps %xmm2, -96(%rsp) # 16-byte Spill movaps %xmm1, -112(%rsp) # 16-byte Spill movaps %xmm0, -128(%rsp) # 16-byte Spill .Ltmp10: .cfi_def_cfa_offset 480 .Ltmp11: .cfi_offset %rcx, -88 .Ltmp12: .cfi_offset %rdx, -80 .Ltmp13: .cfi_offset %rsi, -72 .Ltmp14: .cfi_offset %rdi, -64 .Ltmp15: .cfi_offset %r8, -56 .Ltmp16: .cfi_offset %r9, -48 .Ltmp17: .cfi_offset %r10, -40 .Ltmp18: .cfi_offset %r11, -32 .Ltmp19: .cfi_offset %xmm0, -608 .Ltmp20: .cfi_offset %xmm1, -592 .Ltmp21: .cfi_offset %xmm2, -576 .Ltmp22: .cfi_offset %xmm3, -560 .Ltmp23: .cfi_offset %xmm4, -544 .Ltmp24: .cfi_offset %xmm5, -528 .Ltmp25: .cfi_offset %xmm6, -512 .Ltmp26: .cfi_offset %xmm7, -496 .Ltmp27: .cfi_offset %xmm8, -480 .Ltmp28: .cfi_offset %xmm9, -464 .Ltmp29: .cfi_offset %xmm10, -448 .Ltmp30: .cfi_offset %xmm11, -432 .Ltmp31: .cfi_offset %xmm12, -416 .Ltmp32: .cfi_offset %xmm13, -400 .Ltmp33: .cfi_offset %xmm14, -384 .Ltmp34: .cfi_offset %xmm15, -368 .Ltmp35: .cfi_offset %rax, -24 .Ltmp36: .cfi_offset %rsp, -16 .Ltmp37: .cfi_offset %xmm16, -352 .Ltmp38: .cfi_offset %xmm17, -336 .Ltmp39: .cfi_offset %xmm18, -320 .Ltmp40: .cfi_offset %xmm19, -304 .Ltmp41: .cfi_offset %xmm20, -288 .Ltmp42: .cfi_offset %xmm21, -272 .Ltmp43: .cfi_offset %xmm22, -256 .Ltmp44: .cfi_offset %xmm23, -240 .Ltmp45: .cfi_offset %xmm24, -224 .Ltmp46: .cfi_offset %xmm25, -208 .Ltmp47: .cfi_offset %xmm26, -192 .Ltmp48: .cfi_offset %xmm27, -176 .Ltmp49: .cfi_offset %xmm28, -160 .Ltmp50: .cfi_offset %xmm29, -144 .Ltmp51: .cfi_offset %xmm30, -128 .Ltmp52: .cfi_offset %xmm31, -112 movaps -128(%rsp), %xmm0 # 16-byte Reload movaps -112(%rsp), %xmm1 # 16-byte Reload movaps -96(%rsp), %xmm2 # 16-byte Reload movaps -80(%rsp), %xmm3 # 16-byte Reload movaps -64(%rsp), %xmm4 # 16-byte Reload movaps -48(%rsp), %xmm5 # 16-byte Reload movaps -32(%rsp), %xmm6 # 16-byte Reload movaps -16(%rsp), %xmm7 # 16-byte Reload movaps (%rsp), %xmm8 # 16-byte Reload movaps 16(%rsp), %xmm9 # 16-byte Reload movaps 32(%rsp), %xmm10 # 16-byte Reload movaps 48(%rsp), %xmm11 # 16-byte Reload movaps 64(%rsp), %xmm12 # 16-byte Reload movaps 80(%rsp), %xmm13 # 16-byte Reload movaps 96(%rsp), %xmm14 # 16-byte Reload movaps 112(%rsp), %xmm15 # 16-byte Reload movaps 128(%rsp), %xmm16 # 16-byte Reload movaps 144(%rsp), %xmm17 # 16-byte Reload movaps 160(%rsp), %xmm18 # 16-byte Reload movaps 176(%rsp), %xmm19 # 16-byte Reload movaps 192(%rsp), %xmm20 # 16-byte Reload movaps 208(%rsp), %xmm21 # 16-byte Reload movaps 224(%rsp), %xmm22 # 16-byte Reload movaps 240(%rsp), %xmm23 # 16-byte Reload movaps 256(%rsp), %xmm24 # 16-byte Reload movaps 272(%rsp), %xmm25 # 16-byte Reload movaps 288(%rsp), %xmm26 # 16-byte Reload movaps 304(%rsp), %xmm27 # 16-byte Reload movaps 320(%rsp), %xmm28 # 16-byte Reload movaps 336(%rsp), %xmm29 # 16-byte Reload movaps 352(%rsp), %xmm30 # 16-byte Reload movaps 368(%rsp), %xmm31 # 16-byte Reload addq $392, %rsp # imm = 0x188 popq %rcx popq %rdx popq %rsi popq %rdi popq %r8 popq %r9 popq %r10 popq %r11 popq %rax popq %rsp jmp subroutine1 # TAILCALL .Lfunc_end0: .size ih, .Lfunc_end0-ih .cfi_endproc .ident "clang version 3.9.0 (http://llvm.org/git/clang.git 1f64ddbc4c5d1036b68ec896765a7535537ded85) (http://llvm.org/git/llvm.git 43b517fe4e0a181b1cf20f36fd9eb92f7b32946c)" .section ".note.GNU-stack","",@progbits [hjl@gnu-6 interrupt-8]$ Since interrupt handler is called with 8-byte stack alignment, movups should be used.
I think there is a bigger problem here than using movups and/or aligning the stack. clang should be giving an error for this test, because we have no good way to efficiently save & restore the non-GPR state. The interrupt handler is required to save & restore all the register state that it uses. And according to the ABI, the call to subroutine1() may clobber arbitrary XMM, YMM, or ZMM state. The only way to reliably save & restore that state is to use xsave/xrstor, which would be very inefficient and is probably not what we want. There are two alternate ways to write this as devised by gcc and described in the interrupt attribute spec: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ed3cc7b66af4758f7849ed6f65f4365be8223be. (1) Add the no_caller_saved_registers attribute to subroutine1. (2) Compile the program with -mgeneral-regs-only.
code review here https://reviews.llvm.org/D97764