26413 – movaps is generated in interrupt handler

LLVM Bugzilla is read-only and represents the historical archive of all LLVM issues filled before November 26, 2021. Use github to submit LLVM bugs

Bug 26413 - movaps is generated in interrupt handler

Summary: movaps is generated in interrupt handler

Status:	RESOLVED FIXED

Alias:	None

Product:	libraries
Classification:	Unclassified
Component:	Backend: X86 (show other bugs)
Version:	trunk
Hardware:	PC Windows NT

Importance:	P normal
Assignee:	Melanie Blower

URL:
Keywords:

Depends on:
Blocks:

Reported:	2016-02-01 07:56 PST by H.J. Lu
Modified:	2021-03-03 07:19 PST (History)
CC List:	7 users (show)

See Also:	26411 41844
Fixed By Commit(s):	cc3d25be0116fc3cc41a08f8bf9eef73d6dc5d62

Attachments
Add an attachment (proposed patch, testcase, etc.)

Note You need to log in before you can comment on or make changes to this bug.

Description H.J. Lu 2016-02-01 07:56:57 PST

[hjl@gnu-6 interrupt-8]$ cat x.i 
extern void subroutine1(void);

__attribute__ ((interrupt))
void ih(void * frame)
{
  subroutine1();
}
[hjl@gnu-6 interrupt-8]$ make x.s
/export/build/gnu/llvm-clang-bootstrap/stage1/build-x86_64-linux/bin/clang -O2 -Wall -march=nehalem -S -o x.s x.i
[hjl@gnu-6 interrupt-8]$ cat x.s
	.text
	.file	"x.i"
	.globl	ih
	.p2align	4, 0x90
	.type	ih,@function
ih:                                     # @ih
	.cfi_startproc
# BB#0:
	pushq	%rsp
.Ltmp0:
	.cfi_def_cfa_offset 16
	pushq	%rax
.Ltmp1:
	.cfi_def_cfa_offset 24
	pushq	%r11
.Ltmp2:
	.cfi_def_cfa_offset 32
	pushq	%r10
.Ltmp3:
	.cfi_def_cfa_offset 40
	pushq	%r9
.Ltmp4:
	.cfi_def_cfa_offset 48
	pushq	%r8
.Ltmp5:
	.cfi_def_cfa_offset 56
	pushq	%rdi
.Ltmp6:
	.cfi_def_cfa_offset 64
	pushq	%rsi
.Ltmp7:
	.cfi_def_cfa_offset 72
	pushq	%rdx
.Ltmp8:
	.cfi_def_cfa_offset 80
	pushq	%rcx
.Ltmp9:
	.cfi_def_cfa_offset 88
	subq	$392, %rsp              # imm = 0x188
	movaps	%xmm31, 368(%rsp)       # 16-byte Spill
	movaps	%xmm30, 352(%rsp)       # 16-byte Spill
	movaps	%xmm29, 336(%rsp)       # 16-byte Spill
	movaps	%xmm28, 320(%rsp)       # 16-byte Spill
	movaps	%xmm27, 304(%rsp)       # 16-byte Spill
	movaps	%xmm26, 288(%rsp)       # 16-byte Spill
	movaps	%xmm25, 272(%rsp)       # 16-byte Spill
	movaps	%xmm24, 256(%rsp)       # 16-byte Spill
	movaps	%xmm23, 240(%rsp)       # 16-byte Spill
	movaps	%xmm22, 224(%rsp)       # 16-byte Spill
	movaps	%xmm21, 208(%rsp)       # 16-byte Spill
	movaps	%xmm20, 192(%rsp)       # 16-byte Spill
	movaps	%xmm19, 176(%rsp)       # 16-byte Spill
	movaps	%xmm18, 160(%rsp)       # 16-byte Spill
	movaps	%xmm17, 144(%rsp)       # 16-byte Spill
	movaps	%xmm16, 128(%rsp)       # 16-byte Spill
	movaps	%xmm15, 112(%rsp)       # 16-byte Spill
	movaps	%xmm14, 96(%rsp)        # 16-byte Spill
	movaps	%xmm13, 80(%rsp)        # 16-byte Spill
	movaps	%xmm12, 64(%rsp)        # 16-byte Spill
	movaps	%xmm11, 48(%rsp)        # 16-byte Spill
	movaps	%xmm10, 32(%rsp)        # 16-byte Spill
	movaps	%xmm9, 16(%rsp)         # 16-byte Spill
	movaps	%xmm8, (%rsp)           # 16-byte Spill
	movaps	%xmm7, -16(%rsp)        # 16-byte Spill
	movaps	%xmm6, -32(%rsp)        # 16-byte Spill
	movaps	%xmm5, -48(%rsp)        # 16-byte Spill
	movaps	%xmm4, -64(%rsp)        # 16-byte Spill
	movaps	%xmm3, -80(%rsp)        # 16-byte Spill
	movaps	%xmm2, -96(%rsp)        # 16-byte Spill
	movaps	%xmm1, -112(%rsp)       # 16-byte Spill
	movaps	%xmm0, -128(%rsp)       # 16-byte Spill
.Ltmp10:
	.cfi_def_cfa_offset 480
.Ltmp11:
	.cfi_offset %rcx, -88
.Ltmp12:
	.cfi_offset %rdx, -80
.Ltmp13:
	.cfi_offset %rsi, -72
.Ltmp14:
	.cfi_offset %rdi, -64
.Ltmp15:
	.cfi_offset %r8, -56
.Ltmp16:
	.cfi_offset %r9, -48
.Ltmp17:
	.cfi_offset %r10, -40
.Ltmp18:
	.cfi_offset %r11, -32
.Ltmp19:
	.cfi_offset %xmm0, -608
.Ltmp20:
	.cfi_offset %xmm1, -592
.Ltmp21:
	.cfi_offset %xmm2, -576
.Ltmp22:
	.cfi_offset %xmm3, -560
.Ltmp23:
	.cfi_offset %xmm4, -544
.Ltmp24:
	.cfi_offset %xmm5, -528
.Ltmp25:
	.cfi_offset %xmm6, -512
.Ltmp26:
	.cfi_offset %xmm7, -496
.Ltmp27:
	.cfi_offset %xmm8, -480
.Ltmp28:
	.cfi_offset %xmm9, -464
.Ltmp29:
	.cfi_offset %xmm10, -448
.Ltmp30:
	.cfi_offset %xmm11, -432
.Ltmp31:
	.cfi_offset %xmm12, -416
.Ltmp32:
	.cfi_offset %xmm13, -400
.Ltmp33:
	.cfi_offset %xmm14, -384
.Ltmp34:
	.cfi_offset %xmm15, -368
.Ltmp35:
	.cfi_offset %rax, -24
.Ltmp36:
	.cfi_offset %rsp, -16
.Ltmp37:
	.cfi_offset %xmm16, -352
.Ltmp38:
	.cfi_offset %xmm17, -336
.Ltmp39:
	.cfi_offset %xmm18, -320
.Ltmp40:
	.cfi_offset %xmm19, -304
.Ltmp41:
	.cfi_offset %xmm20, -288
.Ltmp42:
	.cfi_offset %xmm21, -272
.Ltmp43:
	.cfi_offset %xmm22, -256
.Ltmp44:
	.cfi_offset %xmm23, -240
.Ltmp45:
	.cfi_offset %xmm24, -224
.Ltmp46:
	.cfi_offset %xmm25, -208
.Ltmp47:
	.cfi_offset %xmm26, -192
.Ltmp48:
	.cfi_offset %xmm27, -176
.Ltmp49:
	.cfi_offset %xmm28, -160
.Ltmp50:
	.cfi_offset %xmm29, -144
.Ltmp51:
	.cfi_offset %xmm30, -128
.Ltmp52:
	.cfi_offset %xmm31, -112
	movaps	-128(%rsp), %xmm0       # 16-byte Reload
	movaps	-112(%rsp), %xmm1       # 16-byte Reload
	movaps	-96(%rsp), %xmm2        # 16-byte Reload
	movaps	-80(%rsp), %xmm3        # 16-byte Reload
	movaps	-64(%rsp), %xmm4        # 16-byte Reload
	movaps	-48(%rsp), %xmm5        # 16-byte Reload
	movaps	-32(%rsp), %xmm6        # 16-byte Reload
	movaps	-16(%rsp), %xmm7        # 16-byte Reload
	movaps	(%rsp), %xmm8           # 16-byte Reload
	movaps	16(%rsp), %xmm9         # 16-byte Reload
	movaps	32(%rsp), %xmm10        # 16-byte Reload
	movaps	48(%rsp), %xmm11        # 16-byte Reload
	movaps	64(%rsp), %xmm12        # 16-byte Reload
	movaps	80(%rsp), %xmm13        # 16-byte Reload
	movaps	96(%rsp), %xmm14        # 16-byte Reload
	movaps	112(%rsp), %xmm15       # 16-byte Reload
	movaps	128(%rsp), %xmm16       # 16-byte Reload
	movaps	144(%rsp), %xmm17       # 16-byte Reload
	movaps	160(%rsp), %xmm18       # 16-byte Reload
	movaps	176(%rsp), %xmm19       # 16-byte Reload
	movaps	192(%rsp), %xmm20       # 16-byte Reload
	movaps	208(%rsp), %xmm21       # 16-byte Reload
	movaps	224(%rsp), %xmm22       # 16-byte Reload
	movaps	240(%rsp), %xmm23       # 16-byte Reload
	movaps	256(%rsp), %xmm24       # 16-byte Reload
	movaps	272(%rsp), %xmm25       # 16-byte Reload
	movaps	288(%rsp), %xmm26       # 16-byte Reload
	movaps	304(%rsp), %xmm27       # 16-byte Reload
	movaps	320(%rsp), %xmm28       # 16-byte Reload
	movaps	336(%rsp), %xmm29       # 16-byte Reload
	movaps	352(%rsp), %xmm30       # 16-byte Reload
	movaps	368(%rsp), %xmm31       # 16-byte Reload
	addq	$392, %rsp              # imm = 0x188
	popq	%rcx
	popq	%rdx
	popq	%rsi
	popq	%rdi
	popq	%r8
	popq	%r9
	popq	%r10
	popq	%r11
	popq	%rax
	popq	%rsp
	jmp	subroutine1             # TAILCALL
.Lfunc_end0:
	.size	ih, .Lfunc_end0-ih
	.cfi_endproc


	.ident	"clang version 3.9.0 (http://llvm.org/git/clang.git 1f64ddbc4c5d1036b68ec896765a7535537ded85) (http://llvm.org/git/llvm.git 43b517fe4e0a181b1cf20f36fd9eb92f7b32946c)"
	.section	".note.GNU-stack","",@progbits
[hjl@gnu-6 interrupt-8]$ 

Since interrupt handler is called with 8-byte stack alignment, movups
should be used.

Comment 1 David Kreitzer 2017-02-17 08:40:06 PST

I think there is a bigger problem here than using movups and/or aligning the stack.

clang should be giving an error for this test, because we have no good way to efficiently save & restore the non-GPR state.

The interrupt handler is required to save & restore all the register state that it uses. And according to the ABI, the call to subroutine1() may clobber arbitrary XMM, YMM, or ZMM state. The only way to reliably save & restore that state is to use xsave/xrstor, which would be very inefficient and is probably not what we want.

There are two alternate ways to write this as devised by gcc and described in the interrupt attribute spec: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ed3cc7b66af4758f7849ed6f65f4365be8223be.

(1) Add the no_caller_saved_registers attribute to subroutine1.

(2) Compile the program with -mgeneral-regs-only.

Comment 2 Melanie Blower 2021-03-02 05:22:06 PST

code review here https://reviews.llvm.org/D97764