diff options
author | Miod Vallat <miod@cvs.openbsd.org> | 2006-07-20 21:05:23 +0000 |
---|---|---|
committer | Miod Vallat <miod@cvs.openbsd.org> | 2006-07-20 21:05:23 +0000 |
commit | 3c1bb48f3e96b9286e5087f766cc4528496f46df (patch) | |
tree | 7b65804e6d43d1c46c245b791f9613994047c6c7 /gnu | |
parent | a1f3f924b2978f47d07ab694ed46bb3cf5cc7950 (diff) |
Introduce a new compiler warning, -Wstack-larger-than-N, to report
functions which are too greedy in stack variables.
This is intended to be used for kernel compiles, where this warning will
be enabled for a reasonable size (after a few weeks grace period so that
people can upgrade their compiler).
Please note that this warning relies upon md code, and as such is only
available on platforms OpenBSD runs on; also, the stack size being warned
on is only the local variables size, regardless of the ABI stack usage
requirements and the callee-saved registers; which means a function may
be warning-clean yet need more stack space than meets the eye; the
actual size being checked on may change to include these extras in the
future.
Diffstat (limited to 'gnu')
-rw-r--r-- | gnu/egcs/gcc/config/alpha/alpha.c | 3 | ||||
-rw-r--r-- | gnu/egcs/gcc/config/m68k/m68k.c | 2 | ||||
-rw-r--r-- | gnu/egcs/gcc/config/m88k/m88k.c | 3 | ||||
-rw-r--r-- | gnu/egcs/gcc/config/sparc/sparc.c | 4 | ||||
-rw-r--r-- | gnu/egcs/gcc/config/vax/vax.h | 9 | ||||
-rw-r--r-- | gnu/egcs/gcc/flags.h | 6 | ||||
-rw-r--r-- | gnu/egcs/gcc/toplev.c | 17 | ||||
-rw-r--r-- | gnu/usr.bin/gcc/gcc/config/arm/arm.c | 31 | ||||
-rw-r--r-- | gnu/usr.bin/gcc/gcc/config/i386/i386.c | 514 | ||||
-rw-r--r-- | gnu/usr.bin/gcc/gcc/config/mips/mips.c | 4 | ||||
-rw-r--r-- | gnu/usr.bin/gcc/gcc/config/pa/pa.c | 3 | ||||
-rw-r--r-- | gnu/usr.bin/gcc/gcc/config/rs6000/rs6000.c | 36 | ||||
-rw-r--r-- | gnu/usr.bin/gcc/gcc/config/sparc/sparc.c | 157 | ||||
-rw-r--r-- | gnu/usr.bin/gcc/gcc/flags.h | 6 | ||||
-rw-r--r-- | gnu/usr.bin/gcc/gcc/toplev.c | 12 |
15 files changed, 575 insertions, 232 deletions
diff --git a/gnu/egcs/gcc/config/alpha/alpha.c b/gnu/egcs/gcc/config/alpha/alpha.c index 2d62693d76b..861d58d644f 100644 --- a/gnu/egcs/gcc/config/alpha/alpha.c +++ b/gnu/egcs/gcc/config/alpha/alpha.c @@ -3460,6 +3460,9 @@ alpha_expand_prologue () + ALPHA_ROUND (frame_size + current_function_pretend_args_size)); + if (warn_stack_larger_than && frame_size > stack_larger_than_size) + warning ("stack usage is %d bytes", frame_size); + if (TARGET_OPEN_VMS) reg_offset = 8; else diff --git a/gnu/egcs/gcc/config/m68k/m68k.c b/gnu/egcs/gcc/config/m68k/m68k.c index e751326c85b..72d305c073a 100644 --- a/gnu/egcs/gcc/config/m68k/m68k.c +++ b/gnu/egcs/gcc/config/m68k/m68k.c @@ -151,6 +151,8 @@ output_function_prologue (stream, size) int fsize = (size + 3) & -4; int cfa_offset = INCOMING_FRAME_SP_OFFSET, cfa_store_offset = cfa_offset; + if (warn_stack_larger_than && fsize > stack_larger_than_size) + warning ("stack usage is %d bytes", fsize); if (frame_pointer_needed) { diff --git a/gnu/egcs/gcc/config/m88k/m88k.c b/gnu/egcs/gcc/config/m88k/m88k.c index 6c46ff1d544..8c9eaed2678 100644 --- a/gnu/egcs/gcc/config/m88k/m88k.c +++ b/gnu/egcs/gcc/config/m88k/m88k.c @@ -1920,6 +1920,9 @@ m88k_expand_prologue () { m88k_layout_frame (); + if (warn_stack_larger_than && m88k_stack_size > stack_larger_than_size) + warning ("stack usage is %d bytes", m88k_stack_size); + if (m88k_stack_size) emit_add (stack_pointer_rtx, stack_pointer_rtx, -m88k_stack_size); diff --git a/gnu/egcs/gcc/config/sparc/sparc.c b/gnu/egcs/gcc/config/sparc/sparc.c index 9d8fade6c62..35800d7aee3 100644 --- a/gnu/egcs/gcc/config/sparc/sparc.c +++ b/gnu/egcs/gcc/config/sparc/sparc.c @@ -3116,6 +3116,10 @@ output_function_prologue (file, size, leaf_function) int size; int leaf_function; { + if (warn_stack_larger_than && + SPARC_STACK_ALIGN (size) > stack_larger_than_size) + warning ("stack usage is %d bytes", SPARC_STACK_ALIGN (size)); + /* Need to use actual_fsize, since we are also allocating space for our callee (and our own register save area). */ actual_fsize = compute_frame_size (size, leaf_function); diff --git a/gnu/egcs/gcc/config/vax/vax.h b/gnu/egcs/gcc/config/vax/vax.h index 89f6b8a639d..6492ea6d331 100644 --- a/gnu/egcs/gcc/config/vax/vax.h +++ b/gnu/egcs/gcc/config/vax/vax.h @@ -466,8 +466,13 @@ gen_rtx (PLUS, Pmode, frame, GEN_INT (12)) mask |= 1 << regno; \ fprintf (FILE, "\t.word 0x%x\n", mask); \ MAYBE_VMS_FUNCTION_PROLOGUE(FILE) \ - if ((size) >= 64) fprintf (FILE, "\tmovab %d(sp),sp\n", -size);\ - else if (size) fprintf (FILE, "\tsubl2 $%d,sp\n", (size)); } + if (warn_stack_larger_than && size > stack_larger_than_size) \ + warning ("stack usage is %d bytes", size); \ + if ((size) >= 64) \ + fprintf (FILE, "\tmovab %d(sp),sp\n", -size); \ + else if (size) \ + fprintf (FILE, "\tsubl2 $%d,sp\n", (size)); \ +} /* vms.h redefines this. */ #define MAYBE_VMS_FUNCTION_PROLOGUE(FILE) diff --git a/gnu/egcs/gcc/flags.h b/gnu/egcs/gcc/flags.h index bc1c5690f3d..a63a4e5ab14 100644 --- a/gnu/egcs/gcc/flags.h +++ b/gnu/egcs/gcc/flags.h @@ -132,6 +132,12 @@ extern unsigned id_clash_len; extern int warn_larger_than; extern unsigned larger_than_size; +/* Nonzero means warn about any function whose stack usage is larger + than N bytes. The value N is in `stack_larger_than_size'. */ + +extern int warn_stack_larger_than; +extern unsigned stack_larger_than_size; + /* Warn if a function returns an aggregate, since there are often incompatible calling conventions for doing this. */ diff --git a/gnu/egcs/gcc/toplev.c b/gnu/egcs/gcc/toplev.c index 7443a766999..f8f616fb300 100644 --- a/gnu/egcs/gcc/toplev.c +++ b/gnu/egcs/gcc/toplev.c @@ -1257,6 +1257,12 @@ unsigned id_clash_len; int warn_larger_than; unsigned larger_than_size; +/* Nonzero means warn about any function whose stack usage is larger + than N bytes. The value N is in `stack_larger_than_size'. */ + +int warn_stack_larger_than; +unsigned stack_larger_than_size; + /* Nonzero means warn if inline function is too large. */ int warn_inline; @@ -4576,6 +4582,7 @@ display_help () printf (" -Wid-clash-<num> Warn if 2 identifiers have the same first <num> chars\n"); printf (" -Wlarger-than-<number> Warn if an object is larger than <number> bytes\n"); + printf (" -Wstack-larger-than-<number> Warn if a function stack usage is larger than <number> bytes\n"); printf (" -p Enable function profiling\n"); #if defined (BLOCK_PROFILER) || defined (FUNCTION_BLOCK_PROFILER) printf (" -a Enable block profiling \n"); @@ -5219,6 +5226,16 @@ main (argc, argv) warn_larger_than = 1; } } + else if (!strncmp (p, "stack-larger-than-", 18)) + { + const int stack_larger_than_val + = read_integral_parameter (p + 18, p - 2, -1); + if (stack_larger_than_val != -1) + { + stack_larger_than_size = stack_larger_than_val; + warn_stack_larger_than = 1; + } + } else error ("Invalid option `%s'", argv[i]); } diff --git a/gnu/usr.bin/gcc/gcc/config/arm/arm.c b/gnu/usr.bin/gcc/gcc/config/arm/arm.c index 7422f1d03af..21df8460846 100644 --- a/gnu/usr.bin/gcc/gcc/config/arm/arm.c +++ b/gnu/usr.bin/gcc/gcc/config/arm/arm.c @@ -275,7 +275,10 @@ int arm_ld_sched = 0; int arm_is_strong = 0; /* Nonzero if this chip is an XScale. */ -int arm_is_xscale = 0; +int arm_arch_xscale = 0; + +/* Nonzero if tuning for XScale */ +int arm_tune_xscale = 0; /* Nonzero if this chip is an ARM6 or an ARM7. */ int arm_is_6_or_7 = 0; @@ -684,13 +687,14 @@ arm_override_options () arm_arch4 = (insn_flags & FL_ARCH4) != 0; arm_arch5 = (insn_flags & FL_ARCH5) != 0; arm_arch5e = (insn_flags & FL_ARCH5E) != 0; - arm_is_xscale = (insn_flags & FL_XSCALE) != 0; + arm_arch_xscale = (insn_flags & FL_XSCALE) != 0; arm_ld_sched = (tune_flags & FL_LDSCHED) != 0; arm_is_strong = (tune_flags & FL_STRONG) != 0; thumb_code = (TARGET_ARM == 0); arm_is_6_or_7 = (((tune_flags & (FL_MODE26 | FL_MODE32)) && !(tune_flags & FL_ARCH4))) != 0; + arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; /* Default value for floating point code... if no co-processor bus, then schedule for emulated floating point. Otherwise, @@ -762,7 +766,7 @@ arm_override_options () if (optimize_size || (tune_flags & FL_LDSCHED)) arm_constant_limit = 1; - if (arm_is_xscale) + if (arm_arch_xscale) arm_constant_limit = 2; /* If optimizing for size, bump the number of instructions that we @@ -2947,7 +2951,7 @@ arm_adjust_cost (insn, link, dep, cost) /* Some true dependencies can have a higher cost depending on precisely how certain input operands are used. */ - if (arm_is_xscale + if (arm_tune_xscale && REG_NOTE_KIND (link) == 0 && recog_memoized (insn) < 0 && recog_memoized (dep) < 0) @@ -3778,6 +3782,12 @@ adjacent_mem_locations (a, b) else reg1 = REGNO (XEXP (b, 0)); + /* Don't accept any offset that will require multiple instructions to handle, + since this would cause the arith_adjacentmem pattern to output an overlong + sequence. */ + if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1)) + return 0; + return (reg0 == reg1) && ((val1 - val0) == 4 || (val0 - val1) == 4); } return 0; @@ -4388,7 +4398,7 @@ arm_gen_load_multiple (base_regno, count, from, up, write_back, unchanging_p, As a compromise, we use ldr for counts of 1 or 2 regs, and ldm for counts of 3 or 4 regs. */ - if (arm_is_xscale && count <= 2 && ! optimize_size) + if (arm_tune_xscale && count <= 2 && ! optimize_size) { rtx seq; @@ -4455,7 +4465,7 @@ arm_gen_store_multiple (base_regno, count, to, up, write_back, unchanging_p, /* See arm_gen_load_multiple for discussion of the pros/cons of ldm/stm usage for XScale. */ - if (arm_is_xscale && count <= 2 && ! optimize_size) + if (arm_tune_xscale && count <= 2 && ! optimize_size) { rtx seq; @@ -8303,6 +8313,7 @@ arm_get_frame_size () void arm_expand_prologue () { + int size; int reg; rtx amount; rtx insn; @@ -8534,8 +8545,12 @@ arm_expand_prologue () } } - amount = GEN_INT (-(arm_get_frame_size () - + current_function_outgoing_args_size)); + size = arm_get_frame_size (); + + if (warn_stack_larger_than && size > stack_larger_than_size) + warning ("stack usage is %d bytes", size); + + amount = GEN_INT (-(size + current_function_outgoing_args_size)); if (amount != const0_rtx) { diff --git a/gnu/usr.bin/gcc/gcc/config/i386/i386.c b/gnu/usr.bin/gcc/gcc/config/i386/i386.c index ea60c32515b..b991ef7c543 100644 --- a/gnu/usr.bin/gcc/gcc/config/i386/i386.c +++ b/gnu/usr.bin/gcc/gcc/config/i386/i386.c @@ -1,6 +1,6 @@ /* Subroutines used for code generation on IA-32. Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, - 2002, 2003 Free Software Foundation, Inc. + 2002, 2003, 2004 Free Software Foundation, Inc. This file is part of GNU CC. @@ -647,6 +647,8 @@ struct ix86_frame HOST_WIDE_INT frame_pointer_offset; HOST_WIDE_INT hard_frame_pointer_offset; HOST_WIDE_INT stack_pointer_offset; + + HOST_WIDE_INT local_size; }; /* Used to enable/disable debugging features. */ @@ -720,6 +722,10 @@ static rtx maybe_get_pool_constant PARAMS ((rtx)); static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx)); static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code, rtx *, rtx *)); +static bool ix86_fixed_condition_code_regs PARAMS ((unsigned int *, + unsigned int *)); +static enum machine_mode ix86_cc_modes_compatible PARAMS ((enum machine_mode, + enum machine_mode)); static rtx get_thread_pointer PARAMS ((void)); static void get_pc_thunk_name PARAMS ((char [32], unsigned int)); static rtx gen_push PARAMS ((rtx)); @@ -910,6 +916,11 @@ static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class, #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk +#undef TARGET_FIXED_CONDITION_CODE_REGS +#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs +#undef TARGET_CC_MODES_COMPATIBLE +#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible + struct gcc_target targetm = TARGET_INITIALIZER; /* The svr4 ABI for the i386 says that records and unions are returned @@ -966,9 +977,10 @@ override_options () { PTA_SSE = 1, PTA_SSE2 = 2, - PTA_MMX = 4, - PTA_PREFETCH_SSE = 8, - PTA_3DNOW = 16, + PTA_SSE3 = 4, + PTA_MMX = 8, + PTA_PREFETCH_SSE = 16, + PTA_3DNOW = 32, PTA_3DNOW_A = 64 } flags; } @@ -986,8 +998,12 @@ override_options () {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, - {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | - PTA_MMX | PTA_PREFETCH_SSE}, + {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 + | PTA_MMX | PTA_PREFETCH_SSE}, + {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_MMX | PTA_PREFETCH_SSE}, + {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_MMX | PTA_PREFETCH_SSE}, {"k6", PROCESSOR_K6, PTA_MMX}, {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, @@ -995,6 +1011,7 @@ override_options () | PTA_3DNOW_A}, {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_3DNOW_A}, + {"x86-64", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE}, {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE}, {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW @@ -1040,7 +1057,7 @@ override_options () if (!ix86_cpu_string) ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT]; if (!ix86_arch_string) - ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386"; + ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386"; if (ix86_cmodel_string != 0) { @@ -1104,6 +1121,9 @@ override_options () if (processor_alias_table[i].flags & PTA_SSE2 && !(target_flags_explicit & MASK_SSE2)) target_flags |= MASK_SSE2; + if (processor_alias_table[i].flags & PTA_SSE3 + && !(target_flags_explicit & MASK_SSE3)) + target_flags |= MASK_SSE3; if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) x86_prefetch_sse = true; break; @@ -1257,6 +1277,14 @@ override_options () if (x86_arch_always_fancy_math_387 & (1 << ix86_arch)) target_flags &= ~MASK_NO_FANCY_MATH_387; + /* Turn on SSE2 builtins for -msse3. */ + if (TARGET_SSE3) + target_flags |= MASK_SSE2; + + /* Turn on SSE builtins for -msse2. */ + if (TARGET_SSE2) + target_flags |= MASK_SSE; + if (TARGET_64BIT) { if (TARGET_ALIGN_DOUBLE) @@ -1888,6 +1916,8 @@ classify_argument (mode, type, classes, bit_offset) mode_alignment = 128; else if (mode == XCmode) mode_alignment = 256; + if (COMPLEX_MODE_P (mode)) + mode_alignment /= 2; /* Misaligned fields are always returned in memory. */ if (bit_offset % mode_alignment) return 0; @@ -2069,7 +2099,8 @@ construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regn default: abort (); } - if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS) + if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS + && mode != BLKmode) return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); if (n == 2 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS) @@ -2081,7 +2112,8 @@ construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regn return gen_rtx_REG (mode, intreg[0]); if (n == 4 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS - && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS) + && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS + && mode != BLKmode) return gen_rtx_REG (TCmode, FIRST_STACK_REG); /* Otherwise figure out the entries of the PARALLEL. */ @@ -3917,11 +3949,6 @@ aligned_operand (op, mode) if (! ix86_decompose_address (op, &parts)) abort (); - if (parts.base && GET_CODE (parts.base) == SUBREG) - parts.base = SUBREG_REG (parts.base); - if (parts.index && GET_CODE (parts.index) == SUBREG) - parts.index = SUBREG_REG (parts.index); - /* Look for some component that isn't known to be aligned. */ if (parts.index) { @@ -4338,6 +4365,10 @@ ix86_asm_file_end (file) output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); output_asm_insn ("ret", xops); } + +#ifdef SUBTARGET_FILE_END + SUBTARGET_FILE_END (file); +#endif } /* Emit code for the SET_GOT patterns. */ @@ -4512,6 +4543,7 @@ ix86_compute_frame_layout (frame) int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT; HOST_WIDE_INT size = get_frame_size (); + frame->local_size = size; frame->nregs = ix86_nsaved_regs (); total_size = size; @@ -4560,8 +4592,12 @@ ix86_compute_frame_layout (frame) offset += size; /* Add outgoing arguments area. Can be skipped if we eliminated - all the function calls as dead code. */ - if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf) + all the function calls as dead code. + Skipping is however impossible when function calls alloca. Alloca + expander assumes that last current_function_outgoing_args_size + of stack frame are unused. */ + if (ACCUMULATE_OUTGOING_ARGS + && (!current_function_is_leaf || current_function_calls_alloca)) { offset += current_function_outgoing_args_size; frame->outgoing_arguments_size = current_function_outgoing_args_size; @@ -4671,6 +4707,9 @@ ix86_expand_prologue () } ix86_compute_frame_layout (&frame); + if (warn_stack_larger_than && frame.local_size > stack_larger_than_size) + warning ("stack usage is %d bytes", frame.local_size); + /* Note: AT&T enter does NOT have reversed args. Enter is probably slower on all targets. Also sdb doesn't like it. */ @@ -4988,7 +5027,7 @@ ix86_decompose_address (addr, out) rtx scale_rtx = NULL_RTX; int retval = 1; - if (REG_P (addr) || GET_CODE (addr) == SUBREG) + if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG) base = addr; else if (GET_CODE (addr) == PLUS) { @@ -5115,11 +5154,6 @@ ix86_address_cost (x) if (!ix86_decompose_address (x, &parts)) abort (); - if (parts.base && GET_CODE (parts.base) == SUBREG) - parts.base = SUBREG_REG (parts.base); - if (parts.index && GET_CODE (parts.index) == SUBREG) - parts.index = SUBREG_REG (parts.index); - /* More complex memory references are better. */ if (parts.disp && parts.disp != const0_rtx) cost--; @@ -5492,15 +5526,9 @@ legitimate_address_p (mode, addr, strict) if (base) { - rtx reg; reason_rtx = base; - if (GET_CODE (base) == SUBREG) - reg = SUBREG_REG (base); - else - reg = base; - - if (GET_CODE (reg) != REG) + if (GET_CODE (base) != REG) { reason = "base is not a register"; goto report_error; @@ -5512,8 +5540,8 @@ legitimate_address_p (mode, addr, strict) goto report_error; } - if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) - || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) + if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base)) + || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base))) { reason = "base is not valid"; goto report_error; @@ -5528,15 +5556,9 @@ legitimate_address_p (mode, addr, strict) if (index) { - rtx reg; reason_rtx = index; - if (GET_CODE (index) == SUBREG) - reg = SUBREG_REG (index); - else - reg = index; - - if (GET_CODE (reg) != REG) + if (GET_CODE (index) != REG) { reason = "index is not a register"; goto report_error; @@ -5548,8 +5570,8 @@ legitimate_address_p (mode, addr, strict) goto report_error; } - if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) - || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) + if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index)) + || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index))) { reason = "index is not valid"; goto report_error; @@ -8372,6 +8394,68 @@ ix86_cc_mode (code, op0, op1) } } +/* Return the fixed registers used for condition codes. */ + +static bool +ix86_fixed_condition_code_regs (p1, p2) + unsigned int *p1; + unsigned int *p2; +{ + *p1 = FLAGS_REG; + *p2 = FPSR_REG; + return true; +} + +/* If two condition code modes are compatible, return a condition code + mode which is compatible with both. Otherwise, return + VOIDmode. */ + +static enum machine_mode +ix86_cc_modes_compatible (m1, m2) + enum machine_mode m1; + enum machine_mode m2; +{ + if (m1 == m2) + return m1; + + if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) + return VOIDmode; + + if ((m1 == CCGCmode && m2 == CCGOCmode) + || (m1 == CCGOCmode && m2 == CCGCmode)) + return CCGCmode; + + switch (m1) + { + default: + abort (); + + case CCmode: + case CCGCmode: + case CCGOCmode: + case CCNOmode: + case CCZmode: + switch (m2) + { + default: + return VOIDmode; + + case CCmode: + case CCGCmode: + case CCGOCmode: + case CCNOmode: + case CCZmode: + return CCmode; + } + + case CCFPmode: + case CCFPUmode: + /* These are only compatible with themselves, which we already + checked above. */ + return VOIDmode; + } +} + /* Return true if we should use an FCOMI instruction for this fp comparison. */ int @@ -12114,25 +12198,20 @@ struct builtin_description const unsigned int flag; }; -/* Used for builtins that are enabled both by -msse and -msse2. */ -#define MASK_SSE1 (MASK_SSE | MASK_SSE2) -#define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT) -#define MASK_SSE264 (MASK_SSE2 | MASK_64BIT) - static const struct builtin_description bdesc_comi[] = { - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, + { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, + { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, @@ -12150,51 +12229,51 @@ static const struct builtin_description bdesc_comi[] = static const struct builtin_description bdesc_2arg[] = { /* SSE */ - { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, - { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, - { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, - { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, - - { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, - { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, - { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, - { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 }, - { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 }, - { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, - { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, - { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, - { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, - { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 }, - { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 }, - { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, - { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, - { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, - { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, - { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, - { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, - { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, - { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, - { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, - - { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, - { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, - - { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, - - { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, - { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, + { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, + { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, + { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, + { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, + { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, + { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, + { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, + { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, + + { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, + { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, + { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, + { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 }, + { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 }, + { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, + { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, + { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, + { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, + { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 }, + { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 }, + { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, + { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, + { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, + { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, + { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, + { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, + { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, + { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, + { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, + + { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, + { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, + { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, + { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, + + { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, + + { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, /* MMX */ { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, @@ -12217,15 +12296,15 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, @@ -12234,10 +12313,10 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, @@ -12251,9 +12330,9 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, - { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, - { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, - { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, + { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, + { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, + { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, @@ -12274,7 +12353,7 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, /* SSE2 */ @@ -12404,26 +12483,34 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, - { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, + { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 } + { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }, + + /* SSE3 MMX */ + { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 }, + { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 }, + { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 }, + { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 }, + { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 }, + { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 } }; static const struct builtin_description bdesc_1arg[] = { - { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, - { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, + { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, + { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, - { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, - { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, - { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, - { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, - { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, - { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, + { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, + { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, + { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, + { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, + { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, + { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, @@ -12445,14 +12532,19 @@ static const struct builtin_description bdesc_1arg[] = { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, - { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, - { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, + { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, + { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 } + { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }, + + /* SSE3 */ + { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 }, + { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 }, + { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 } }; void @@ -12543,6 +12635,13 @@ ix86_init_mmx_sse_builtins () = build_function_type (void_type_node, void_list_node); tree void_ftype_unsigned = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); + tree void_ftype_unsigned_unsigned + = build_function_type_list (void_type_node, unsigned_type_node, + unsigned_type_node, NULL_TREE); + tree void_ftype_pcvoid_unsigned_unsigned + = build_function_type_list (void_type_node, const_ptr_type_node, + unsigned_type_node, unsigned_type_node, + NULL_TREE); tree unsigned_ftype_void = build_function_type (unsigned_type_node, void_list_node); tree di_ftype_void @@ -12861,52 +12960,52 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); - def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); - def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); - def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); - def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); - def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); - def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); - def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); - def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); - def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); - def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); - def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); - - def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); - def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); - - def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); - - def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS); - def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); - def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS); - def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); - def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); - def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); - - def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); - def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); - def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); - def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); - - def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); - def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); - def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); - def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); - - def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); - - def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); - - def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); - def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); - def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); - def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); - def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); - def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); - - def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); + def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); + def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); + def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); + def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); + def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); + def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); + def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); + def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); + def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); + def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); + def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); + + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); + + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); + + def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS); + def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); + def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS); + def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); + def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); + def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); + + def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); + def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); + def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); + def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); + + def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); + def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); + + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); + + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); + + def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); + def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); + def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); + def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); + def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); + def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); + + def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); /* Original 3DNow! */ def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); @@ -12938,7 +13037,7 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); - def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO); + def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO); /* SSE2 */ def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128); @@ -12989,15 +13088,15 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); - def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); - def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); + def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); + def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); - def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); + def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); @@ -13021,7 +13120,7 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED); def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ); - def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI); + def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI); def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128); def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128); @@ -13048,6 +13147,26 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); + + /* Prescott New Instructions. */ + def_builtin (MASK_SSE3, "__builtin_ia32_monitor", + void_ftype_pcvoid_unsigned_unsigned, + IX86_BUILTIN_MONITOR); + def_builtin (MASK_SSE3, "__builtin_ia32_mwait", + void_ftype_unsigned_unsigned, + IX86_BUILTIN_MWAIT); + def_builtin (MASK_SSE3, "__builtin_ia32_movshdup", + v4sf_ftype_v4sf, + IX86_BUILTIN_MOVSHDUP); + def_builtin (MASK_SSE3, "__builtin_ia32_movsldup", + v4sf_ftype_v4sf, + IX86_BUILTIN_MOVSLDUP); + def_builtin (MASK_SSE3, "__builtin_ia32_lddqu", + v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); + def_builtin (MASK_SSE3, "__builtin_ia32_loadddup", + v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP); + def_builtin (MASK_SSE3, "__builtin_ia32_movddup", + v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP); } /* Errors in the source file can cause expand_expr to return const0_rtx @@ -13856,6 +13975,41 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_STORED: return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist); + case IX86_BUILTIN_MONITOR: + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); + if (!REG_P (op0)) + op0 = copy_to_mode_reg (SImode, op0); + if (!REG_P (op1)) + op1 = copy_to_mode_reg (SImode, op1); + if (!REG_P (op2)) + op2 = copy_to_mode_reg (SImode, op2); + emit_insn (gen_monitor (op0, op1, op2)); + return 0; + + case IX86_BUILTIN_MWAIT: + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + if (!REG_P (op0)) + op0 = copy_to_mode_reg (SImode, op0); + if (!REG_P (op1)) + op1 = copy_to_mode_reg (SImode, op1); + emit_insn (gen_mwait (op0, op1)); + return 0; + + case IX86_BUILTIN_LOADDDUP: + return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1); + + case IX86_BUILTIN_LDDQU: + return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target, + 1); + default: break; } diff --git a/gnu/usr.bin/gcc/gcc/config/mips/mips.c b/gnu/usr.bin/gcc/gcc/config/mips/mips.c index 44024f2e813..3ea546f7e51 100644 --- a/gnu/usr.bin/gcc/gcc/config/mips/mips.c +++ b/gnu/usr.bin/gcc/gcc/config/mips/mips.c @@ -7501,6 +7501,10 @@ mips_expand_prologue () tsize = compute_frame_size (get_frame_size ()); + if (warn_stack_larger_than + && cfun->machine->frame.var_size > stack_larger_than_size) + warning ("stack usage is %d bytes", cfun->machine->frame.var_size); + /* If this function is a varargs function, store any registers that would normally hold arguments ($4 - $7) on the stack. */ if (store_args_on_stack diff --git a/gnu/usr.bin/gcc/gcc/config/pa/pa.c b/gnu/usr.bin/gcc/gcc/config/pa/pa.c index 63b014cf178..90e4eb37a9d 100644 --- a/gnu/usr.bin/gcc/gcc/config/pa/pa.c +++ b/gnu/usr.bin/gcc/gcc/config/pa/pa.c @@ -3252,6 +3252,9 @@ hppa_expand_prologue () fr_saved = 0; save_fregs = 0; + if (warn_stack_larger_than && size > stack_larger_than_size) + warning ("stack usage is %d bytes", size); + /* Allocate space for frame pointer + filler. If any frame is allocated we need to add this in because of STARTING_FRAME_OFFSET. diff --git a/gnu/usr.bin/gcc/gcc/config/rs6000/rs6000.c b/gnu/usr.bin/gcc/gcc/config/rs6000/rs6000.c index 13f4ed3dab1..0ba733942fa 100644 --- a/gnu/usr.bin/gcc/gcc/config/rs6000/rs6000.c +++ b/gnu/usr.bin/gcc/gcc/config/rs6000/rs6000.c @@ -9190,16 +9190,15 @@ rs6000_stack_info () else info_ptr->spe_gp_size = 0; - if (TARGET_ALTIVEC_ABI && TARGET_ALTIVEC_VRSAVE) - { - info_ptr->vrsave_mask = compute_vrsave_mask (); - info_ptr->vrsave_size = info_ptr->vrsave_mask ? 4 : 0; - } + if (TARGET_ALTIVEC_ABI) + info_ptr->vrsave_mask = compute_vrsave_mask (); else - { - info_ptr->vrsave_mask = 0; - info_ptr->vrsave_size = 0; - } + info_ptr->vrsave_mask = 0; + + if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask) + info_ptr->vrsave_size = 4; + else + info_ptr->vrsave_size = 0; /* Calculate the offsets. */ switch (abi) @@ -10280,6 +10279,9 @@ rs6000_emit_prologue () int saving_FPRs_inline; int using_store_multiple; HOST_WIDE_INT sp_offset = 0; + + if (warn_stack_larger_than && info->vars_size > stack_larger_than_size) + warning ("stack usage is %d bytes", info->vars_size); if (TARGET_SPE_ABI) { @@ -10351,7 +10353,8 @@ rs6000_emit_prologue () used in this function, and do the corresponding magic in the epilogue. */ - if (TARGET_ALTIVEC && info->vrsave_mask != 0) + if (TARGET_ALTIVEC && TARGET_ALTIVEC_VRSAVE + && info->vrsave_mask != 0) { rtx reg, mem, vrsave; int offset; @@ -10792,7 +10795,8 @@ rs6000_emit_epilogue (sibcall) } /* Restore VRSAVE if needed. */ - if (TARGET_ALTIVEC_ABI && info->vrsave_mask != 0) + if (TARGET_ALTIVEC_ABI && TARGET_ALTIVEC_VRSAVE + && info->vrsave_mask != 0) { rtx addr, mem, reg; @@ -13138,14 +13142,8 @@ rs6000_elf_section_type_flags (decl, name, reloc) const char *name; int reloc; { - unsigned int flags - = default_section_type_flags_1 (decl, name, reloc, - flag_pic || DEFAULT_ABI == ABI_AIX); - - if (TARGET_RELOCATABLE) - flags |= SECTION_WRITE; - - return flags; + return default_section_type_flags_1 (decl, name, reloc, + flag_pic || DEFAULT_ABI == ABI_AIX); } /* Record an element in the table of global constructors. SYMBOL is diff --git a/gnu/usr.bin/gcc/gcc/config/sparc/sparc.c b/gnu/usr.bin/gcc/gcc/config/sparc/sparc.c index 79022b4b85d..eafc24a3c0e 100644 --- a/gnu/usr.bin/gcc/gcc/config/sparc/sparc.c +++ b/gnu/usr.bin/gcc/gcc/config/sparc/sparc.c @@ -1,8 +1,8 @@ /* Subroutines for insn-output.c for Sun SPARC. Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, - 1999, 2000, 2001, 2002 Free Software Foundation, Inc. + 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. Contributed by Michael Tiemann (tiemann@cygnus.com) - 64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans, + 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans, at Cygnus Support. This file is part of GNU CC. @@ -1357,6 +1357,34 @@ input_operand (op, mode) return 0; } +/* Return 1 if OP is valid for the lhs of a compare insn. */ + +int +compare_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (GET_CODE (op) == ZERO_EXTRACT) + return (register_operand (XEXP (op, 0), mode) + && small_int_or_double (XEXP (op, 1), mode) + && small_int_or_double (XEXP (op, 2), mode) + /* This matches cmp_zero_extract. */ + && ((mode == SImode + && ((GET_CODE (XEXP (op, 2)) == CONST_INT + && INTVAL (XEXP (op, 2)) > 19) + || (GET_CODE (XEXP (op, 2)) == CONST_DOUBLE + && CONST_DOUBLE_LOW (XEXP (op, 2)) > 19))) + /* This matches cmp_zero_extract_sp64. */ + || (mode == DImode + && TARGET_ARCH64 + && ((GET_CODE (XEXP (op, 2)) == CONST_INT + && INTVAL (XEXP (op, 2)) > 51) + || (GET_CODE (XEXP (op, 2)) == CONST_DOUBLE + && CONST_DOUBLE_LOW (XEXP (op, 2)) > 51))))); + else + return register_operand (op, mode); +} + /* We know it can't be done in one insn when we get here, the movsi expander guarentees this. */ @@ -3845,6 +3873,10 @@ sparc_output_function_prologue (file, size) FILE *file; HOST_WIDE_INT size; { + if (warn_stack_larger_than && + SPARC_STACK_ALIGN (size) > stack_larger_than_size) + warning ("stack usage is %d bytes", SPARC_STACK_ALIGN (size)); + if (TARGET_FLAT) sparc_flat_function_prologue (file, size); else @@ -5414,18 +5446,80 @@ sparc_va_arg (valist, type) return addr_rtx; } +/* Return the string to output an unconditional branch to LABEL, which is + the operand number of the label. + + DEST is the destination insn (i.e. the label), INSN is the source. */ + +const char * +output_ubranch (dest, label, insn) + rtx dest; + int label; + rtx insn; +{ + static char string[64]; + bool noop = false; + char *p; + + /* TurboSPARC is reported to have problems with + with + foo: b,a foo + i.e. an empty loop with the annul bit set. The workaround is to use + foo: b foo; nop + instead. */ + + if (! TARGET_V9 && flag_delayed_branch + && (INSN_ADDRESSES (INSN_UID (dest)) + == INSN_ADDRESSES (INSN_UID (insn)))) + { + strcpy (string, "b\t"); + noop = true; + } + else + { + bool v9_form = false; + + if (TARGET_V9 && INSN_ADDRESSES_SET_P ()) + { + int delta = (INSN_ADDRESSES (INSN_UID (dest)) + - INSN_ADDRESSES (INSN_UID (insn))); + /* Leave some instructions for "slop". */ + if (delta >= -260000 && delta < 260000) + v9_form = true; + } + + if (v9_form) + strcpy (string, "ba%*,pt\t%%xcc, "); + else + strcpy (string, "b%*\t"); + } + + p = strchr (string, '\0'); + *p++ = '%'; + *p++ = 'l'; + *p++ = '0' + label; + *p++ = '%'; + if (noop) + *p++ = '#'; + else + *p++ = '('; + *p = '\0'; + + return string; +} + /* Return the string to output a conditional branch to LABEL, which is the operand number of the label. OP is the conditional expression. XEXP (OP, 0) is assumed to be a condition code register (integer or floating point) and its mode specifies what kind of comparison we made. + DEST is the destination insn (i.e. the label), INSN is the source. + REVERSED is nonzero if we should reverse the sense of the comparison. ANNUL is nonzero if we should generate an annulling branch. - NOOP is nonzero if we have to follow this branch by a noop. - - INSN, if set, is the insn. */ + NOOP is nonzero if we have to follow this branch by a noop. */ char * output_cbranch (op, dest, label, reversed, annul, noop, insn) @@ -5463,7 +5557,7 @@ output_cbranch (op, dest, label, reversed, annul, noop, insn) nop ba .LC29 */ - far = get_attr_length (insn) >= 3; + far = TARGET_V9 && (get_attr_length (insn) >= 3); if (reversed ^ far) { /* Reversal of FP compares takes care -- an ordered compare @@ -5593,9 +5687,7 @@ output_cbranch (op, dest, label, reversed, annul, noop, insn) spaces -= 2; } - if (! TARGET_V9) - labelno = ""; - else + if (TARGET_V9) { rtx note; int v8 = 0; @@ -5645,6 +5737,9 @@ output_cbranch (op, dest, label, reversed, annul, noop, insn) spaces -= 3; } } + else + labelno = ""; + if (spaces > 0) *p++ = '\t'; else @@ -5852,6 +5947,8 @@ sparc_emit_floatunsdi (operands) operand number of the reg. OP is the conditional expression. The mode of REG says what kind of comparison we made. + DEST is the destination insn (i.e. the label), INSN is the source. + REVERSED is nonzero if we should reverse the sense of the comparison. ANNUL is nonzero if we should generate an annulling branch. @@ -6767,7 +6864,7 @@ void sparc_initialize_trampoline (tramp, fnaddr, cxt) rtx tramp, fnaddr, cxt; { - /* SPARC 32 bit trampoline: + /* SPARC 32-bit trampoline: sethi %hi(fn), %g1 sethi %hi(static), %g2 @@ -6777,10 +6874,6 @@ sparc_initialize_trampoline (tramp, fnaddr, cxt) SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii */ -#ifdef TRANSFER_FROM_TRAMPOLINE - emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"), - LCT_NORMAL, VOIDmode, 1, tramp, Pmode); -#endif emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)), @@ -6819,9 +6912,17 @@ sparc_initialize_trampoline (tramp, fnaddr, cxt) && sparc_cpu != PROCESSOR_ULTRASPARC3) emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, plus_constant (tramp, 8))))); + + /* Call __enable_execute_stack after writing onto the stack to make sure + the stack address is accessible. */ +#ifdef TRANSFER_FROM_TRAMPOLINE + emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"), + LCT_NORMAL, VOIDmode, 1, tramp, Pmode); +#endif + } -/* The 64 bit version is simpler because it makes more sense to load the +/* The 64-bit version is simpler because it makes more sense to load the values as "immediate" data out of the trampoline. It's also easier since we can read the PC without clobbering a register. */ @@ -6829,12 +6930,8 @@ void sparc64_initialize_trampoline (tramp, fnaddr, cxt) rtx tramp, fnaddr, cxt; { -#ifdef TRANSFER_FROM_TRAMPOLINE - emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"), - LCT_NORMAL, VOIDmode, 1, tramp, Pmode); -#endif + /* SPARC 64-bit trampoline: - /* rd %pc, %g1 ldx [%g1+24], %g5 jmp %g5 @@ -6857,6 +6954,13 @@ sparc64_initialize_trampoline (tramp, fnaddr, cxt) if (sparc_cpu != PROCESSOR_ULTRASPARC && sparc_cpu != PROCESSOR_ULTRASPARC3) emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8))))); + + /* Call __enable_execute_stack after writing onto the stack to make sure + the stack address is accessible. */ +#ifdef TRANSFER_FROM_TRAMPOLINE + emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"), + LCT_NORMAL, VOIDmode, 1, tramp, Pmode); +#endif } /* Subroutines to support a flat (single) register window calling @@ -8554,10 +8658,17 @@ sparc_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function) if (!SPARC_SIMM13_P (delta)) { rtx scratch = gen_rtx_REG (Pmode, 1); - if (TARGET_ARCH64) - sparc_emit_set_const64 (scratch, delta_rtx); + + if (input_operand (delta_rtx, GET_MODE (scratch))) + emit_insn (gen_rtx_SET (VOIDmode, scratch, delta_rtx)); else - sparc_emit_set_const32 (scratch, delta_rtx); + { + if (TARGET_ARCH64) + sparc_emit_set_const64 (scratch, delta_rtx); + else + sparc_emit_set_const32 (scratch, delta_rtx); + } + delta_rtx = scratch; } diff --git a/gnu/usr.bin/gcc/gcc/flags.h b/gnu/usr.bin/gcc/gcc/flags.h index 7aa7c73b4ef..1a61fc6d1ce 100644 --- a/gnu/usr.bin/gcc/gcc/flags.h +++ b/gnu/usr.bin/gcc/gcc/flags.h @@ -162,6 +162,12 @@ extern int warn_cast_align; extern int warn_larger_than; extern HOST_WIDE_INT larger_than_size; +/* Nonzero means warn about any function whose stack usage is larger + than N bytes. The value N is in `stack_larger_than_size'. */ + +extern int warn_stack_larger_than; +extern HOST_WIDE_INT stack_larger_than_size; + /* Warn if a function returns an aggregate, since there are often incompatible calling conventions for doing this. */ diff --git a/gnu/usr.bin/gcc/gcc/toplev.c b/gnu/usr.bin/gcc/gcc/toplev.c index 8148dbc215f..8105a718d9f 100644 --- a/gnu/usr.bin/gcc/gcc/toplev.c +++ b/gnu/usr.bin/gcc/gcc/toplev.c @@ -1494,6 +1494,12 @@ int warn_cast_align; int warn_larger_than; HOST_WIDE_INT larger_than_size; +/* Nonzero means warn about any function whose stack usage is larger + than N bytes. The value N is in `stack_larger_than_size'. */ + +int warn_stack_larger_than; +HOST_WIDE_INT stack_larger_than_size; + /* Nonzero means warn if inline function is too large. */ int warn_inline; @@ -4158,6 +4164,12 @@ decode_W_option (arg) warn_larger_than = larger_than_size != -1; } + else if ((option_value = skip_leading_substring (arg, "stack-larger-than-"))) + { + stack_larger_than_size = read_integral_parameter (option_value, arg - 2, -1); + + warn_stack_larger_than = stack_larger_than_size != -1; + } else if (!strcmp (arg, "unused")) { set_Wunused (1); |