diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index a769c71f54..b4f3f0efa8 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -475,6 +475,10 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls, bool infzero, bool have_snan, float_status *status) { + FloatClass cls[3] = { a_cls, b_cls, c_cls }; + Float3NaNPropRule rule = status->float_3nan_prop_rule; + int which; + /* * We guarantee not to require the target to tell us how to * pick a NaN if we're always returning the default NaN. @@ -500,145 +504,56 @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls, } } + if (rule == float_3nan_prop_none) { #if defined(TARGET_ARM) - - /* This looks different from the ARM ARM pseudocode, because the ARM ARM - * puts the operands to a fused mac operation (a*b)+c in the order c,a,b. - */ - if (is_snan(c_cls)) { - return 2; - } else if (is_snan(a_cls)) { - return 0; - } else if (is_snan(b_cls)) { - return 1; - } else if (is_qnan(c_cls)) { - return 2; - } else if (is_qnan(a_cls)) { - return 0; - } else { - return 1; - } + /* + * This looks different from the ARM ARM pseudocode, because the ARM ARM + * puts the operands to a fused mac operation (a*b)+c in the order c,a,b + */ + rule = float_3nan_prop_s_cab; #elif defined(TARGET_MIPS) - if (snan_bit_is_one(status)) { - /* Prefer sNaN over qNaN, in the a, b, c order. */ - if (is_snan(a_cls)) { - return 0; - } else if (is_snan(b_cls)) { - return 1; - } else if (is_snan(c_cls)) { - return 2; - } else if (is_qnan(a_cls)) { - return 0; - } else if (is_qnan(b_cls)) { - return 1; + if (snan_bit_is_one(status)) { + rule = float_3nan_prop_s_abc; } else { - return 2; + rule = float_3nan_prop_s_cab; } - } else { - /* Prefer sNaN over qNaN, in the c, a, b order. */ - if (is_snan(c_cls)) { - return 2; - } else if (is_snan(a_cls)) { - return 0; - } else if (is_snan(b_cls)) { - return 1; - } else if (is_qnan(c_cls)) { - return 2; - } else if (is_qnan(a_cls)) { - return 0; - } else { - return 1; - } - } #elif defined(TARGET_LOONGARCH64) - /* Prefer sNaN over qNaN, in the c, a, b order. */ - if (is_snan(c_cls)) { - return 2; - } else if (is_snan(a_cls)) { - return 0; - } else if (is_snan(b_cls)) { - return 1; - } else if (is_qnan(c_cls)) { - return 2; - } else if (is_qnan(a_cls)) { - return 0; - } else { - return 1; - } + rule = float_3nan_prop_s_cab; #elif defined(TARGET_PPC) - /* If fRA is a NaN return it; otherwise if fRB is a NaN return it; - * otherwise return fRC. Note that muladd on PPC is (fRA * fRC) + frB - */ - if (is_nan(a_cls)) { - return 0; - } else if (is_nan(c_cls)) { - return 2; - } else { - return 1; - } + /* + * If fRA is a NaN return it; otherwise if fRB is a NaN return it; + * otherwise return fRC. Note that muladd on PPC is (fRA * fRC) + frB + */ + rule = float_3nan_prop_acb; #elif defined(TARGET_S390X) - if (is_snan(a_cls)) { - return 0; - } else if (is_snan(b_cls)) { - return 1; - } else if (is_snan(c_cls)) { - return 2; - } else if (is_qnan(a_cls)) { - return 0; - } else if (is_qnan(b_cls)) { - return 1; - } else { - return 2; - } + rule = float_3nan_prop_s_abc; #elif defined(TARGET_SPARC) - /* Prefer SNaN over QNaN, order C, B, A. */ - if (is_snan(c_cls)) { - return 2; - } else if (is_snan(b_cls)) { - return 1; - } else if (is_snan(a_cls)) { - return 0; - } else if (is_qnan(c_cls)) { - return 2; - } else if (is_qnan(b_cls)) { - return 1; - } else { - return 0; - } + rule = float_3nan_prop_s_cba; #elif defined(TARGET_XTENSA) - /* - * For Xtensa, the (inf,zero,nan) case sets InvalidOp and returns - * an input NaN if we have one (ie c). - */ - if (status->use_first_nan) { - if (is_nan(a_cls)) { - return 0; - } else if (is_nan(b_cls)) { - return 1; + if (status->use_first_nan) { + rule = float_3nan_prop_abc; } else { - return 2; + rule = float_3nan_prop_cba; } - } else { - if (is_nan(c_cls)) { - return 2; - } else if (is_nan(b_cls)) { - return 1; - } else { - return 0; - } - } #else - /* A default implementation: prefer a to b to c. - * This is unlikely to actually match any real implementation. - */ - if (is_nan(a_cls)) { - return 0; - } else if (is_nan(b_cls)) { - return 1; - } else { - return 2; - } + rule = float_3nan_prop_abc; #endif + } + + assert(rule != float_3nan_prop_none); + if (have_snan && (rule & R_3NAN_SNAN_MASK)) { + /* We have at least one SNaN input and should prefer it */ + do { + which = rule & R_3NAN_1ST_MASK; + rule >>= R_3NAN_1ST_LENGTH; + } while (!is_snan(cls[which])); + } else { + do { + which = rule & R_3NAN_1ST_MASK; + rule >>= R_3NAN_1ST_LENGTH; + } while (!is_nan(cls[which])); + } + return which; } /*---------------------------------------------------------------------------- diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h index 0bf44dc608..cf06b4e16b 100644 --- a/include/fpu/softfloat-helpers.h +++ b/include/fpu/softfloat-helpers.h @@ -81,6 +81,12 @@ static inline void set_float_2nan_prop_rule(Float2NaNPropRule rule, status->float_2nan_prop_rule = rule; } +static inline void set_float_3nan_prop_rule(Float3NaNPropRule rule, + float_status *status) +{ + status->float_3nan_prop_rule = rule; +} + static inline void set_float_infzeronan_rule(FloatInfZeroNaNRule rule, float_status *status) { @@ -143,6 +149,11 @@ static inline Float2NaNPropRule get_float_2nan_prop_rule(float_status *status) return status->float_2nan_prop_rule; } +static inline Float3NaNPropRule get_float_3nan_prop_rule(float_status *status) +{ + return status->float_3nan_prop_rule; +} + static inline FloatInfZeroNaNRule get_float_infzeronan_rule(float_status *status) { return status->float_infzeronan_rule; diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h index 47bb22c4e2..d9f0797eda 100644 --- a/include/fpu/softfloat-types.h +++ b/include/fpu/softfloat-types.h @@ -80,6 +80,8 @@ this code that are retained. #ifndef SOFTFLOAT_TYPES_H #define SOFTFLOAT_TYPES_H +#include "hw/registerfields.h" + /* * Software IEC/IEEE floating-point types. */ @@ -207,6 +209,58 @@ typedef enum __attribute__((__packed__)) { float_2nan_prop_x87, } Float2NaNPropRule; +/* + * 3-input NaN propagation rule, for fused multiply-add. Individual + * architectures have different rules for which input NaN is + * propagated to the output when there is more than one NaN on the + * input. + * + * If default_nan_mode is enabled then it is valid not to set a NaN + * propagation rule, because the softfloat code guarantees not to try + * to pick a NaN to propagate in default NaN mode. When not in + * default-NaN mode, it is an error for the target not to set the rule + * in float_status if it uses a muladd, and we will assert if we need + * to handle an input NaN and no rule was selected. + * + * The naming scheme for Float3NaNPropRule values is: + * float_3nan_prop_s_abc: + * = "Prefer SNaN over QNaN, then operand A over B over C" + * float_3nan_prop_abc: + * = "Prefer A over B over C regardless of SNaN vs QNAN" + * + * For QEMU, the multiply-add operation is A * B + C. + */ + +/* + * We set the Float3NaNPropRule enum values up so we can select the + * right value in pickNaNMulAdd in a data driven way. + */ +FIELD(3NAN, 1ST, 0, 2) /* which operand is most preferred ? */ +FIELD(3NAN, 2ND, 2, 2) /* which operand is next most preferred ? */ +FIELD(3NAN, 3RD, 4, 2) /* which operand is least preferred ? */ +FIELD(3NAN, SNAN, 6, 1) /* do we prefer SNaN over QNaN ? */ + +#define PROPRULE(X, Y, Z) \ + ((X << R_3NAN_1ST_SHIFT) | (Y << R_3NAN_2ND_SHIFT) | (Z << R_3NAN_3RD_SHIFT)) + +typedef enum __attribute__((__packed__)) { + float_3nan_prop_none = 0, /* No propagation rule specified */ + float_3nan_prop_abc = PROPRULE(0, 1, 2), + float_3nan_prop_acb = PROPRULE(0, 2, 1), + float_3nan_prop_bac = PROPRULE(1, 0, 2), + float_3nan_prop_bca = PROPRULE(1, 2, 0), + float_3nan_prop_cab = PROPRULE(2, 0, 1), + float_3nan_prop_cba = PROPRULE(2, 1, 0), + float_3nan_prop_s_abc = float_3nan_prop_abc | R_3NAN_SNAN_MASK, + float_3nan_prop_s_acb = float_3nan_prop_acb | R_3NAN_SNAN_MASK, + float_3nan_prop_s_bac = float_3nan_prop_bac | R_3NAN_SNAN_MASK, + float_3nan_prop_s_bca = float_3nan_prop_bca | R_3NAN_SNAN_MASK, + float_3nan_prop_s_cab = float_3nan_prop_cab | R_3NAN_SNAN_MASK, + float_3nan_prop_s_cba = float_3nan_prop_cba | R_3NAN_SNAN_MASK, +} Float3NaNPropRule; + +#undef PROPRULE + /* * Rule for result of fused multiply-add 0 * Inf + NaN. * This must be a NaN, but implementations differ on whether this @@ -241,6 +295,7 @@ typedef struct float_status { FloatRoundMode float_rounding_mode; FloatX80RoundPrec floatx80_rounding_precision; Float2NaNPropRule float_2nan_prop_rule; + Float3NaNPropRule float_3nan_prop_rule; FloatInfZeroNaNRule float_infzeronan_rule; bool tininess_before_rounding; /* should denormalised results go to zero and set the inexact flag? */