fix sh/fdpic 64-bit division code in libgcc

I originally missed the udiv_qrnnd part of the fdpic patch when
forward-porting it because longlong.h moved to a non-obvious location.
however, the original patch was broken anyway; it misordered
instructions at branch delay slots and clobbered in-use registers.

my version of the code is more efficient because it uses knowledge
that __udiv_qrnnd_16 is a pure-PIC function that makes no calls, uses
no static data, and does not clobber r12. this means we only need to
load the instruction address, not the got address, from the function
descriptor to make the call.

the same can be achieved even more efficiently by declaring
__udiv_qrnnd_16 with object type instead of function type and using
the existing non-fdpic asm unmodified. I did not do this because I was
afraid it would be considered a hack unacceptable for upstream.
This commit is contained in:
Rich Felker 2015-09-24 02:44:28 +00:00
parent 67122dc449
commit f872b28199
1 changed files with 41 additions and 0 deletions

View File

@ -1951,3 +1951,44 @@ diff -urp ../baseline/gcc-5.2.0/libitm/config/sh/sjlj.S gcc-5.2.0/libitm/config/
#endif #endif
.size _ITM_beginTransaction, . - _ITM_beginTransaction .size _ITM_beginTransaction, . - _ITM_beginTransaction
diff -urp ../baseline/gcc-5.2.0/include/longlong.h gcc-5.2.0/include/longlong.h
--- ../baseline/gcc-5.2.0/include/longlong.h 2014-10-28 20:22:40.000000000 +0000
+++ gcc-5.2.0/include/longlong.h 2015-09-24 02:40:55.451988407 +0000
@@ -1102,6 +1102,29 @@ extern UDItype __umulsidi3 (USItype, USI
/* This is the same algorithm as __udiv_qrnnd_c. */
#define UDIV_NEEDS_NORMALIZATION 1
+#ifdef __FDPIC__
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
+ __attribute__ ((visibility ("hidden"))); \
+ /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
+ __asm__ ( \
+ "mov%M4 %4,r5\n" \
+" swap.w %3,r4\n" \
+" swap.w r5,r6\n" \
+" mov.l @%5,r2\n" \
+" jsr @r2\n" \
+" shll16 r6\n" \
+" swap.w r4,r4\n" \
+" mov.l @%5,r2\n" \
+" jsr @r2\n" \
+" swap.w r1,%0\n" \
+" or r1,%0" \
+ : "=r" (q), "=&z" (r) \
+ : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
+ : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
+ } while (0)
+#else
#define udiv_qrnnd(q, r, n1, n0, d) \
do { \
extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
@@ -1121,6 +1144,7 @@ extern UDItype __umulsidi3 (USItype, USI
: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
: "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
} while (0)
+#endif
#define UDIV_TIME 80