fix sh/fdpic 64-bit division code in libgcc
I originally missed the udiv_qrnnd part of the fdpic patch when forward-porting it because longlong.h moved to a non-obvious location. however, the original patch was broken anyway; it misordered instructions at branch delay slots and clobbered in-use registers. my version of the code is more efficient because it uses knowledge that __udiv_qrnnd_16 is a pure-PIC function that makes no calls, uses no static data, and does not clobber r12. this means we only need to load the instruction address, not the got address, from the function descriptor to make the call. the same can be achieved even more efficiently by declaring __udiv_qrnnd_16 with object type instead of function type and using the existing non-fdpic asm unmodified. I did not do this because I was afraid it would be considered a hack unacceptable for upstream.
This commit is contained in:
parent
67122dc449
commit
f872b28199
|
@ -1951,3 +1951,44 @@ diff -urp ../baseline/gcc-5.2.0/libitm/config/sh/sjlj.S gcc-5.2.0/libitm/config/
|
||||||
#endif
|
#endif
|
||||||
.size _ITM_beginTransaction, . - _ITM_beginTransaction
|
.size _ITM_beginTransaction, . - _ITM_beginTransaction
|
||||||
|
|
||||||
|
diff -urp ../baseline/gcc-5.2.0/include/longlong.h gcc-5.2.0/include/longlong.h
|
||||||
|
--- ../baseline/gcc-5.2.0/include/longlong.h 2014-10-28 20:22:40.000000000 +0000
|
||||||
|
+++ gcc-5.2.0/include/longlong.h 2015-09-24 02:40:55.451988407 +0000
|
||||||
|
@@ -1102,6 +1102,29 @@ extern UDItype __umulsidi3 (USItype, USI
|
||||||
|
/* This is the same algorithm as __udiv_qrnnd_c. */
|
||||||
|
#define UDIV_NEEDS_NORMALIZATION 1
|
||||||
|
|
||||||
|
+#ifdef __FDPIC__
|
||||||
|
+#define udiv_qrnnd(q, r, n1, n0, d) \
|
||||||
|
+ do { \
|
||||||
|
+ extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
|
||||||
|
+ __attribute__ ((visibility ("hidden"))); \
|
||||||
|
+ /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
|
||||||
|
+ __asm__ ( \
|
||||||
|
+ "mov%M4 %4,r5\n" \
|
||||||
|
+" swap.w %3,r4\n" \
|
||||||
|
+" swap.w r5,r6\n" \
|
||||||
|
+" mov.l @%5,r2\n" \
|
||||||
|
+" jsr @r2\n" \
|
||||||
|
+" shll16 r6\n" \
|
||||||
|
+" swap.w r4,r4\n" \
|
||||||
|
+" mov.l @%5,r2\n" \
|
||||||
|
+" jsr @r2\n" \
|
||||||
|
+" swap.w r1,%0\n" \
|
||||||
|
+" or r1,%0" \
|
||||||
|
+ : "=r" (q), "=&z" (r) \
|
||||||
|
+ : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
|
||||||
|
+ : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
|
||||||
|
+ } while (0)
|
||||||
|
+#else
|
||||||
|
#define udiv_qrnnd(q, r, n1, n0, d) \
|
||||||
|
do { \
|
||||||
|
extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
|
||||||
|
@@ -1121,6 +1144,7 @@ extern UDItype __umulsidi3 (USItype, USI
|
||||||
|
: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
|
||||||
|
: "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
|
||||||
|
} while (0)
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
#define UDIV_TIME 80
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue