From f872b281996523b2d9c979b914cb9d23c9b1d1f8 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 24 Sep 2015 02:44:28 +0000 Subject: [PATCH] fix sh/fdpic 64-bit division code in libgcc I originally missed the udiv_qrnnd part of the fdpic patch when forward-porting it because longlong.h moved to a non-obvious location. however, the original patch was broken anyway; it misordered instructions at branch delay slots and clobbered in-use registers. my version of the code is more efficient because it uses knowledge that __udiv_qrnnd_16 is a pure-PIC function that makes no calls, uses no static data, and does not clobber r12. this means we only need to load the instruction address, not the got address, from the function descriptor to make the call. the same can be achieved even more efficiently by declaring __udiv_qrnnd_16 with object type instead of function type and using the existing non-fdpic asm unmodified. I did not do this because I was afraid it would be considered a hack unacceptable for upstream. --- patches/gcc-5.2.0/0007-fdpic.diff | 41 +++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/patches/gcc-5.2.0/0007-fdpic.diff b/patches/gcc-5.2.0/0007-fdpic.diff index 47e0461..3118b1b 100644 --- a/patches/gcc-5.2.0/0007-fdpic.diff +++ b/patches/gcc-5.2.0/0007-fdpic.diff @@ -1951,3 +1951,44 @@ diff -urp ../baseline/gcc-5.2.0/libitm/config/sh/sjlj.S gcc-5.2.0/libitm/config/ #endif .size _ITM_beginTransaction, . - _ITM_beginTransaction +diff -urp ../baseline/gcc-5.2.0/include/longlong.h gcc-5.2.0/include/longlong.h +--- ../baseline/gcc-5.2.0/include/longlong.h 2014-10-28 20:22:40.000000000 +0000 ++++ gcc-5.2.0/include/longlong.h 2015-09-24 02:40:55.451988407 +0000 +@@ -1102,6 +1102,29 @@ extern UDItype __umulsidi3 (USItype, USI + /* This is the same algorithm as __udiv_qrnnd_c. */ + #define UDIV_NEEDS_NORMALIZATION 1 + ++#ifdef __FDPIC__ ++#define udiv_qrnnd(q, r, n1, n0, d) \ ++ do { \ ++ extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \ ++ __attribute__ ((visibility ("hidden"))); \ ++ /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \ ++ __asm__ ( \ ++ "mov%M4 %4,r5\n" \ ++" swap.w %3,r4\n" \ ++" swap.w r5,r6\n" \ ++" mov.l @%5,r2\n" \ ++" jsr @r2\n" \ ++" shll16 r6\n" \ ++" swap.w r4,r4\n" \ ++" mov.l @%5,r2\n" \ ++" jsr @r2\n" \ ++" swap.w r1,%0\n" \ ++" or r1,%0" \ ++ : "=r" (q), "=&z" (r) \ ++ : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \ ++ : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \ ++ } while (0) ++#else + #define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \ +@@ -1121,6 +1144,7 @@ extern UDItype __umulsidi3 (USItype, USI + : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \ + : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \ + } while (0) ++#endif + + #define UDIV_TIME 80 +