 403f290c06
			
		
	
	
		403f290c06
		
	
	
	
	
		
			
			Updates can come from other threads, so readers that do not
take tlb_lock must use atomic_read to avoid undefined
behaviour (UB).
This completes the conversion to tlb_lock. This conversion results
on average in no performance loss, as the following experiments
(run on an Intel i7-6700K CPU @ 4.00GHz) show.
1. aarch64 bootup+shutdown test:
- Before:
 Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
       7487.087786      task-clock (msec)         #    0.998 CPUs utilized            ( +-  0.12% )
    31,574,905,303      cycles                    #    4.217 GHz                      ( +-  0.12% )
    57,097,908,812      instructions              #    1.81  insns per cycle          ( +-  0.08% )
    10,255,415,367      branches                  # 1369.747 M/sec                    ( +-  0.08% )
       173,278,962      branch-misses             #    1.69% of all branches          ( +-  0.18% )
       7.504481349 seconds time elapsed                                          ( +-  0.14% )
- After:
 Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
       7462.441328      task-clock (msec)         #    0.998 CPUs utilized            ( +-  0.07% )
    31,478,476,520      cycles                    #    4.218 GHz                      ( +-  0.07% )
    57,017,330,084      instructions              #    1.81  insns per cycle          ( +-  0.05% )
    10,251,929,667      branches                  # 1373.804 M/sec                    ( +-  0.05% )
       173,023,787      branch-misses             #    1.69% of all branches          ( +-  0.11% )
       7.474970463 seconds time elapsed                                          ( +-  0.07% )
2. SPEC06int:
                                              SPEC06int (test set)
                                           [Y axis: Speedup over master]
  1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+
       |                                                                                                  |
   1.1 +-+.................................+++.............................+  tlb-lock-v2 (m+++x)       +-+
       |                                +++ |                   +++        tlb-lock-v3 (spinl|ck)         |
       |                    +++          |  |     +++    +++     |                           |            |
  1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+
       |      ###         ++#| #         |# |# ***### +++### +++#+#     |     +++     |     #|#    ###    |
     1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+
       |    *+* #    #++# ***  #   #### ***  # * *++# ****+# *| * # ****|#   |# #    #|#    #+#    # #    |
  0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+
       |    * * #    #  # *|*  #   #  # *|*  # * *  # *++* # *  * # *  * # * |* #  ++# #    # #  *** #    |
       |    * * #  ++#  # *+*  #   #  # *|*  # * *  # *  * # *  * # *  * # *++* # **** #  ++# #  * * #    |
   0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+
       |    * * #  ***  # * *  #  |#  # *+*  # * *  # *  * # *  * # *  * # *  * # *++* #   |# #  * * #    |
  0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+
       |    * * #  *+*  # * *  # *|*  # * *  # * *  # *  * # *  * # *  * # *  * # *  * # * |* #  * * #    |
       |    * * #  * *  # * *  # *+*  # * *  # * *  # *  * # *  * # *  * # *  * # *  * # * |* #  * * #    |
   0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+
       |    * * #  * *  # * *  # * *  # * *  # * *  # *  * # *  * # *  * # *  * # *  * # *  * #  * * #    |
  0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+
 400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean
  png: https://imgur.com/a/BHzpPTW
Notes:
- tlb-lock-v2 corresponds to an implementation with a mutex.
- tlb-lock-v3 corresponds to the current implementation, i.e.
  a spinlock and a single lock acquisition in tlb_set_page_with_attrs.
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <20181016153840.25877-1-cota@braap.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
		
	
			
		
			
				
	
	
		
			211 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			211 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  *  Software MMU support
 | |
|  *
 | |
|  * Generate inline load/store functions for one MMU mode and data
 | |
|  * size.
 | |
|  *
 | |
|  * Generate a store function as well as signed and unsigned loads.
 | |
|  *
 | |
|  * Not used directly but included from cpu_ldst.h.
 | |
|  *
 | |
|  *  Copyright (c) 2003 Fabrice Bellard
 | |
|  *
 | |
|  * This library is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of the GNU Lesser General Public
 | |
|  * License as published by the Free Software Foundation; either
 | |
|  * version 2 of the License, or (at your option) any later version.
 | |
|  *
 | |
|  * This library is distributed in the hope that it will be useful,
 | |
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|  * Lesser General Public License for more details.
 | |
|  *
 | |
|  * You should have received a copy of the GNU Lesser General Public
 | |
|  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
 | |
|  */
 | |
| 
 | |
| #if !defined(SOFTMMU_CODE_ACCESS)
 | |
| #include "trace-root.h"
 | |
| #endif
 | |
| 
 | |
| #include "trace/mem.h"
 | |
| 
 | |
| #if DATA_SIZE == 8
 | |
| #define SUFFIX q
 | |
| #define USUFFIX q
 | |
| #define DATA_TYPE uint64_t
 | |
| #define SHIFT 3
 | |
| #elif DATA_SIZE == 4
 | |
| #define SUFFIX l
 | |
| #define USUFFIX l
 | |
| #define DATA_TYPE uint32_t
 | |
| #define SHIFT 2
 | |
| #elif DATA_SIZE == 2
 | |
| #define SUFFIX w
 | |
| #define USUFFIX uw
 | |
| #define DATA_TYPE uint16_t
 | |
| #define DATA_STYPE int16_t
 | |
| #define SHIFT 1
 | |
| #elif DATA_SIZE == 1
 | |
| #define SUFFIX b
 | |
| #define USUFFIX ub
 | |
| #define DATA_TYPE uint8_t
 | |
| #define DATA_STYPE int8_t
 | |
| #define SHIFT 0
 | |
| #else
 | |
| #error unsupported data size
 | |
| #endif
 | |
| 
 | |
| #if DATA_SIZE == 8
 | |
| #define RES_TYPE uint64_t
 | |
| #else
 | |
| #define RES_TYPE uint32_t
 | |
| #endif
 | |
| 
 | |
| #ifdef SOFTMMU_CODE_ACCESS
 | |
| #define ADDR_READ addr_code
 | |
| #define MMUSUFFIX _cmmu
 | |
| #define URETSUFFIX SUFFIX
 | |
| #define SRETSUFFIX SUFFIX
 | |
| #else
 | |
| #define ADDR_READ addr_read
 | |
| #define MMUSUFFIX _mmu
 | |
| #define URETSUFFIX USUFFIX
 | |
| #define SRETSUFFIX glue(s, SUFFIX)
 | |
| #endif
 | |
| 
 | |
| /* generic load/store macros */
 | |
| 
 | |
| static inline RES_TYPE
 | |
| glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
 | |
|                                                   target_ulong ptr,
 | |
|                                                   uintptr_t retaddr)
 | |
| {
 | |
|     CPUTLBEntry *entry;
 | |
|     RES_TYPE res;
 | |
|     target_ulong addr;
 | |
|     int mmu_idx;
 | |
|     TCGMemOpIdx oi;
 | |
| 
 | |
| #if !defined(SOFTMMU_CODE_ACCESS)
 | |
|     trace_guest_mem_before_exec(
 | |
|         ENV_GET_CPU(env), ptr,
 | |
|         trace_mem_build_info(SHIFT, false, MO_TE, false));
 | |
| #endif
 | |
| 
 | |
|     addr = ptr;
 | |
|     mmu_idx = CPU_MMU_INDEX;
 | |
|     entry = tlb_entry(env, mmu_idx, addr);
 | |
|     if (unlikely(entry->ADDR_READ !=
 | |
|                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
 | |
|         oi = make_memop_idx(SHIFT, mmu_idx);
 | |
|         res = glue(glue(helper_ret_ld, URETSUFFIX), MMUSUFFIX)(env, addr,
 | |
|                                                             oi, retaddr);
 | |
|     } else {
 | |
|         uintptr_t hostaddr = addr + entry->addend;
 | |
|         res = glue(glue(ld, USUFFIX), _p)((uint8_t *)hostaddr);
 | |
|     }
 | |
|     return res;
 | |
| }
 | |
| 
 | |
| static inline RES_TYPE
 | |
| glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr)
 | |
| {
 | |
|     return glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(env, ptr, 0);
 | |
| }
 | |
| 
 | |
| #if DATA_SIZE <= 2
 | |
| static inline int
 | |
| glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
 | |
|                                                   target_ulong ptr,
 | |
|                                                   uintptr_t retaddr)
 | |
| {
 | |
|     CPUTLBEntry *entry;
 | |
|     int res;
 | |
|     target_ulong addr;
 | |
|     int mmu_idx;
 | |
|     TCGMemOpIdx oi;
 | |
| 
 | |
| #if !defined(SOFTMMU_CODE_ACCESS)
 | |
|     trace_guest_mem_before_exec(
 | |
|         ENV_GET_CPU(env), ptr,
 | |
|         trace_mem_build_info(SHIFT, true, MO_TE, false));
 | |
| #endif
 | |
| 
 | |
|     addr = ptr;
 | |
|     mmu_idx = CPU_MMU_INDEX;
 | |
|     entry = tlb_entry(env, mmu_idx, addr);
 | |
|     if (unlikely(entry->ADDR_READ !=
 | |
|                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
 | |
|         oi = make_memop_idx(SHIFT, mmu_idx);
 | |
|         res = (DATA_STYPE)glue(glue(helper_ret_ld, SRETSUFFIX),
 | |
|                                MMUSUFFIX)(env, addr, oi, retaddr);
 | |
|     } else {
 | |
|         uintptr_t hostaddr = addr + entry->addend;
 | |
|         res = glue(glue(lds, SUFFIX), _p)((uint8_t *)hostaddr);
 | |
|     }
 | |
|     return res;
 | |
| }
 | |
| 
 | |
| static inline int
 | |
| glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr)
 | |
| {
 | |
|     return glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(env, ptr, 0);
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #ifndef SOFTMMU_CODE_ACCESS
 | |
| 
 | |
| /* generic store macro */
 | |
| 
 | |
| static inline void
 | |
| glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
 | |
|                                                  target_ulong ptr,
 | |
|                                                  RES_TYPE v, uintptr_t retaddr)
 | |
| {
 | |
|     CPUTLBEntry *entry;
 | |
|     target_ulong addr;
 | |
|     int mmu_idx;
 | |
|     TCGMemOpIdx oi;
 | |
| 
 | |
| #if !defined(SOFTMMU_CODE_ACCESS)
 | |
|     trace_guest_mem_before_exec(
 | |
|         ENV_GET_CPU(env), ptr,
 | |
|         trace_mem_build_info(SHIFT, false, MO_TE, true));
 | |
| #endif
 | |
| 
 | |
|     addr = ptr;
 | |
|     mmu_idx = CPU_MMU_INDEX;
 | |
|     entry = tlb_entry(env, mmu_idx, addr);
 | |
|     if (unlikely(tlb_addr_write(entry) !=
 | |
|                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
 | |
|         oi = make_memop_idx(SHIFT, mmu_idx);
 | |
|         glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi,
 | |
|                                                      retaddr);
 | |
|     } else {
 | |
|         uintptr_t hostaddr = addr + entry->addend;
 | |
|         glue(glue(st, SUFFIX), _p)((uint8_t *)hostaddr, v);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static inline void
 | |
| glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr,
 | |
|                                       RES_TYPE v)
 | |
| {
 | |
|     glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(env, ptr, v, 0);
 | |
| }
 | |
| 
 | |
| #endif /* !SOFTMMU_CODE_ACCESS */
 | |
| 
 | |
| #undef RES_TYPE
 | |
| #undef DATA_TYPE
 | |
| #undef DATA_STYPE
 | |
| #undef SUFFIX
 | |
| #undef USUFFIX
 | |
| #undef DATA_SIZE
 | |
| #undef MMUSUFFIX
 | |
| #undef ADDR_READ
 | |
| #undef URETSUFFIX
 | |
| #undef SRETSUFFIX
 | |
| #undef SHIFT
 |