From 4e5ab265983a101efdb5ba67052bd0a59800092d Mon Sep 17 00:00:00 2001 From: Alwin Berger Date: Thu, 16 Feb 2023 15:38:40 +0100 Subject: [PATCH] add a branchless benchmark --- .../Demo/CORTEX_M3_MPS2_QEMU_GCC/Makefile | 6 + .../CORTEX_M3_MPS2_QEMU_GCC/arbitrary_loads.c | 6 + FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/main.c | 5 + .../main_micro_branchless.c | 247 ++++++++++++++++++ 4 files changed, 264 insertions(+) create mode 100644 FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/main_micro_branchless.c diff --git a/FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/Makefile b/FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/Makefile index e8aa5ff4..7e60d068 100644 --- a/FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/Makefile +++ b/FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/Makefile @@ -93,6 +93,11 @@ ifeq ($(WATERS_DEMO), 1) SOURCE_FILES += main_waters.c CFLAGS := -DmainCREATE_WATERS_DEMO=1 +else +ifeq ($(BRANCHLESS_DEMO), 1) + SOURCE_FILES += main_micro_branchless.c + + CFLAGS := -DmainCREATE_BRANCHLESS_DEMO=1 else SOURCE_FILES += main_blinky.c @@ -105,6 +110,7 @@ endif endif endif endif +endif DEFINES := -DQEMU_SOC_MPS2 -DHEAP3 diff --git a/FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/arbitrary_loads.c b/FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/arbitrary_loads.c index 5b64d7c5..b71c6a53 100644 --- a/FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/arbitrary_loads.c +++ b/FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/arbitrary_loads.c @@ -27,4 +27,10 @@ static unsigned int rng_seed = 2345745; // Challanges ======= #define CHANCE_1_IN_POWOF2(X,Y) (RNG_FROM(X)<(M>>Y)) // assume the type of x has more than y bits + +// Branchless polynomes +#define U_ABS_DIFF(X,Y) (__uint32_t)(((__uint32_t)X<(__uint32_t)Y)*((__uint32_t)Y-(__uint32_t)X)+((__uint32_t)X>=(__uint32_t)Y)*((__uint32_t)X-(__uint32_t)Y)) +#define CHECKED_SQUARE(X, OFF) (U_ABS_DIFF(X,OFF)<=0x0000FFFF)*(U_ABS_DIFF(X,OFF)*U_ABS_DIFF(X,OFF)) +#define HILL(X, OFF, H, W) (U_ABS_DIFF(X,OFF)<=0x0000FFFF)*(H>=CHECKED_SQUARE(X/((__uint32_t)W), (OFF/(__uint32_t)W)))*(H-CHECKED_SQUARE(X/((__uint32_t)W), (OFF/(__uint32_t)W))) + #endif \ No newline at end of file diff --git a/FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/main.c b/FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/main.c index d9b0bf9c..bf0b9a80 100644 --- a/FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/main.c +++ b/FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/main.c @@ -91,7 +91,12 @@ int main() { main_waters(); } + #elif ( mainCREATE_BRANCHLESS_DEMO == 1 ) + { + main_branchless(); + } #else + { #error "Invalid Selection...\nPlease Select a Demo application from the main command" } diff --git a/FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/main_micro_branchless.c b/FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/main_micro_branchless.c new file mode 100644 index 00000000..9f2fd1fe --- /dev/null +++ b/FreeRTOS/Demo/CORTEX_M3_MPS2_QEMU_GCC/main_micro_branchless.c @@ -0,0 +1,247 @@ +/* + * FreeRTOS V202111.00 + * Copyright (C) 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * https://www.FreeRTOS.org + * https://github.com/FreeRTOS + * + */ + +#include +#include +#include +#include +#include +/* +TMR Demo with retry +prvSamplerTask will read 4 Bytes of Input into a buffer, unlocks xMutexInput +prvReplicateA and prvReplicateB wait on xMutexInput to average the Inputs and +sum up all numbers up to the Input. +ReplicateA will fail if mod 11 = 0, but only once +ReplicateB will fail if mod 12 = 0 +ReplicateC also exists and will never fail, does not run by default +Each Replicate outputs to it's own queue +prvVoterTask will wait on ReplicateA&B +If they disagree ReplicateC will be started by mutex. +If all the Replicates disagree now the sampler will be engaged once more +*/ +// include tacle benches + +#include "arbitrary_loads.c" + +__attribute__((noinline)) static void trigger_Qemu_break( void ) +{ + puts("Trigger"); + while (1) { + } +} +// #define DEBUG_WCET(A) {A} + +// #define WCET_END(A) +#define WCET_END(A) {A} + +#ifdef DEBUG_WCET + #define WCET_CLAMP(X, LB, UB, LABEL) PRINT_TIME(UB,LABEL) +#else + #define WCET_CLAMP(X, LB, UB, LABEL) PRINT_TIME(CLAMP(X,LB,UB),LABEL) + #define DEBUG_WCET(A) +#endif + +// Begin Input Stuff +volatile unsigned char FUZZ_INPUT[4096] = {0xa,0xb,0xc,0xd,0xe,0xf}; +volatile uint32_t FUZZ_LENGTH = 4096; +volatile uint32_t FUZZ_POINTER = 0; +volatile uint32_t INP = 0; +// Read the Byte of Input, if the Input is exausted trigger the breakpoint instead +static unsigned char fuzz_char_next(void) { + FUZZ_POINTER++; + return FUZZ_INPUT[FUZZ_POINTER-1]; +} +static uint16_t fuzz_short_next(void) { + unsigned char field[2]; + field[0]=fuzz_char_next(); + field[1]=fuzz_char_next(); + uint16_t* sf = (uint16_t*) field; + return *sf; +} +static uint32_t fuzz_long_next(void) { + unsigned char field[4]; + field[0]=fuzz_char_next(); + field[1]=fuzz_char_next(); + field[2]=fuzz_char_next(); + field[3]=fuzz_char_next(); + uint32_t* sf = (uint32_t*) field; + return *sf; +} +// End Input Stuff +static void prvTask31( void * pvParameters ); +static void prvTask78( void * pvParameters ); +static void prvTask90( void * pvParameters ); +static void prvTask397( void * pvParameters ); +static void prvTask400( void * pvParameters ); +static void prvTask416( void * pvParameters ); +static void prvTask579( void * pvParameters ); +static void prvTask1009( void * pvParameters ); +static void prvTask1107( void * pvParameters ); +static void prvTask1129( void * pvParameters ); + +// Priorities using rate-monotonic scheduling +// ties are decided favoring short wcets +// Chain1: 579 -> 1009 -> 1129 -> 416 +// 10ms 10ms 10ms 10ms +// Chain2: 31 -> 78 -> 400 +// 100ms 10ms 2ms +// Chain3: 397 -> 90 -> 1107 +// spor 2ms 50ms +// cross-chain effect ideas: + +// RM + sort by chains +#define mainTASK_31_PRIO ( tskIDLE_PRIORITY + 3 ) +#define mainTASK_78_PRIO ( tskIDLE_PRIORITY + 2 ) +#define mainTASK_400_PRIO ( tskIDLE_PRIORITY + 1 ) + +// RM with pref for short +// #define mainTASK_31_PRIO ( tskIDLE_PRIORITY + 1 ) +// #define mainTASK_78_PRIO ( tskIDLE_PRIORITY + 5 ) +// #define mainTASK_90_PRIO ( tskIDLE_PRIORITY + 8 ) +// #define mainTASK_397_PRIO ( tskIDLE_PRIORITY + 10 ) +// #define mainTASK_400_PRIO ( tskIDLE_PRIORITY + 9 ) +// #define mainTASK_416_PRIO ( tskIDLE_PRIORITY + 4 ) +// #define mainTASK_579_PRIO ( tskIDLE_PRIORITY + 7 ) +// #define mainTASK_1009_PRIO ( tskIDLE_PRIORITY + 6 ) +// #define mainTASK_1107_PRIO ( tskIDLE_PRIORITY + 2 ) +// #define mainTASK_1129_PRIO ( tskIDLE_PRIORITY + 3 ) + +// Same Prio +// #define mainTASK_31_PRIO ( tskIDLE_PRIORITY + 1 ) +// #define mainTASK_78_PRIO ( tskIDLE_PRIORITY + 7 ) +// #define mainTASK_90_PRIO ( tskIDLE_PRIORITY + 8 ) +// #define mainTASK_397_PRIO ( tskIDLE_PRIORITY + 10 ) +// #define mainTASK_400_PRIO ( tskIDLE_PRIORITY + 9 ) +// #define mainTASK_416_PRIO ( tskIDLE_PRIORITY + 7 ) +// #define mainTASK_579_PRIO ( tskIDLE_PRIORITY + 7 ) +// #define mainTASK_1009_PRIO ( tskIDLE_PRIORITY + 7 ) +// #define mainTASK_1107_PRIO ( tskIDLE_PRIORITY + 2 ) +// #define mainTASK_1129_PRIO ( tskIDLE_PRIORITY + 7 ) + +#define TASK_31_MESSAGE "01" +#define TASK_78_MESSAGE "05" +#define TASK_400_MESSAGE "09" + +// Handles for direct messages +static TaskHandle_t xTask31 = NULL; +static TaskHandle_t xTask78 = NULL; +static TaskHandle_t xTask400 = NULL; + +void main_branchless( void ) +{ + // puts("Main function"); + /* Start the two tasks as described in the comments at the top of this + * file. */ + xTaskCreate( prvTask31, /* The function that implements the task. */ + "31", /* The text name assigned to the task - for debug only as it is not used by the kernel. */ + configMINIMAL_STACK_SIZE, /* The size of the stack to allocate to the task. */ + NULL, /* The parameter passed to the task - not used in this case. */ + mainTASK_31_PRIO, /* The priority assigned to the task. */ + &xTask31 ); /* The task handle is not required, so NULL is passed. */ + + xTaskCreate( prvTask78, /* The function that implements the task. */ + "78", /* The text name assigned to the task - for debug only as it is not used by the kernel. */ + configMINIMAL_STACK_SIZE, /* The size of the stack to allocate to the task. */ + NULL, /* The parameter passed to the task - not used in this case. */ + mainTASK_78_PRIO, /* The priority assigned to the task. */ + &xTask78 ); /* The task handle is not required, so NULL is passed. */ + + xTaskCreate( prvTask400, + "400", + configMINIMAL_STACK_SIZE, + NULL, + mainTASK_400_PRIO, + &xTask400 ); + + + /* Start the tasks and timer running. */ + // puts("Start scheduler"); + vTaskStartScheduler(); + + /* If all is well, the scheduler will now be running, and the following + * line will never be reached. If the following line does execute, then + * there was insufficient FreeRTOS heap memory available for the Idle and/or + * timer tasks to be created. See the memory management section on the + * FreeRTOS web site for more details on the FreeRTOS heap + * http://www.freertos.org/a00111.html. */ + for( ; ; ) + { + } +} + + +// Chain2: 31 -> 78 -> 400 +static void prvTask31( void * pvParameters ) { + TickType_t xLastWakeTime = xTaskGetTickCount(); + const TickType_t xFrequency = 100 / portTICK_PERIOD_MS; + int period_counter = 2; + for( ;; ){ + // Actions -------------------------------------- + INP = fuzz_long_next(); + uint32_t torun = 100000+HILL(INP,500000000,100000,100)+HILL(INP,1500000000,50000,10); + WCET_CLAMP(torun, 0, 500000, TASK_31_MESSAGE) + xTaskNotify(xTask78, 1, eSetValueWithOverwrite); + // --------------------------------------------- + vTaskDelayUntil( &xLastWakeTime, xFrequency );}// Wait for the next cycle. +} + +// Chain2: 31 -> 78 -> 400 +static void prvTask78( void * pvParameters ) { + TickType_t xLastWakeTime = xTaskGetTickCount(); + const TickType_t xFrequency = 100 / portTICK_PERIOD_MS; + for( ;; ){ + // Actions -------------------------------------- + uint32_t torun = 100000-HILL(INP,500000000,100000,100); + printf("%d\n",torun); + WCET_CLAMP(torun, 0, 200000, TASK_78_MESSAGE); + WCET_END({trigger_Qemu_break();}) + xTaskNotify(xTask400, 1, eSetValueWithOverwrite); + // --------------------------------------------- + vTaskDelayUntil( &xLastWakeTime, xFrequency );} +} + +// Chain2: 31 -> 78 -> 400 +static void prvTask400( void * pvParameters ) { + TickType_t xLastWakeTime = xTaskGetTickCount(); + const TickType_t xFrequency = 100 / portTICK_PERIOD_MS; + for( ;; ){ + // Actions -------------------------------------- + WCET_CLAMP(1, 0, 1765, TASK_400_MESSAGE) + // --------------------------------------------- + vTaskDelayUntil( &xLastWakeTime, xFrequency );} +} + +void vWatersIdleFunction() { + for (int i; i<1000; i++) { + puts("0 "); + } +} + +void isr_starter( void ) +{ +} + +/*-----------------------------------------------------------*/