add a branchless benchmark

This commit is contained in:
Alwin Berger 2023-02-16 15:38:40 +01:00
parent 397956b731
commit 4e5ab26598
4 changed files with 264 additions and 0 deletions

View File

@ -93,6 +93,11 @@ ifeq ($(WATERS_DEMO), 1)
SOURCE_FILES += main_waters.c
CFLAGS := -DmainCREATE_WATERS_DEMO=1
else
ifeq ($(BRANCHLESS_DEMO), 1)
SOURCE_FILES += main_micro_branchless.c
CFLAGS := -DmainCREATE_BRANCHLESS_DEMO=1
else
SOURCE_FILES += main_blinky.c
@ -105,6 +110,7 @@ endif
endif
endif
endif
endif
DEFINES := -DQEMU_SOC_MPS2 -DHEAP3

View File

@ -27,4 +27,10 @@ static unsigned int rng_seed = 2345745;
// Challanges =======
#define CHANCE_1_IN_POWOF2(X,Y) (RNG_FROM(X)<(M>>Y)) // assume the type of x has more than y bits
// Branchless polynomes
#define U_ABS_DIFF(X,Y) (__uint32_t)(((__uint32_t)X<(__uint32_t)Y)*((__uint32_t)Y-(__uint32_t)X)+((__uint32_t)X>=(__uint32_t)Y)*((__uint32_t)X-(__uint32_t)Y))
#define CHECKED_SQUARE(X, OFF) (U_ABS_DIFF(X,OFF)<=0x0000FFFF)*(U_ABS_DIFF(X,OFF)*U_ABS_DIFF(X,OFF))
#define HILL(X, OFF, H, W) (U_ABS_DIFF(X,OFF)<=0x0000FFFF)*(H>=CHECKED_SQUARE(X/((__uint32_t)W), (OFF/(__uint32_t)W)))*(H-CHECKED_SQUARE(X/((__uint32_t)W), (OFF/(__uint32_t)W)))
#endif

View File

@ -91,7 +91,12 @@ int main()
{
main_waters();
}
#elif ( mainCREATE_BRANCHLESS_DEMO == 1 )
{
main_branchless();
}
#else
{
#error "Invalid Selection...\nPlease Select a Demo application from the main command"
}

View File

@ -0,0 +1,247 @@
/*
* FreeRTOS V202111.00
* Copyright (C) 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
* the Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* https://www.FreeRTOS.org
* https://github.com/FreeRTOS
*
*/
#include <FreeRTOS.h>
#include <stdint.h>
#include <task.h>
#include <queue.h>
#include <stdio.h>
/*
TMR Demo with retry
prvSamplerTask will read 4 Bytes of Input into a buffer, unlocks xMutexInput
prvReplicateA and prvReplicateB wait on xMutexInput to average the Inputs and
sum up all numbers up to the Input.
ReplicateA will fail if mod 11 = 0, but only once
ReplicateB will fail if mod 12 = 0
ReplicateC also exists and will never fail, does not run by default
Each Replicate outputs to it's own queue
prvVoterTask will wait on ReplicateA&B
If they disagree ReplicateC will be started by mutex.
If all the Replicates disagree now the sampler will be engaged once more
*/
// include tacle benches
#include "arbitrary_loads.c"
__attribute__((noinline)) static void trigger_Qemu_break( void )
{
puts("Trigger");
while (1) {
}
}
// #define DEBUG_WCET(A) {A}
// #define WCET_END(A)
#define WCET_END(A) {A}
#ifdef DEBUG_WCET
#define WCET_CLAMP(X, LB, UB, LABEL) PRINT_TIME(UB,LABEL)
#else
#define WCET_CLAMP(X, LB, UB, LABEL) PRINT_TIME(CLAMP(X,LB,UB),LABEL)
#define DEBUG_WCET(A)
#endif
// Begin Input Stuff
volatile unsigned char FUZZ_INPUT[4096] = {0xa,0xb,0xc,0xd,0xe,0xf};
volatile uint32_t FUZZ_LENGTH = 4096;
volatile uint32_t FUZZ_POINTER = 0;
volatile uint32_t INP = 0;
// Read the Byte of Input, if the Input is exausted trigger the breakpoint instead
static unsigned char fuzz_char_next(void) {
FUZZ_POINTER++;
return FUZZ_INPUT[FUZZ_POINTER-1];
}
static uint16_t fuzz_short_next(void) {
unsigned char field[2];
field[0]=fuzz_char_next();
field[1]=fuzz_char_next();
uint16_t* sf = (uint16_t*) field;
return *sf;
}
static uint32_t fuzz_long_next(void) {
unsigned char field[4];
field[0]=fuzz_char_next();
field[1]=fuzz_char_next();
field[2]=fuzz_char_next();
field[3]=fuzz_char_next();
uint32_t* sf = (uint32_t*) field;
return *sf;
}
// End Input Stuff
static void prvTask31( void * pvParameters );
static void prvTask78( void * pvParameters );
static void prvTask90( void * pvParameters );
static void prvTask397( void * pvParameters );
static void prvTask400( void * pvParameters );
static void prvTask416( void * pvParameters );
static void prvTask579( void * pvParameters );
static void prvTask1009( void * pvParameters );
static void prvTask1107( void * pvParameters );
static void prvTask1129( void * pvParameters );
// Priorities using rate-monotonic scheduling
// ties are decided favoring short wcets
// Chain1: 579 -> 1009 -> 1129 -> 416
// 10ms 10ms 10ms 10ms
// Chain2: 31 -> 78 -> 400
// 100ms 10ms 2ms
// Chain3: 397 -> 90 -> 1107
// spor 2ms 50ms
// cross-chain effect ideas:
// RM + sort by chains
#define mainTASK_31_PRIO ( tskIDLE_PRIORITY + 3 )
#define mainTASK_78_PRIO ( tskIDLE_PRIORITY + 2 )
#define mainTASK_400_PRIO ( tskIDLE_PRIORITY + 1 )
// RM with pref for short
// #define mainTASK_31_PRIO ( tskIDLE_PRIORITY + 1 )
// #define mainTASK_78_PRIO ( tskIDLE_PRIORITY + 5 )
// #define mainTASK_90_PRIO ( tskIDLE_PRIORITY + 8 )
// #define mainTASK_397_PRIO ( tskIDLE_PRIORITY + 10 )
// #define mainTASK_400_PRIO ( tskIDLE_PRIORITY + 9 )
// #define mainTASK_416_PRIO ( tskIDLE_PRIORITY + 4 )
// #define mainTASK_579_PRIO ( tskIDLE_PRIORITY + 7 )
// #define mainTASK_1009_PRIO ( tskIDLE_PRIORITY + 6 )
// #define mainTASK_1107_PRIO ( tskIDLE_PRIORITY + 2 )
// #define mainTASK_1129_PRIO ( tskIDLE_PRIORITY + 3 )
// Same Prio
// #define mainTASK_31_PRIO ( tskIDLE_PRIORITY + 1 )
// #define mainTASK_78_PRIO ( tskIDLE_PRIORITY + 7 )
// #define mainTASK_90_PRIO ( tskIDLE_PRIORITY + 8 )
// #define mainTASK_397_PRIO ( tskIDLE_PRIORITY + 10 )
// #define mainTASK_400_PRIO ( tskIDLE_PRIORITY + 9 )
// #define mainTASK_416_PRIO ( tskIDLE_PRIORITY + 7 )
// #define mainTASK_579_PRIO ( tskIDLE_PRIORITY + 7 )
// #define mainTASK_1009_PRIO ( tskIDLE_PRIORITY + 7 )
// #define mainTASK_1107_PRIO ( tskIDLE_PRIORITY + 2 )
// #define mainTASK_1129_PRIO ( tskIDLE_PRIORITY + 7 )
#define TASK_31_MESSAGE "01"
#define TASK_78_MESSAGE "05"
#define TASK_400_MESSAGE "09"
// Handles for direct messages
static TaskHandle_t xTask31 = NULL;
static TaskHandle_t xTask78 = NULL;
static TaskHandle_t xTask400 = NULL;
void main_branchless( void )
{
// puts("Main function");
/* Start the two tasks as described in the comments at the top of this
* file. */
xTaskCreate( prvTask31, /* The function that implements the task. */
"31", /* The text name assigned to the task - for debug only as it is not used by the kernel. */
configMINIMAL_STACK_SIZE, /* The size of the stack to allocate to the task. */
NULL, /* The parameter passed to the task - not used in this case. */
mainTASK_31_PRIO, /* The priority assigned to the task. */
&xTask31 ); /* The task handle is not required, so NULL is passed. */
xTaskCreate( prvTask78, /* The function that implements the task. */
"78", /* The text name assigned to the task - for debug only as it is not used by the kernel. */
configMINIMAL_STACK_SIZE, /* The size of the stack to allocate to the task. */
NULL, /* The parameter passed to the task - not used in this case. */
mainTASK_78_PRIO, /* The priority assigned to the task. */
&xTask78 ); /* The task handle is not required, so NULL is passed. */
xTaskCreate( prvTask400,
"400",
configMINIMAL_STACK_SIZE,
NULL,
mainTASK_400_PRIO,
&xTask400 );
/* Start the tasks and timer running. */
// puts("Start scheduler");
vTaskStartScheduler();
/* If all is well, the scheduler will now be running, and the following
* line will never be reached. If the following line does execute, then
* there was insufficient FreeRTOS heap memory available for the Idle and/or
* timer tasks to be created. See the memory management section on the
* FreeRTOS web site for more details on the FreeRTOS heap
* http://www.freertos.org/a00111.html. */
for( ; ; )
{
}
}
// Chain2: 31 -> 78 -> 400
static void prvTask31( void * pvParameters ) {
TickType_t xLastWakeTime = xTaskGetTickCount();
const TickType_t xFrequency = 100 / portTICK_PERIOD_MS;
int period_counter = 2;
for( ;; ){
// Actions --------------------------------------
INP = fuzz_long_next();
uint32_t torun = 100000+HILL(INP,500000000,100000,100)+HILL(INP,1500000000,50000,10);
WCET_CLAMP(torun, 0, 500000, TASK_31_MESSAGE)
xTaskNotify(xTask78, 1, eSetValueWithOverwrite);
// ---------------------------------------------
vTaskDelayUntil( &xLastWakeTime, xFrequency );}// Wait for the next cycle.
}
// Chain2: 31 -> 78 -> 400
static void prvTask78( void * pvParameters ) {
TickType_t xLastWakeTime = xTaskGetTickCount();
const TickType_t xFrequency = 100 / portTICK_PERIOD_MS;
for( ;; ){
// Actions --------------------------------------
uint32_t torun = 100000-HILL(INP,500000000,100000,100);
printf("%d\n",torun);
WCET_CLAMP(torun, 0, 200000, TASK_78_MESSAGE);
WCET_END({trigger_Qemu_break();})
xTaskNotify(xTask400, 1, eSetValueWithOverwrite);
// ---------------------------------------------
vTaskDelayUntil( &xLastWakeTime, xFrequency );}
}
// Chain2: 31 -> 78 -> 400
static void prvTask400( void * pvParameters ) {
TickType_t xLastWakeTime = xTaskGetTickCount();
const TickType_t xFrequency = 100 / portTICK_PERIOD_MS;
for( ;; ){
// Actions --------------------------------------
WCET_CLAMP(1, 0, 1765, TASK_400_MESSAGE)
// ---------------------------------------------
vTaskDelayUntil( &xLastWakeTime, xFrequency );}
}
void vWatersIdleFunction() {
for (int i; i<1000; i++) {
puts("0 ");
}
}
void isr_starter( void )
{
}
/*-----------------------------------------------------------*/