12424 lines
366 KiB
C++
12424 lines
366 KiB
C++
|
/*************************************************************************
|
||
|
ALGLIB 3.16.0 (source code generated 2019-12-19)
|
||
|
Copyright (c) Sergey Bochkanov (ALGLIB project).
|
||
|
|
||
|
>>> SOURCE LICENSE >>>
|
||
|
This program is free software; you can redistribute it and/or modify
|
||
|
it under the terms of the GNU General Public License as published by
|
||
|
the Free Software Foundation (www.fsf.org); either version 2 of the
|
||
|
License, or (at your option) any later version.
|
||
|
|
||
|
This program is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU General Public License for more details.
|
||
|
|
||
|
A copy of the GNU General Public License is available at
|
||
|
http://www.fsf.org/licensing/licenses
|
||
|
>>> END OF LICENSE >>>
|
||
|
*************************************************************************/
|
||
|
#ifdef _MSC_VER
|
||
|
#define _CRT_SECURE_NO_WARNINGS
|
||
|
#endif
|
||
|
|
||
|
//
|
||
|
// if AE_OS==AE_LINUX (will be redefined to AE_POSIX in ap.h),
|
||
|
// set _GNU_SOURCE flag BEFORE any #includes to get affinity
|
||
|
// management functions
|
||
|
//
|
||
|
#if (AE_OS==AE_LINUX) && !defined(_GNU_SOURCE)
|
||
|
#define _GNU_SOURCE
|
||
|
#endif
|
||
|
|
||
|
#include "stdafx.h"
|
||
|
#include "ap.h"
|
||
|
#include <limits>
|
||
|
#include <locale.h>
|
||
|
#include <ctype.h>
|
||
|
|
||
|
#if defined(AE_CPU)
|
||
|
#if (AE_CPU==AE_INTEL)
|
||
|
|
||
|
#if AE_COMPILER==AE_MSVC
|
||
|
#include <intrin.h>
|
||
|
#endif
|
||
|
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
// disable some irrelevant warnings
|
||
|
#if (AE_COMPILER==AE_MSVC) && !defined(AE_ALL_WARNINGS)
|
||
|
#pragma warning(disable:4100)
|
||
|
#pragma warning(disable:4127)
|
||
|
#pragma warning(disable:4611)
|
||
|
#pragma warning(disable:4702)
|
||
|
#pragma warning(disable:4996)
|
||
|
#endif
|
||
|
|
||
|
/////////////////////////////////////////////////////////////////////////
|
||
|
//
|
||
|
// THIS SECTION IMPLEMENTS BASIC FUNCTIONALITY LIKE
|
||
|
// MEMORY MANAGEMENT FOR VECTORS/MATRICES WHICH IS
|
||
|
// SHARED BETWEEN C++ AND PURE C LIBRARIES
|
||
|
//
|
||
|
/////////////////////////////////////////////////////////////////////////
|
||
|
namespace alglib_impl
|
||
|
{
|
||
|
/*
|
||
|
* OS-specific includes
|
||
|
*/
|
||
|
#ifdef AE_USE_CPP
|
||
|
}
|
||
|
#endif
|
||
|
#if AE_OS==AE_WINDOWS
|
||
|
#ifndef _WIN32_WINNT
|
||
|
#define _WIN32_WINNT 0x0501
|
||
|
#endif
|
||
|
#include <windows.h>
|
||
|
#include <process.h>
|
||
|
#elif AE_OS==AE_POSIX
|
||
|
#include <time.h>
|
||
|
#include <unistd.h>
|
||
|
#include <pthread.h>
|
||
|
#include <sched.h>
|
||
|
#endif
|
||
|
/* Debugging helpers for Windows */
|
||
|
#ifdef AE_DEBUG4WINDOWS
|
||
|
#include <windows.h>
|
||
|
#include <stdio.h>
|
||
|
#endif
|
||
|
#ifdef AE_USE_CPP
|
||
|
namespace alglib_impl
|
||
|
{
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* local definitions
|
||
|
*/
|
||
|
#define x_nb 16
|
||
|
#define AE_DATA_ALIGN 64
|
||
|
#define AE_PTR_ALIGN sizeof(void*)
|
||
|
#define DYN_BOTTOM ((void*)1)
|
||
|
#define DYN_FRAME ((void*)2)
|
||
|
#define AE_LITTLE_ENDIAN 1
|
||
|
#define AE_BIG_ENDIAN 2
|
||
|
#define AE_MIXED_ENDIAN 3
|
||
|
#define AE_SER_ENTRY_LENGTH 11
|
||
|
#define AE_SER_ENTRIES_PER_ROW 5
|
||
|
|
||
|
#define AE_SM_DEFAULT 0
|
||
|
#define AE_SM_ALLOC 1
|
||
|
#define AE_SM_READY2S 2
|
||
|
#define AE_SM_TO_STRING 10
|
||
|
#define AE_SM_TO_CPPSTRING 11
|
||
|
#define AE_SM_TO_STREAM 12
|
||
|
#define AE_SM_FROM_STRING 20
|
||
|
#define AE_SM_FROM_STREAM 22
|
||
|
|
||
|
#define AE_LOCK_CYCLES 512
|
||
|
#define AE_LOCK_TESTS_BEFORE_YIELD 16
|
||
|
#define AE_CRITICAL_ASSERT(x) if( !(x) ) abort()
|
||
|
|
||
|
/* IDs for set_dbg_value */
|
||
|
#define _ALGLIB_USE_ALLOC_COUNTER 0
|
||
|
#define _ALGLIB_USE_DBG_COUNTERS 1
|
||
|
#define _ALGLIB_USE_VENDOR_KERNELS 100
|
||
|
#define _ALGLIB_VENDOR_MEMSTAT 101
|
||
|
|
||
|
#define _ALGLIB_DEBUG_WORKSTEALING 200
|
||
|
#define _ALGLIB_WSDBG_NCORES 201
|
||
|
#define _ALGLIB_WSDBG_PUSHROOT_OK 202
|
||
|
#define _ALGLIB_WSDBG_PUSHROOT_FAILED 203
|
||
|
|
||
|
#define _ALGLIB_SET_GLOBAL_THREADING 1001
|
||
|
#define _ALGLIB_SET_NWORKERS 1002
|
||
|
|
||
|
/* IDs for get_dbg_value */
|
||
|
#define _ALGLIB_GET_ALLOC_COUNTER 0
|
||
|
#define _ALGLIB_GET_CUMULATIVE_ALLOC_SIZE 1
|
||
|
#define _ALGLIB_GET_CUMULATIVE_ALLOC_COUNT 2
|
||
|
|
||
|
#define _ALGLIB_GET_CORES_COUNT 1000
|
||
|
#define _ALGLIB_GET_GLOBAL_THREADING 1001
|
||
|
#define _ALGLIB_GET_NWORKERS 1002
|
||
|
|
||
|
/*************************************************************************
|
||
|
Lock.
|
||
|
|
||
|
This is internal structure which implements lock functionality.
|
||
|
*************************************************************************/
|
||
|
typedef struct
|
||
|
{
|
||
|
#if AE_OS==AE_WINDOWS
|
||
|
volatile ae_int_t * volatile p_lock;
|
||
|
char buf[sizeof(ae_int_t)+AE_LOCK_ALIGNMENT];
|
||
|
#elif AE_OS==AE_POSIX
|
||
|
pthread_mutex_t mutex;
|
||
|
#else
|
||
|
ae_bool is_locked;
|
||
|
#endif
|
||
|
} _lock;
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
/*
|
||
|
* Error tracking facilities; this fields are modified every time ae_set_error_flag()
|
||
|
* is called with non-zero cond. Thread unsafe access, but it does not matter actually.
|
||
|
*/
|
||
|
static const char * sef_file = "";
|
||
|
static int sef_line = 0;
|
||
|
static const char * sef_xdesc = "";
|
||
|
|
||
|
/*
|
||
|
* Global flags, split into several char-sized variables in order
|
||
|
* to avoid problem with non-atomic reads/writes (single-byte ops
|
||
|
* are atomic on all modern architectures);
|
||
|
*
|
||
|
* Following variables are included:
|
||
|
* * threading-related settings
|
||
|
*/
|
||
|
unsigned char _alglib_global_threading_flags = _ALGLIB_FLG_THREADING_SERIAL>>_ALGLIB_FLG_THREADING_SHIFT;
|
||
|
|
||
|
/*
|
||
|
* DESCRIPTION: recommended number of active workers:
|
||
|
* * positive value >=1 is used to specify exact number of active workers
|
||
|
* * 0 means that ALL available cores are used
|
||
|
* * negative value means that all cores EXCEPT for cores_to_use will be used
|
||
|
* (say, -1 means that all cores except for one will be used). At least one
|
||
|
* core will be used in this case, even if you assign -9999999 to this field.
|
||
|
*
|
||
|
* Default value = 0 (fully parallel execution) when AE_NWORKERS is not defined
|
||
|
* = 0 for manually defined number of cores (AE_NWORKERS is defined)
|
||
|
* PROTECTION: not needed; runtime modification is possible, but we do not need exact
|
||
|
* synchronization.
|
||
|
*/
|
||
|
#if defined(AE_NWORKERS) && (AE_NWORKERS<=0)
|
||
|
#error AE_NWORKERS must be positive number or not defined at all.
|
||
|
#endif
|
||
|
#if defined(AE_NWORKERS)
|
||
|
ae_int_t _alglib_cores_to_use = 0;
|
||
|
#else
|
||
|
ae_int_t _alglib_cores_to_use = 0;
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* Debug counters
|
||
|
*/
|
||
|
ae_int_t _alloc_counter = 0;
|
||
|
ae_int_t _alloc_counter_total = 0;
|
||
|
ae_bool _use_alloc_counter = ae_false;
|
||
|
|
||
|
ae_int_t _dbg_alloc_total = 0;
|
||
|
ae_bool _use_dbg_counters = ae_false;
|
||
|
|
||
|
ae_bool _use_vendor_kernels = ae_true;
|
||
|
|
||
|
ae_bool debug_workstealing = ae_false; /* debug workstealing environment? False by default */
|
||
|
ae_int_t dbgws_pushroot_ok = 0;
|
||
|
ae_int_t dbgws_pushroot_failed = 0;
|
||
|
|
||
|
#ifdef AE_SMP_DEBUGCOUNTERS
|
||
|
__declspec(align(AE_LOCK_ALIGNMENT)) volatile ae_int64_t _ae_dbg_lock_acquisitions = 0;
|
||
|
__declspec(align(AE_LOCK_ALIGNMENT)) volatile ae_int64_t _ae_dbg_lock_spinwaits = 0;
|
||
|
__declspec(align(AE_LOCK_ALIGNMENT)) volatile ae_int64_t _ae_dbg_lock_yields = 0;
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* Allocation debugging
|
||
|
*/
|
||
|
ae_bool _force_malloc_failure = ae_false;
|
||
|
ae_int_t _malloc_failure_after = 0;
|
||
|
|
||
|
|
||
|
/*
|
||
|
* Trace-related declarations:
|
||
|
* alglib_trace_type - trace output type
|
||
|
* alglib_trace_file - file descriptor (to be used by ALGLIB code which
|
||
|
* sends messages to trace log
|
||
|
* alglib_fclose_trace - whether we have to call fclose() when disabling or
|
||
|
* changing trace output
|
||
|
* alglib_trace_tags - string buffer used to store tags + two additional
|
||
|
* characters (leading and trailing commas) + null
|
||
|
* terminator
|
||
|
*/
|
||
|
#define ALGLIB_TRACE_NONE 0
|
||
|
#define ALGLIB_TRACE_FILE 1
|
||
|
#define ALGLIB_TRACE_TAGS_LEN 2048
|
||
|
#define ALGLIB_TRACE_BUFFER_LEN (ALGLIB_TRACE_TAGS_LEN+2+1)
|
||
|
static ae_int_t alglib_trace_type = ALGLIB_TRACE_NONE;
|
||
|
FILE *alglib_trace_file = NULL;
|
||
|
static ae_bool alglib_fclose_trace = ae_false;
|
||
|
static char alglib_trace_tags[ALGLIB_TRACE_BUFFER_LEN];
|
||
|
|
||
|
/*
|
||
|
* Fields for memory allocation over static array
|
||
|
*/
|
||
|
#if AE_MALLOC==AE_BASIC_STATIC_MALLOC
|
||
|
#if AE_THREADING!=AE_SERIAL_UNSAFE
|
||
|
#error Basis static malloc is thread-unsafe; define AE_THREADING=AE_SERIAL_UNSAFE to prove that you know it
|
||
|
#endif
|
||
|
static ae_int_t sm_page_size = 0;
|
||
|
static ae_int_t sm_page_cnt = 0;
|
||
|
static ae_int_t *sm_page_tbl = NULL;
|
||
|
static unsigned char *sm_mem = NULL;
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* These declarations are used to ensure that
|
||
|
* sizeof(ae_bool)=1, sizeof(ae_int32_t)==4, sizeof(ae_int64_t)==8, sizeof(ae_int_t)==sizeof(void*).
|
||
|
* they will lead to syntax error otherwise (array size will be negative).
|
||
|
*
|
||
|
* you can remove them, if you want - they are not used anywhere.
|
||
|
*
|
||
|
*/
|
||
|
static char _ae_bool_must_be_8_bits_wide [1-2*((int)(sizeof(ae_bool))-1)*((int)(sizeof(ae_bool))-1)];
|
||
|
static char _ae_int32_t_must_be_32_bits_wide[1-2*((int)(sizeof(ae_int32_t))-4)*((int)(sizeof(ae_int32_t))-4)];
|
||
|
static char _ae_int64_t_must_be_64_bits_wide[1-2*((int)(sizeof(ae_int64_t))-8)*((int)(sizeof(ae_int64_t))-8)];
|
||
|
static char _ae_uint64_t_must_be_64_bits_wide[1-2*((int)(sizeof(ae_uint64_t))-8)*((int)(sizeof(ae_uint64_t))-8)];
|
||
|
static char _ae_int_t_must_be_pointer_sized [1-2*((int)(sizeof(ae_int_t))-(int)sizeof(void*))*((int)(sizeof(ae_int_t))-(int)(sizeof(void*)))];
|
||
|
|
||
|
/*
|
||
|
* This variable is used to prevent some tricky optimizations which may degrade multithreaded performance.
|
||
|
* It is touched once in the ae_init_pool() function from smp.c in order to prevent optimizations.
|
||
|
*
|
||
|
*/
|
||
|
static volatile ae_int_t ae_never_change_it = 1;
|
||
|
|
||
|
/*************************************************************************
|
||
|
This function should never be called. It is here to prevent spurious
|
||
|
compiler warnings about unused variables (in fact: used).
|
||
|
*************************************************************************/
|
||
|
void ae_never_call_it()
|
||
|
{
|
||
|
ae_touch_ptr((void*)_ae_bool_must_be_8_bits_wide);
|
||
|
ae_touch_ptr((void*)_ae_int32_t_must_be_32_bits_wide);
|
||
|
ae_touch_ptr((void*)_ae_int64_t_must_be_64_bits_wide);
|
||
|
ae_touch_ptr((void*)_ae_uint64_t_must_be_64_bits_wide);
|
||
|
ae_touch_ptr((void*)_ae_int_t_must_be_pointer_sized);
|
||
|
}
|
||
|
|
||
|
void ae_set_dbg_flag(ae_int64_t flag_id, ae_int64_t flag_val)
|
||
|
{
|
||
|
if( flag_id==_ALGLIB_USE_ALLOC_COUNTER )
|
||
|
{
|
||
|
_use_alloc_counter = flag_val!=0;
|
||
|
return;
|
||
|
}
|
||
|
if( flag_id==_ALGLIB_USE_DBG_COUNTERS )
|
||
|
{
|
||
|
_use_dbg_counters = flag_val!=0;
|
||
|
return;
|
||
|
}
|
||
|
if( flag_id==_ALGLIB_USE_VENDOR_KERNELS )
|
||
|
{
|
||
|
_use_vendor_kernels = flag_val!=0;
|
||
|
return;
|
||
|
}
|
||
|
if( flag_id==_ALGLIB_DEBUG_WORKSTEALING )
|
||
|
{
|
||
|
debug_workstealing = flag_val!=0;
|
||
|
return;
|
||
|
}
|
||
|
if( flag_id==_ALGLIB_SET_GLOBAL_THREADING )
|
||
|
{
|
||
|
ae_set_global_threading((ae_uint64_t)flag_val);
|
||
|
return;
|
||
|
}
|
||
|
if( flag_id==_ALGLIB_SET_NWORKERS )
|
||
|
{
|
||
|
_alglib_cores_to_use = (ae_int_t)flag_val;
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
ae_int64_t ae_get_dbg_value(ae_int64_t id)
|
||
|
{
|
||
|
if( id==_ALGLIB_GET_ALLOC_COUNTER )
|
||
|
return _alloc_counter;
|
||
|
if( id==_ALGLIB_GET_CUMULATIVE_ALLOC_SIZE )
|
||
|
return _dbg_alloc_total;
|
||
|
if( id==_ALGLIB_GET_CUMULATIVE_ALLOC_COUNT )
|
||
|
return _alloc_counter_total;
|
||
|
|
||
|
if( id==_ALGLIB_VENDOR_MEMSTAT )
|
||
|
{
|
||
|
#if defined(AE_MKL)
|
||
|
return ae_mkl_memstat();
|
||
|
#else
|
||
|
return 0;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
/* workstealing counters */
|
||
|
if( id==_ALGLIB_WSDBG_NCORES )
|
||
|
#if defined(AE_SMP)
|
||
|
return ae_cores_count();
|
||
|
#else
|
||
|
return 0;
|
||
|
#endif
|
||
|
if( id==_ALGLIB_WSDBG_PUSHROOT_OK )
|
||
|
return dbgws_pushroot_ok;
|
||
|
if( id==_ALGLIB_WSDBG_PUSHROOT_FAILED )
|
||
|
return dbgws_pushroot_failed;
|
||
|
|
||
|
if( id==_ALGLIB_GET_CORES_COUNT )
|
||
|
#if defined(AE_SMP)
|
||
|
return ae_cores_count();
|
||
|
#else
|
||
|
return 0;
|
||
|
#endif
|
||
|
if( id==_ALGLIB_GET_GLOBAL_THREADING )
|
||
|
return (ae_int64_t)ae_get_global_threading();
|
||
|
if( id==_ALGLIB_GET_NWORKERS )
|
||
|
return (ae_int64_t)_alglib_cores_to_use;
|
||
|
|
||
|
/* unknown value */
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function sets default (global) threading model:
|
||
|
* serial execution
|
||
|
* multithreading, if cores_to_use allows it
|
||
|
|
||
|
************************************************************************/
|
||
|
void ae_set_global_threading(ae_uint64_t flg_value)
|
||
|
{
|
||
|
flg_value = flg_value&_ALGLIB_FLG_THREADING_MASK;
|
||
|
AE_CRITICAL_ASSERT(flg_value==_ALGLIB_FLG_THREADING_SERIAL || flg_value==_ALGLIB_FLG_THREADING_PARALLEL);
|
||
|
_alglib_global_threading_flags = (unsigned char)(flg_value>>_ALGLIB_FLG_THREADING_SHIFT);
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function gets default (global) threading model:
|
||
|
* serial execution
|
||
|
* multithreading, if cores_to_use allows it
|
||
|
|
||
|
************************************************************************/
|
||
|
ae_uint64_t ae_get_global_threading()
|
||
|
{
|
||
|
return ((ae_uint64_t)_alglib_global_threading_flags)<<_ALGLIB_FLG_THREADING_SHIFT;
|
||
|
}
|
||
|
|
||
|
void ae_set_error_flag(ae_bool *p_flag, ae_bool cond, const char *filename, int lineno, const char *xdesc)
|
||
|
{
|
||
|
if( cond )
|
||
|
{
|
||
|
*p_flag = ae_true;
|
||
|
sef_file = filename;
|
||
|
sef_line = lineno;
|
||
|
sef_xdesc= xdesc;
|
||
|
#ifdef ALGLIB_ABORT_ON_ERROR_FLAG
|
||
|
printf("[ALGLIB] aborting on ae_set_error_flag(cond=true)\n");
|
||
|
printf("[ALGLIB] %s:%d\n", filename, lineno);
|
||
|
printf("[ALGLIB] %s\n", xdesc);
|
||
|
fflush(stdout);
|
||
|
if( alglib_trace_file!=NULL ) fflush(alglib_trace_file);
|
||
|
abort();
|
||
|
#endif
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function returns file name for the last call of ae_set_error_flag()
|
||
|
with non-zero cond parameter.
|
||
|
************************************************************************/
|
||
|
const char * ae_get_last_error_file()
|
||
|
{
|
||
|
return sef_file;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function returns line number for the last call of ae_set_error_flag()
|
||
|
with non-zero cond parameter.
|
||
|
************************************************************************/
|
||
|
int ae_get_last_error_line()
|
||
|
{
|
||
|
return sef_line;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function returns extra description for the last call of ae_set_error_flag()
|
||
|
with non-zero cond parameter.
|
||
|
************************************************************************/
|
||
|
const char * ae_get_last_error_xdesc()
|
||
|
{
|
||
|
return sef_xdesc;
|
||
|
}
|
||
|
|
||
|
ae_int_t ae_misalignment(const void *ptr, size_t alignment)
|
||
|
{
|
||
|
union _u
|
||
|
{
|
||
|
const void *ptr;
|
||
|
ae_int_t iptr;
|
||
|
} u;
|
||
|
u.ptr = ptr;
|
||
|
return (ae_int_t)(u.iptr%alignment);
|
||
|
}
|
||
|
|
||
|
void* ae_align(void *ptr, size_t alignment)
|
||
|
{
|
||
|
char *result = (char*)ptr;
|
||
|
if( (result-(char*)0)%alignment!=0 )
|
||
|
result += alignment - (result-(char*)0)%alignment;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function maps nworkers number (which can be positive, zero or
|
||
|
negative with 0 meaning "all cores", -1 meaning "all cores -1" and so on)
|
||
|
to "effective", strictly positive workers count.
|
||
|
|
||
|
This function is intended to be used by debugging/testing code which
|
||
|
tests different number of worker threads. It is NOT aligned in any way
|
||
|
with ALGLIB multithreading framework (i.e. it can return non-zero worker
|
||
|
count even for single-threaded GPLed ALGLIB).
|
||
|
************************************************************************/
|
||
|
ae_int_t ae_get_effective_workers(ae_int_t nworkers)
|
||
|
{
|
||
|
ae_int_t ncores;
|
||
|
|
||
|
/* determine cores count */
|
||
|
#if defined(AE_NWORKERS)
|
||
|
ncores = AE_NWORKERS;
|
||
|
#elif AE_OS==AE_WINDOWS
|
||
|
SYSTEM_INFO sysInfo;
|
||
|
GetSystemInfo(&sysInfo);
|
||
|
ncores = (ae_int_t)(sysInfo.dwNumberOfProcessors);
|
||
|
#elif AE_OS==AE_POSIX
|
||
|
{
|
||
|
long r = sysconf(_SC_NPROCESSORS_ONLN);
|
||
|
ncores = r<=0 ? 1 : r;
|
||
|
}
|
||
|
#else
|
||
|
ncores = 1;
|
||
|
#endif
|
||
|
AE_CRITICAL_ASSERT(ncores>=1);
|
||
|
|
||
|
/* map nworkers to its effective value */
|
||
|
if( nworkers>=1 )
|
||
|
return nworkers>ncores ? ncores : nworkers;
|
||
|
return ncores+nworkers>=1 ? ncores+nworkers : 1;
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
This function belongs to the family of "optional atomics", i.e. atomic
|
||
|
functions which either perform atomic changes - or do nothing at all, if
|
||
|
current compiler settings do not allow us to generate atomic code.
|
||
|
|
||
|
All "optional atomics" are synchronized, i.e. either all of them work - or
|
||
|
no one of the works.
|
||
|
|
||
|
This particular function performs atomic addition on pointer-sized value,
|
||
|
which must be pointer-size aligned.
|
||
|
|
||
|
NOTE: this function is not intended to be extremely high performance one,
|
||
|
so use it only when necessary.
|
||
|
*************************************************************************/
|
||
|
void ae_optional_atomic_add_i(ae_int_t *p, ae_int_t v)
|
||
|
{
|
||
|
AE_CRITICAL_ASSERT(ae_misalignment(p,sizeof(void*))==0);
|
||
|
#if AE_OS==AE_WINDOWS
|
||
|
for(;;)
|
||
|
{
|
||
|
/* perform conversion between ae_int_t* and void**
|
||
|
without compiler warnings about indirection levels */
|
||
|
union _u
|
||
|
{
|
||
|
PVOID volatile * volatile ptr;
|
||
|
volatile ae_int_t * volatile iptr;
|
||
|
} u;
|
||
|
u.iptr = p;
|
||
|
|
||
|
/* atomic read for initial value */
|
||
|
PVOID v0 = InterlockedCompareExchangePointer(u.ptr, NULL, NULL);
|
||
|
|
||
|
/* increment cached value and store */
|
||
|
if( InterlockedCompareExchangePointer(u.ptr, (PVOID)(((char*)v0)+v), v0)==v0 )
|
||
|
break;
|
||
|
}
|
||
|
#elif (AE_COMPILER==AE_GNUC) && (AE_CPU==AE_INTEL) && (__GNUC__*100+__GNUC__>=470)
|
||
|
__atomic_add_fetch(p, v, __ATOMIC_RELAXED);
|
||
|
#else
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
This function belongs to the family of "optional atomics", i.e. atomic
|
||
|
functions which either perform atomic changes - or do nothing at all, if
|
||
|
current compiler settings do not allow us to generate atomic code.
|
||
|
|
||
|
All "optional atomics" are synchronized, i.e. either all of them work - or
|
||
|
no one of the works.
|
||
|
|
||
|
This particular function performs atomic subtraction on pointer-sized
|
||
|
value, which must be pointer-size aligned.
|
||
|
|
||
|
NOTE: this function is not intended to be extremely high performance one,
|
||
|
so use it only when necessary.
|
||
|
*************************************************************************/
|
||
|
void ae_optional_atomic_sub_i(ae_int_t *p, ae_int_t v)
|
||
|
{
|
||
|
AE_CRITICAL_ASSERT(ae_misalignment(p,sizeof(void*))==0);
|
||
|
#if AE_OS==AE_WINDOWS
|
||
|
for(;;)
|
||
|
{
|
||
|
/* perform conversion between ae_int_t* and void**
|
||
|
without compiler warnings about indirection levels */
|
||
|
union _u
|
||
|
{
|
||
|
PVOID volatile * volatile ptr;
|
||
|
volatile ae_int_t * volatile iptr;
|
||
|
} u;
|
||
|
u.iptr = p;
|
||
|
|
||
|
/* atomic read for initial value, convert it to 1-byte pointer */
|
||
|
PVOID v0 = InterlockedCompareExchangePointer(u.ptr, NULL, NULL);
|
||
|
|
||
|
/* increment cached value and store */
|
||
|
if( InterlockedCompareExchangePointer(u.ptr, (PVOID)(((char*)v0)-v), v0)==v0 )
|
||
|
break;
|
||
|
}
|
||
|
#elif (AE_COMPILER==AE_GNUC) && (AE_CPU==AE_INTEL) && (__GNUC__*100+__GNUC__>=470)
|
||
|
__atomic_sub_fetch(p, v, __ATOMIC_RELAXED);
|
||
|
#else
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
|
||
|
/*************************************************************************
|
||
|
This function cleans up automatically managed memory before caller terminates
|
||
|
ALGLIB executing by ae_break() or by simply stopping calling callback.
|
||
|
|
||
|
For state!=NULL it calls thread_exception_handler() and the ae_state_clear().
|
||
|
For state==NULL it does nothing.
|
||
|
*************************************************************************/
|
||
|
void ae_clean_up_before_breaking(ae_state *state)
|
||
|
{
|
||
|
if( state!=NULL )
|
||
|
{
|
||
|
if( state->thread_exception_handler!=NULL )
|
||
|
state->thread_exception_handler(state);
|
||
|
ae_state_clear(state);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
This function abnormally aborts program, using one of several ways:
|
||
|
|
||
|
* for state!=NULL and state->break_jump being initialized with call to
|
||
|
ae_state_set_break_jump() - it performs longjmp() to return site.
|
||
|
* otherwise, abort() is called
|
||
|
|
||
|
In all cases, for state!=NULL function sets state->last_error and
|
||
|
state->error_msg fields. It also clears state with ae_state_clear().
|
||
|
|
||
|
If state is not NULL and state->thread_exception_handler is set, it is
|
||
|
called prior to handling error and clearing state.
|
||
|
*************************************************************************/
|
||
|
void ae_break(ae_state *state, ae_error_type error_type, const char *msg)
|
||
|
{
|
||
|
if( state!=NULL )
|
||
|
{
|
||
|
if( alglib_trace_type!=ALGLIB_TRACE_NONE )
|
||
|
ae_trace("---!!! CRITICAL ERROR !!!--- exception with message '%s' was generated\n", msg!=NULL ? msg : "");
|
||
|
ae_clean_up_before_breaking(state);
|
||
|
state->last_error = error_type;
|
||
|
state->error_msg = msg;
|
||
|
if( state->break_jump!=NULL )
|
||
|
longjmp(*(state->break_jump), 1);
|
||
|
else
|
||
|
abort();
|
||
|
}
|
||
|
else
|
||
|
abort();
|
||
|
}
|
||
|
|
||
|
#if AE_MALLOC==AE_BASIC_STATIC_MALLOC
|
||
|
void set_memory_pool(void *ptr, size_t size)
|
||
|
{
|
||
|
/*
|
||
|
* Integrity checks
|
||
|
*/
|
||
|
AE_CRITICAL_ASSERT(sm_page_size==0);
|
||
|
AE_CRITICAL_ASSERT(sm_page_cnt==0);
|
||
|
AE_CRITICAL_ASSERT(sm_page_tbl==NULL);
|
||
|
AE_CRITICAL_ASSERT(sm_mem==NULL);
|
||
|
AE_CRITICAL_ASSERT(size>0);
|
||
|
|
||
|
/*
|
||
|
* Align pointer
|
||
|
*/
|
||
|
size -= ae_misalignment(ptr, sizeof(ae_int_t));
|
||
|
ptr = ae_align(ptr, sizeof(ae_int_t));
|
||
|
|
||
|
/*
|
||
|
* Calculate page size and page count, prepare pointers to page table and memory
|
||
|
*/
|
||
|
sm_page_size = 256;
|
||
|
AE_CRITICAL_ASSERT(size>=(sm_page_size+sizeof(ae_int_t))+sm_page_size); /* we expect to have memory for at least one page + table entry + alignment */
|
||
|
sm_page_cnt = (size-sm_page_size)/(sm_page_size+sizeof(ae_int_t));
|
||
|
AE_CRITICAL_ASSERT(sm_page_cnt>0);
|
||
|
sm_page_tbl = (ae_int_t*)ptr;
|
||
|
sm_mem = (unsigned char*)ae_align(sm_page_tbl+sm_page_cnt, sm_page_size);
|
||
|
|
||
|
/*
|
||
|
* Mark all pages as free
|
||
|
*/
|
||
|
memset(sm_page_tbl, 0, sm_page_cnt*sizeof(ae_int_t));
|
||
|
}
|
||
|
|
||
|
void* ae_static_malloc(size_t size, size_t alignment)
|
||
|
{
|
||
|
int rq_pages, i, j, cur_len;
|
||
|
|
||
|
AE_CRITICAL_ASSERT(size>=0);
|
||
|
AE_CRITICAL_ASSERT(sm_page_size>0);
|
||
|
AE_CRITICAL_ASSERT(sm_page_cnt>0);
|
||
|
AE_CRITICAL_ASSERT(sm_page_tbl!=NULL);
|
||
|
AE_CRITICAL_ASSERT(sm_mem!=NULL);
|
||
|
|
||
|
if( size==0 )
|
||
|
return NULL;
|
||
|
if( _force_malloc_failure )
|
||
|
return NULL;
|
||
|
|
||
|
/* check that page alignment and requested alignment match each other */
|
||
|
AE_CRITICAL_ASSERT(alignment<=sm_page_size);
|
||
|
AE_CRITICAL_ASSERT((sm_page_size%alignment)==0);
|
||
|
|
||
|
/* search long enough sequence of pages */
|
||
|
rq_pages = size/sm_page_size;
|
||
|
if( size%sm_page_size )
|
||
|
rq_pages++;
|
||
|
cur_len = 0;
|
||
|
for(i=0; i<sm_page_cnt;)
|
||
|
{
|
||
|
/* determine length of the sequence of free pages */
|
||
|
if( sm_page_tbl[i]==0 )
|
||
|
cur_len++;
|
||
|
else
|
||
|
{
|
||
|
AE_CRITICAL_ASSERT(sm_page_tbl[i]>0);
|
||
|
cur_len=0;
|
||
|
i += sm_page_tbl[i];
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* found it? */
|
||
|
if( cur_len>=rq_pages )
|
||
|
{
|
||
|
/* update counters (if flag is set) */
|
||
|
if( _use_alloc_counter )
|
||
|
{
|
||
|
ae_optional_atomic_add_i(&_alloc_counter, 1);
|
||
|
ae_optional_atomic_add_i(&_alloc_counter_total, 1);
|
||
|
}
|
||
|
if( _use_dbg_counters )
|
||
|
ae_optional_atomic_add_i(&_dbg_alloc_total, size);
|
||
|
|
||
|
/* mark pages and return */
|
||
|
for(j=0; j<rq_pages; j++)
|
||
|
sm_page_tbl[i-j] = -1;
|
||
|
sm_page_tbl[i-(rq_pages-1)] = rq_pages;
|
||
|
return sm_mem+(i-(rq_pages-1))*sm_page_size;
|
||
|
}
|
||
|
|
||
|
/* next element */
|
||
|
i++;
|
||
|
}
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
void ae_static_free(void *block)
|
||
|
{
|
||
|
ae_int_t page_idx, page_cnt, i;
|
||
|
if( block==NULL )
|
||
|
return;
|
||
|
page_idx = (unsigned char*)block-sm_mem;
|
||
|
AE_CRITICAL_ASSERT(page_idx>=0);
|
||
|
AE_CRITICAL_ASSERT((page_idx%sm_page_size)==0);
|
||
|
page_idx = page_idx/sm_page_size;
|
||
|
AE_CRITICAL_ASSERT(page_idx<sm_page_cnt);
|
||
|
page_cnt = sm_page_tbl[page_idx];
|
||
|
AE_CRITICAL_ASSERT(page_cnt>=1);
|
||
|
for(i=0; i<page_cnt; i++)
|
||
|
sm_page_tbl[page_idx+i] = 0;
|
||
|
|
||
|
/* update counters (if flag is set) */
|
||
|
if( _use_alloc_counter )
|
||
|
ae_optional_atomic_sub_i(&_alloc_counter, 1);
|
||
|
}
|
||
|
|
||
|
void memory_pool_stats(ae_int_t *bytes_used, ae_int_t *bytes_free)
|
||
|
{
|
||
|
int i;
|
||
|
|
||
|
AE_CRITICAL_ASSERT(sm_page_size>0);
|
||
|
AE_CRITICAL_ASSERT(sm_page_cnt>0);
|
||
|
AE_CRITICAL_ASSERT(sm_page_tbl!=NULL);
|
||
|
AE_CRITICAL_ASSERT(sm_mem!=NULL);
|
||
|
|
||
|
/* scan page table */
|
||
|
*bytes_used = 0;
|
||
|
*bytes_free = 0;
|
||
|
for(i=0; i<sm_page_cnt;)
|
||
|
{
|
||
|
if( sm_page_tbl[i]==0 )
|
||
|
{
|
||
|
(*bytes_free)++;
|
||
|
i++;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
AE_CRITICAL_ASSERT(sm_page_tbl[i]>0);
|
||
|
*bytes_used += sm_page_tbl[i];
|
||
|
i += sm_page_tbl[i];
|
||
|
}
|
||
|
}
|
||
|
*bytes_used *= sm_page_size;
|
||
|
*bytes_free *= sm_page_size;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
void* aligned_malloc(size_t size, size_t alignment)
|
||
|
{
|
||
|
#if AE_MALLOC==AE_BASIC_STATIC_MALLOC
|
||
|
return ae_static_malloc(size, alignment);
|
||
|
#else
|
||
|
char *result = NULL;
|
||
|
|
||
|
if( size==0 )
|
||
|
return NULL;
|
||
|
if( _force_malloc_failure )
|
||
|
return NULL;
|
||
|
if( _malloc_failure_after>0 && _alloc_counter_total>=_malloc_failure_after )
|
||
|
return NULL;
|
||
|
|
||
|
/* allocate */
|
||
|
if( alignment<=1 )
|
||
|
{
|
||
|
/* no alignment, just call alloc */
|
||
|
void *block;
|
||
|
void **p; ;
|
||
|
block = malloc(sizeof(void*)+size);
|
||
|
if( block==NULL )
|
||
|
return NULL;
|
||
|
p = (void**)block;
|
||
|
*p = block;
|
||
|
result = (char*)((char*)block+sizeof(void*));
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/* align */
|
||
|
void *block;
|
||
|
block = malloc(alignment-1+sizeof(void*)+size);
|
||
|
if( block==NULL )
|
||
|
return NULL;
|
||
|
result = (char*)block+sizeof(void*);
|
||
|
/*if( (result-(char*)0)%alignment!=0 )
|
||
|
result += alignment - (result-(char*)0)%alignment;*/
|
||
|
result = (char*)ae_align(result, alignment);
|
||
|
*((void**)(result-sizeof(void*))) = block;
|
||
|
}
|
||
|
|
||
|
/* update counters (if flag is set) */
|
||
|
if( _use_alloc_counter )
|
||
|
{
|
||
|
ae_optional_atomic_add_i(&_alloc_counter, 1);
|
||
|
ae_optional_atomic_add_i(&_alloc_counter_total, 1);
|
||
|
}
|
||
|
if( _use_dbg_counters )
|
||
|
ae_optional_atomic_add_i(&_dbg_alloc_total, (ae_int64_t)size);
|
||
|
|
||
|
/* return */
|
||
|
return (void*)result;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
void* aligned_extract_ptr(void *block)
|
||
|
{
|
||
|
#if AE_MALLOC==AE_BASIC_STATIC_MALLOC
|
||
|
return NULL;
|
||
|
#else
|
||
|
if( block==NULL )
|
||
|
return NULL;
|
||
|
return *((void**)((char*)block-sizeof(void*)));
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
void aligned_free(void *block)
|
||
|
{
|
||
|
#if AE_MALLOC==AE_BASIC_STATIC_MALLOC
|
||
|
ae_static_free(block);
|
||
|
#else
|
||
|
void *p;
|
||
|
if( block==NULL )
|
||
|
return;
|
||
|
p = aligned_extract_ptr(block);
|
||
|
free(p);
|
||
|
if( _use_alloc_counter )
|
||
|
ae_optional_atomic_sub_i(&_alloc_counter, 1);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
void* eternal_malloc(size_t size)
|
||
|
{
|
||
|
if( size==0 )
|
||
|
return NULL;
|
||
|
if( _force_malloc_failure )
|
||
|
return NULL;
|
||
|
return malloc(size);
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
Allocate memory with automatic alignment.
|
||
|
|
||
|
Returns NULL when zero size is specified.
|
||
|
|
||
|
Error handling:
|
||
|
* if state is NULL, returns NULL on allocation error
|
||
|
* if state is not NULL, calls ae_break() on allocation error
|
||
|
************************************************************************/
|
||
|
void* ae_malloc(size_t size, ae_state *state)
|
||
|
{
|
||
|
void *result;
|
||
|
if( size==0 )
|
||
|
return NULL;
|
||
|
result = aligned_malloc(size,AE_DATA_ALIGN);
|
||
|
if( result==NULL && state!=NULL)
|
||
|
ae_break(state, ERR_OUT_OF_MEMORY, "ae_malloc(): out of memory");
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
void ae_free(void *p)
|
||
|
{
|
||
|
if( p!=NULL )
|
||
|
aligned_free(p);
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
Sets pointers to the matrix rows.
|
||
|
|
||
|
* dst must be correctly initialized matrix
|
||
|
* dst->data.ptr points to the beginning of memory block allocated for
|
||
|
row pointers.
|
||
|
* dst->ptr - undefined (initialized during algorithm processing)
|
||
|
* storage parameter points to the beginning of actual storage
|
||
|
************************************************************************/
|
||
|
void ae_matrix_update_row_pointers(ae_matrix *dst, void *storage)
|
||
|
{
|
||
|
char *p_base;
|
||
|
void **pp_ptr;
|
||
|
ae_int_t i;
|
||
|
if( dst->rows>0 && dst->cols>0 )
|
||
|
{
|
||
|
p_base = (char*)storage;
|
||
|
pp_ptr = (void**)dst->data.ptr;
|
||
|
dst->ptr.pp_void = pp_ptr;
|
||
|
for(i=0; i<dst->rows; i++, p_base+=dst->stride*ae_sizeof(dst->datatype))
|
||
|
pp_ptr[i] = p_base;
|
||
|
}
|
||
|
else
|
||
|
dst->ptr.pp_void = NULL;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
Returns size of datatype.
|
||
|
Zero for dynamic types like strings or multiple precision types.
|
||
|
************************************************************************/
|
||
|
ae_int_t ae_sizeof(ae_datatype datatype)
|
||
|
{
|
||
|
switch(datatype)
|
||
|
{
|
||
|
case DT_BOOL: return (ae_int_t)sizeof(ae_bool);
|
||
|
case DT_INT: return (ae_int_t)sizeof(ae_int_t);
|
||
|
case DT_REAL: return (ae_int_t)sizeof(double);
|
||
|
case DT_COMPLEX: return 2*(ae_int_t)sizeof(double);
|
||
|
default: return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
Checks that n bytes pointed by ptr are zero.
|
||
|
|
||
|
This function is used in the constructors to check that instance fields
|
||
|
on entry are correctly initialized by zeros.
|
||
|
************************************************************************/
|
||
|
ae_bool ae_check_zeros(const void *ptr, ae_int_t n)
|
||
|
{
|
||
|
ae_int_t nu, nr, i;
|
||
|
unsigned long long c = 0x0;
|
||
|
|
||
|
/*
|
||
|
* determine leading and trailing lengths
|
||
|
*/
|
||
|
nu = n/sizeof(unsigned long long);
|
||
|
nr = n%sizeof(unsigned long long);
|
||
|
|
||
|
/*
|
||
|
* handle leading nu long long elements
|
||
|
*/
|
||
|
if( nu>0 )
|
||
|
{
|
||
|
const unsigned long long *p_ull;
|
||
|
p_ull = (const unsigned long long *)ptr;
|
||
|
for(i=0; i<nu; i++)
|
||
|
c |= p_ull[i];
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* handle trailing nr char elements
|
||
|
*/
|
||
|
if( nr>0 )
|
||
|
{
|
||
|
const unsigned char *p_uc;
|
||
|
p_uc = ((const unsigned char *)ptr)+nu*sizeof(unsigned long long);
|
||
|
for(i=0; i<nr; i++)
|
||
|
c |= p_uc[i];
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* done
|
||
|
*/
|
||
|
return c==0x0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This dummy function is used to prevent compiler messages about unused
|
||
|
locals in automatically generated code.
|
||
|
|
||
|
It makes nothing - just accepts pointer, "touches" it - and that is all.
|
||
|
It performs several tricky operations without side effects which confuse
|
||
|
compiler so it does not compain about unused locals in THIS function.
|
||
|
************************************************************************/
|
||
|
void ae_touch_ptr(void *p)
|
||
|
{
|
||
|
void * volatile fake_variable0 = p;
|
||
|
void * volatile fake_variable1 = fake_variable0;
|
||
|
fake_variable0 = fake_variable1;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function initializes ALGLIB environment state.
|
||
|
|
||
|
NOTES:
|
||
|
* stacks contain no frames, so ae_make_frame() must be called before
|
||
|
attaching dynamic blocks. Without it ae_leave_frame() will cycle
|
||
|
forever (which is intended behavior).
|
||
|
************************************************************************/
|
||
|
void ae_state_init(ae_state *state)
|
||
|
{
|
||
|
ae_int32_t *vp;
|
||
|
|
||
|
/*
|
||
|
* Set flags
|
||
|
*/
|
||
|
state->flags = 0x0;
|
||
|
|
||
|
/*
|
||
|
* p_next points to itself because:
|
||
|
* * correct program should be able to detect end of the list
|
||
|
* by looking at the ptr field.
|
||
|
* * NULL p_next may be used to distinguish automatic blocks
|
||
|
* (in the list) from non-automatic (not in the list)
|
||
|
*/
|
||
|
state->last_block.p_next = &(state->last_block);
|
||
|
state->last_block.deallocator = NULL;
|
||
|
state->last_block.ptr = DYN_BOTTOM;
|
||
|
state->p_top_block = &(state->last_block);
|
||
|
state->break_jump = NULL;
|
||
|
state->error_msg = "";
|
||
|
|
||
|
/*
|
||
|
* determine endianness and initialize precomputed IEEE special quantities.
|
||
|
*/
|
||
|
state->endianness = ae_get_endianness();
|
||
|
if( state->endianness==AE_LITTLE_ENDIAN )
|
||
|
{
|
||
|
vp = (ae_int32_t*)(&state->v_nan);
|
||
|
vp[0] = 0;
|
||
|
vp[1] = (ae_int32_t)0x7FF80000;
|
||
|
vp = (ae_int32_t*)(&state->v_posinf);
|
||
|
vp[0] = 0;
|
||
|
vp[1] = (ae_int32_t)0x7FF00000;
|
||
|
vp = (ae_int32_t*)(&state->v_neginf);
|
||
|
vp[0] = 0;
|
||
|
vp[1] = (ae_int32_t)0xFFF00000;
|
||
|
}
|
||
|
else if( state->endianness==AE_BIG_ENDIAN )
|
||
|
{
|
||
|
vp = (ae_int32_t*)(&state->v_nan);
|
||
|
vp[1] = 0;
|
||
|
vp[0] = (ae_int32_t)0x7FF80000;
|
||
|
vp = (ae_int32_t*)(&state->v_posinf);
|
||
|
vp[1] = 0;
|
||
|
vp[0] = (ae_int32_t)0x7FF00000;
|
||
|
vp = (ae_int32_t*)(&state->v_neginf);
|
||
|
vp[1] = 0;
|
||
|
vp[0] = (ae_int32_t)0xFFF00000;
|
||
|
}
|
||
|
else
|
||
|
abort();
|
||
|
|
||
|
/*
|
||
|
* set threading information
|
||
|
*/
|
||
|
state->worker_thread = NULL;
|
||
|
state->parent_task = NULL;
|
||
|
state->thread_exception_handler = NULL;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function clears ALGLIB environment state.
|
||
|
All dynamic data controlled by state are freed.
|
||
|
************************************************************************/
|
||
|
void ae_state_clear(ae_state *state)
|
||
|
{
|
||
|
while( state->p_top_block->ptr!=DYN_BOTTOM )
|
||
|
ae_frame_leave(state);
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function sets jump buffer for error handling.
|
||
|
|
||
|
buf may be NULL.
|
||
|
************************************************************************/
|
||
|
void ae_state_set_break_jump(ae_state *state, jmp_buf *buf)
|
||
|
{
|
||
|
state->break_jump = buf;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function sets flags member of the ae_state structure
|
||
|
|
||
|
buf may be NULL.
|
||
|
************************************************************************/
|
||
|
void ae_state_set_flags(ae_state *state, ae_uint64_t flags)
|
||
|
{
|
||
|
state->flags = flags;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function makes new stack frame.
|
||
|
|
||
|
This function takes two parameters: environment state and pointer to the
|
||
|
dynamic block which will be used as indicator of the frame beginning.
|
||
|
This dynamic block must be initialized by caller and mustn't be changed/
|
||
|
deallocated/reused till ae_leave_frame called. It may be global or local
|
||
|
variable (local is even better).
|
||
|
************************************************************************/
|
||
|
void ae_frame_make(ae_state *state, ae_frame *tmp)
|
||
|
{
|
||
|
tmp->db_marker.p_next = state->p_top_block;
|
||
|
tmp->db_marker.deallocator = NULL;
|
||
|
tmp->db_marker.ptr = DYN_FRAME;
|
||
|
state->p_top_block = &tmp->db_marker;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function leaves current stack frame and deallocates all automatic
|
||
|
dynamic blocks which were attached to this frame.
|
||
|
************************************************************************/
|
||
|
void ae_frame_leave(ae_state *state)
|
||
|
{
|
||
|
while( state->p_top_block->ptr!=DYN_FRAME && state->p_top_block->ptr!=DYN_BOTTOM)
|
||
|
{
|
||
|
if( state->p_top_block->ptr!=NULL && state->p_top_block->deallocator!=NULL)
|
||
|
((ae_deallocator)(state->p_top_block->deallocator))(state->p_top_block->ptr);
|
||
|
state->p_top_block = state->p_top_block->p_next;
|
||
|
}
|
||
|
state->p_top_block = state->p_top_block->p_next;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function attaches block to the dynamic block list
|
||
|
|
||
|
block block
|
||
|
state ALGLIB environment state
|
||
|
|
||
|
This function does NOT generate exceptions.
|
||
|
|
||
|
NOTES:
|
||
|
* never call it for special blocks which marks frame boundaries!
|
||
|
************************************************************************/
|
||
|
void ae_db_attach(ae_dyn_block *block, ae_state *state)
|
||
|
{
|
||
|
block->p_next = state->p_top_block;
|
||
|
state->p_top_block = block;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function initializes dynamic block:
|
||
|
|
||
|
block destination block, MUST be zero-filled on entry
|
||
|
size size (in bytes), >=0.
|
||
|
state ALGLIB environment state, non-NULL
|
||
|
make_automatic if true, vector is added to the dynamic block list
|
||
|
|
||
|
block is assumed to be uninitialized, its fields are ignored. You may
|
||
|
call this function with zero size in order to register block in the
|
||
|
dynamic list.
|
||
|
|
||
|
Error handling: calls ae_break() on allocation error. Block is left in
|
||
|
valid state (empty, but valid).
|
||
|
|
||
|
NOTES:
|
||
|
* never call it for blocks which are already in the list; use ae_db_realloc
|
||
|
for already allocated blocks.
|
||
|
|
||
|
NOTE: no memory allocation is performed for initialization with size=0
|
||
|
************************************************************************/
|
||
|
void ae_db_init(ae_dyn_block *block, ae_int_t size, ae_state *state, ae_bool make_automatic)
|
||
|
{
|
||
|
AE_CRITICAL_ASSERT(state!=NULL);
|
||
|
AE_CRITICAL_ASSERT(ae_check_zeros(block,sizeof(*block)));
|
||
|
|
||
|
/*
|
||
|
* NOTE: these strange dances around block->ptr are necessary
|
||
|
* in order to correctly handle possible exceptions during
|
||
|
* memory allocation.
|
||
|
*/
|
||
|
ae_assert(size>=0, "ae_db_init(): negative size", state);
|
||
|
block->ptr = NULL;
|
||
|
block->valgrind_hint = NULL;
|
||
|
ae_touch_ptr(block->ptr);
|
||
|
ae_touch_ptr(block->valgrind_hint);
|
||
|
if( make_automatic )
|
||
|
ae_db_attach(block, state);
|
||
|
else
|
||
|
block->p_next = NULL;
|
||
|
if( size!=0 )
|
||
|
{
|
||
|
block->ptr = ae_malloc((size_t)size, state);
|
||
|
block->valgrind_hint = aligned_extract_ptr(block->ptr);
|
||
|
}
|
||
|
block->deallocator = ae_free;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function realloc's dynamic block:
|
||
|
|
||
|
block destination block (initialized)
|
||
|
size new size (in bytes)
|
||
|
state ALGLIB environment state
|
||
|
|
||
|
block is assumed to be initialized.
|
||
|
|
||
|
This function:
|
||
|
* deletes old contents
|
||
|
* preserves automatic state
|
||
|
|
||
|
Error handling: calls ae_break() on allocation error. Block is left in
|
||
|
valid state - empty, but valid.
|
||
|
|
||
|
NOTES:
|
||
|
* never call it for special blocks which mark frame boundaries!
|
||
|
************************************************************************/
|
||
|
void ae_db_realloc(ae_dyn_block *block, ae_int_t size, ae_state *state)
|
||
|
{
|
||
|
AE_CRITICAL_ASSERT(state!=NULL);
|
||
|
|
||
|
/*
|
||
|
* NOTE: these strange dances around block->ptr are necessary
|
||
|
* in order to correctly handle possible exceptions during
|
||
|
* memory allocation.
|
||
|
*/
|
||
|
ae_assert(size>=0, "ae_db_realloc(): negative size", state);
|
||
|
if( block->ptr!=NULL )
|
||
|
{
|
||
|
((ae_deallocator)block->deallocator)(block->ptr);
|
||
|
block->ptr = NULL;
|
||
|
block->valgrind_hint = NULL;
|
||
|
}
|
||
|
block->ptr = ae_malloc((size_t)size, state);
|
||
|
block->valgrind_hint = aligned_extract_ptr(block->ptr);
|
||
|
block->deallocator = ae_free;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function clears dynamic block (releases all dynamically allocated
|
||
|
memory). Dynamic block may be in automatic management list - in this case
|
||
|
it will NOT be removed from list.
|
||
|
|
||
|
block destination block (initialized)
|
||
|
|
||
|
NOTES:
|
||
|
* never call it for special blocks which marks frame boundaries!
|
||
|
************************************************************************/
|
||
|
void ae_db_free(ae_dyn_block *block)
|
||
|
{
|
||
|
if( block->ptr!=NULL )
|
||
|
((ae_deallocator)block->deallocator)(block->ptr);
|
||
|
block->ptr = NULL;
|
||
|
block->valgrind_hint = NULL;
|
||
|
block->deallocator = ae_free;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function swaps contents of two dynamic blocks (pointers and
|
||
|
deallocators) leaving other parameters (automatic management settings,
|
||
|
etc.) unchanged.
|
||
|
|
||
|
NOTES:
|
||
|
* never call it for special blocks which marks frame boundaries!
|
||
|
************************************************************************/
|
||
|
void ae_db_swap(ae_dyn_block *block1, ae_dyn_block *block2)
|
||
|
{
|
||
|
void (*deallocator)(void*) = NULL;
|
||
|
void * volatile ptr;
|
||
|
void * valgrind_hint;
|
||
|
|
||
|
ptr = block1->ptr;
|
||
|
valgrind_hint = block1->valgrind_hint;
|
||
|
deallocator = block1->deallocator;
|
||
|
|
||
|
block1->ptr = block2->ptr;
|
||
|
block1->valgrind_hint = block2->valgrind_hint;
|
||
|
block1->deallocator = block2->deallocator;
|
||
|
|
||
|
block2->ptr = ptr;
|
||
|
block2->valgrind_hint = valgrind_hint;
|
||
|
block2->deallocator = deallocator;
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
This function creates ae_vector.
|
||
|
Vector size may be zero. Vector contents is uninitialized.
|
||
|
|
||
|
dst destination vector, MUST be zero-filled (we check it
|
||
|
and call abort() if *dst is non-zero; the rationale is
|
||
|
that we can not correctly handle errors in constructors
|
||
|
without zero-filling).
|
||
|
size vector size, may be zero
|
||
|
datatype guess what...
|
||
|
state pointer to current state structure. Can not be NULL.
|
||
|
used for exception handling (say, allocation error results
|
||
|
in longjmp call).
|
||
|
make_automatic if true, vector will be registered in the current frame
|
||
|
of the state structure;
|
||
|
|
||
|
NOTE: no memory allocation is performed for initialization with size=0
|
||
|
*************************************************************************/
|
||
|
void ae_vector_init(ae_vector *dst, ae_int_t size, ae_datatype datatype, ae_state *state, ae_bool make_automatic)
|
||
|
{
|
||
|
/*
|
||
|
* Integrity checks
|
||
|
*/
|
||
|
AE_CRITICAL_ASSERT(state!=NULL);
|
||
|
AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
|
||
|
ae_assert(size>=0, "ae_vector_init(): negative size", state);
|
||
|
|
||
|
/* prepare for possible errors during allocation */
|
||
|
dst->cnt = 0;
|
||
|
dst->ptr.p_ptr = NULL;
|
||
|
|
||
|
/* init */
|
||
|
ae_db_init(&dst->data, size*ae_sizeof(datatype), state, make_automatic);
|
||
|
dst->cnt = size;
|
||
|
dst->datatype = datatype;
|
||
|
dst->ptr.p_ptr = dst->data.ptr;
|
||
|
dst->is_attached = ae_false;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function creates copy of ae_vector. New copy of the data is created,
|
||
|
which is managed and owned by newly initialized vector.
|
||
|
|
||
|
dst destination vector, MUST be zero-filled (we check it
|
||
|
and call abort() if *dst is non-zero; the rationale is
|
||
|
that we can not correctly handle errors in constructors
|
||
|
without zero-filling).
|
||
|
src well, it is source
|
||
|
state pointer to current state structure. Can not be NULL.
|
||
|
used for exception handling (say, allocation error results
|
||
|
in longjmp call).
|
||
|
make_automatic if true, vector will be registered in the current frame
|
||
|
of the state structure;
|
||
|
|
||
|
dst is assumed to be uninitialized, its fields are ignored.
|
||
|
************************************************************************/
|
||
|
void ae_vector_init_copy(ae_vector *dst, ae_vector *src, ae_state *state, ae_bool make_automatic)
|
||
|
{
|
||
|
AE_CRITICAL_ASSERT(state!=NULL);
|
||
|
|
||
|
ae_vector_init(dst, src->cnt, src->datatype, state, make_automatic);
|
||
|
if( src->cnt!=0 )
|
||
|
memmove(dst->ptr.p_ptr, src->ptr.p_ptr, (size_t)(src->cnt*ae_sizeof(src->datatype)));
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function initializes ae_vector using X-structure as source. New copy
|
||
|
of data is created, which is owned/managed by ae_vector structure. Both
|
||
|
structures (source and destination) remain completely independent after
|
||
|
this call.
|
||
|
|
||
|
dst destination vector, MUST be zero-filled (we check it
|
||
|
and call abort() if *dst is non-zero; the rationale is
|
||
|
that we can not correctly handle errors in constructors
|
||
|
without zero-filling).
|
||
|
src well, it is source
|
||
|
state pointer to current state structure. Can not be NULL.
|
||
|
used for exception handling (say, allocation error results
|
||
|
in longjmp call).
|
||
|
make_automatic if true, vector will be registered in the current frame
|
||
|
of the state structure;
|
||
|
|
||
|
dst is assumed to be uninitialized, its fields are ignored.
|
||
|
************************************************************************/
|
||
|
void ae_vector_init_from_x(ae_vector *dst, x_vector *src, ae_state *state, ae_bool make_automatic)
|
||
|
{
|
||
|
AE_CRITICAL_ASSERT(state!=NULL);
|
||
|
|
||
|
ae_vector_init(dst, (ae_int_t)src->cnt, (ae_datatype)src->datatype, state, make_automatic);
|
||
|
if( src->cnt>0 )
|
||
|
memmove(dst->ptr.p_ptr, src->x_ptr.p_ptr, (size_t)(((ae_int_t)src->cnt)*ae_sizeof((ae_datatype)src->datatype)));
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function initializes ae_vector using X-structure as source.
|
||
|
|
||
|
New vector is attached to source:
|
||
|
* DST shares memory with SRC
|
||
|
* both DST and SRC are writable - all writes to DST change elements of
|
||
|
SRC and vice versa.
|
||
|
* DST can be reallocated with ae_vector_set_length(), in this case SRC
|
||
|
remains untouched
|
||
|
* SRC, however, CAN NOT BE REALLOCATED AS LONG AS DST EXISTS
|
||
|
|
||
|
NOTE: is_attached field is set to ae_true in order to indicate that
|
||
|
vector does not own its memory.
|
||
|
|
||
|
dst destination vector
|
||
|
src well, it is source
|
||
|
state pointer to current state structure. Can not be NULL.
|
||
|
used for exception handling (say, allocation error results
|
||
|
in longjmp call).
|
||
|
make_automatic if true, vector will be registered in the current frame
|
||
|
of the state structure;
|
||
|
|
||
|
dst is assumed to be uninitialized, its fields are ignored.
|
||
|
************************************************************************/
|
||
|
void ae_vector_init_attach_to_x(ae_vector *dst, x_vector *src, ae_state *state, ae_bool make_automatic)
|
||
|
{
|
||
|
volatile ae_int_t cnt;
|
||
|
|
||
|
AE_CRITICAL_ASSERT(state!=NULL);
|
||
|
AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
|
||
|
|
||
|
cnt = (ae_int_t)src->cnt;
|
||
|
|
||
|
/* ensure that size is correct */
|
||
|
ae_assert(cnt==src->cnt, "ae_vector_init_attach_to_x(): 32/64 overflow", state);
|
||
|
ae_assert(cnt>=0, "ae_vector_init_attach_to_x(): negative length", state);
|
||
|
|
||
|
/* prepare for possible errors during allocation */
|
||
|
dst->cnt = 0;
|
||
|
dst->ptr.p_ptr = NULL;
|
||
|
dst->datatype = (ae_datatype)src->datatype;
|
||
|
|
||
|
/* zero-size init in order to correctly register in the frame */
|
||
|
ae_db_init(&dst->data, 0, state, make_automatic);
|
||
|
|
||
|
/* init */
|
||
|
dst->cnt = cnt;
|
||
|
dst->ptr.p_ptr = src->x_ptr.p_ptr;
|
||
|
dst->is_attached = ae_true;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function changes length of ae_vector.
|
||
|
|
||
|
dst destination vector
|
||
|
newsize vector size, may be zero
|
||
|
state ALGLIB environment state, can not be NULL
|
||
|
|
||
|
Error handling: calls ae_break() on allocation error
|
||
|
|
||
|
NOTES:
|
||
|
* vector must be initialized
|
||
|
* all contents is destroyed during setlength() call
|
||
|
* new size may be zero.
|
||
|
************************************************************************/
|
||
|
void ae_vector_set_length(ae_vector *dst, ae_int_t newsize, ae_state *state)
|
||
|
{
|
||
|
AE_CRITICAL_ASSERT(state!=NULL);
|
||
|
ae_assert(newsize>=0, "ae_vector_set_length(): negative size", state);
|
||
|
if( dst->cnt==newsize )
|
||
|
return;
|
||
|
|
||
|
/* realloc, being ready for exception during reallocation (cnt=ptr=0 on entry) */
|
||
|
dst->cnt = 0;
|
||
|
dst->ptr.p_ptr = NULL;
|
||
|
ae_db_realloc(&dst->data, newsize*ae_sizeof(dst->datatype), state);
|
||
|
dst->cnt = newsize;
|
||
|
dst->ptr.p_ptr = dst->data.ptr;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function resized ae_vector, preserving previously existing elements.
|
||
|
Values of elements added during vector growth is undefined.
|
||
|
|
||
|
dst destination vector
|
||
|
newsize vector size, may be zero
|
||
|
state ALGLIB environment state, can not be NULL
|
||
|
|
||
|
Error handling: calls ae_break() on allocation error
|
||
|
|
||
|
NOTES:
|
||
|
* vector must be initialized
|
||
|
* new size may be zero.
|
||
|
************************************************************************/
|
||
|
void ae_vector_resize(ae_vector *dst, ae_int_t newsize, ae_state *state)
|
||
|
{
|
||
|
ae_vector tmp;
|
||
|
ae_int_t bytes_total;
|
||
|
|
||
|
memset(&tmp, 0, sizeof(tmp));
|
||
|
ae_vector_init(&tmp, newsize, dst->datatype, state, ae_false);
|
||
|
bytes_total = (dst->cnt<newsize ? dst->cnt : newsize)*ae_sizeof(dst->datatype);
|
||
|
if( bytes_total>0 )
|
||
|
memmove(tmp.ptr.p_ptr, dst->ptr.p_ptr, bytes_total);
|
||
|
ae_swap_vectors(dst, &tmp);
|
||
|
ae_vector_clear(&tmp);
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function provides "CLEAR" functionality for vector (contents is
|
||
|
cleared, but structure still left in valid state).
|
||
|
|
||
|
The function clears vector contents (releases all dynamically allocated
|
||
|
memory). Vector may be in automatic management list - in this case it
|
||
|
will NOT be removed from list.
|
||
|
|
||
|
IMPORTANT: this function does NOT invalidates dst; it just releases all
|
||
|
dynamically allocated storage, but dst still may be used after call to
|
||
|
ae_vector_set_length().
|
||
|
|
||
|
dst destination vector
|
||
|
************************************************************************/
|
||
|
void ae_vector_clear(ae_vector *dst)
|
||
|
{
|
||
|
dst->cnt = 0;
|
||
|
ae_db_free(&dst->data);
|
||
|
dst->ptr.p_ptr = 0;
|
||
|
dst->is_attached = ae_false;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function provides "DESTROY" functionality for vector (contents is
|
||
|
cleared, all internal structures are destroyed). For vectors it is same
|
||
|
as CLEAR.
|
||
|
|
||
|
dst destination vector
|
||
|
************************************************************************/
|
||
|
void ae_vector_destroy(ae_vector *dst)
|
||
|
{
|
||
|
ae_vector_clear(dst);
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function efficiently swaps contents of two vectors, leaving other
|
||
|
pararemeters (automatic management, etc.) unchanged.
|
||
|
************************************************************************/
|
||
|
void ae_swap_vectors(ae_vector *vec1, ae_vector *vec2)
|
||
|
{
|
||
|
ae_int_t cnt;
|
||
|
ae_datatype datatype;
|
||
|
void *p_ptr;
|
||
|
|
||
|
ae_assert(!vec1->is_attached, "ALGLIB: internal error, attempt to swap vectors attached to X-object", NULL);
|
||
|
ae_assert(!vec2->is_attached, "ALGLIB: internal error, attempt to swap vectors attached to X-object", NULL);
|
||
|
|
||
|
ae_db_swap(&vec1->data, &vec2->data);
|
||
|
|
||
|
cnt = vec1->cnt;
|
||
|
datatype = vec1->datatype;
|
||
|
p_ptr = vec1->ptr.p_ptr;
|
||
|
vec1->cnt = vec2->cnt;
|
||
|
vec1->datatype = vec2->datatype;
|
||
|
vec1->ptr.p_ptr = vec2->ptr.p_ptr;
|
||
|
vec2->cnt = cnt;
|
||
|
vec2->datatype = datatype;
|
||
|
vec2->ptr.p_ptr = p_ptr;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function creates ae_matrix.
|
||
|
|
||
|
Matrix size may be zero, in such cases both rows and cols are zero.
|
||
|
Matrix contents is uninitialized.
|
||
|
|
||
|
dst destination matrix, must be zero-filled
|
||
|
rows rows count
|
||
|
cols cols count
|
||
|
datatype element type
|
||
|
state pointer to current state structure. Can not be NULL.
|
||
|
used for exception handling (say, allocation error results
|
||
|
in longjmp call).
|
||
|
make_automatic if true, matrix will be registered in the current frame
|
||
|
of the state structure;
|
||
|
|
||
|
dst is assumed to be uninitialized, its fields are ignored.
|
||
|
|
||
|
NOTE: no memory allocation is performed for initialization with rows=cols=0
|
||
|
************************************************************************/
|
||
|
void ae_matrix_init(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_datatype datatype, ae_state *state, ae_bool make_automatic)
|
||
|
{
|
||
|
AE_CRITICAL_ASSERT(state!=NULL);
|
||
|
AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
|
||
|
|
||
|
ae_assert(rows>=0 && cols>=0, "ae_matrix_init(): negative length", state);
|
||
|
|
||
|
/* if one of rows/cols is zero, another MUST be too; perform quick exit */
|
||
|
if( rows==0 || cols==0 )
|
||
|
{
|
||
|
dst->rows = 0;
|
||
|
dst->cols = 0;
|
||
|
dst->is_attached = ae_false;
|
||
|
dst->ptr.pp_void = NULL;
|
||
|
dst->stride = 0;
|
||
|
dst->datatype = datatype;
|
||
|
ae_db_init(&dst->data, 0, state, make_automatic);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/* init, being ready for exception during allocation (rows=cols=ptr=NULL on entry) */
|
||
|
dst->is_attached = ae_false;
|
||
|
dst->rows = 0;
|
||
|
dst->cols = 0;
|
||
|
dst->ptr.pp_void = NULL;
|
||
|
dst->stride = cols;
|
||
|
while( dst->stride*ae_sizeof(datatype)%AE_DATA_ALIGN!=0 )
|
||
|
dst->stride++;
|
||
|
dst->datatype = datatype;
|
||
|
ae_db_init(&dst->data, rows*((ae_int_t)sizeof(void*)+dst->stride*ae_sizeof(datatype))+AE_DATA_ALIGN-1, state, make_automatic);
|
||
|
dst->rows = rows;
|
||
|
dst->cols = cols;
|
||
|
ae_matrix_update_row_pointers(dst, ae_align((char*)dst->data.ptr+rows*sizeof(void*),AE_DATA_ALIGN));
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function creates copy of ae_matrix. A new copy of the data is created.
|
||
|
|
||
|
dst destination matrix, must be zero-filled
|
||
|
src well, it is source
|
||
|
state pointer to current state structure. Can not be NULL.
|
||
|
used for exception handling (say, allocation error results
|
||
|
in longjmp call).
|
||
|
make_automatic if true, matrix will be registered in the current frame
|
||
|
of the state structure;
|
||
|
|
||
|
dst is assumed to be uninitialized, its fields are ignored.
|
||
|
************************************************************************/
|
||
|
void ae_matrix_init_copy(ae_matrix *dst, ae_matrix *src, ae_state *state, ae_bool make_automatic)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
ae_matrix_init(dst, src->rows, src->cols, src->datatype, state, make_automatic);
|
||
|
if( src->rows!=0 && src->cols!=0 )
|
||
|
{
|
||
|
if( dst->stride==src->stride )
|
||
|
memmove(dst->ptr.pp_void[0], src->ptr.pp_void[0], (size_t)(src->rows*src->stride*ae_sizeof(src->datatype)));
|
||
|
else
|
||
|
for(i=0; i<dst->rows; i++)
|
||
|
memmove(dst->ptr.pp_void[i], src->ptr.pp_void[i], (size_t)(dst->cols*ae_sizeof(dst->datatype)));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function initializes ae_matrix using X-structure as source. New copy
|
||
|
of data is created, which is owned/managed by ae_matrix structure. Both
|
||
|
structures (source and destination) remain completely independent after
|
||
|
this call.
|
||
|
|
||
|
dst destination matrix, must be zero-filled
|
||
|
src well, it is source
|
||
|
state pointer to current state structure. Can not be NULL.
|
||
|
used for exception handling (say, allocation error results
|
||
|
in longjmp call).
|
||
|
make_automatic if true, matrix will be registered in the current frame
|
||
|
of the state structure;
|
||
|
|
||
|
dst is assumed to be uninitialized, its fields are ignored.
|
||
|
************************************************************************/
|
||
|
void ae_matrix_init_from_x(ae_matrix *dst, x_matrix *src, ae_state *state, ae_bool make_automatic)
|
||
|
{
|
||
|
char *p_src_row;
|
||
|
char *p_dst_row;
|
||
|
ae_int_t row_size;
|
||
|
ae_int_t i;
|
||
|
AE_CRITICAL_ASSERT(state!=NULL);
|
||
|
ae_matrix_init(dst, (ae_int_t)src->rows, (ae_int_t)src->cols, (ae_datatype)src->datatype, state, make_automatic);
|
||
|
if( src->rows!=0 && src->cols!=0 )
|
||
|
{
|
||
|
p_src_row = (char*)src->x_ptr.p_ptr;
|
||
|
p_dst_row = (char*)(dst->ptr.pp_void[0]);
|
||
|
row_size = ae_sizeof((ae_datatype)src->datatype)*(ae_int_t)src->cols;
|
||
|
for(i=0; i<src->rows; i++, p_src_row+=src->stride*ae_sizeof((ae_datatype)src->datatype), p_dst_row+=dst->stride*ae_sizeof((ae_datatype)src->datatype))
|
||
|
memmove(p_dst_row, p_src_row, (size_t)(row_size));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function initializes ae_matrix using X-structure as source.
|
||
|
|
||
|
New matrix is attached to source:
|
||
|
* DST shares memory with SRC
|
||
|
* both DST and SRC are writable - all writes to DST change elements of
|
||
|
SRC and vice versa.
|
||
|
* DST can be reallocated with ae_matrix_set_length(), in this case SRC
|
||
|
remains untouched
|
||
|
* SRC, however, CAN NOT BE REALLOCATED AS LONG AS DST EXISTS
|
||
|
|
||
|
dst destination matrix, must be zero-filled
|
||
|
src well, it is source
|
||
|
state pointer to current state structure. Can not be NULL.
|
||
|
used for exception handling (say, allocation error results
|
||
|
in longjmp call).
|
||
|
make_automatic if true, matrix will be registered in the current frame
|
||
|
of the state structure;
|
||
|
|
||
|
dst is assumed to be uninitialized, its fields are ignored.
|
||
|
************************************************************************/
|
||
|
void ae_matrix_init_attach_to_x(ae_matrix *dst, x_matrix *src, ae_state *state, ae_bool make_automatic)
|
||
|
{
|
||
|
ae_int_t rows, cols;
|
||
|
|
||
|
AE_CRITICAL_ASSERT(state!=NULL);
|
||
|
AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
|
||
|
|
||
|
rows = (ae_int_t)src->rows;
|
||
|
cols = (ae_int_t)src->cols;
|
||
|
|
||
|
/* check that X-source is densely packed */
|
||
|
ae_assert(src->cols==src->stride, "ae_matrix_init_attach_to_x(): unsupported stride", state);
|
||
|
|
||
|
/* ensure that size is correct */
|
||
|
ae_assert(rows==src->rows, "ae_matrix_init_attach_to_x(): 32/64 overflow", state);
|
||
|
ae_assert(cols==src->cols, "ae_matrix_init_attach_to_x(): 32/64 overflow", state);
|
||
|
ae_assert(rows>=0 && cols>=0, "ae_matrix_init_attach_to_x(): negative length", state);
|
||
|
|
||
|
/* if one of rows/cols is zero, another MUST be too */
|
||
|
if( rows==0 || cols==0 )
|
||
|
{
|
||
|
rows = 0;
|
||
|
cols = 0;
|
||
|
}
|
||
|
|
||
|
/* init, being ready for allocation error */
|
||
|
dst->is_attached = ae_true;
|
||
|
dst->rows = 0;
|
||
|
dst->cols = 0;
|
||
|
dst->stride = cols;
|
||
|
dst->datatype = (ae_datatype)src->datatype;
|
||
|
dst->ptr.pp_void = NULL;
|
||
|
ae_db_init(&dst->data, rows*(ae_int_t)sizeof(void*), state, make_automatic);
|
||
|
dst->rows = rows;
|
||
|
dst->cols = cols;
|
||
|
if( dst->rows>0 && dst->cols>0 )
|
||
|
{
|
||
|
ae_int_t i, rowsize;
|
||
|
char *p_row;
|
||
|
void **pp_ptr;
|
||
|
|
||
|
p_row = (char*)src->x_ptr.p_ptr;
|
||
|
rowsize = dst->stride*ae_sizeof(dst->datatype);
|
||
|
pp_ptr = (void**)dst->data.ptr;
|
||
|
dst->ptr.pp_void = pp_ptr;
|
||
|
for(i=0; i<dst->rows; i++, p_row+=rowsize)
|
||
|
pp_ptr[i] = p_row;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function changes length of ae_matrix.
|
||
|
|
||
|
dst destination matrix
|
||
|
rows size, may be zero
|
||
|
cols size, may be zero
|
||
|
state ALGLIB environment state
|
||
|
|
||
|
Error handling:
|
||
|
* if state is NULL, returns ae_false on allocation error
|
||
|
* if state is not NULL, calls ae_break() on allocation error
|
||
|
* returns ae_true on success
|
||
|
|
||
|
NOTES:
|
||
|
* matrix must be initialized
|
||
|
* all contents is destroyed during setlength() call
|
||
|
* new size may be zero.
|
||
|
************************************************************************/
|
||
|
void ae_matrix_set_length(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_state *state)
|
||
|
{
|
||
|
AE_CRITICAL_ASSERT(state!=NULL);
|
||
|
ae_assert(rows>=0 && cols>=0, "ae_matrix_set_length(): negative length", state);
|
||
|
if( dst->rows==rows && dst->cols==cols )
|
||
|
return;
|
||
|
|
||
|
/* prepare stride */
|
||
|
dst->stride = cols;
|
||
|
while( dst->stride*ae_sizeof(dst->datatype)%AE_DATA_ALIGN!=0 )
|
||
|
dst->stride++;
|
||
|
|
||
|
/* realloc, being ready for an exception during reallocation (rows=cols=0 on entry) */
|
||
|
dst->rows = 0;
|
||
|
dst->cols = 0;
|
||
|
dst->ptr.pp_void = NULL;
|
||
|
ae_db_realloc(&dst->data, rows*((ae_int_t)sizeof(void*)+dst->stride*ae_sizeof(dst->datatype))+AE_DATA_ALIGN-1, state);
|
||
|
dst->rows = rows;
|
||
|
dst->cols = cols;
|
||
|
|
||
|
/* update pointers to rows */
|
||
|
ae_matrix_update_row_pointers(dst, ae_align((char*)dst->data.ptr+dst->rows*sizeof(void*),AE_DATA_ALIGN));
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function provides "CLEAR" functionality for vector (contents is
|
||
|
cleared, but structure still left in valid state).
|
||
|
|
||
|
The function clears matrix contents (releases all dynamically allocated
|
||
|
memory). Matrix may be in automatic management list - in this case it
|
||
|
will NOT be removed from list.
|
||
|
|
||
|
IMPORTANT: this function does NOT invalidates dst; it just releases all
|
||
|
dynamically allocated storage, but dst still may be used after call to
|
||
|
ae_matrix_set_length().
|
||
|
|
||
|
dst destination matrix
|
||
|
************************************************************************/
|
||
|
void ae_matrix_clear(ae_matrix *dst)
|
||
|
{
|
||
|
dst->rows = 0;
|
||
|
dst->cols = 0;
|
||
|
dst->stride = 0;
|
||
|
ae_db_free(&dst->data);
|
||
|
dst->ptr.p_ptr = 0;
|
||
|
dst->is_attached = ae_false;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function provides "DESTROY" functionality for matrix (contents is
|
||
|
cleared, but structure still left in valid state).
|
||
|
|
||
|
For matrices it is same as CLEAR.
|
||
|
|
||
|
dst destination matrix
|
||
|
************************************************************************/
|
||
|
void ae_matrix_destroy(ae_matrix *dst)
|
||
|
{
|
||
|
ae_matrix_clear(dst);
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function efficiently swaps contents of two vectors, leaving other
|
||
|
pararemeters (automatic management, etc.) unchanged.
|
||
|
************************************************************************/
|
||
|
void ae_swap_matrices(ae_matrix *mat1, ae_matrix *mat2)
|
||
|
{
|
||
|
ae_int_t rows;
|
||
|
ae_int_t cols;
|
||
|
ae_int_t stride;
|
||
|
ae_datatype datatype;
|
||
|
void *p_ptr;
|
||
|
|
||
|
ae_assert(!mat1->is_attached, "ALGLIB: internal error, attempt to swap matrices attached to X-object", NULL);
|
||
|
ae_assert(!mat2->is_attached, "ALGLIB: internal error, attempt to swap matrices attached to X-object", NULL);
|
||
|
|
||
|
ae_db_swap(&mat1->data, &mat2->data);
|
||
|
|
||
|
rows = mat1->rows;
|
||
|
cols = mat1->cols;
|
||
|
stride = mat1->stride;
|
||
|
datatype = mat1->datatype;
|
||
|
p_ptr = mat1->ptr.p_ptr;
|
||
|
|
||
|
mat1->rows = mat2->rows;
|
||
|
mat1->cols = mat2->cols;
|
||
|
mat1->stride = mat2->stride;
|
||
|
mat1->datatype = mat2->datatype;
|
||
|
mat1->ptr.p_ptr = mat2->ptr.p_ptr;
|
||
|
|
||
|
mat2->rows = rows;
|
||
|
mat2->cols = cols;
|
||
|
mat2->stride = stride;
|
||
|
mat2->datatype = datatype;
|
||
|
mat2->ptr.p_ptr = p_ptr;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function creates smart pointer structure.
|
||
|
|
||
|
dst destination smart pointer, must be zero-filled
|
||
|
subscriber pointer to pointer which receives updates in the
|
||
|
internal object stored in ae_smart_ptr. Any update to
|
||
|
dst->ptr is translated to subscriber. Can be NULL.
|
||
|
state pointer to current state structure. Can not be NULL.
|
||
|
used for exception handling (say, allocation error results
|
||
|
in longjmp call).
|
||
|
make_automatic if true, pointer will be registered in the current frame
|
||
|
of the state structure;
|
||
|
|
||
|
Error handling:
|
||
|
* on failure calls ae_break() with NULL state pointer. Usually it results
|
||
|
in abort() call.
|
||
|
|
||
|
After initialization, smart pointer stores NULL pointer.
|
||
|
************************************************************************/
|
||
|
void ae_smart_ptr_init(ae_smart_ptr *dst, void **subscriber, ae_state *state, ae_bool make_automatic)
|
||
|
{
|
||
|
AE_CRITICAL_ASSERT(state!=NULL);
|
||
|
AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
|
||
|
dst->subscriber = subscriber;
|
||
|
dst->ptr = NULL;
|
||
|
if( dst->subscriber!=NULL )
|
||
|
*(dst->subscriber) = dst->ptr;
|
||
|
dst->is_owner = ae_false;
|
||
|
dst->is_dynamic = ae_false;
|
||
|
dst->frame_entry.deallocator = ae_smart_ptr_destroy;
|
||
|
dst->frame_entry.ptr = dst;
|
||
|
if( make_automatic )
|
||
|
ae_db_attach(&dst->frame_entry, state);
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function clears smart pointer structure.
|
||
|
|
||
|
dst destination smart pointer.
|
||
|
|
||
|
After call to this function smart pointer contains NULL reference, which
|
||
|
is propagated to its subscriber (in cases non-NULL subscruber was
|
||
|
specified during pointer creation).
|
||
|
************************************************************************/
|
||
|
void ae_smart_ptr_clear(void *_dst)
|
||
|
{
|
||
|
ae_smart_ptr *dst = (ae_smart_ptr*)_dst;
|
||
|
if( dst->is_owner && dst->ptr!=NULL )
|
||
|
{
|
||
|
dst->destroy(dst->ptr);
|
||
|
if( dst->is_dynamic )
|
||
|
ae_free(dst->ptr);
|
||
|
}
|
||
|
dst->is_owner = ae_false;
|
||
|
dst->is_dynamic = ae_false;
|
||
|
dst->ptr = NULL;
|
||
|
dst->destroy = NULL;
|
||
|
if( dst->subscriber!=NULL )
|
||
|
*(dst->subscriber) = NULL;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function dstroys smart pointer structure (same as clearing it).
|
||
|
|
||
|
dst destination smart pointer.
|
||
|
************************************************************************/
|
||
|
void ae_smart_ptr_destroy(void *_dst)
|
||
|
{
|
||
|
ae_smart_ptr_clear(_dst);
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function assigns pointer to ae_smart_ptr structure.
|
||
|
|
||
|
dst destination smart pointer.
|
||
|
new_ptr new pointer to assign
|
||
|
is_owner whether smart pointer owns new_ptr
|
||
|
is_dynamic whether object is dynamic - clearing such object
|
||
|
requires BOTH calling destructor function AND calling
|
||
|
ae_free() for memory occupied by object.
|
||
|
destroy destructor function
|
||
|
|
||
|
In case smart pointer already contains non-NULL value and owns this value,
|
||
|
it is freed before assigning new pointer.
|
||
|
|
||
|
Changes in pointer are propagated to its subscriber (in case non-NULL
|
||
|
subscriber was specified during pointer creation).
|
||
|
|
||
|
You can specify NULL new_ptr, in which case is_owner/destroy are ignored.
|
||
|
************************************************************************/
|
||
|
void ae_smart_ptr_assign(ae_smart_ptr *dst, void *new_ptr, ae_bool is_owner, ae_bool is_dynamic, void (*destroy)(void*))
|
||
|
{
|
||
|
if( dst->is_owner && dst->ptr!=NULL )
|
||
|
{
|
||
|
dst->destroy(dst->ptr);
|
||
|
if( dst->is_dynamic )
|
||
|
ae_free(dst->ptr);
|
||
|
}
|
||
|
if( new_ptr!=NULL )
|
||
|
{
|
||
|
dst->ptr = new_ptr;
|
||
|
dst->is_owner = is_owner;
|
||
|
dst->is_dynamic = is_dynamic;
|
||
|
dst->destroy = destroy;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
dst->ptr = NULL;
|
||
|
dst->is_owner = ae_false;
|
||
|
dst->is_dynamic = ae_false;
|
||
|
dst->destroy = NULL;
|
||
|
}
|
||
|
if( dst->subscriber!=NULL )
|
||
|
*(dst->subscriber) = dst->ptr;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function releases pointer owned by ae_smart_ptr structure:
|
||
|
* all internal fields are set to NULL
|
||
|
* destructor function for internal pointer is NOT called even when we own
|
||
|
this pointer. After this call ae_smart_ptr releases ownership of its
|
||
|
pointer and passes it to caller.
|
||
|
* changes in pointer are propagated to its subscriber (in case non-NULL
|
||
|
subscriber was specified during pointer creation).
|
||
|
|
||
|
dst destination smart pointer.
|
||
|
************************************************************************/
|
||
|
void ae_smart_ptr_release(ae_smart_ptr *dst)
|
||
|
{
|
||
|
dst->is_owner = ae_false;
|
||
|
dst->is_dynamic = ae_false;
|
||
|
dst->ptr = NULL;
|
||
|
dst->destroy = NULL;
|
||
|
if( dst->subscriber!=NULL )
|
||
|
*(dst->subscriber) = NULL;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function copies contents of ae_vector (SRC) to x_vector (DST).
|
||
|
|
||
|
This function should not be called for DST which is attached to SRC
|
||
|
(opposite situation, when SRC is attached to DST, is possible).
|
||
|
|
||
|
Depending on situation, following actions are performed
|
||
|
* for SRC attached to DST, this function performs no actions (no need to
|
||
|
do anything)
|
||
|
* for independent vectors of different sizes it allocates storage in DST
|
||
|
and copy contents of SRC to DST. DST->last_action field is set to
|
||
|
ACT_NEW_LOCATION, and DST->owner is set to OWN_AE.
|
||
|
* for independent vectors of same sizes it does not perform memory
|
||
|
(re)allocation. It just copies SRC to already existing place.
|
||
|
DST->last_action is set to ACT_SAME_LOCATION (unless it was
|
||
|
ACT_NEW_LOCATION), DST->owner is unmodified.
|
||
|
|
||
|
dst destination vector
|
||
|
src source, vector in x-format
|
||
|
state ALGLIB environment state
|
||
|
|
||
|
NOTES:
|
||
|
* dst is assumed to be initialized. Its contents is freed before copying
|
||
|
data from src (if size / type are different) or overwritten (if
|
||
|
possible given destination size).
|
||
|
************************************************************************/
|
||
|
void ae_x_set_vector(x_vector *dst, ae_vector *src, ae_state *state)
|
||
|
{
|
||
|
if( src->ptr.p_ptr == dst->x_ptr.p_ptr )
|
||
|
{
|
||
|
/* src->ptr points to the beginning of dst, attached matrices, no need to copy */
|
||
|
return;
|
||
|
}
|
||
|
if( dst->cnt!=src->cnt || dst->datatype!=src->datatype )
|
||
|
{
|
||
|
if( dst->owner==OWN_AE )
|
||
|
ae_free(dst->x_ptr.p_ptr);
|
||
|
dst->x_ptr.p_ptr = ae_malloc((size_t)(src->cnt*ae_sizeof(src->datatype)), state);
|
||
|
if( src->cnt!=0 && dst->x_ptr.p_ptr==NULL )
|
||
|
ae_break(state, ERR_OUT_OF_MEMORY, "ae_malloc(): out of memory");
|
||
|
dst->last_action = ACT_NEW_LOCATION;
|
||
|
dst->cnt = src->cnt;
|
||
|
dst->datatype = src->datatype;
|
||
|
dst->owner = OWN_AE;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if( dst->last_action==ACT_UNCHANGED )
|
||
|
dst->last_action = ACT_SAME_LOCATION;
|
||
|
else if( dst->last_action==ACT_SAME_LOCATION )
|
||
|
dst->last_action = ACT_SAME_LOCATION;
|
||
|
else if( dst->last_action==ACT_NEW_LOCATION )
|
||
|
dst->last_action = ACT_NEW_LOCATION;
|
||
|
else
|
||
|
ae_assert(ae_false, "ALGLIB: internal error in ae_x_set_vector()", state);
|
||
|
}
|
||
|
if( src->cnt )
|
||
|
memmove(dst->x_ptr.p_ptr, src->ptr.p_ptr, (size_t)(src->cnt*ae_sizeof(src->datatype)));
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function copies contents of ae_matrix to x_matrix.
|
||
|
|
||
|
This function should not be called for DST which is attached to SRC
|
||
|
(opposite situation, when SRC is attached to DST, is possible).
|
||
|
|
||
|
Depending on situation, following actions are performed
|
||
|
* for SRC attached to DST, this function performs no actions (no need to
|
||
|
do anything)
|
||
|
* for independent matrices of different sizes it allocates storage in DST
|
||
|
and copy contents of SRC to DST. DST->last_action field is set to
|
||
|
ACT_NEW_LOCATION, and DST->owner is set to OWN_AE.
|
||
|
* for independent matrices of same sizes it does not perform memory
|
||
|
(re)allocation. It just copies SRC to already existing place.
|
||
|
DST->last_action is set to ACT_SAME_LOCATION (unless it was
|
||
|
ACT_NEW_LOCATION), DST->owner is unmodified.
|
||
|
|
||
|
dst destination vector
|
||
|
src source, matrix in x-format
|
||
|
state ALGLIB environment state
|
||
|
|
||
|
NOTES:
|
||
|
* dst is assumed to be initialized. Its contents is freed before copying
|
||
|
data from src (if size / type are different) or overwritten (if
|
||
|
possible given destination size).
|
||
|
************************************************************************/
|
||
|
void ae_x_set_matrix(x_matrix *dst, ae_matrix *src, ae_state *state)
|
||
|
{
|
||
|
char *p_src_row;
|
||
|
char *p_dst_row;
|
||
|
ae_int_t i;
|
||
|
ae_int_t row_size;
|
||
|
if( src->ptr.pp_void!=NULL && src->ptr.pp_void[0] == dst->x_ptr.p_ptr )
|
||
|
{
|
||
|
/* src->ptr points to the beginning of dst, attached matrices, no need to copy */
|
||
|
return;
|
||
|
}
|
||
|
if( dst->rows!=src->rows || dst->cols!=src->cols || dst->datatype!=src->datatype )
|
||
|
{
|
||
|
if( dst->owner==OWN_AE )
|
||
|
ae_free(dst->x_ptr.p_ptr);
|
||
|
dst->rows = src->rows;
|
||
|
dst->cols = src->cols;
|
||
|
dst->stride = src->cols;
|
||
|
dst->datatype = src->datatype;
|
||
|
dst->x_ptr.p_ptr = ae_malloc((size_t)(dst->rows*((ae_int_t)dst->stride)*ae_sizeof(src->datatype)), state);
|
||
|
if( dst->rows!=0 && dst->stride!=0 && dst->x_ptr.p_ptr==NULL )
|
||
|
ae_break(state, ERR_OUT_OF_MEMORY, "ae_malloc(): out of memory");
|
||
|
dst->last_action = ACT_NEW_LOCATION;
|
||
|
dst->owner = OWN_AE;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if( dst->last_action==ACT_UNCHANGED )
|
||
|
dst->last_action = ACT_SAME_LOCATION;
|
||
|
else if( dst->last_action==ACT_SAME_LOCATION )
|
||
|
dst->last_action = ACT_SAME_LOCATION;
|
||
|
else if( dst->last_action==ACT_NEW_LOCATION )
|
||
|
dst->last_action = ACT_NEW_LOCATION;
|
||
|
else
|
||
|
ae_assert(ae_false, "ALGLIB: internal error in ae_x_set_vector()", state);
|
||
|
}
|
||
|
if( src->rows!=0 && src->cols!=0 )
|
||
|
{
|
||
|
p_src_row = (char*)(src->ptr.pp_void[0]);
|
||
|
p_dst_row = (char*)dst->x_ptr.p_ptr;
|
||
|
row_size = ae_sizeof(src->datatype)*src->cols;
|
||
|
for(i=0; i<src->rows; i++, p_src_row+=src->stride*ae_sizeof(src->datatype), p_dst_row+=dst->stride*ae_sizeof(src->datatype))
|
||
|
memmove(p_dst_row, p_src_row, (size_t)(row_size));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function attaches x_vector to ae_vector's contents.
|
||
|
Ownership of memory allocated is not changed (it is still managed by
|
||
|
ae_matrix).
|
||
|
|
||
|
dst destination vector
|
||
|
src source, vector in x-format
|
||
|
state ALGLIB environment state
|
||
|
|
||
|
NOTES:
|
||
|
* dst is assumed to be initialized. Its contents is freed before
|
||
|
attaching to src.
|
||
|
* this function doesn't need ae_state parameter because it can't fail
|
||
|
(assuming correctly initialized src)
|
||
|
************************************************************************/
|
||
|
void ae_x_attach_to_vector(x_vector *dst, ae_vector *src)
|
||
|
{
|
||
|
if( dst->owner==OWN_AE )
|
||
|
ae_free(dst->x_ptr.p_ptr);
|
||
|
dst->x_ptr.p_ptr = src->ptr.p_ptr;
|
||
|
dst->last_action = ACT_NEW_LOCATION;
|
||
|
dst->cnt = src->cnt;
|
||
|
dst->datatype = src->datatype;
|
||
|
dst->owner = OWN_CALLER;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function attaches x_matrix to ae_matrix's contents.
|
||
|
Ownership of memory allocated is not changed (it is still managed by
|
||
|
ae_matrix).
|
||
|
|
||
|
dst destination vector
|
||
|
src source, matrix in x-format
|
||
|
state ALGLIB environment state
|
||
|
|
||
|
NOTES:
|
||
|
* dst is assumed to be initialized. Its contents is freed before
|
||
|
attaching to src.
|
||
|
* this function doesn't need ae_state parameter because it can't fail
|
||
|
(assuming correctly initialized src)
|
||
|
************************************************************************/
|
||
|
void ae_x_attach_to_matrix(x_matrix *dst, ae_matrix *src)
|
||
|
{
|
||
|
if( dst->owner==OWN_AE )
|
||
|
ae_free(dst->x_ptr.p_ptr);
|
||
|
dst->rows = src->rows;
|
||
|
dst->cols = src->cols;
|
||
|
dst->stride = src->stride;
|
||
|
dst->datatype = src->datatype;
|
||
|
dst->x_ptr.p_ptr = &(src->ptr.pp_double[0][0]);
|
||
|
dst->last_action = ACT_NEW_LOCATION;
|
||
|
dst->owner = OWN_CALLER;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function clears x_vector. It does nothing if vector is not owned by
|
||
|
ALGLIB environment.
|
||
|
|
||
|
dst vector
|
||
|
************************************************************************/
|
||
|
void x_vector_clear(x_vector *dst)
|
||
|
{
|
||
|
if( dst->owner==OWN_AE )
|
||
|
aligned_free(dst->x_ptr.p_ptr);
|
||
|
dst->x_ptr.p_ptr = NULL;
|
||
|
dst->cnt = 0;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
Assertion
|
||
|
|
||
|
For non-NULL state it allows to gracefully leave ALGLIB session,
|
||
|
removing all frames and deallocating registered dynamic data structure.
|
||
|
|
||
|
For NULL state it just abort()'s program.
|
||
|
|
||
|
IMPORTANT: this function ALWAYS evaluates its argument. It can not be
|
||
|
replaced by macro which does nothing. So, you may place actual
|
||
|
function calls at cond, and these will always be performed.
|
||
|
************************************************************************/
|
||
|
void ae_assert(ae_bool cond, const char *msg, ae_state *state)
|
||
|
{
|
||
|
if( !cond )
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, msg);
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
CPUID
|
||
|
|
||
|
Returns information about features CPU and compiler support.
|
||
|
|
||
|
You must tell ALGLIB what CPU family is used by defining AE_CPU symbol
|
||
|
(without this hint zero will be returned).
|
||
|
|
||
|
Note: results of this function depend on both CPU and compiler;
|
||
|
if compiler doesn't support SSE intrinsics, function won't set
|
||
|
corresponding flag.
|
||
|
************************************************************************/
|
||
|
static volatile ae_bool _ae_cpuid_initialized = ae_false;
|
||
|
static volatile ae_bool _ae_cpuid_has_sse2 = ae_false;
|
||
|
ae_int_t ae_cpuid()
|
||
|
{
|
||
|
/*
|
||
|
* to speed up CPU detection we cache results from previous attempts
|
||
|
* there is no synchronization, but it is still thread safe.
|
||
|
*
|
||
|
* thread safety is guaranteed on all modern architectures which
|
||
|
* have following property: simultaneous writes by different cores
|
||
|
* to the same location will be executed in serial manner.
|
||
|
*
|
||
|
*/
|
||
|
ae_int_t result;
|
||
|
|
||
|
/*
|
||
|
* if not initialized, determine system properties
|
||
|
*/
|
||
|
if( !_ae_cpuid_initialized )
|
||
|
{
|
||
|
/*
|
||
|
* SSE2
|
||
|
*/
|
||
|
#if defined(AE_CPU)
|
||
|
#if (AE_CPU==AE_INTEL) && defined(AE_HAS_SSE2_INTRINSICS)
|
||
|
#if AE_COMPILER==AE_MSVC
|
||
|
{
|
||
|
int CPUInfo[4];
|
||
|
__cpuid(CPUInfo, 1);
|
||
|
if( (CPUInfo[3]&0x04000000)!=0 )
|
||
|
_ae_cpuid_has_sse2 = ae_true;
|
||
|
}
|
||
|
#elif AE_COMPILER==AE_GNUC
|
||
|
{
|
||
|
ae_int_t a,b,c,d;
|
||
|
__asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1));
|
||
|
if( (d&0x04000000)!=0 )
|
||
|
_ae_cpuid_has_sse2 = ae_true;
|
||
|
}
|
||
|
#elif AE_COMPILER==AE_SUNC
|
||
|
{
|
||
|
ae_int_t a,b,c,d;
|
||
|
__asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1));
|
||
|
if( (d&0x04000000)!=0 )
|
||
|
_ae_cpuid_has_sse2 = ae_true;
|
||
|
}
|
||
|
#else
|
||
|
#endif
|
||
|
#endif
|
||
|
#endif
|
||
|
/*
|
||
|
* Perform one more CPUID call to generate memory fence
|
||
|
*/
|
||
|
#if AE_CPU==AE_INTEL
|
||
|
#if AE_COMPILER==AE_MSVC
|
||
|
{ int CPUInfo[4]; __cpuid(CPUInfo, 1); }
|
||
|
#elif AE_COMPILER==AE_GNUC
|
||
|
{ ae_int_t a,b,c,d; __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1)); }
|
||
|
#elif AE_COMPILER==AE_SUNC
|
||
|
{ ae_int_t a,b,c,d; __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1)); }
|
||
|
#else
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* set initialization flag
|
||
|
*/
|
||
|
_ae_cpuid_initialized = ae_true;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* return
|
||
|
*/
|
||
|
result = 0;
|
||
|
if( _ae_cpuid_has_sse2 )
|
||
|
result = result|CPU_SSE2;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
Activates tracing to file
|
||
|
|
||
|
IMPORTANT: this function is NOT thread-safe! Calling it from multiple
|
||
|
threads will result in undefined behavior. Calling it when
|
||
|
some thread calls ALGLIB functions may result in undefined
|
||
|
behavior.
|
||
|
************************************************************************/
|
||
|
void ae_trace_file(const char *tags, const char *filename)
|
||
|
{
|
||
|
/*
|
||
|
* clean up previous call
|
||
|
*/
|
||
|
if( alglib_fclose_trace )
|
||
|
{
|
||
|
if( alglib_trace_file!=NULL )
|
||
|
fclose(alglib_trace_file);
|
||
|
alglib_trace_file = NULL;
|
||
|
alglib_fclose_trace = ae_false;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* store ",tags," to buffer. Leading and trailing commas allow us
|
||
|
* to perform checks for various tags by simply calling strstr().
|
||
|
*/
|
||
|
memset(alglib_trace_tags, 0, ALGLIB_TRACE_BUFFER_LEN);
|
||
|
strcat(alglib_trace_tags, ",");
|
||
|
strncat(alglib_trace_tags, tags, ALGLIB_TRACE_TAGS_LEN);
|
||
|
strcat(alglib_trace_tags, ",");
|
||
|
for(int i=0; alglib_trace_tags[i]!=0; i++)
|
||
|
alglib_trace_tags[i] = tolower(alglib_trace_tags[i]);
|
||
|
|
||
|
/*
|
||
|
* set up trace
|
||
|
*/
|
||
|
alglib_trace_type = ALGLIB_TRACE_FILE;
|
||
|
alglib_trace_file = fopen(filename, "ab");
|
||
|
alglib_fclose_trace = ae_true;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
Disables tracing
|
||
|
************************************************************************/
|
||
|
void ae_trace_disable()
|
||
|
{
|
||
|
alglib_trace_type = ALGLIB_TRACE_NONE;
|
||
|
if( alglib_fclose_trace )
|
||
|
fclose(alglib_trace_file);
|
||
|
alglib_trace_file = NULL;
|
||
|
alglib_fclose_trace = ae_false;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
Checks whether specific kind of tracing is enabled
|
||
|
************************************************************************/
|
||
|
ae_bool ae_is_trace_enabled(const char *tag)
|
||
|
{
|
||
|
char buf[ALGLIB_TRACE_BUFFER_LEN];
|
||
|
|
||
|
/* check global trace status */
|
||
|
if( alglib_trace_type==ALGLIB_TRACE_NONE || alglib_trace_file==NULL )
|
||
|
return ae_false;
|
||
|
|
||
|
/* copy tag to buffer, lowercase it */
|
||
|
memset(buf, 0, ALGLIB_TRACE_BUFFER_LEN);
|
||
|
strcat(buf, ",");
|
||
|
strncat(buf, tag, ALGLIB_TRACE_TAGS_LEN);
|
||
|
strcat(buf, "?");
|
||
|
for(int i=0; buf[i]!=0; i++)
|
||
|
buf[i] = tolower(buf[i]);
|
||
|
|
||
|
/* contains tag (followed by comma, which means exact match) */
|
||
|
buf[strlen(buf)-1] = ',';
|
||
|
if( strstr(alglib_trace_tags,buf)!=NULL )
|
||
|
return ae_true;
|
||
|
|
||
|
/* contains tag (followed by dot, which means match with child) */
|
||
|
buf[strlen(buf)-1] = '.';
|
||
|
if( strstr(alglib_trace_tags,buf)!=NULL )
|
||
|
return ae_true;
|
||
|
|
||
|
/* nothing */
|
||
|
return ae_false;
|
||
|
}
|
||
|
|
||
|
void ae_trace(const char * printf_fmt, ...)
|
||
|
{
|
||
|
/* check global trace status */
|
||
|
if( alglib_trace_type==ALGLIB_TRACE_FILE && alglib_trace_file!=NULL )
|
||
|
{
|
||
|
va_list args;
|
||
|
|
||
|
/* fprintf() */
|
||
|
va_start(args, printf_fmt);
|
||
|
vfprintf(alglib_trace_file, printf_fmt, args);
|
||
|
va_end(args);
|
||
|
|
||
|
/* flush output */
|
||
|
fflush(alglib_trace_file);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
Real math functions
|
||
|
************************************************************************/
|
||
|
ae_bool ae_fp_eq(double v1, double v2)
|
||
|
{
|
||
|
/* IEEE-strict floating point comparison */
|
||
|
volatile double x = v1;
|
||
|
volatile double y = v2;
|
||
|
return x==y;
|
||
|
}
|
||
|
|
||
|
ae_bool ae_fp_neq(double v1, double v2)
|
||
|
{
|
||
|
/* IEEE-strict floating point comparison */
|
||
|
return !ae_fp_eq(v1,v2);
|
||
|
}
|
||
|
|
||
|
ae_bool ae_fp_less(double v1, double v2)
|
||
|
{
|
||
|
/* IEEE-strict floating point comparison */
|
||
|
volatile double x = v1;
|
||
|
volatile double y = v2;
|
||
|
return x<y;
|
||
|
}
|
||
|
|
||
|
ae_bool ae_fp_less_eq(double v1, double v2)
|
||
|
{
|
||
|
/* IEEE-strict floating point comparison */
|
||
|
volatile double x = v1;
|
||
|
volatile double y = v2;
|
||
|
return x<=y;
|
||
|
}
|
||
|
|
||
|
ae_bool ae_fp_greater(double v1, double v2)
|
||
|
{
|
||
|
/* IEEE-strict floating point comparison */
|
||
|
volatile double x = v1;
|
||
|
volatile double y = v2;
|
||
|
return x>y;
|
||
|
}
|
||
|
|
||
|
ae_bool ae_fp_greater_eq(double v1, double v2)
|
||
|
{
|
||
|
/* IEEE-strict floating point comparison */
|
||
|
volatile double x = v1;
|
||
|
volatile double y = v2;
|
||
|
return x>=y;
|
||
|
}
|
||
|
|
||
|
ae_bool ae_isfinite_stateless(double x, ae_int_t endianness)
|
||
|
{
|
||
|
union _u
|
||
|
{
|
||
|
double a;
|
||
|
ae_int32_t p[2];
|
||
|
} u;
|
||
|
ae_int32_t high;
|
||
|
u.a = x;
|
||
|
if( endianness==AE_LITTLE_ENDIAN )
|
||
|
high = u.p[1];
|
||
|
else
|
||
|
high = u.p[0];
|
||
|
return (high & (ae_int32_t)0x7FF00000)!=(ae_int32_t)0x7FF00000;
|
||
|
}
|
||
|
|
||
|
ae_bool ae_isnan_stateless(double x, ae_int_t endianness)
|
||
|
{
|
||
|
union _u
|
||
|
{
|
||
|
double a;
|
||
|
ae_int32_t p[2];
|
||
|
} u;
|
||
|
ae_int32_t high, low;
|
||
|
u.a = x;
|
||
|
if( endianness==AE_LITTLE_ENDIAN )
|
||
|
{
|
||
|
high = u.p[1];
|
||
|
low = u.p[0];
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
high = u.p[0];
|
||
|
low = u.p[1];
|
||
|
}
|
||
|
return ((high &0x7FF00000)==0x7FF00000) && (((high &0x000FFFFF)!=0) || (low!=0));
|
||
|
}
|
||
|
|
||
|
ae_bool ae_isinf_stateless(double x, ae_int_t endianness)
|
||
|
{
|
||
|
union _u
|
||
|
{
|
||
|
double a;
|
||
|
ae_int32_t p[2];
|
||
|
} u;
|
||
|
ae_int32_t high, low;
|
||
|
u.a = x;
|
||
|
if( endianness==AE_LITTLE_ENDIAN )
|
||
|
{
|
||
|
high = u.p[1];
|
||
|
low = u.p[0];
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
high = u.p[0];
|
||
|
low = u.p[1];
|
||
|
}
|
||
|
|
||
|
/* 31 least significant bits of high are compared */
|
||
|
return ((high&0x7FFFFFFF)==0x7FF00000) && (low==0);
|
||
|
}
|
||
|
|
||
|
ae_bool ae_isposinf_stateless(double x, ae_int_t endianness)
|
||
|
{
|
||
|
union _u
|
||
|
{
|
||
|
double a;
|
||
|
ae_int32_t p[2];
|
||
|
} u;
|
||
|
ae_int32_t high, low;
|
||
|
u.a = x;
|
||
|
if( endianness==AE_LITTLE_ENDIAN )
|
||
|
{
|
||
|
high = u.p[1];
|
||
|
low = u.p[0];
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
high = u.p[0];
|
||
|
low = u.p[1];
|
||
|
}
|
||
|
|
||
|
/* all 32 bits of high are compared */
|
||
|
return (high==(ae_int32_t)0x7FF00000) && (low==0);
|
||
|
}
|
||
|
|
||
|
ae_bool ae_isneginf_stateless(double x, ae_int_t endianness)
|
||
|
{
|
||
|
union _u
|
||
|
{
|
||
|
double a;
|
||
|
ae_int32_t p[2];
|
||
|
} u;
|
||
|
ae_int32_t high, low;
|
||
|
u.a = x;
|
||
|
if( endianness==AE_LITTLE_ENDIAN )
|
||
|
{
|
||
|
high = u.p[1];
|
||
|
low = u.p[0];
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
high = u.p[0];
|
||
|
low = u.p[1];
|
||
|
}
|
||
|
|
||
|
/* this code is a bit tricky to avoid comparison of high with 0xFFF00000, which may be unsafe with some buggy compilers */
|
||
|
return ((high&0x7FFFFFFF)==0x7FF00000) && (high!=(ae_int32_t)0x7FF00000) && (low==0);
|
||
|
}
|
||
|
|
||
|
ae_int_t ae_get_endianness()
|
||
|
{
|
||
|
union
|
||
|
{
|
||
|
double a;
|
||
|
ae_int32_t p[2];
|
||
|
} u;
|
||
|
|
||
|
/*
|
||
|
* determine endianness
|
||
|
* two types are supported: big-endian and little-endian.
|
||
|
* mixed-endian hardware is NOT supported.
|
||
|
*
|
||
|
* 1983 is used as magic number because its non-periodic double
|
||
|
* representation allow us to easily distinguish between upper
|
||
|
* and lower halfs and to detect mixed endian hardware.
|
||
|
*
|
||
|
*/
|
||
|
u.a = 1.0/1983.0;
|
||
|
if( u.p[1]==(ae_int32_t)0x3f408642 )
|
||
|
return AE_LITTLE_ENDIAN;
|
||
|
if( u.p[0]==(ae_int32_t)0x3f408642 )
|
||
|
return AE_BIG_ENDIAN;
|
||
|
return AE_MIXED_ENDIAN;
|
||
|
}
|
||
|
|
||
|
ae_bool ae_isfinite(double x,ae_state *state)
|
||
|
{
|
||
|
return ae_isfinite_stateless(x, state->endianness);
|
||
|
}
|
||
|
|
||
|
ae_bool ae_isnan(double x, ae_state *state)
|
||
|
{
|
||
|
return ae_isnan_stateless(x, state->endianness);
|
||
|
}
|
||
|
|
||
|
ae_bool ae_isinf(double x, ae_state *state)
|
||
|
{
|
||
|
return ae_isinf_stateless(x, state->endianness);
|
||
|
}
|
||
|
|
||
|
ae_bool ae_isposinf(double x,ae_state *state)
|
||
|
{
|
||
|
return ae_isposinf_stateless(x, state->endianness);
|
||
|
}
|
||
|
|
||
|
ae_bool ae_isneginf(double x,ae_state *state)
|
||
|
{
|
||
|
return ae_isneginf_stateless(x, state->endianness);
|
||
|
}
|
||
|
|
||
|
double ae_fabs(double x, ae_state *state)
|
||
|
{
|
||
|
return fabs(x);
|
||
|
}
|
||
|
|
||
|
ae_int_t ae_iabs(ae_int_t x, ae_state *state)
|
||
|
{
|
||
|
return x>=0 ? x : -x;
|
||
|
}
|
||
|
|
||
|
double ae_sqr(double x, ae_state *state)
|
||
|
{
|
||
|
return x*x;
|
||
|
}
|
||
|
|
||
|
double ae_sqrt(double x, ae_state *state)
|
||
|
{
|
||
|
return sqrt(x);
|
||
|
}
|
||
|
|
||
|
ae_int_t ae_sign(double x, ae_state *state)
|
||
|
{
|
||
|
if( x>0 ) return 1;
|
||
|
if( x<0 ) return -1;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
ae_int_t ae_round(double x, ae_state *state)
|
||
|
{
|
||
|
return (ae_int_t)(ae_ifloor(x+0.5,state));
|
||
|
}
|
||
|
|
||
|
ae_int_t ae_trunc(double x, ae_state *state)
|
||
|
{
|
||
|
return (ae_int_t)(x>0 ? ae_ifloor(x,state) : ae_iceil(x,state));
|
||
|
}
|
||
|
|
||
|
ae_int_t ae_ifloor(double x, ae_state *state)
|
||
|
{
|
||
|
return (ae_int_t)(floor(x));
|
||
|
}
|
||
|
|
||
|
ae_int_t ae_iceil(double x, ae_state *state)
|
||
|
{
|
||
|
return (ae_int_t)(ceil(x));
|
||
|
}
|
||
|
|
||
|
ae_int_t ae_maxint(ae_int_t m1, ae_int_t m2, ae_state *state)
|
||
|
{
|
||
|
return m1>m2 ? m1 : m2;
|
||
|
}
|
||
|
|
||
|
ae_int_t ae_minint(ae_int_t m1, ae_int_t m2, ae_state *state)
|
||
|
{
|
||
|
return m1>m2 ? m2 : m1;
|
||
|
}
|
||
|
|
||
|
double ae_maxreal(double m1, double m2, ae_state *state)
|
||
|
{
|
||
|
return m1>m2 ? m1 : m2;
|
||
|
}
|
||
|
|
||
|
double ae_minreal(double m1, double m2, ae_state *state)
|
||
|
{
|
||
|
return m1>m2 ? m2 : m1;
|
||
|
}
|
||
|
|
||
|
double ae_randomreal(ae_state *state)
|
||
|
{
|
||
|
int i1 = rand();
|
||
|
int i2 = rand();
|
||
|
double mx = (double)(RAND_MAX)+1.0;
|
||
|
volatile double tmp0 = i2/mx;
|
||
|
volatile double tmp1 = i1+tmp0;
|
||
|
return tmp1/mx;
|
||
|
}
|
||
|
|
||
|
ae_int_t ae_randominteger(ae_int_t maxv, ae_state *state)
|
||
|
{
|
||
|
return rand()%maxv;
|
||
|
}
|
||
|
|
||
|
double ae_sin(double x, ae_state *state)
|
||
|
{
|
||
|
return sin(x);
|
||
|
}
|
||
|
|
||
|
double ae_cos(double x, ae_state *state)
|
||
|
{
|
||
|
return cos(x);
|
||
|
}
|
||
|
|
||
|
double ae_tan(double x, ae_state *state)
|
||
|
{
|
||
|
return tan(x);
|
||
|
}
|
||
|
|
||
|
double ae_sinh(double x, ae_state *state)
|
||
|
{
|
||
|
return sinh(x);
|
||
|
}
|
||
|
|
||
|
double ae_cosh(double x, ae_state *state)
|
||
|
{
|
||
|
return cosh(x);
|
||
|
}
|
||
|
double ae_tanh(double x, ae_state *state)
|
||
|
{
|
||
|
return tanh(x);
|
||
|
}
|
||
|
|
||
|
double ae_asin(double x, ae_state *state)
|
||
|
{
|
||
|
return asin(x);
|
||
|
}
|
||
|
|
||
|
double ae_acos(double x, ae_state *state)
|
||
|
{
|
||
|
return acos(x);
|
||
|
}
|
||
|
|
||
|
double ae_atan(double x, ae_state *state)
|
||
|
{
|
||
|
return atan(x);
|
||
|
}
|
||
|
|
||
|
double ae_atan2(double y, double x, ae_state *state)
|
||
|
{
|
||
|
return atan2(y,x);
|
||
|
}
|
||
|
|
||
|
double ae_log(double x, ae_state *state)
|
||
|
{
|
||
|
return log(x);
|
||
|
}
|
||
|
|
||
|
double ae_pow(double x, double y, ae_state *state)
|
||
|
{
|
||
|
return pow(x,y);
|
||
|
}
|
||
|
|
||
|
double ae_exp(double x, ae_state *state)
|
||
|
{
|
||
|
return exp(x);
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
Symmetric/Hermitian properties: check and force
|
||
|
************************************************************************/
|
||
|
static void x_split_length(ae_int_t n, ae_int_t nb, ae_int_t* n1, ae_int_t* n2)
|
||
|
{
|
||
|
ae_int_t r;
|
||
|
if( n<=nb )
|
||
|
{
|
||
|
*n1 = n;
|
||
|
*n2 = 0;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if( n%nb!=0 )
|
||
|
{
|
||
|
*n2 = n%nb;
|
||
|
*n1 = n-(*n2);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
*n2 = n/2;
|
||
|
*n1 = n-(*n2);
|
||
|
if( *n1%nb==0 )
|
||
|
{
|
||
|
return;
|
||
|
}
|
||
|
r = nb-*n1%nb;
|
||
|
*n1 = *n1+r;
|
||
|
*n2 = *n2-r;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
static double x_safepythag2(double x, double y)
|
||
|
{
|
||
|
double w;
|
||
|
double xabs;
|
||
|
double yabs;
|
||
|
double z;
|
||
|
xabs = fabs(x);
|
||
|
yabs = fabs(y);
|
||
|
w = xabs>yabs ? xabs : yabs;
|
||
|
z = xabs<yabs ? xabs : yabs;
|
||
|
if( z==0 )
|
||
|
return w;
|
||
|
else
|
||
|
{
|
||
|
double t;
|
||
|
t = z/w;
|
||
|
return w*sqrt(1+t*t);
|
||
|
}
|
||
|
}
|
||
|
/*
|
||
|
* this function checks difference between offdiagonal blocks BL and BU
|
||
|
* (see below). Block BL is specified by offsets (offset0,offset1) and
|
||
|
* sizes (len0,len1).
|
||
|
*
|
||
|
* [ . ]
|
||
|
* [ A0 BU ]
|
||
|
* A = [ BL A1 ]
|
||
|
* [ . ]
|
||
|
*
|
||
|
* this subroutine updates current values of:
|
||
|
* a) mx maximum value of A[i,j] found so far
|
||
|
* b) err componentwise difference between elements of BL and BU^T
|
||
|
*
|
||
|
*/
|
||
|
static void is_symmetric_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
|
||
|
{
|
||
|
/* try to split problem into two smaller ones */
|
||
|
if( len0>x_nb || len1>x_nb )
|
||
|
{
|
||
|
ae_int_t n1, n2;
|
||
|
if( len0>len1 )
|
||
|
{
|
||
|
x_split_length(len0, x_nb, &n1, &n2);
|
||
|
is_symmetric_rec_off_stat(a, offset0, offset1, n1, len1, nonfinite, mx, err, _state);
|
||
|
is_symmetric_rec_off_stat(a, offset0+n1, offset1, n2, len1, nonfinite, mx, err, _state);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
x_split_length(len1, x_nb, &n1, &n2);
|
||
|
is_symmetric_rec_off_stat(a, offset0, offset1, len0, n1, nonfinite, mx, err, _state);
|
||
|
is_symmetric_rec_off_stat(a, offset0, offset1+n1, len0, n2, nonfinite, mx, err, _state);
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/* base case */
|
||
|
double *p1, *p2, *prow, *pcol;
|
||
|
double v;
|
||
|
ae_int_t i, j;
|
||
|
|
||
|
p1 = (double*)(a->x_ptr.p_ptr)+offset0*a->stride+offset1;
|
||
|
p2 = (double*)(a->x_ptr.p_ptr)+offset1*a->stride+offset0;
|
||
|
for(i=0; i<len0; i++)
|
||
|
{
|
||
|
pcol = p2+i;
|
||
|
prow = p1+i*a->stride;
|
||
|
for(j=0; j<len1; j++)
|
||
|
{
|
||
|
if( !ae_isfinite(*pcol,_state) || !ae_isfinite(*prow,_state) )
|
||
|
{
|
||
|
*nonfinite = ae_true;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
v = fabs(*pcol);
|
||
|
*mx = *mx>v ? *mx : v;
|
||
|
v = fabs(*prow);
|
||
|
*mx = *mx>v ? *mx : v;
|
||
|
v = fabs(*pcol-*prow);
|
||
|
*err = *err>v ? *err : v;
|
||
|
}
|
||
|
pcol += a->stride;
|
||
|
prow++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
/*
|
||
|
* this function checks that diagonal block A0 is symmetric.
|
||
|
* Block A0 is specified by its offset and size.
|
||
|
*
|
||
|
* [ . ]
|
||
|
* [ A0 ]
|
||
|
* A = [ . ]
|
||
|
* [ . ]
|
||
|
*
|
||
|
* this subroutine updates current values of:
|
||
|
* a) mx maximum value of A[i,j] found so far
|
||
|
* b) err componentwise difference between A0 and A0^T
|
||
|
*
|
||
|
*/
|
||
|
static void is_symmetric_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
|
||
|
{
|
||
|
double *p, *prow, *pcol;
|
||
|
double v;
|
||
|
ae_int_t i, j;
|
||
|
|
||
|
/* try to split problem into two smaller ones */
|
||
|
if( len>x_nb )
|
||
|
{
|
||
|
ae_int_t n1, n2;
|
||
|
x_split_length(len, x_nb, &n1, &n2);
|
||
|
is_symmetric_rec_diag_stat(a, offset, n1, nonfinite, mx, err, _state);
|
||
|
is_symmetric_rec_diag_stat(a, offset+n1, n2, nonfinite, mx, err, _state);
|
||
|
is_symmetric_rec_off_stat(a, offset+n1, offset, n2, n1, nonfinite, mx, err, _state);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/* base case */
|
||
|
p = (double*)(a->x_ptr.p_ptr)+offset*a->stride+offset;
|
||
|
for(i=0; i<len; i++)
|
||
|
{
|
||
|
pcol = p+i;
|
||
|
prow = p+i*a->stride;
|
||
|
for(j=0; j<i; j++,pcol+=a->stride,prow++)
|
||
|
{
|
||
|
if( !ae_isfinite(*pcol,_state) || !ae_isfinite(*prow,_state) )
|
||
|
{
|
||
|
*nonfinite = ae_true;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
v = fabs(*pcol);
|
||
|
*mx = *mx>v ? *mx : v;
|
||
|
v = fabs(*prow);
|
||
|
*mx = *mx>v ? *mx : v;
|
||
|
v = fabs(*pcol-*prow);
|
||
|
*err = *err>v ? *err : v;
|
||
|
}
|
||
|
}
|
||
|
v = fabs(p[i+i*a->stride]);
|
||
|
*mx = *mx>v ? *mx : v;
|
||
|
}
|
||
|
}
|
||
|
/*
|
||
|
* this function checks difference between offdiagonal blocks BL and BU
|
||
|
* (see below). Block BL is specified by offsets (offset0,offset1) and
|
||
|
* sizes (len0,len1).
|
||
|
*
|
||
|
* [ . ]
|
||
|
* [ A0 BU ]
|
||
|
* A = [ BL A1 ]
|
||
|
* [ . ]
|
||
|
*
|
||
|
* this subroutine updates current values of:
|
||
|
* a) mx maximum value of A[i,j] found so far
|
||
|
* b) err componentwise difference between elements of BL and BU^H
|
||
|
*
|
||
|
*/
|
||
|
static void is_hermitian_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
|
||
|
{
|
||
|
/* try to split problem into two smaller ones */
|
||
|
if( len0>x_nb || len1>x_nb )
|
||
|
{
|
||
|
ae_int_t n1, n2;
|
||
|
if( len0>len1 )
|
||
|
{
|
||
|
x_split_length(len0, x_nb, &n1, &n2);
|
||
|
is_hermitian_rec_off_stat(a, offset0, offset1, n1, len1, nonfinite, mx, err, _state);
|
||
|
is_hermitian_rec_off_stat(a, offset0+n1, offset1, n2, len1, nonfinite, mx, err, _state);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
x_split_length(len1, x_nb, &n1, &n2);
|
||
|
is_hermitian_rec_off_stat(a, offset0, offset1, len0, n1, nonfinite, mx, err, _state);
|
||
|
is_hermitian_rec_off_stat(a, offset0, offset1+n1, len0, n2, nonfinite, mx, err, _state);
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/* base case */
|
||
|
ae_complex *p1, *p2, *prow, *pcol;
|
||
|
double v;
|
||
|
ae_int_t i, j;
|
||
|
|
||
|
p1 = (ae_complex*)(a->x_ptr.p_ptr)+offset0*a->stride+offset1;
|
||
|
p2 = (ae_complex*)(a->x_ptr.p_ptr)+offset1*a->stride+offset0;
|
||
|
for(i=0; i<len0; i++)
|
||
|
{
|
||
|
pcol = p2+i;
|
||
|
prow = p1+i*a->stride;
|
||
|
for(j=0; j<len1; j++)
|
||
|
{
|
||
|
if( !ae_isfinite(pcol->x, _state) || !ae_isfinite(pcol->y, _state) || !ae_isfinite(prow->x, _state) || !ae_isfinite(prow->y, _state) )
|
||
|
{
|
||
|
*nonfinite = ae_true;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
v = x_safepythag2(pcol->x, pcol->y);
|
||
|
*mx = *mx>v ? *mx : v;
|
||
|
v = x_safepythag2(prow->x, prow->y);
|
||
|
*mx = *mx>v ? *mx : v;
|
||
|
v = x_safepythag2(pcol->x-prow->x, pcol->y+prow->y);
|
||
|
*err = *err>v ? *err : v;
|
||
|
}
|
||
|
pcol += a->stride;
|
||
|
prow++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
/*
|
||
|
* this function checks that diagonal block A0 is Hermitian.
|
||
|
* Block A0 is specified by its offset and size.
|
||
|
*
|
||
|
* [ . ]
|
||
|
* [ A0 ]
|
||
|
* A = [ . ]
|
||
|
* [ . ]
|
||
|
*
|
||
|
* this subroutine updates current values of:
|
||
|
* a) mx maximum value of A[i,j] found so far
|
||
|
* b) err componentwise difference between A0 and A0^H
|
||
|
*
|
||
|
*/
|
||
|
static void is_hermitian_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
|
||
|
{
|
||
|
ae_complex *p, *prow, *pcol;
|
||
|
double v;
|
||
|
ae_int_t i, j;
|
||
|
|
||
|
/* try to split problem into two smaller ones */
|
||
|
if( len>x_nb )
|
||
|
{
|
||
|
ae_int_t n1, n2;
|
||
|
x_split_length(len, x_nb, &n1, &n2);
|
||
|
is_hermitian_rec_diag_stat(a, offset, n1, nonfinite, mx, err, _state);
|
||
|
is_hermitian_rec_diag_stat(a, offset+n1, n2, nonfinite, mx, err, _state);
|
||
|
is_hermitian_rec_off_stat(a, offset+n1, offset, n2, n1, nonfinite, mx, err, _state);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/* base case */
|
||
|
p = (ae_complex*)(a->x_ptr.p_ptr)+offset*a->stride+offset;
|
||
|
for(i=0; i<len; i++)
|
||
|
{
|
||
|
pcol = p+i;
|
||
|
prow = p+i*a->stride;
|
||
|
for(j=0; j<i; j++,pcol+=a->stride,prow++)
|
||
|
{
|
||
|
if( !ae_isfinite(pcol->x, _state) || !ae_isfinite(pcol->y, _state) || !ae_isfinite(prow->x, _state) || !ae_isfinite(prow->y, _state) )
|
||
|
{
|
||
|
*nonfinite = ae_true;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
v = x_safepythag2(pcol->x, pcol->y);
|
||
|
*mx = *mx>v ? *mx : v;
|
||
|
v = x_safepythag2(prow->x, prow->y);
|
||
|
*mx = *mx>v ? *mx : v;
|
||
|
v = x_safepythag2(pcol->x-prow->x, pcol->y+prow->y);
|
||
|
*err = *err>v ? *err : v;
|
||
|
}
|
||
|
}
|
||
|
if( !ae_isfinite(p[i+i*a->stride].x, _state) || !ae_isfinite(p[i+i*a->stride].y, _state) )
|
||
|
{
|
||
|
*nonfinite = ae_true;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
v = fabs(p[i+i*a->stride].x);
|
||
|
*mx = *mx>v ? *mx : v;
|
||
|
v = fabs(p[i+i*a->stride].y);
|
||
|
*err = *err>v ? *err : v;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
/*
|
||
|
* this function copies offdiagonal block BL to its symmetric counterpart
|
||
|
* BU (see below). Block BL is specified by offsets (offset0,offset1)
|
||
|
* and sizes (len0,len1).
|
||
|
*
|
||
|
* [ . ]
|
||
|
* [ A0 BU ]
|
||
|
* A = [ BL A1 ]
|
||
|
* [ . ]
|
||
|
*
|
||
|
*/
|
||
|
static void force_symmetric_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1)
|
||
|
{
|
||
|
/* try to split problem into two smaller ones */
|
||
|
if( len0>x_nb || len1>x_nb )
|
||
|
{
|
||
|
ae_int_t n1, n2;
|
||
|
if( len0>len1 )
|
||
|
{
|
||
|
x_split_length(len0, x_nb, &n1, &n2);
|
||
|
force_symmetric_rec_off_stat(a, offset0, offset1, n1, len1);
|
||
|
force_symmetric_rec_off_stat(a, offset0+n1, offset1, n2, len1);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
x_split_length(len1, x_nb, &n1, &n2);
|
||
|
force_symmetric_rec_off_stat(a, offset0, offset1, len0, n1);
|
||
|
force_symmetric_rec_off_stat(a, offset0, offset1+n1, len0, n2);
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/* base case */
|
||
|
double *p1, *p2, *prow, *pcol;
|
||
|
ae_int_t i, j;
|
||
|
|
||
|
p1 = (double*)(a->x_ptr.p_ptr)+offset0*a->stride+offset1;
|
||
|
p2 = (double*)(a->x_ptr.p_ptr)+offset1*a->stride+offset0;
|
||
|
for(i=0; i<len0; i++)
|
||
|
{
|
||
|
pcol = p2+i;
|
||
|
prow = p1+i*a->stride;
|
||
|
for(j=0; j<len1; j++)
|
||
|
{
|
||
|
*pcol = *prow;
|
||
|
pcol += a->stride;
|
||
|
prow++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
/*
|
||
|
* this function copies lower part of diagonal block A0 to its upper part
|
||
|
* Block is specified by offset and size.
|
||
|
*
|
||
|
* [ . ]
|
||
|
* [ A0 ]
|
||
|
* A = [ . ]
|
||
|
* [ . ]
|
||
|
*
|
||
|
*/
|
||
|
static void force_symmetric_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len)
|
||
|
{
|
||
|
double *p, *prow, *pcol;
|
||
|
ae_int_t i, j;
|
||
|
|
||
|
/* try to split problem into two smaller ones */
|
||
|
if( len>x_nb )
|
||
|
{
|
||
|
ae_int_t n1, n2;
|
||
|
x_split_length(len, x_nb, &n1, &n2);
|
||
|
force_symmetric_rec_diag_stat(a, offset, n1);
|
||
|
force_symmetric_rec_diag_stat(a, offset+n1, n2);
|
||
|
force_symmetric_rec_off_stat(a, offset+n1, offset, n2, n1);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/* base case */
|
||
|
p = (double*)(a->x_ptr.p_ptr)+offset*a->stride+offset;
|
||
|
for(i=0; i<len; i++)
|
||
|
{
|
||
|
pcol = p+i;
|
||
|
prow = p+i*a->stride;
|
||
|
for(j=0; j<i; j++,pcol+=a->stride,prow++)
|
||
|
*pcol = *prow;
|
||
|
}
|
||
|
}
|
||
|
/*
|
||
|
* this function copies Hermitian transpose of offdiagonal block BL to
|
||
|
* its symmetric counterpart BU (see below). Block BL is specified by
|
||
|
* offsets (offset0,offset1) and sizes (len0,len1).
|
||
|
*
|
||
|
* [ . ]
|
||
|
* [ A0 BU ]
|
||
|
* A = [ BL A1 ]
|
||
|
* [ . ]
|
||
|
*/
|
||
|
static void force_hermitian_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1)
|
||
|
{
|
||
|
/* try to split problem into two smaller ones */
|
||
|
if( len0>x_nb || len1>x_nb )
|
||
|
{
|
||
|
ae_int_t n1, n2;
|
||
|
if( len0>len1 )
|
||
|
{
|
||
|
x_split_length(len0, x_nb, &n1, &n2);
|
||
|
force_hermitian_rec_off_stat(a, offset0, offset1, n1, len1);
|
||
|
force_hermitian_rec_off_stat(a, offset0+n1, offset1, n2, len1);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
x_split_length(len1, x_nb, &n1, &n2);
|
||
|
force_hermitian_rec_off_stat(a, offset0, offset1, len0, n1);
|
||
|
force_hermitian_rec_off_stat(a, offset0, offset1+n1, len0, n2);
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/* base case */
|
||
|
ae_complex *p1, *p2, *prow, *pcol;
|
||
|
ae_int_t i, j;
|
||
|
|
||
|
p1 = (ae_complex*)(a->x_ptr.p_ptr)+offset0*a->stride+offset1;
|
||
|
p2 = (ae_complex*)(a->x_ptr.p_ptr)+offset1*a->stride+offset0;
|
||
|
for(i=0; i<len0; i++)
|
||
|
{
|
||
|
pcol = p2+i;
|
||
|
prow = p1+i*a->stride;
|
||
|
for(j=0; j<len1; j++)
|
||
|
{
|
||
|
*pcol = *prow;
|
||
|
pcol += a->stride;
|
||
|
prow++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
/*
|
||
|
* this function copies Hermitian transpose of lower part of
|
||
|
* diagonal block A0 to its upper part Block is specified by offset and size.
|
||
|
*
|
||
|
* [ . ]
|
||
|
* [ A0 ]
|
||
|
* A = [ . ]
|
||
|
* [ . ]
|
||
|
*
|
||
|
*/
|
||
|
static void force_hermitian_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len)
|
||
|
{
|
||
|
ae_complex *p, *prow, *pcol;
|
||
|
ae_int_t i, j;
|
||
|
|
||
|
/* try to split problem into two smaller ones */
|
||
|
if( len>x_nb )
|
||
|
{
|
||
|
ae_int_t n1, n2;
|
||
|
x_split_length(len, x_nb, &n1, &n2);
|
||
|
force_hermitian_rec_diag_stat(a, offset, n1);
|
||
|
force_hermitian_rec_diag_stat(a, offset+n1, n2);
|
||
|
force_hermitian_rec_off_stat(a, offset+n1, offset, n2, n1);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/* base case */
|
||
|
p = (ae_complex*)(a->x_ptr.p_ptr)+offset*a->stride+offset;
|
||
|
for(i=0; i<len; i++)
|
||
|
{
|
||
|
pcol = p+i;
|
||
|
prow = p+i*a->stride;
|
||
|
for(j=0; j<i; j++,pcol+=a->stride,prow++)
|
||
|
*pcol = *prow;
|
||
|
}
|
||
|
}
|
||
|
ae_bool x_is_symmetric(x_matrix *a)
|
||
|
{
|
||
|
double mx, err;
|
||
|
ae_bool nonfinite;
|
||
|
ae_state _alglib_env_state;
|
||
|
if( a->datatype!=DT_REAL )
|
||
|
return ae_false;
|
||
|
if( a->cols!=a->rows )
|
||
|
return ae_false;
|
||
|
if( a->cols==0 || a->rows==0 )
|
||
|
return ae_true;
|
||
|
ae_state_init(&_alglib_env_state);
|
||
|
mx = 0;
|
||
|
err = 0;
|
||
|
nonfinite = ae_false;
|
||
|
is_symmetric_rec_diag_stat(a, 0, (ae_int_t)a->rows, &nonfinite, &mx, &err, &_alglib_env_state);
|
||
|
if( nonfinite )
|
||
|
return ae_false;
|
||
|
if( mx==0 )
|
||
|
return ae_true;
|
||
|
return err/mx<=1.0E-14;
|
||
|
}
|
||
|
ae_bool x_is_hermitian(x_matrix *a)
|
||
|
{
|
||
|
double mx, err;
|
||
|
ae_bool nonfinite;
|
||
|
ae_state _alglib_env_state;
|
||
|
if( a->datatype!=DT_COMPLEX )
|
||
|
return ae_false;
|
||
|
if( a->cols!=a->rows )
|
||
|
return ae_false;
|
||
|
if( a->cols==0 || a->rows==0 )
|
||
|
return ae_true;
|
||
|
ae_state_init(&_alglib_env_state);
|
||
|
mx = 0;
|
||
|
err = 0;
|
||
|
nonfinite = ae_false;
|
||
|
is_hermitian_rec_diag_stat(a, 0, (ae_int_t)a->rows, &nonfinite, &mx, &err, &_alglib_env_state);
|
||
|
if( nonfinite )
|
||
|
return ae_false;
|
||
|
if( mx==0 )
|
||
|
return ae_true;
|
||
|
return err/mx<=1.0E-14;
|
||
|
}
|
||
|
ae_bool x_force_symmetric(x_matrix *a)
|
||
|
{
|
||
|
if( a->datatype!=DT_REAL )
|
||
|
return ae_false;
|
||
|
if( a->cols!=a->rows )
|
||
|
return ae_false;
|
||
|
if( a->cols==0 || a->rows==0 )
|
||
|
return ae_true;
|
||
|
force_symmetric_rec_diag_stat(a, 0, (ae_int_t)a->rows);
|
||
|
return ae_true;
|
||
|
}
|
||
|
ae_bool x_force_hermitian(x_matrix *a)
|
||
|
{
|
||
|
if( a->datatype!=DT_COMPLEX )
|
||
|
return ae_false;
|
||
|
if( a->cols!=a->rows )
|
||
|
return ae_false;
|
||
|
if( a->cols==0 || a->rows==0 )
|
||
|
return ae_true;
|
||
|
force_hermitian_rec_diag_stat(a, 0, (ae_int_t)a->rows);
|
||
|
return ae_true;
|
||
|
}
|
||
|
|
||
|
ae_bool ae_is_symmetric(ae_matrix *a)
|
||
|
{
|
||
|
x_matrix x;
|
||
|
x.owner = OWN_CALLER;
|
||
|
ae_x_attach_to_matrix(&x, a);
|
||
|
return x_is_symmetric(&x);
|
||
|
}
|
||
|
|
||
|
ae_bool ae_is_hermitian(ae_matrix *a)
|
||
|
{
|
||
|
x_matrix x;
|
||
|
x.owner = OWN_CALLER;
|
||
|
ae_x_attach_to_matrix(&x, a);
|
||
|
return x_is_hermitian(&x);
|
||
|
}
|
||
|
|
||
|
ae_bool ae_force_symmetric(ae_matrix *a)
|
||
|
{
|
||
|
x_matrix x;
|
||
|
x.owner = OWN_CALLER;
|
||
|
ae_x_attach_to_matrix(&x, a);
|
||
|
return x_force_symmetric(&x);
|
||
|
}
|
||
|
|
||
|
ae_bool ae_force_hermitian(ae_matrix *a)
|
||
|
{
|
||
|
x_matrix x;
|
||
|
x.owner = OWN_CALLER;
|
||
|
ae_x_attach_to_matrix(&x, a);
|
||
|
return x_force_hermitian(&x);
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function converts six-bit value (from 0 to 63) to character (only
|
||
|
digits, lowercase and uppercase letters, minus and underscore are used).
|
||
|
|
||
|
If v is negative or greater than 63, this function returns '?'.
|
||
|
************************************************************************/
|
||
|
static char _sixbits2char_tbl[64] = {
|
||
|
'0', '1', '2', '3', '4', '5', '6', '7',
|
||
|
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
|
||
|
'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
|
||
|
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
|
||
|
'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
|
||
|
'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
|
||
|
'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
|
||
|
'u', 'v', 'w', 'x', 'y', 'z', '-', '_' };
|
||
|
|
||
|
char ae_sixbits2char(ae_int_t v)
|
||
|
{
|
||
|
|
||
|
if( v<0 || v>63 )
|
||
|
return '?';
|
||
|
return _sixbits2char_tbl[v];
|
||
|
|
||
|
/* v is correct, process it */
|
||
|
/*if( v<10 )
|
||
|
return '0'+v;
|
||
|
v -= 10;
|
||
|
if( v<26 )
|
||
|
return 'A'+v;
|
||
|
v -= 26;
|
||
|
if( v<26 )
|
||
|
return 'a'+v;
|
||
|
v -= 26;
|
||
|
return v==0 ? '-' : '_';*/
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function converts character to six-bit value (from 0 to 63).
|
||
|
|
||
|
This function is inverse of ae_sixbits2char()
|
||
|
If c is not correct character, this function returns -1.
|
||
|
************************************************************************/
|
||
|
static ae_int_t _ae_char2sixbits_tbl[] = {
|
||
|
-1, -1, -1, -1, -1, -1, -1, -1,
|
||
|
-1, -1, -1, -1, -1, -1, -1, -1,
|
||
|
-1, -1, -1, -1, -1, -1, -1, -1,
|
||
|
-1, -1, -1, -1, -1, -1, -1, -1,
|
||
|
-1, -1, -1, -1, -1, -1, -1, -1,
|
||
|
-1, -1, -1, -1, -1, 62, -1, -1,
|
||
|
0, 1, 2, 3, 4, 5, 6, 7,
|
||
|
8, 9, -1, -1, -1, -1, -1, -1,
|
||
|
-1, 10, 11, 12, 13, 14, 15, 16,
|
||
|
17, 18, 19, 20, 21, 22, 23, 24,
|
||
|
25, 26, 27, 28, 29, 30, 31, 32,
|
||
|
33, 34, 35, -1, -1, -1, -1, 63,
|
||
|
-1, 36, 37, 38, 39, 40, 41, 42,
|
||
|
43, 44, 45, 46, 47, 48, 49, 50,
|
||
|
51, 52, 53, 54, 55, 56, 57, 58,
|
||
|
59, 60, 61, -1, -1, -1, -1, -1 };
|
||
|
ae_int_t ae_char2sixbits(char c)
|
||
|
{
|
||
|
return (c>=0 && c<127) ? _ae_char2sixbits_tbl[(int)c] : -1;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function converts three bytes (24 bits) to four six-bit values
|
||
|
(24 bits again).
|
||
|
|
||
|
src pointer to three bytes
|
||
|
dst pointer to four ints
|
||
|
************************************************************************/
|
||
|
void ae_threebytes2foursixbits(const unsigned char *src, ae_int_t *dst)
|
||
|
{
|
||
|
dst[0] = src[0] & 0x3F;
|
||
|
dst[1] = (src[0]>>6) | ((src[1]&0x0F)<<2);
|
||
|
dst[2] = (src[1]>>4) | ((src[2]&0x03)<<4);
|
||
|
dst[3] = src[2]>>2;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function converts four six-bit values (24 bits) to three bytes
|
||
|
(24 bits again).
|
||
|
|
||
|
src pointer to four ints
|
||
|
dst pointer to three bytes
|
||
|
************************************************************************/
|
||
|
void ae_foursixbits2threebytes(const ae_int_t *src, unsigned char *dst)
|
||
|
{
|
||
|
dst[0] = (unsigned char)( src[0] | ((src[1]&0x03)<<6));
|
||
|
dst[1] = (unsigned char)((src[1]>>2) | ((src[2]&0x0F)<<4));
|
||
|
dst[2] = (unsigned char)((src[2]>>4) | (src[3]<<2));
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function serializes boolean value into buffer
|
||
|
|
||
|
v boolean value to be serialized
|
||
|
buf buffer, at least 12 characters wide
|
||
|
(11 chars for value, one for trailing zero)
|
||
|
state ALGLIB environment state
|
||
|
************************************************************************/
|
||
|
void ae_bool2str(ae_bool v, char *buf, ae_state *state)
|
||
|
{
|
||
|
char c = v ? '1' : '0';
|
||
|
ae_int_t i;
|
||
|
for(i=0; i<AE_SER_ENTRY_LENGTH; i++)
|
||
|
buf[i] = c;
|
||
|
buf[AE_SER_ENTRY_LENGTH] = 0;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function unserializes boolean value from buffer
|
||
|
|
||
|
buf buffer which contains value; leading spaces/tabs/newlines are
|
||
|
ignored, traling spaces/tabs/newlines are treated as end of
|
||
|
the boolean value.
|
||
|
state ALGLIB environment state
|
||
|
|
||
|
This function raises an error in case unexpected symbol is found
|
||
|
************************************************************************/
|
||
|
ae_bool ae_str2bool(const char *buf, ae_state *state, const char **pasttheend)
|
||
|
{
|
||
|
ae_bool was0, was1;
|
||
|
const char *emsg = "ALGLIB: unable to read boolean value from stream";
|
||
|
|
||
|
was0 = ae_false;
|
||
|
was1 = ae_false;
|
||
|
while( *buf==' ' || *buf=='\t' || *buf=='\n' || *buf=='\r' )
|
||
|
buf++;
|
||
|
while( *buf!=' ' && *buf!='\t' && *buf!='\n' && *buf!='\r' && *buf!=0 )
|
||
|
{
|
||
|
if( *buf=='0' )
|
||
|
{
|
||
|
was0 = ae_true;
|
||
|
buf++;
|
||
|
continue;
|
||
|
}
|
||
|
if( *buf=='1' )
|
||
|
{
|
||
|
was1 = ae_true;
|
||
|
buf++;
|
||
|
continue;
|
||
|
}
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, emsg);
|
||
|
}
|
||
|
*pasttheend = buf;
|
||
|
if( (!was0) && (!was1) )
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, emsg);
|
||
|
if( was0 && was1 )
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, emsg);
|
||
|
return was1 ? ae_true : ae_false;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function serializes integer value into buffer
|
||
|
|
||
|
v integer value to be serialized
|
||
|
buf buffer, at least 12 characters wide
|
||
|
(11 chars for value, one for trailing zero)
|
||
|
state ALGLIB environment state
|
||
|
************************************************************************/
|
||
|
void ae_int2str(ae_int_t v, char *buf, ae_state *state)
|
||
|
{
|
||
|
union _u
|
||
|
{
|
||
|
ae_int_t ival;
|
||
|
unsigned char bytes[9];
|
||
|
} u;
|
||
|
ae_int_t i;
|
||
|
ae_int_t sixbits[12];
|
||
|
unsigned char c;
|
||
|
|
||
|
/*
|
||
|
* copy v to array of chars, sign extending it and
|
||
|
* converting to little endian order
|
||
|
*
|
||
|
* because we don't want to mention size of ae_int_t explicitly,
|
||
|
* we do it as follows:
|
||
|
* 1. we fill u.bytes by zeros or ones (depending on sign of v)
|
||
|
* 2. we copy v to u.ival
|
||
|
* 3. if we run on big endian architecture, we reorder u.bytes
|
||
|
* 4. now we have signed 64-bit representation of v stored in u.bytes
|
||
|
* 5. additionally, we set 9th byte of u.bytes to zero in order to
|
||
|
* simplify conversion to six-bit representation
|
||
|
*/
|
||
|
c = v<0 ? (unsigned char)0xFF : (unsigned char)0x00;
|
||
|
u.ival = v;
|
||
|
for(i=sizeof(ae_int_t); i<=8; i++) /* <=8 is preferred because it avoids unnecessary compiler warnings*/
|
||
|
u.bytes[i] = c;
|
||
|
u.bytes[8] = 0;
|
||
|
if( state->endianness==AE_BIG_ENDIAN )
|
||
|
{
|
||
|
for(i=0; i<(ae_int_t)(sizeof(ae_int_t)/2); i++)
|
||
|
{
|
||
|
unsigned char tc;
|
||
|
tc = u.bytes[i];
|
||
|
u.bytes[i] = u.bytes[sizeof(ae_int_t)-1-i];
|
||
|
u.bytes[sizeof(ae_int_t)-1-i] = tc;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* convert to six-bit representation, output
|
||
|
*
|
||
|
* NOTE: last 12th element of sixbits is always zero, we do not output it
|
||
|
*/
|
||
|
ae_threebytes2foursixbits(u.bytes+0, sixbits+0);
|
||
|
ae_threebytes2foursixbits(u.bytes+3, sixbits+4);
|
||
|
ae_threebytes2foursixbits(u.bytes+6, sixbits+8);
|
||
|
for(i=0; i<AE_SER_ENTRY_LENGTH; i++)
|
||
|
buf[i] = ae_sixbits2char(sixbits[i]);
|
||
|
buf[AE_SER_ENTRY_LENGTH] = 0x00;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function serializes 64-bit integer value into buffer
|
||
|
|
||
|
v integer value to be serialized
|
||
|
buf buffer, at least 12 characters wide
|
||
|
(11 chars for value, one for trailing zero)
|
||
|
state ALGLIB environment state
|
||
|
************************************************************************/
|
||
|
void ae_int642str(ae_int64_t v, char *buf, ae_state *state)
|
||
|
{
|
||
|
unsigned char bytes[9];
|
||
|
ae_int_t i;
|
||
|
ae_int_t sixbits[12];
|
||
|
|
||
|
/*
|
||
|
* copy v to array of chars, sign extending it and
|
||
|
* converting to little endian order
|
||
|
*
|
||
|
* because we don't want to mention size of ae_int_t explicitly,
|
||
|
* we do it as follows:
|
||
|
* 1. we fill bytes by zeros or ones (depending on sign of v)
|
||
|
* 2. we memmove v to bytes
|
||
|
* 3. if we run on big endian architecture, we reorder bytes
|
||
|
* 4. now we have signed 64-bit representation of v stored in bytes
|
||
|
* 5. additionally, we set 9th byte of bytes to zero in order to
|
||
|
* simplify conversion to six-bit representation
|
||
|
*/
|
||
|
memset(bytes, v<0 ? 0xFF : 0x00, 8);
|
||
|
memmove(bytes, &v, 8);
|
||
|
bytes[8] = 0;
|
||
|
if( state->endianness==AE_BIG_ENDIAN )
|
||
|
{
|
||
|
for(i=0; i<(ae_int_t)(sizeof(ae_int_t)/2); i++)
|
||
|
{
|
||
|
unsigned char tc;
|
||
|
tc = bytes[i];
|
||
|
bytes[i] = bytes[sizeof(ae_int_t)-1-i];
|
||
|
bytes[sizeof(ae_int_t)-1-i] = tc;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* convert to six-bit representation, output
|
||
|
*
|
||
|
* NOTE: last 12th element of sixbits is always zero, we do not output it
|
||
|
*/
|
||
|
ae_threebytes2foursixbits(bytes+0, sixbits+0);
|
||
|
ae_threebytes2foursixbits(bytes+3, sixbits+4);
|
||
|
ae_threebytes2foursixbits(bytes+6, sixbits+8);
|
||
|
for(i=0; i<AE_SER_ENTRY_LENGTH; i++)
|
||
|
buf[i] = ae_sixbits2char(sixbits[i]);
|
||
|
buf[AE_SER_ENTRY_LENGTH] = 0x00;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function unserializes integer value from string
|
||
|
|
||
|
buf buffer which contains value; leading spaces/tabs/newlines are
|
||
|
ignored, traling spaces/tabs/newlines are treated as end of
|
||
|
the boolean value.
|
||
|
state ALGLIB environment state
|
||
|
|
||
|
This function raises an error in case unexpected symbol is found
|
||
|
************************************************************************/
|
||
|
ae_int_t ae_str2int(const char *buf, ae_state *state, const char **pasttheend)
|
||
|
{
|
||
|
const char *emsg = "ALGLIB: unable to read integer value from stream";
|
||
|
ae_int_t sixbits[12];
|
||
|
ae_int_t sixbitsread, i;
|
||
|
union _u
|
||
|
{
|
||
|
ae_int_t ival;
|
||
|
unsigned char bytes[9];
|
||
|
} u;
|
||
|
/*
|
||
|
* 1. skip leading spaces
|
||
|
* 2. read and decode six-bit digits
|
||
|
* 3. set trailing digits to zeros
|
||
|
* 4. convert to little endian 64-bit integer representation
|
||
|
* 5. convert to big endian representation, if needed
|
||
|
*/
|
||
|
while( *buf==' ' || *buf=='\t' || *buf=='\n' || *buf=='\r' )
|
||
|
buf++;
|
||
|
sixbitsread = 0;
|
||
|
while( *buf!=' ' && *buf!='\t' && *buf!='\n' && *buf!='\r' && *buf!=0 )
|
||
|
{
|
||
|
ae_int_t d;
|
||
|
d = ae_char2sixbits(*buf);
|
||
|
if( d<0 || sixbitsread>=AE_SER_ENTRY_LENGTH )
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, emsg);
|
||
|
sixbits[sixbitsread] = d;
|
||
|
sixbitsread++;
|
||
|
buf++;
|
||
|
}
|
||
|
*pasttheend = buf;
|
||
|
if( sixbitsread==0 )
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, emsg);
|
||
|
for(i=sixbitsread; i<12; i++)
|
||
|
sixbits[i] = 0;
|
||
|
ae_foursixbits2threebytes(sixbits+0, u.bytes+0);
|
||
|
ae_foursixbits2threebytes(sixbits+4, u.bytes+3);
|
||
|
ae_foursixbits2threebytes(sixbits+8, u.bytes+6);
|
||
|
if( state->endianness==AE_BIG_ENDIAN )
|
||
|
{
|
||
|
for(i=0; i<(ae_int_t)(sizeof(ae_int_t)/2); i++)
|
||
|
{
|
||
|
unsigned char tc;
|
||
|
tc = u.bytes[i];
|
||
|
u.bytes[i] = u.bytes[sizeof(ae_int_t)-1-i];
|
||
|
u.bytes[sizeof(ae_int_t)-1-i] = tc;
|
||
|
}
|
||
|
}
|
||
|
return u.ival;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function unserializes 64-bit integer value from string
|
||
|
|
||
|
buf buffer which contains value; leading spaces/tabs/newlines are
|
||
|
ignored, traling spaces/tabs/newlines are treated as end of
|
||
|
the boolean value.
|
||
|
state ALGLIB environment state
|
||
|
|
||
|
This function raises an error in case unexpected symbol is found
|
||
|
************************************************************************/
|
||
|
ae_int64_t ae_str2int64(const char *buf, ae_state *state, const char **pasttheend)
|
||
|
{
|
||
|
const char *emsg = "ALGLIB: unable to read integer value from stream";
|
||
|
ae_int_t sixbits[12];
|
||
|
ae_int_t sixbitsread, i;
|
||
|
unsigned char bytes[9];
|
||
|
ae_int64_t result;
|
||
|
|
||
|
/*
|
||
|
* 1. skip leading spaces
|
||
|
* 2. read and decode six-bit digits
|
||
|
* 3. set trailing digits to zeros
|
||
|
* 4. convert to little endian 64-bit integer representation
|
||
|
* 5. convert to big endian representation, if needed
|
||
|
*/
|
||
|
while( *buf==' ' || *buf=='\t' || *buf=='\n' || *buf=='\r' )
|
||
|
buf++;
|
||
|
sixbitsread = 0;
|
||
|
while( *buf!=' ' && *buf!='\t' && *buf!='\n' && *buf!='\r' && *buf!=0 )
|
||
|
{
|
||
|
ae_int_t d;
|
||
|
d = ae_char2sixbits(*buf);
|
||
|
if( d<0 || sixbitsread>=AE_SER_ENTRY_LENGTH )
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, emsg);
|
||
|
sixbits[sixbitsread] = d;
|
||
|
sixbitsread++;
|
||
|
buf++;
|
||
|
}
|
||
|
*pasttheend = buf;
|
||
|
if( sixbitsread==0 )
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, emsg);
|
||
|
for(i=sixbitsread; i<12; i++)
|
||
|
sixbits[i] = 0;
|
||
|
ae_foursixbits2threebytes(sixbits+0, bytes+0);
|
||
|
ae_foursixbits2threebytes(sixbits+4, bytes+3);
|
||
|
ae_foursixbits2threebytes(sixbits+8, bytes+6);
|
||
|
if( state->endianness==AE_BIG_ENDIAN )
|
||
|
{
|
||
|
for(i=0; i<(ae_int_t)(sizeof(ae_int_t)/2); i++)
|
||
|
{
|
||
|
unsigned char tc;
|
||
|
tc = bytes[i];
|
||
|
bytes[i] = bytes[sizeof(ae_int_t)-1-i];
|
||
|
bytes[sizeof(ae_int_t)-1-i] = tc;
|
||
|
}
|
||
|
}
|
||
|
memmove(&result, bytes, sizeof(result));
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function serializes double value into buffer
|
||
|
|
||
|
v double value to be serialized
|
||
|
buf buffer, at least 12 characters wide
|
||
|
(11 chars for value, one for trailing zero)
|
||
|
state ALGLIB environment state
|
||
|
************************************************************************/
|
||
|
void ae_double2str(double v, char *buf, ae_state *state)
|
||
|
{
|
||
|
union _u
|
||
|
{
|
||
|
double dval;
|
||
|
unsigned char bytes[9];
|
||
|
} u;
|
||
|
ae_int_t i;
|
||
|
ae_int_t sixbits[12];
|
||
|
|
||
|
/*
|
||
|
* handle special quantities
|
||
|
*/
|
||
|
if( ae_isnan(v, state) )
|
||
|
{
|
||
|
const char *s = ".nan_______";
|
||
|
memmove(buf, s, strlen(s)+1);
|
||
|
return;
|
||
|
}
|
||
|
if( ae_isposinf(v, state) )
|
||
|
{
|
||
|
const char *s = ".posinf____";
|
||
|
memmove(buf, s, strlen(s)+1);
|
||
|
return;
|
||
|
}
|
||
|
if( ae_isneginf(v, state) )
|
||
|
{
|
||
|
const char *s = ".neginf____";
|
||
|
memmove(buf, s, strlen(s)+1);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* process general case:
|
||
|
* 1. copy v to array of chars
|
||
|
* 2. set 9th byte of u.bytes to zero in order to
|
||
|
* simplify conversion to six-bit representation
|
||
|
* 3. convert to little endian (if needed)
|
||
|
* 4. convert to six-bit representation
|
||
|
* (last 12th element of sixbits is always zero, we do not output it)
|
||
|
*/
|
||
|
u.dval = v;
|
||
|
u.bytes[8] = 0;
|
||
|
if( state->endianness==AE_BIG_ENDIAN )
|
||
|
{
|
||
|
for(i=0; i<(ae_int_t)(sizeof(double)/2); i++)
|
||
|
{
|
||
|
unsigned char tc;
|
||
|
tc = u.bytes[i];
|
||
|
u.bytes[i] = u.bytes[sizeof(double)-1-i];
|
||
|
u.bytes[sizeof(double)-1-i] = tc;
|
||
|
}
|
||
|
}
|
||
|
ae_threebytes2foursixbits(u.bytes+0, sixbits+0);
|
||
|
ae_threebytes2foursixbits(u.bytes+3, sixbits+4);
|
||
|
ae_threebytes2foursixbits(u.bytes+6, sixbits+8);
|
||
|
for(i=0; i<AE_SER_ENTRY_LENGTH; i++)
|
||
|
buf[i] = ae_sixbits2char(sixbits[i]);
|
||
|
buf[AE_SER_ENTRY_LENGTH] = 0x00;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function unserializes double value from string
|
||
|
|
||
|
buf buffer which contains value; leading spaces/tabs/newlines are
|
||
|
ignored, traling spaces/tabs/newlines are treated as end of
|
||
|
the boolean value.
|
||
|
state ALGLIB environment state
|
||
|
|
||
|
This function raises an error in case unexpected symbol is found
|
||
|
************************************************************************/
|
||
|
double ae_str2double(const char *buf, ae_state *state, const char **pasttheend)
|
||
|
{
|
||
|
const char *emsg = "ALGLIB: unable to read double value from stream";
|
||
|
ae_int_t sixbits[12];
|
||
|
ae_int_t sixbitsread, i;
|
||
|
union _u
|
||
|
{
|
||
|
double dval;
|
||
|
unsigned char bytes[9];
|
||
|
} u;
|
||
|
|
||
|
|
||
|
/*
|
||
|
* skip leading spaces
|
||
|
*/
|
||
|
while( *buf==' ' || *buf=='\t' || *buf=='\n' || *buf=='\r' )
|
||
|
buf++;
|
||
|
|
||
|
/*
|
||
|
* Handle special cases
|
||
|
*/
|
||
|
if( *buf=='.' )
|
||
|
{
|
||
|
const char *s_nan = ".nan_______";
|
||
|
const char *s_posinf = ".posinf____";
|
||
|
const char *s_neginf = ".neginf____";
|
||
|
if( strncmp(buf, s_nan, strlen(s_nan))==0 )
|
||
|
{
|
||
|
*pasttheend = buf+strlen(s_nan);
|
||
|
return state->v_nan;
|
||
|
}
|
||
|
if( strncmp(buf, s_posinf, strlen(s_posinf))==0 )
|
||
|
{
|
||
|
*pasttheend = buf+strlen(s_posinf);
|
||
|
return state->v_posinf;
|
||
|
}
|
||
|
if( strncmp(buf, s_neginf, strlen(s_neginf))==0 )
|
||
|
{
|
||
|
*pasttheend = buf+strlen(s_neginf);
|
||
|
return state->v_neginf;
|
||
|
}
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, emsg);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* General case:
|
||
|
* 1. read and decode six-bit digits
|
||
|
* 2. check that all 11 digits were read
|
||
|
* 3. set last 12th digit to zero (needed for simplicity of conversion)
|
||
|
* 4. convert to 8 bytes
|
||
|
* 5. convert to big endian representation, if needed
|
||
|
*/
|
||
|
sixbitsread = 0;
|
||
|
while( *buf!=' ' && *buf!='\t' && *buf!='\n' && *buf!='\r' && *buf!=0 )
|
||
|
{
|
||
|
ae_int_t d;
|
||
|
d = ae_char2sixbits(*buf);
|
||
|
if( d<0 || sixbitsread>=AE_SER_ENTRY_LENGTH )
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, emsg);
|
||
|
sixbits[sixbitsread] = d;
|
||
|
sixbitsread++;
|
||
|
buf++;
|
||
|
}
|
||
|
*pasttheend = buf;
|
||
|
if( sixbitsread!=AE_SER_ENTRY_LENGTH )
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, emsg);
|
||
|
sixbits[AE_SER_ENTRY_LENGTH] = 0;
|
||
|
ae_foursixbits2threebytes(sixbits+0, u.bytes+0);
|
||
|
ae_foursixbits2threebytes(sixbits+4, u.bytes+3);
|
||
|
ae_foursixbits2threebytes(sixbits+8, u.bytes+6);
|
||
|
if( state->endianness==AE_BIG_ENDIAN )
|
||
|
{
|
||
|
for(i=0; i<(ae_int_t)(sizeof(double)/2); i++)
|
||
|
{
|
||
|
unsigned char tc;
|
||
|
tc = u.bytes[i];
|
||
|
u.bytes[i] = u.bytes[sizeof(double)-1-i];
|
||
|
u.bytes[sizeof(double)-1-i] = tc;
|
||
|
}
|
||
|
}
|
||
|
return u.dval;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function performs given number of spin-wait iterations
|
||
|
************************************************************************/
|
||
|
void ae_spin_wait(ae_int_t cnt)
|
||
|
{
|
||
|
/*
|
||
|
* these strange operations with ae_never_change_it are necessary to
|
||
|
* prevent compiler optimization of the loop.
|
||
|
*/
|
||
|
volatile ae_int_t i;
|
||
|
|
||
|
/* very unlikely because no one will wait for such amount of cycles */
|
||
|
if( cnt>0x12345678 )
|
||
|
ae_never_change_it = cnt%10;
|
||
|
|
||
|
/* spin wait, test condition which will never be true */
|
||
|
for(i=0; i<cnt; i++)
|
||
|
if( ae_never_change_it>0 )
|
||
|
ae_never_change_it--;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function causes the calling thread to relinquish the CPU. The thread
|
||
|
is moved to the end of the queue and some other thread gets to run.
|
||
|
|
||
|
NOTE: this function should NOT be called when AE_OS is AE_UNKNOWN - the
|
||
|
whole program will be abnormally terminated.
|
||
|
************************************************************************/
|
||
|
void ae_yield()
|
||
|
{
|
||
|
#if AE_OS==AE_WINDOWS
|
||
|
if( !SwitchToThread() )
|
||
|
Sleep(0);
|
||
|
#elif AE_OS==AE_POSIX
|
||
|
sched_yield();
|
||
|
#else
|
||
|
abort();
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function initializes _lock structure which is internally used by
|
||
|
ae_lock high-level structure.
|
||
|
|
||
|
_lock structure is statically allocated, no malloc() calls is performed
|
||
|
during its allocation. However, you have to call _ae_free_lock_raw() in
|
||
|
order to deallocate this lock properly.
|
||
|
************************************************************************/
|
||
|
void _ae_init_lock_raw(_lock *p)
|
||
|
{
|
||
|
#if AE_OS==AE_WINDOWS
|
||
|
p->p_lock = (ae_int_t*)ae_align((void*)(&p->buf),AE_LOCK_ALIGNMENT);
|
||
|
p->p_lock[0] = 0;
|
||
|
#elif AE_OS==AE_POSIX
|
||
|
pthread_mutex_init(&p->mutex, NULL);
|
||
|
#else
|
||
|
p->is_locked = ae_false;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function acquires _lock structure.
|
||
|
|
||
|
It is low-level workhorse utilized by ae_acquire_lock().
|
||
|
************************************************************************/
|
||
|
void _ae_acquire_lock_raw(_lock *p)
|
||
|
{
|
||
|
#if AE_OS==AE_WINDOWS
|
||
|
ae_int_t cnt = 0;
|
||
|
#ifdef AE_SMP_DEBUGCOUNTERS
|
||
|
InterlockedIncrement((LONG volatile *)&_ae_dbg_lock_acquisitions);
|
||
|
#endif
|
||
|
for(;;)
|
||
|
{
|
||
|
if( InterlockedCompareExchange((LONG volatile *)p->p_lock, 1, 0)==0 )
|
||
|
return;
|
||
|
ae_spin_wait(AE_LOCK_CYCLES);
|
||
|
#ifdef AE_SMP_DEBUGCOUNTERS
|
||
|
InterlockedIncrement((LONG volatile *)&_ae_dbg_lock_spinwaits);
|
||
|
#endif
|
||
|
cnt++;
|
||
|
if( cnt%AE_LOCK_TESTS_BEFORE_YIELD==0 )
|
||
|
{
|
||
|
#ifdef AE_SMP_DEBUGCOUNTERS
|
||
|
InterlockedIncrement((LONG volatile *)&_ae_dbg_lock_yields);
|
||
|
#endif
|
||
|
ae_yield();
|
||
|
}
|
||
|
}
|
||
|
#elif AE_OS==AE_POSIX
|
||
|
ae_int_t cnt = 0;
|
||
|
for(;;)
|
||
|
{
|
||
|
if( pthread_mutex_trylock(&p->mutex)==0 )
|
||
|
return;
|
||
|
ae_spin_wait(AE_LOCK_CYCLES);
|
||
|
cnt++;
|
||
|
if( cnt%AE_LOCK_TESTS_BEFORE_YIELD==0 )
|
||
|
ae_yield();
|
||
|
}
|
||
|
;
|
||
|
#else
|
||
|
AE_CRITICAL_ASSERT(!p->is_locked);
|
||
|
p->is_locked = ae_true;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function releases _lock structure.
|
||
|
|
||
|
It is low-level lock function which is used by ae_release_lock.
|
||
|
************************************************************************/
|
||
|
void _ae_release_lock_raw(_lock *p)
|
||
|
{
|
||
|
#if AE_OS==AE_WINDOWS
|
||
|
InterlockedExchange((LONG volatile *)p->p_lock, 0);
|
||
|
#elif AE_OS==AE_POSIX
|
||
|
pthread_mutex_unlock(&p->mutex);
|
||
|
#else
|
||
|
p->is_locked = ae_false;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function frees _lock structure.
|
||
|
************************************************************************/
|
||
|
void _ae_free_lock_raw(_lock *p)
|
||
|
{
|
||
|
#if AE_OS==AE_POSIX
|
||
|
pthread_mutex_destroy(&p->mutex);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function initializes ae_lock structure.
|
||
|
|
||
|
INPUT PARAMETERS:
|
||
|
lock - pointer to lock structure, must be zero-filled
|
||
|
state - pointer to state structure, used for exception
|
||
|
handling and management of automatic objects.
|
||
|
make_automatic - if true, lock object is added to automatic
|
||
|
memory management list.
|
||
|
|
||
|
NOTE: as a special exception, this function allows you to specify NULL
|
||
|
state pointer. In this case all exception arising during construction
|
||
|
are handled as critical failures, with abort() being called.
|
||
|
make_automatic must be false on such calls.
|
||
|
************************************************************************/
|
||
|
void ae_init_lock(ae_lock *lock, ae_state *state, ae_bool make_automatic)
|
||
|
{
|
||
|
_lock *p;
|
||
|
AE_CRITICAL_ASSERT(ae_check_zeros(lock,sizeof(*lock)));
|
||
|
if(state==NULL)
|
||
|
{
|
||
|
ae_state _tmp_state;
|
||
|
AE_CRITICAL_ASSERT(!make_automatic);
|
||
|
ae_state_init(&_tmp_state);
|
||
|
ae_init_lock(lock, &_tmp_state, ae_false);
|
||
|
ae_state_clear(&_tmp_state);
|
||
|
return;
|
||
|
}
|
||
|
lock->eternal = ae_false;
|
||
|
ae_db_init(&lock->db, sizeof(_lock), state, make_automatic);
|
||
|
lock->lock_ptr = lock->db.ptr;
|
||
|
p = (_lock*)lock->lock_ptr;
|
||
|
_ae_init_lock_raw(p);
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
This function initializes "eternal" ae_lock structure which is expected
|
||
|
to persist until the end of the execution of the program. Eternal locks
|
||
|
can not be deallocated (cleared) and do not increase debug allocation
|
||
|
counters. Errors during allocation of eternal locks are considered
|
||
|
critical exceptions and handled by calling abort().
|
||
|
|
||
|
INPUT PARAMETERS:
|
||
|
lock - pointer to lock structure, must be zero-filled
|
||
|
state - pointer to state structure, used for exception
|
||
|
handling and management of automatic objects;
|
||
|
non-NULL.
|
||
|
make_automatic - if true, lock object is added to automatic
|
||
|
memory management list.
|
||
|
************************************************************************/
|
||
|
void ae_init_lock_eternal(ae_lock *lock)
|
||
|
{
|
||
|
_lock *p;
|
||
|
AE_CRITICAL_ASSERT(ae_check_zeros(lock,sizeof(*lock)));
|
||
|
lock->eternal = ae_true;
|
||
|
lock->lock_ptr = eternal_malloc(sizeof(_lock));
|
||
|
p = (_lock*)lock->lock_ptr;
|
||
|
_ae_init_lock_raw(p);
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function acquires lock. In case lock is busy, we perform several
|
||
|
iterations inside tight loop before trying again.
|
||
|
************************************************************************/
|
||
|
void ae_acquire_lock(ae_lock *lock)
|
||
|
{
|
||
|
_lock *p;
|
||
|
p = (_lock*)lock->lock_ptr;
|
||
|
_ae_acquire_lock_raw(p);
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function releases lock.
|
||
|
************************************************************************/
|
||
|
void ae_release_lock(ae_lock *lock)
|
||
|
{
|
||
|
_lock *p;
|
||
|
p = (_lock*)lock->lock_ptr;
|
||
|
_ae_release_lock_raw(p);
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function frees ae_lock structure.
|
||
|
************************************************************************/
|
||
|
void ae_free_lock(ae_lock *lock)
|
||
|
{
|
||
|
_lock *p;
|
||
|
AE_CRITICAL_ASSERT(!lock->eternal);
|
||
|
p = (_lock*)lock->lock_ptr;
|
||
|
if( p!=NULL )
|
||
|
_ae_free_lock_raw(p);
|
||
|
ae_db_free(&lock->db);
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function creates ae_shared_pool structure.
|
||
|
|
||
|
dst destination shared pool, must be zero-filled
|
||
|
already allocated, but not initialized.
|
||
|
state pointer to current state structure. Can not be NULL.
|
||
|
used for exception handling (say, allocation error results
|
||
|
in longjmp call).
|
||
|
make_automatic if true, vector will be registered in the current frame
|
||
|
of the state structure;
|
||
|
|
||
|
Error handling:
|
||
|
* on failure calls ae_break() with NULL state pointer. Usually it results
|
||
|
in abort() call.
|
||
|
|
||
|
dst is assumed to be uninitialized, its fields are ignored.
|
||
|
************************************************************************/
|
||
|
void ae_shared_pool_init(void *_dst, ae_state *state, ae_bool make_automatic)
|
||
|
{
|
||
|
ae_shared_pool *dst;
|
||
|
|
||
|
AE_CRITICAL_ASSERT(state!=NULL);
|
||
|
dst = (ae_shared_pool*)_dst;
|
||
|
AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
|
||
|
|
||
|
/* init */
|
||
|
dst->seed_object = NULL;
|
||
|
dst->recycled_objects = NULL;
|
||
|
dst->recycled_entries = NULL;
|
||
|
dst->enumeration_counter = NULL;
|
||
|
dst->size_of_object = 0;
|
||
|
dst->init = NULL;
|
||
|
dst->init_copy = NULL;
|
||
|
dst->destroy = NULL;
|
||
|
dst->frame_entry.deallocator = ae_shared_pool_destroy;
|
||
|
dst->frame_entry.ptr = dst;
|
||
|
if( make_automatic )
|
||
|
ae_db_attach(&dst->frame_entry, state);
|
||
|
ae_init_lock(&dst->pool_lock, state, ae_false);
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function clears all dynamically allocated fields of the pool except
|
||
|
for the lock. It does NOT try to acquire pool_lock.
|
||
|
|
||
|
NOTE: this function is NOT thread-safe, it is not protected by lock.
|
||
|
************************************************************************/
|
||
|
static void ae_shared_pool_internalclear(ae_shared_pool *dst)
|
||
|
{
|
||
|
ae_shared_pool_entry *ptr, *tmp;
|
||
|
|
||
|
/* destroy seed */
|
||
|
if( dst->seed_object!=NULL )
|
||
|
{
|
||
|
dst->destroy((void*)dst->seed_object);
|
||
|
ae_free((void*)dst->seed_object);
|
||
|
dst->seed_object = NULL;
|
||
|
}
|
||
|
|
||
|
/* destroy recycled objects */
|
||
|
for(ptr=dst->recycled_objects; ptr!=NULL;)
|
||
|
{
|
||
|
tmp = (ae_shared_pool_entry*)ptr->next_entry;
|
||
|
dst->destroy(ptr->obj);
|
||
|
ae_free(ptr->obj);
|
||
|
ae_free(ptr);
|
||
|
ptr = tmp;
|
||
|
}
|
||
|
dst->recycled_objects = NULL;
|
||
|
|
||
|
/* destroy recycled entries */
|
||
|
for(ptr=dst->recycled_entries; ptr!=NULL;)
|
||
|
{
|
||
|
tmp = (ae_shared_pool_entry*)ptr->next_entry;
|
||
|
ae_free(ptr);
|
||
|
ptr = tmp;
|
||
|
}
|
||
|
dst->recycled_entries = NULL;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function creates copy of ae_shared_pool.
|
||
|
|
||
|
dst destination pool, must be zero-filled
|
||
|
src source pool
|
||
|
state pointer to current state structure. Can not be NULL.
|
||
|
used for exception handling (say, allocation error results
|
||
|
in longjmp call).
|
||
|
make_automatic if true, vector will be registered in the current frame
|
||
|
of the state structure;
|
||
|
|
||
|
dst is assumed to be uninitialized, its fields are ignored.
|
||
|
|
||
|
NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
|
||
|
you should NOT call it when lock can be used by another thread.
|
||
|
************************************************************************/
|
||
|
void ae_shared_pool_init_copy(void *_dst, void *_src, ae_state *state, ae_bool make_automatic)
|
||
|
{
|
||
|
ae_shared_pool *dst, *src;
|
||
|
ae_shared_pool_entry *ptr;
|
||
|
|
||
|
/* state!=NULL, allocation errors result in exception */
|
||
|
/* AE_CRITICAL_ASSERT(state!=NULL); */
|
||
|
|
||
|
dst = (ae_shared_pool*)_dst;
|
||
|
src = (ae_shared_pool*)_src;
|
||
|
ae_shared_pool_init(dst, state, make_automatic);
|
||
|
|
||
|
/* copy non-pointer fields */
|
||
|
dst->size_of_object = src->size_of_object;
|
||
|
dst->init = src->init;
|
||
|
dst->init_copy = src->init_copy;
|
||
|
dst->destroy = src->destroy;
|
||
|
|
||
|
/* copy seed object */
|
||
|
if( src->seed_object!=NULL )
|
||
|
{
|
||
|
dst->seed_object = ae_malloc(dst->size_of_object, state);
|
||
|
memset(dst->seed_object, 0, dst->size_of_object);
|
||
|
dst->init_copy(dst->seed_object, src->seed_object, state, ae_false);
|
||
|
}
|
||
|
|
||
|
/* copy recycled objects */
|
||
|
dst->recycled_objects = NULL;
|
||
|
for(ptr=src->recycled_objects; ptr!=NULL; ptr=(ae_shared_pool_entry*)ptr->next_entry)
|
||
|
{
|
||
|
ae_shared_pool_entry *tmp;
|
||
|
|
||
|
/* allocate entry, immediately add to the recycled list
|
||
|
(we do not want to lose it in case of future malloc failures) */
|
||
|
tmp = (ae_shared_pool_entry*)ae_malloc(sizeof(ae_shared_pool_entry), state);
|
||
|
memset(tmp, 0, sizeof(*tmp));
|
||
|
tmp->next_entry = dst->recycled_objects;
|
||
|
dst->recycled_objects = tmp;
|
||
|
|
||
|
/* prepare place for object, init_copy() it */
|
||
|
tmp->obj = ae_malloc(dst->size_of_object, state);
|
||
|
memset(tmp->obj, 0, dst->size_of_object);
|
||
|
dst->init_copy(tmp->obj, ptr->obj, state, ae_false);
|
||
|
}
|
||
|
|
||
|
/* recycled entries are not copied because they do not store any information */
|
||
|
dst->recycled_entries = NULL;
|
||
|
|
||
|
/* enumeration counter is reset on copying */
|
||
|
dst->enumeration_counter = NULL;
|
||
|
|
||
|
/* initialize frame record */
|
||
|
dst->frame_entry.deallocator = ae_shared_pool_destroy;
|
||
|
dst->frame_entry.ptr = dst;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function performs destruction of the pool object.
|
||
|
|
||
|
NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
|
||
|
you should NOT call it when pool can be used by another thread.
|
||
|
************************************************************************/
|
||
|
void ae_shared_pool_clear(void *_dst)
|
||
|
{
|
||
|
ae_shared_pool *dst = (ae_shared_pool*)_dst;
|
||
|
|
||
|
/* clear seed and lists */
|
||
|
ae_shared_pool_internalclear(dst);
|
||
|
|
||
|
/* clear fields */
|
||
|
dst->seed_object = NULL;
|
||
|
dst->recycled_objects = NULL;
|
||
|
dst->recycled_entries = NULL;
|
||
|
dst->enumeration_counter = NULL;
|
||
|
dst->size_of_object = 0;
|
||
|
dst->init = NULL;
|
||
|
dst->init_copy = NULL;
|
||
|
dst->destroy = NULL;
|
||
|
}
|
||
|
|
||
|
void ae_shared_pool_destroy(void *_dst)
|
||
|
{
|
||
|
ae_shared_pool *dst = (ae_shared_pool*)_dst;
|
||
|
ae_shared_pool_clear(_dst);
|
||
|
ae_free_lock(&dst->pool_lock);
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function returns True, if internal seed object was set. It returns
|
||
|
False for un-seeded pool.
|
||
|
|
||
|
dst destination pool (initialized by constructor function)
|
||
|
|
||
|
NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
|
||
|
you should NOT call it when lock can be used by another thread.
|
||
|
************************************************************************/
|
||
|
ae_bool ae_shared_pool_is_initialized(void *_dst)
|
||
|
{
|
||
|
ae_shared_pool *dst = (ae_shared_pool*)_dst;
|
||
|
return dst->seed_object!=NULL;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function sets internal seed object. All objects owned by the pool
|
||
|
(current seed object, recycled objects) are automatically freed.
|
||
|
|
||
|
dst destination pool (initialized by constructor function)
|
||
|
seed_object new seed object
|
||
|
size_of_object sizeof(), used to allocate memory
|
||
|
init constructor function
|
||
|
init_copy copy constructor
|
||
|
clear destructor function
|
||
|
state ALGLIB environment state
|
||
|
|
||
|
NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
|
||
|
you should NOT call it when lock can be used by another thread.
|
||
|
************************************************************************/
|
||
|
void ae_shared_pool_set_seed(
|
||
|
ae_shared_pool *dst,
|
||
|
void *seed_object,
|
||
|
ae_int_t size_of_object,
|
||
|
void (*init)(void* dst, ae_state* state, ae_bool make_automatic),
|
||
|
void (*init_copy)(void* dst, void* src, ae_state* state, ae_bool make_automatic),
|
||
|
void (*destroy)(void* ptr),
|
||
|
ae_state *state)
|
||
|
{
|
||
|
/* state!=NULL, allocation errors result in exception */
|
||
|
AE_CRITICAL_ASSERT(state!=NULL);
|
||
|
|
||
|
/* destroy internal objects */
|
||
|
ae_shared_pool_internalclear(dst);
|
||
|
|
||
|
/* set non-pointer fields */
|
||
|
dst->size_of_object = size_of_object;
|
||
|
dst->init = init;
|
||
|
dst->init_copy = init_copy;
|
||
|
dst->destroy = destroy;
|
||
|
|
||
|
/* set seed object */
|
||
|
dst->seed_object = ae_malloc(size_of_object, state);
|
||
|
memset(dst->seed_object, 0, size_of_object);
|
||
|
init_copy(dst->seed_object, seed_object, state, ae_false);
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function retrieves a copy of the seed object from the pool and
|
||
|
stores it to target smart pointer ptr.
|
||
|
|
||
|
In case target pointer owns non-NULL value, it is deallocated before
|
||
|
storing value retrieved from pool. Target pointer becomes owner of the
|
||
|
value which was retrieved from pool.
|
||
|
|
||
|
pool pool
|
||
|
pptr pointer to ae_smart_ptr structure
|
||
|
state ALGLIB environment state
|
||
|
|
||
|
NOTE: this function IS thread-safe. It acquires pool lock during its
|
||
|
operation and can be used simultaneously from several threads.
|
||
|
************************************************************************/
|
||
|
void ae_shared_pool_retrieve(
|
||
|
ae_shared_pool *pool,
|
||
|
ae_smart_ptr *pptr,
|
||
|
ae_state *state)
|
||
|
{
|
||
|
void *new_obj;
|
||
|
|
||
|
/* state!=NULL, allocation errors are handled by throwing exception from ae_malloc() */
|
||
|
AE_CRITICAL_ASSERT(state!=NULL);
|
||
|
|
||
|
/* assert that pool was seeded */
|
||
|
ae_assert(
|
||
|
pool->seed_object!=NULL,
|
||
|
"ALGLIB: shared pool is not seeded, PoolRetrieve() failed",
|
||
|
state);
|
||
|
|
||
|
/* acquire lock */
|
||
|
ae_acquire_lock(&pool->pool_lock);
|
||
|
|
||
|
/* try to reuse recycled objects */
|
||
|
if( pool->recycled_objects!=NULL )
|
||
|
{
|
||
|
ae_shared_pool_entry *result;
|
||
|
|
||
|
/* retrieve entry/object from list of recycled objects */
|
||
|
result = pool->recycled_objects;
|
||
|
pool->recycled_objects = (ae_shared_pool_entry*)pool->recycled_objects->next_entry;
|
||
|
new_obj = result->obj;
|
||
|
result->obj = NULL;
|
||
|
|
||
|
/* move entry to list of recycled entries */
|
||
|
result->next_entry = pool->recycled_entries;
|
||
|
pool->recycled_entries = result;
|
||
|
|
||
|
/* release lock */
|
||
|
ae_release_lock(&pool->pool_lock);
|
||
|
|
||
|
/* assign object to smart pointer */
|
||
|
ae_smart_ptr_assign(pptr, new_obj, ae_true, ae_true, pool->destroy);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/* release lock; we do not need it anymore because copy constructor does not modify source variable */
|
||
|
ae_release_lock(&pool->pool_lock);
|
||
|
|
||
|
/* create new object from seed, immediately assign object to smart pointer
|
||
|
(do not want to lose it in case of future failures) */
|
||
|
new_obj = ae_malloc(pool->size_of_object, state);
|
||
|
memset(new_obj, 0, pool->size_of_object);
|
||
|
ae_smart_ptr_assign(pptr, new_obj, ae_true, ae_true, pool->destroy);
|
||
|
|
||
|
/* perform actual copying; before this line smartptr points to zero-filled instance */
|
||
|
pool->init_copy(new_obj, pool->seed_object, state, ae_false);
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function recycles object owned by smart pointer by moving it to
|
||
|
internal storage of the shared pool.
|
||
|
|
||
|
Source pointer must own the object. After function is over, it owns NULL
|
||
|
pointer.
|
||
|
|
||
|
pool pool
|
||
|
pptr pointer to ae_smart_ptr structure
|
||
|
state ALGLIB environment state
|
||
|
|
||
|
NOTE: this function IS thread-safe. It acquires pool lock during its
|
||
|
operation and can be used simultaneously from several threads.
|
||
|
************************************************************************/
|
||
|
void ae_shared_pool_recycle(
|
||
|
ae_shared_pool *pool,
|
||
|
ae_smart_ptr *pptr,
|
||
|
ae_state *state)
|
||
|
{
|
||
|
ae_shared_pool_entry *new_entry;
|
||
|
|
||
|
/* state!=NULL, allocation errors are handled by throwing exception from ae_malloc() */
|
||
|
AE_CRITICAL_ASSERT(state!=NULL);
|
||
|
|
||
|
/* assert that pool was seeded */
|
||
|
ae_assert(
|
||
|
pool->seed_object!=NULL,
|
||
|
"ALGLIB: shared pool is not seeded, PoolRecycle() failed",
|
||
|
state);
|
||
|
|
||
|
/* assert that pointer non-null and owns the object */
|
||
|
ae_assert(pptr->is_owner, "ALGLIB: pptr in ae_shared_pool_recycle() does not own its pointer", state);
|
||
|
ae_assert(pptr->ptr!=NULL, "ALGLIB: pptr in ae_shared_pool_recycle() is NULL", state);
|
||
|
|
||
|
/* acquire lock */
|
||
|
ae_acquire_lock(&pool->pool_lock);
|
||
|
|
||
|
/* acquire shared pool entry (reuse one from recycled_entries or allocate new one) */
|
||
|
if( pool->recycled_entries!=NULL )
|
||
|
{
|
||
|
/* reuse previously allocated entry */
|
||
|
new_entry = pool->recycled_entries;
|
||
|
pool->recycled_entries = (ae_shared_pool_entry*)new_entry->next_entry;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* Allocate memory for new entry.
|
||
|
*
|
||
|
* NOTE: we release pool lock during allocation because ae_malloc() may raise
|
||
|
* exception and we do not want our pool to be left in the locked state.
|
||
|
*/
|
||
|
ae_release_lock(&pool->pool_lock);
|
||
|
new_entry = (ae_shared_pool_entry*)ae_malloc(sizeof(ae_shared_pool_entry), state);
|
||
|
ae_acquire_lock(&pool->pool_lock);
|
||
|
}
|
||
|
|
||
|
/* add object to the list of recycled objects */
|
||
|
new_entry->obj = pptr->ptr;
|
||
|
new_entry->next_entry = pool->recycled_objects;
|
||
|
pool->recycled_objects = new_entry;
|
||
|
|
||
|
/* release lock object */
|
||
|
ae_release_lock(&pool->pool_lock);
|
||
|
|
||
|
/* release source pointer */
|
||
|
ae_smart_ptr_release(pptr);
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function clears internal list of recycled objects, but does not
|
||
|
change seed object managed by the pool.
|
||
|
|
||
|
pool pool
|
||
|
state ALGLIB environment state
|
||
|
|
||
|
NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
|
||
|
you should NOT call it when lock can be used by another thread.
|
||
|
************************************************************************/
|
||
|
void ae_shared_pool_clear_recycled(
|
||
|
ae_shared_pool *pool,
|
||
|
ae_state *state)
|
||
|
{
|
||
|
ae_shared_pool_entry *ptr, *tmp;
|
||
|
|
||
|
/* clear recycled objects */
|
||
|
for(ptr=pool->recycled_objects; ptr!=NULL;)
|
||
|
{
|
||
|
tmp = (ae_shared_pool_entry*)ptr->next_entry;
|
||
|
pool->destroy(ptr->obj);
|
||
|
ae_free(ptr->obj);
|
||
|
ae_free(ptr);
|
||
|
ptr = tmp;
|
||
|
}
|
||
|
pool->recycled_objects = NULL;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function allows to enumerate recycled elements of the shared pool.
|
||
|
It stores pointer to the first recycled object in the smart pointer.
|
||
|
|
||
|
IMPORTANT:
|
||
|
* in case target pointer owns non-NULL value, it is deallocated before
|
||
|
storing value retrieved from pool.
|
||
|
* recycled object IS NOT removed from pool
|
||
|
* target pointer DOES NOT become owner of the new value
|
||
|
* this function IS NOT thread-safe
|
||
|
* you SHOULD NOT modify shared pool during enumeration (although you can
|
||
|
modify state of the objects retrieved from pool)
|
||
|
* in case there is no recycled objects in the pool, NULL is stored to pptr
|
||
|
* in case pool is not seeded, NULL is stored to pptr
|
||
|
|
||
|
pool pool
|
||
|
pptr pointer to ae_smart_ptr structure
|
||
|
state ALGLIB environment state
|
||
|
************************************************************************/
|
||
|
void ae_shared_pool_first_recycled(
|
||
|
ae_shared_pool *pool,
|
||
|
ae_smart_ptr *pptr,
|
||
|
ae_state *state)
|
||
|
{
|
||
|
/* modify internal enumeration counter */
|
||
|
pool->enumeration_counter = pool->recycled_objects;
|
||
|
|
||
|
/* exit on empty list */
|
||
|
if( pool->enumeration_counter==NULL )
|
||
|
{
|
||
|
ae_smart_ptr_assign(pptr, NULL, ae_false, ae_false, NULL);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/* assign object to smart pointer */
|
||
|
ae_smart_ptr_assign(pptr, pool->enumeration_counter->obj, ae_false, ae_false, pool->destroy);
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function allows to enumerate recycled elements of the shared pool.
|
||
|
It stores pointer to the next recycled object in the smart pointer.
|
||
|
|
||
|
IMPORTANT:
|
||
|
* in case target pointer owns non-NULL value, it is deallocated before
|
||
|
storing value retrieved from pool.
|
||
|
* recycled object IS NOT removed from pool
|
||
|
* target pointer DOES NOT become owner of the new value
|
||
|
* this function IS NOT thread-safe
|
||
|
* you SHOULD NOT modify shared pool during enumeration (although you can
|
||
|
modify state of the objects retrieved from pool)
|
||
|
* in case there is no recycled objects left in the pool, NULL is stored.
|
||
|
* in case pool is not seeded, NULL is stored.
|
||
|
|
||
|
pool pool
|
||
|
pptr pointer to ae_smart_ptr structure
|
||
|
state ALGLIB environment state
|
||
|
************************************************************************/
|
||
|
void ae_shared_pool_next_recycled(
|
||
|
ae_shared_pool *pool,
|
||
|
ae_smart_ptr *pptr,
|
||
|
ae_state *state)
|
||
|
{
|
||
|
/* exit on end of list */
|
||
|
if( pool->enumeration_counter==NULL )
|
||
|
{
|
||
|
ae_smart_ptr_assign(pptr, NULL, ae_false, ae_false, NULL);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/* modify internal enumeration counter */
|
||
|
pool->enumeration_counter = (ae_shared_pool_entry*)pool->enumeration_counter->next_entry;
|
||
|
|
||
|
/* exit on empty list */
|
||
|
if( pool->enumeration_counter==NULL )
|
||
|
{
|
||
|
ae_smart_ptr_assign(pptr, NULL, ae_false, ae_false, NULL);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/* assign object to smart pointer */
|
||
|
ae_smart_ptr_assign(pptr, pool->enumeration_counter->obj, ae_false, ae_false, pool->destroy);
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function clears internal list of recycled objects and seed object.
|
||
|
However, pool still can be used (after initialization with another seed).
|
||
|
|
||
|
pool pool
|
||
|
state ALGLIB environment state
|
||
|
|
||
|
NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
|
||
|
you should NOT call it when lock can be used by another thread.
|
||
|
************************************************************************/
|
||
|
void ae_shared_pool_reset(
|
||
|
ae_shared_pool *pool,
|
||
|
ae_state *state)
|
||
|
{
|
||
|
/* clear seed and lists */
|
||
|
ae_shared_pool_internalclear(pool);
|
||
|
|
||
|
/* clear fields */
|
||
|
pool->seed_object = NULL;
|
||
|
pool->recycled_objects = NULL;
|
||
|
pool->recycled_entries = NULL;
|
||
|
pool->enumeration_counter = NULL;
|
||
|
pool->size_of_object = 0;
|
||
|
pool->init = NULL;
|
||
|
pool->init_copy = NULL;
|
||
|
pool->destroy = NULL;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
This function initializes serializer
|
||
|
************************************************************************/
|
||
|
void ae_serializer_init(ae_serializer *serializer)
|
||
|
{
|
||
|
serializer->mode = AE_SM_DEFAULT;
|
||
|
serializer->entries_needed = 0;
|
||
|
serializer->bytes_asked = 0;
|
||
|
}
|
||
|
|
||
|
void ae_serializer_clear(ae_serializer *serializer)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
void ae_serializer_alloc_start(ae_serializer *serializer)
|
||
|
{
|
||
|
serializer->entries_needed = 0;
|
||
|
serializer->bytes_asked = 0;
|
||
|
serializer->mode = AE_SM_ALLOC;
|
||
|
}
|
||
|
|
||
|
void ae_serializer_alloc_entry(ae_serializer *serializer)
|
||
|
{
|
||
|
serializer->entries_needed++;
|
||
|
}
|
||
|
|
||
|
void ae_serializer_alloc_byte_array(ae_serializer *serializer, ae_vector *bytes)
|
||
|
{
|
||
|
ae_int_t n;
|
||
|
n = bytes->cnt;
|
||
|
n = n/8 + (n%8>0 ? 1 : 0);
|
||
|
serializer->entries_needed += 1+n;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
After allocation phase is done, this function returns required size of
|
||
|
the output string buffer (including trailing zero symbol). Actual size of
|
||
|
the data being stored can be a few characters smaller than requested.
|
||
|
************************************************************************/
|
||
|
ae_int_t ae_serializer_get_alloc_size(ae_serializer *serializer)
|
||
|
{
|
||
|
ae_int_t rows, lastrowsize, result;
|
||
|
|
||
|
serializer->mode = AE_SM_READY2S;
|
||
|
|
||
|
/* if no entries needes (degenerate case) */
|
||
|
if( serializer->entries_needed==0 )
|
||
|
{
|
||
|
serializer->bytes_asked = 4; /* a pair of chars for \r\n, one for dot, one for trailing zero */
|
||
|
return serializer->bytes_asked;
|
||
|
}
|
||
|
|
||
|
/* non-degenerate case */
|
||
|
rows = serializer->entries_needed/AE_SER_ENTRIES_PER_ROW;
|
||
|
lastrowsize = AE_SER_ENTRIES_PER_ROW;
|
||
|
if( serializer->entries_needed%AE_SER_ENTRIES_PER_ROW )
|
||
|
{
|
||
|
lastrowsize = serializer->entries_needed%AE_SER_ENTRIES_PER_ROW;
|
||
|
rows++;
|
||
|
}
|
||
|
|
||
|
/* calculate result size */
|
||
|
result = ((rows-1)*AE_SER_ENTRIES_PER_ROW+lastrowsize)*AE_SER_ENTRY_LENGTH; /* data size */
|
||
|
result += (rows-1)*(AE_SER_ENTRIES_PER_ROW-1)+(lastrowsize-1); /* space symbols */
|
||
|
result += rows*2; /* newline symbols */
|
||
|
result += 1; /* trailing dot */
|
||
|
result += 1; /* trailing zero */
|
||
|
serializer->bytes_asked = result;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
#ifdef AE_USE_CPP_SERIALIZATION
|
||
|
void ae_serializer_sstart_str(ae_serializer *serializer, std::string *buf)
|
||
|
{
|
||
|
serializer->mode = AE_SM_TO_CPPSTRING;
|
||
|
serializer->out_cppstr = buf;
|
||
|
serializer->entries_saved = 0;
|
||
|
serializer->bytes_written = 0;
|
||
|
}
|
||
|
|
||
|
void ae_serializer_ustart_str(ae_serializer *serializer, const std::string *buf)
|
||
|
{
|
||
|
serializer->mode = AE_SM_FROM_STRING;
|
||
|
serializer->in_str = buf->c_str();
|
||
|
}
|
||
|
|
||
|
static char cpp_writer(const char *p_string, ae_int_t aux)
|
||
|
{
|
||
|
std::ostream *stream = reinterpret_cast<std::ostream*>(aux);
|
||
|
stream->write(p_string, strlen(p_string));
|
||
|
return stream->bad() ? 1 : 0;
|
||
|
}
|
||
|
|
||
|
static char cpp_reader(ae_int_t aux, ae_int_t cnt, char *p_buf)
|
||
|
{
|
||
|
std::istream *stream = reinterpret_cast<std::istream*>(aux);
|
||
|
int c;
|
||
|
if( cnt<=0 )
|
||
|
return 1; /* unexpected cnt */
|
||
|
for(;;)
|
||
|
{
|
||
|
c = stream->get();
|
||
|
if( c<0 || c>255 )
|
||
|
return 1; /* failure! */
|
||
|
if( c!=' ' && c!='\t' && c!='\n' && c!='\r' )
|
||
|
break;
|
||
|
}
|
||
|
p_buf[0] = (char)c;
|
||
|
for(int k=1; k<cnt; k++)
|
||
|
{
|
||
|
c = stream->get();
|
||
|
if( c<0 || c>255 || c==' ' || c=='\t' || c=='\n' || c=='\r' )
|
||
|
return 1; /* failure! */
|
||
|
p_buf[k] = (char)c;
|
||
|
}
|
||
|
p_buf[cnt] = 0;
|
||
|
return 0; /* success */
|
||
|
}
|
||
|
|
||
|
void ae_serializer_sstart_stream(ae_serializer *serializer, std::ostream *stream)
|
||
|
{
|
||
|
serializer->mode = AE_SM_TO_STREAM;
|
||
|
serializer->stream_writer = cpp_writer;
|
||
|
serializer->stream_aux = reinterpret_cast<ae_int_t>(stream);
|
||
|
serializer->entries_saved = 0;
|
||
|
serializer->bytes_written = 0;
|
||
|
}
|
||
|
|
||
|
void ae_serializer_ustart_stream(ae_serializer *serializer, const std::istream *stream)
|
||
|
{
|
||
|
serializer->mode = AE_SM_FROM_STREAM;
|
||
|
serializer->stream_reader = cpp_reader;
|
||
|
serializer->stream_aux = reinterpret_cast<ae_int_t>(stream);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
void ae_serializer_sstart_str(ae_serializer *serializer, char *buf)
|
||
|
{
|
||
|
serializer->mode = AE_SM_TO_STRING;
|
||
|
serializer->out_str = buf;
|
||
|
serializer->out_str[0] = 0;
|
||
|
serializer->entries_saved = 0;
|
||
|
serializer->bytes_written = 0;
|
||
|
}
|
||
|
|
||
|
void ae_serializer_ustart_str(ae_serializer *serializer, const char *buf)
|
||
|
{
|
||
|
serializer->mode = AE_SM_FROM_STRING;
|
||
|
serializer->in_str = buf;
|
||
|
}
|
||
|
|
||
|
void ae_serializer_sstart_stream(ae_serializer *serializer, ae_stream_writer writer, ae_int_t aux)
|
||
|
{
|
||
|
serializer->mode = AE_SM_TO_STREAM;
|
||
|
serializer->stream_writer = writer;
|
||
|
serializer->stream_aux = aux;
|
||
|
serializer->entries_saved = 0;
|
||
|
serializer->bytes_written = 0;
|
||
|
}
|
||
|
|
||
|
void ae_serializer_ustart_stream(ae_serializer *serializer, ae_stream_reader reader, ae_int_t aux)
|
||
|
{
|
||
|
serializer->mode = AE_SM_FROM_STREAM;
|
||
|
serializer->stream_reader = reader;
|
||
|
serializer->stream_aux = aux;
|
||
|
}
|
||
|
|
||
|
void ae_serializer_serialize_bool(ae_serializer *serializer, ae_bool v, ae_state *state)
|
||
|
{
|
||
|
char buf[AE_SER_ENTRY_LENGTH+2+1];
|
||
|
const char *emsg = "ALGLIB: serialization integrity error";
|
||
|
ae_int_t bytes_appended;
|
||
|
|
||
|
/* prepare serialization, check consistency */
|
||
|
ae_bool2str(v, buf, state);
|
||
|
serializer->entries_saved++;
|
||
|
if( serializer->entries_saved%AE_SER_ENTRIES_PER_ROW )
|
||
|
strcat(buf, " ");
|
||
|
else
|
||
|
strcat(buf, "\r\n");
|
||
|
bytes_appended = (ae_int_t)strlen(buf);
|
||
|
ae_assert(serializer->bytes_written+bytes_appended<serializer->bytes_asked, emsg, state); /* strict "less" because we need space for trailing zero */
|
||
|
serializer->bytes_written += bytes_appended;
|
||
|
|
||
|
/* append to buffer */
|
||
|
#ifdef AE_USE_CPP_SERIALIZATION
|
||
|
if( serializer->mode==AE_SM_TO_CPPSTRING )
|
||
|
{
|
||
|
*(serializer->out_cppstr) += buf;
|
||
|
return;
|
||
|
}
|
||
|
#endif
|
||
|
if( serializer->mode==AE_SM_TO_STRING )
|
||
|
{
|
||
|
strcat(serializer->out_str, buf);
|
||
|
serializer->out_str += bytes_appended;
|
||
|
return;
|
||
|
}
|
||
|
if( serializer->mode==AE_SM_TO_STREAM )
|
||
|
{
|
||
|
ae_assert(serializer->stream_writer(buf, serializer->stream_aux)==0, "serializer: error writing to stream", state);
|
||
|
return;
|
||
|
}
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, emsg);
|
||
|
}
|
||
|
|
||
|
void ae_serializer_serialize_int(ae_serializer *serializer, ae_int_t v, ae_state *state)
|
||
|
{
|
||
|
char buf[AE_SER_ENTRY_LENGTH+2+1];
|
||
|
const char *emsg = "ALGLIB: serialization integrity error";
|
||
|
ae_int_t bytes_appended;
|
||
|
|
||
|
/* prepare serialization, check consistency */
|
||
|
ae_int2str(v, buf, state);
|
||
|
serializer->entries_saved++;
|
||
|
if( serializer->entries_saved%AE_SER_ENTRIES_PER_ROW )
|
||
|
strcat(buf, " ");
|
||
|
else
|
||
|
strcat(buf, "\r\n");
|
||
|
bytes_appended = (ae_int_t)strlen(buf);
|
||
|
ae_assert(serializer->bytes_written+bytes_appended<serializer->bytes_asked, emsg, state); /* strict "less" because we need space for trailing zero */
|
||
|
serializer->bytes_written += bytes_appended;
|
||
|
|
||
|
/* append to buffer */
|
||
|
#ifdef AE_USE_CPP_SERIALIZATION
|
||
|
if( serializer->mode==AE_SM_TO_CPPSTRING )
|
||
|
{
|
||
|
*(serializer->out_cppstr) += buf;
|
||
|
return;
|
||
|
}
|
||
|
#endif
|
||
|
if( serializer->mode==AE_SM_TO_STRING )
|
||
|
{
|
||
|
strcat(serializer->out_str, buf);
|
||
|
serializer->out_str += bytes_appended;
|
||
|
return;
|
||
|
}
|
||
|
if( serializer->mode==AE_SM_TO_STREAM )
|
||
|
{
|
||
|
ae_assert(serializer->stream_writer(buf, serializer->stream_aux)==0, "serializer: error writing to stream", state);
|
||
|
return;
|
||
|
}
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, emsg);
|
||
|
}
|
||
|
|
||
|
void ae_serializer_serialize_int64(ae_serializer *serializer, ae_int64_t v, ae_state *state)
|
||
|
{
|
||
|
char buf[AE_SER_ENTRY_LENGTH+2+1];
|
||
|
const char *emsg = "ALGLIB: serialization integrity error";
|
||
|
ae_int_t bytes_appended;
|
||
|
|
||
|
/* prepare serialization, check consistency */
|
||
|
ae_int642str(v, buf, state);
|
||
|
serializer->entries_saved++;
|
||
|
if( serializer->entries_saved%AE_SER_ENTRIES_PER_ROW )
|
||
|
strcat(buf, " ");
|
||
|
else
|
||
|
strcat(buf, "\r\n");
|
||
|
bytes_appended = (ae_int_t)strlen(buf);
|
||
|
ae_assert(serializer->bytes_written+bytes_appended<serializer->bytes_asked, emsg, state); /* strict "less" because we need space for trailing zero */
|
||
|
serializer->bytes_written += bytes_appended;
|
||
|
|
||
|
/* append to buffer */
|
||
|
#ifdef AE_USE_CPP_SERIALIZATION
|
||
|
if( serializer->mode==AE_SM_TO_CPPSTRING )
|
||
|
{
|
||
|
*(serializer->out_cppstr) += buf;
|
||
|
return;
|
||
|
}
|
||
|
#endif
|
||
|
if( serializer->mode==AE_SM_TO_STRING )
|
||
|
{
|
||
|
strcat(serializer->out_str, buf);
|
||
|
serializer->out_str += bytes_appended;
|
||
|
return;
|
||
|
}
|
||
|
if( serializer->mode==AE_SM_TO_STREAM )
|
||
|
{
|
||
|
ae_assert(serializer->stream_writer(buf, serializer->stream_aux)==0, "serializer: error writing to stream", state);
|
||
|
return;
|
||
|
}
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, emsg);
|
||
|
}
|
||
|
|
||
|
void ae_serializer_serialize_double(ae_serializer *serializer, double v, ae_state *state)
|
||
|
{
|
||
|
char buf[AE_SER_ENTRY_LENGTH+2+1];
|
||
|
const char *emsg = "ALGLIB: serialization integrity error";
|
||
|
ae_int_t bytes_appended;
|
||
|
|
||
|
/* prepare serialization, check consistency */
|
||
|
ae_double2str(v, buf, state);
|
||
|
serializer->entries_saved++;
|
||
|
if( serializer->entries_saved%AE_SER_ENTRIES_PER_ROW )
|
||
|
strcat(buf, " ");
|
||
|
else
|
||
|
strcat(buf, "\r\n");
|
||
|
bytes_appended = (ae_int_t)strlen(buf);
|
||
|
ae_assert(serializer->bytes_written+bytes_appended<serializer->bytes_asked, emsg, state); /* strict "less" because we need space for trailing zero */
|
||
|
serializer->bytes_written += bytes_appended;
|
||
|
|
||
|
/* append to buffer */
|
||
|
#ifdef AE_USE_CPP_SERIALIZATION
|
||
|
if( serializer->mode==AE_SM_TO_CPPSTRING )
|
||
|
{
|
||
|
*(serializer->out_cppstr) += buf;
|
||
|
return;
|
||
|
}
|
||
|
#endif
|
||
|
if( serializer->mode==AE_SM_TO_STRING )
|
||
|
{
|
||
|
strcat(serializer->out_str, buf);
|
||
|
serializer->out_str += bytes_appended;
|
||
|
return;
|
||
|
}
|
||
|
if( serializer->mode==AE_SM_TO_STREAM )
|
||
|
{
|
||
|
ae_assert(serializer->stream_writer(buf, serializer->stream_aux)==0, "serializer: error writing to stream", state);
|
||
|
return;
|
||
|
}
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, emsg);
|
||
|
}
|
||
|
|
||
|
void ae_serializer_serialize_byte_array(ae_serializer *serializer, ae_vector *bytes, ae_state *state)
|
||
|
{
|
||
|
ae_int_t chunk_size, entries_count;
|
||
|
|
||
|
chunk_size = 8;
|
||
|
|
||
|
/* save array length */
|
||
|
ae_serializer_serialize_int(serializer, bytes->cnt, state);
|
||
|
|
||
|
/* determine entries count */
|
||
|
entries_count = bytes->cnt/chunk_size + (bytes->cnt%chunk_size>0 ? 1 : 0);
|
||
|
for(ae_int_t eidx=0; eidx<entries_count; eidx++)
|
||
|
{
|
||
|
ae_int64_t tmpi;
|
||
|
ae_int_t elen;
|
||
|
elen = bytes->cnt - eidx*chunk_size;
|
||
|
elen = elen>chunk_size ? chunk_size : elen;
|
||
|
memset(&tmpi, 0, sizeof(tmpi));
|
||
|
memmove(&tmpi, bytes->ptr.p_ubyte + eidx*chunk_size, elen);
|
||
|
ae_serializer_serialize_int64(serializer, tmpi, state);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_serializer_unserialize_bool(ae_serializer *serializer, ae_bool *v, ae_state *state)
|
||
|
{
|
||
|
if( serializer->mode==AE_SM_FROM_STRING )
|
||
|
{
|
||
|
*v = ae_str2bool(serializer->in_str, state, &serializer->in_str);
|
||
|
return;
|
||
|
}
|
||
|
if( serializer->mode==AE_SM_FROM_STREAM )
|
||
|
{
|
||
|
char buf[AE_SER_ENTRY_LENGTH+2+1];
|
||
|
const char *p = buf;
|
||
|
ae_assert(serializer->stream_reader(serializer->stream_aux, AE_SER_ENTRY_LENGTH, buf)==0, "serializer: error reading from stream", state);
|
||
|
*v = ae_str2bool(buf, state, &p);
|
||
|
return;
|
||
|
}
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
|
||
|
}
|
||
|
|
||
|
void ae_serializer_unserialize_int(ae_serializer *serializer, ae_int_t *v, ae_state *state)
|
||
|
{
|
||
|
if( serializer->mode==AE_SM_FROM_STRING )
|
||
|
{
|
||
|
*v = ae_str2int(serializer->in_str, state, &serializer->in_str);
|
||
|
return;
|
||
|
}
|
||
|
if( serializer->mode==AE_SM_FROM_STREAM )
|
||
|
{
|
||
|
char buf[AE_SER_ENTRY_LENGTH+2+1];
|
||
|
const char *p = buf;
|
||
|
ae_assert(serializer->stream_reader(serializer->stream_aux, AE_SER_ENTRY_LENGTH, buf)==0, "serializer: error reading from stream", state);
|
||
|
*v = ae_str2int(buf, state, &p);
|
||
|
return;
|
||
|
}
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
|
||
|
}
|
||
|
|
||
|
void ae_serializer_unserialize_int64(ae_serializer *serializer, ae_int64_t *v, ae_state *state)
|
||
|
{
|
||
|
if( serializer->mode==AE_SM_FROM_STRING )
|
||
|
{
|
||
|
*v = ae_str2int64(serializer->in_str, state, &serializer->in_str);
|
||
|
return;
|
||
|
}
|
||
|
if( serializer->mode==AE_SM_FROM_STREAM )
|
||
|
{
|
||
|
char buf[AE_SER_ENTRY_LENGTH+2+1];
|
||
|
const char *p = buf;
|
||
|
ae_assert(serializer->stream_reader(serializer->stream_aux, AE_SER_ENTRY_LENGTH, buf)==0, "serializer: error reading from stream", state);
|
||
|
*v = ae_str2int64(buf, state, &p);
|
||
|
return;
|
||
|
}
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
|
||
|
}
|
||
|
|
||
|
void ae_serializer_unserialize_double(ae_serializer *serializer, double *v, ae_state *state)
|
||
|
{
|
||
|
if( serializer->mode==AE_SM_FROM_STRING )
|
||
|
{
|
||
|
*v = ae_str2double(serializer->in_str, state, &serializer->in_str);
|
||
|
return;
|
||
|
}
|
||
|
if( serializer->mode==AE_SM_FROM_STREAM )
|
||
|
{
|
||
|
char buf[AE_SER_ENTRY_LENGTH+2+1];
|
||
|
const char *p = buf;
|
||
|
ae_assert(serializer->stream_reader(serializer->stream_aux, AE_SER_ENTRY_LENGTH, buf)==0, "serializer: error reading from stream", state);
|
||
|
*v = ae_str2double(buf, state, &p);
|
||
|
return;
|
||
|
}
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
|
||
|
}
|
||
|
|
||
|
void ae_serializer_unserialize_byte_array(ae_serializer *serializer, ae_vector *bytes, ae_state *state)
|
||
|
{
|
||
|
ae_int_t chunk_size, n, entries_count;
|
||
|
|
||
|
chunk_size = 8;
|
||
|
|
||
|
/* read array length, allocate output */
|
||
|
ae_serializer_unserialize_int(serializer, &n, state);
|
||
|
ae_vector_set_length(bytes, n, state);
|
||
|
|
||
|
/* determine entries count, read entries */
|
||
|
entries_count = n/chunk_size + (n%chunk_size>0 ? 1 : 0);
|
||
|
for(ae_int_t eidx=0; eidx<entries_count; eidx++)
|
||
|
{
|
||
|
ae_int_t elen;
|
||
|
ae_int64_t tmp64;
|
||
|
|
||
|
elen = n-eidx*chunk_size;
|
||
|
elen = elen>chunk_size ? chunk_size : elen;
|
||
|
ae_serializer_unserialize_int64(serializer, &tmp64, state);
|
||
|
memmove(bytes->ptr.p_ubyte+eidx*chunk_size, &tmp64, elen);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_serializer_stop(ae_serializer *serializer, ae_state *state)
|
||
|
{
|
||
|
#ifdef AE_USE_CPP_SERIALIZATION
|
||
|
if( serializer->mode==AE_SM_TO_CPPSTRING )
|
||
|
{
|
||
|
ae_assert(serializer->bytes_written+1<serializer->bytes_asked, "ae_serializer: integrity check failed", state);/* strict "less" because we need space for trailing zero */
|
||
|
serializer->bytes_written++;
|
||
|
*(serializer->out_cppstr) += ".";
|
||
|
return;
|
||
|
}
|
||
|
#endif
|
||
|
if( serializer->mode==AE_SM_TO_STRING )
|
||
|
{
|
||
|
ae_assert(serializer->bytes_written+1<serializer->bytes_asked, "ae_serializer: integrity check failed", state); /* strict "less" because we need space for trailing zero */
|
||
|
serializer->bytes_written++;
|
||
|
strcat(serializer->out_str, ".");
|
||
|
serializer->out_str += 1;
|
||
|
return;
|
||
|
}
|
||
|
if( serializer->mode==AE_SM_TO_STREAM )
|
||
|
{
|
||
|
ae_assert(serializer->bytes_written+1<serializer->bytes_asked, "ae_serializer: integrity check failed", state); /* strict "less" because we need space for trailing zero */
|
||
|
serializer->bytes_written++;
|
||
|
ae_assert(serializer->stream_writer(".", serializer->stream_aux)==0, "ae_serializer: error writing to stream", state);
|
||
|
return;
|
||
|
}
|
||
|
if( serializer->mode==AE_SM_FROM_STRING )
|
||
|
{
|
||
|
/*
|
||
|
* because input string may be from pre-3.11 serializer,
|
||
|
* which does not include trailing dot, we do not test
|
||
|
* string for presence of "." symbol. Anyway, because string
|
||
|
* is not stream, we do not have to read ALL trailing symbols.
|
||
|
*/
|
||
|
return;
|
||
|
}
|
||
|
if( serializer->mode==AE_SM_FROM_STREAM )
|
||
|
{
|
||
|
/*
|
||
|
* Read trailing dot, perform integrity check
|
||
|
*/
|
||
|
char buf[2];
|
||
|
ae_assert(serializer->stream_reader(serializer->stream_aux, 1, buf)==0, "ae_serializer: error reading from stream", state);
|
||
|
ae_assert(buf[0]=='.', "ae_serializer: trailing . is not found in the stream", state);
|
||
|
return;
|
||
|
}
|
||
|
ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
Complex math functions
|
||
|
************************************************************************/
|
||
|
ae_complex ae_complex_from_i(ae_int_t v)
|
||
|
{
|
||
|
ae_complex r;
|
||
|
r.x = (double)v;
|
||
|
r.y = 0.0;
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
ae_complex ae_complex_from_d(double v)
|
||
|
{
|
||
|
ae_complex r;
|
||
|
r.x = v;
|
||
|
r.y = 0.0;
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
ae_complex ae_c_neg(ae_complex lhs)
|
||
|
{
|
||
|
ae_complex result;
|
||
|
result.x = -lhs.x;
|
||
|
result.y = -lhs.y;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
ae_complex ae_c_conj(ae_complex lhs, ae_state *state)
|
||
|
{
|
||
|
ae_complex result;
|
||
|
result.x = +lhs.x;
|
||
|
result.y = -lhs.y;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
ae_complex ae_c_sqr(ae_complex lhs, ae_state *state)
|
||
|
{
|
||
|
ae_complex result;
|
||
|
result.x = lhs.x*lhs.x-lhs.y*lhs.y;
|
||
|
result.y = 2*lhs.x*lhs.y;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
double ae_c_abs(ae_complex z, ae_state *state)
|
||
|
{
|
||
|
double w;
|
||
|
double xabs;
|
||
|
double yabs;
|
||
|
double v;
|
||
|
|
||
|
xabs = fabs(z.x);
|
||
|
yabs = fabs(z.y);
|
||
|
w = xabs>yabs ? xabs : yabs;
|
||
|
v = xabs<yabs ? xabs : yabs;
|
||
|
if( v==0 )
|
||
|
return w;
|
||
|
else
|
||
|
{
|
||
|
double t = v/w;
|
||
|
return w*sqrt(1+t*t);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
ae_bool ae_c_eq(ae_complex lhs, ae_complex rhs)
|
||
|
{
|
||
|
volatile double x1 = lhs.x;
|
||
|
volatile double x2 = rhs.x;
|
||
|
volatile double y1 = lhs.y;
|
||
|
volatile double y2 = rhs.y;
|
||
|
return x1==x2 && y1==y2;
|
||
|
}
|
||
|
|
||
|
ae_bool ae_c_neq(ae_complex lhs, ae_complex rhs)
|
||
|
{
|
||
|
volatile double x1 = lhs.x;
|
||
|
volatile double x2 = rhs.x;
|
||
|
volatile double y1 = lhs.y;
|
||
|
volatile double y2 = rhs.y;
|
||
|
return x1!=x2 || y1!=y2;
|
||
|
}
|
||
|
|
||
|
ae_complex ae_c_add(ae_complex lhs, ae_complex rhs)
|
||
|
{
|
||
|
ae_complex result;
|
||
|
result.x = lhs.x+rhs.x;
|
||
|
result.y = lhs.y+rhs.y;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
ae_complex ae_c_mul(ae_complex lhs, ae_complex rhs)
|
||
|
{
|
||
|
ae_complex result;
|
||
|
result.x = lhs.x*rhs.x-lhs.y*rhs.y;
|
||
|
result.y = lhs.x*rhs.y+lhs.y*rhs.x;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
ae_complex ae_c_sub(ae_complex lhs, ae_complex rhs)
|
||
|
{
|
||
|
ae_complex result;
|
||
|
result.x = lhs.x-rhs.x;
|
||
|
result.y = lhs.y-rhs.y;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
ae_complex ae_c_div(ae_complex lhs, ae_complex rhs)
|
||
|
{
|
||
|
ae_complex result;
|
||
|
double e;
|
||
|
double f;
|
||
|
if( fabs(rhs.y)<fabs(rhs.x) )
|
||
|
{
|
||
|
e = rhs.y/rhs.x;
|
||
|
f = rhs.x+rhs.y*e;
|
||
|
result.x = (lhs.x+lhs.y*e)/f;
|
||
|
result.y = (lhs.y-lhs.x*e)/f;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
e = rhs.x/rhs.y;
|
||
|
f = rhs.y+rhs.x*e;
|
||
|
result.x = (lhs.y+lhs.x*e)/f;
|
||
|
result.y = (-lhs.x+lhs.y*e)/f;
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
ae_bool ae_c_eq_d(ae_complex lhs, double rhs)
|
||
|
{
|
||
|
volatile double x1 = lhs.x;
|
||
|
volatile double x2 = rhs;
|
||
|
volatile double y1 = lhs.y;
|
||
|
volatile double y2 = 0;
|
||
|
return x1==x2 && y1==y2;
|
||
|
}
|
||
|
|
||
|
ae_bool ae_c_neq_d(ae_complex lhs, double rhs)
|
||
|
{
|
||
|
volatile double x1 = lhs.x;
|
||
|
volatile double x2 = rhs;
|
||
|
volatile double y1 = lhs.y;
|
||
|
volatile double y2 = 0;
|
||
|
return x1!=x2 || y1!=y2;
|
||
|
}
|
||
|
|
||
|
ae_complex ae_c_add_d(ae_complex lhs, double rhs)
|
||
|
{
|
||
|
ae_complex result;
|
||
|
result.x = lhs.x+rhs;
|
||
|
result.y = lhs.y;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
ae_complex ae_c_mul_d(ae_complex lhs, double rhs)
|
||
|
{
|
||
|
ae_complex result;
|
||
|
result.x = lhs.x*rhs;
|
||
|
result.y = lhs.y*rhs;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
ae_complex ae_c_sub_d(ae_complex lhs, double rhs)
|
||
|
{
|
||
|
ae_complex result;
|
||
|
result.x = lhs.x-rhs;
|
||
|
result.y = lhs.y;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
ae_complex ae_c_d_sub(double lhs, ae_complex rhs)
|
||
|
{
|
||
|
ae_complex result;
|
||
|
result.x = lhs-rhs.x;
|
||
|
result.y = -rhs.y;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
ae_complex ae_c_div_d(ae_complex lhs, double rhs)
|
||
|
{
|
||
|
ae_complex result;
|
||
|
result.x = lhs.x/rhs;
|
||
|
result.y = lhs.y/rhs;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
ae_complex ae_c_d_div(double lhs, ae_complex rhs)
|
||
|
{
|
||
|
ae_complex result;
|
||
|
double e;
|
||
|
double f;
|
||
|
if( fabs(rhs.y)<fabs(rhs.x) )
|
||
|
{
|
||
|
e = rhs.y/rhs.x;
|
||
|
f = rhs.x+rhs.y*e;
|
||
|
result.x = lhs/f;
|
||
|
result.y = -lhs*e/f;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
e = rhs.x/rhs.y;
|
||
|
f = rhs.y+rhs.x*e;
|
||
|
result.x = lhs*e/f;
|
||
|
result.y = -lhs/f;
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
|
||
|
/************************************************************************
|
||
|
Complex BLAS operations
|
||
|
************************************************************************/
|
||
|
ae_complex ae_v_cdotproduct(const ae_complex *v0, ae_int_t stride0, const char *conj0, const ae_complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n)
|
||
|
{
|
||
|
double rx = 0, ry = 0;
|
||
|
ae_int_t i;
|
||
|
ae_bool bconj0 = !((conj0[0]=='N') || (conj0[0]=='n'));
|
||
|
ae_bool bconj1 = !((conj1[0]=='N') || (conj1[0]=='n'));
|
||
|
ae_complex result;
|
||
|
if( bconj0 && bconj1 )
|
||
|
{
|
||
|
double v0x, v0y, v1x, v1y;
|
||
|
for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
|
||
|
{
|
||
|
v0x = v0->x;
|
||
|
v0y = -v0->y;
|
||
|
v1x = v1->x;
|
||
|
v1y = -v1->y;
|
||
|
rx += v0x*v1x-v0y*v1y;
|
||
|
ry += v0x*v1y+v0y*v1x;
|
||
|
}
|
||
|
}
|
||
|
if( !bconj0 && bconj1 )
|
||
|
{
|
||
|
double v0x, v0y, v1x, v1y;
|
||
|
for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
|
||
|
{
|
||
|
v0x = v0->x;
|
||
|
v0y = v0->y;
|
||
|
v1x = v1->x;
|
||
|
v1y = -v1->y;
|
||
|
rx += v0x*v1x-v0y*v1y;
|
||
|
ry += v0x*v1y+v0y*v1x;
|
||
|
}
|
||
|
}
|
||
|
if( bconj0 && !bconj1 )
|
||
|
{
|
||
|
double v0x, v0y, v1x, v1y;
|
||
|
for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
|
||
|
{
|
||
|
v0x = v0->x;
|
||
|
v0y = -v0->y;
|
||
|
v1x = v1->x;
|
||
|
v1y = v1->y;
|
||
|
rx += v0x*v1x-v0y*v1y;
|
||
|
ry += v0x*v1y+v0y*v1x;
|
||
|
}
|
||
|
}
|
||
|
if( !bconj0 && !bconj1 )
|
||
|
{
|
||
|
double v0x, v0y, v1x, v1y;
|
||
|
for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
|
||
|
{
|
||
|
v0x = v0->x;
|
||
|
v0y = v0->y;
|
||
|
v1x = v1->x;
|
||
|
v1y = v1->y;
|
||
|
rx += v0x*v1x-v0y*v1y;
|
||
|
ry += v0x*v1y+v0y*v1x;
|
||
|
}
|
||
|
}
|
||
|
result.x = rx;
|
||
|
result.y = ry;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
void ae_v_cmove(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
|
||
|
{
|
||
|
ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x = vsrc->x;
|
||
|
vdst->y = -vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
*vdst = *vsrc;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* optimized case
|
||
|
*/
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x = vsrc->x;
|
||
|
vdst->y = -vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
*vdst = *vsrc;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_v_cmoveneg(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
|
||
|
{
|
||
|
ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x = -vsrc->x;
|
||
|
vdst->y = vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x = -vsrc->x;
|
||
|
vdst->y = -vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* optimized case
|
||
|
*/
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x = -vsrc->x;
|
||
|
vdst->y = vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x = -vsrc->x;
|
||
|
vdst->y = -vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_v_cmoved(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
|
||
|
{
|
||
|
ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x = alpha*vsrc->x;
|
||
|
vdst->y = -alpha*vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x = alpha*vsrc->x;
|
||
|
vdst->y = alpha*vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* optimized case
|
||
|
*/
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x = alpha*vsrc->x;
|
||
|
vdst->y = -alpha*vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x = alpha*vsrc->x;
|
||
|
vdst->y = alpha*vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_v_cmovec(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
|
||
|
{
|
||
|
ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
if( bconj )
|
||
|
{
|
||
|
double ax = alpha.x, ay = alpha.y;
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x = ax*vsrc->x+ay*vsrc->y;
|
||
|
vdst->y = -ax*vsrc->y+ay*vsrc->x;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
double ax = alpha.x, ay = alpha.y;
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x = ax*vsrc->x-ay*vsrc->y;
|
||
|
vdst->y = ax*vsrc->y+ay*vsrc->x;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* highly optimized case
|
||
|
*/
|
||
|
if( bconj )
|
||
|
{
|
||
|
double ax = alpha.x, ay = alpha.y;
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x = ax*vsrc->x+ay*vsrc->y;
|
||
|
vdst->y = -ax*vsrc->y+ay*vsrc->x;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
double ax = alpha.x, ay = alpha.y;
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x = ax*vsrc->x-ay*vsrc->y;
|
||
|
vdst->y = ax*vsrc->y+ay*vsrc->x;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_v_cadd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
|
||
|
{
|
||
|
ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x += vsrc->x;
|
||
|
vdst->y -= vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x += vsrc->x;
|
||
|
vdst->y += vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* optimized case
|
||
|
*/
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x += vsrc->x;
|
||
|
vdst->y -= vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x += vsrc->x;
|
||
|
vdst->y += vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_v_caddd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
|
||
|
{
|
||
|
ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x += alpha*vsrc->x;
|
||
|
vdst->y -= alpha*vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x += alpha*vsrc->x;
|
||
|
vdst->y += alpha*vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* optimized case
|
||
|
*/
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x += alpha*vsrc->x;
|
||
|
vdst->y -= alpha*vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x += alpha*vsrc->x;
|
||
|
vdst->y += alpha*vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_v_caddc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
|
||
|
{
|
||
|
ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
double ax = alpha.x, ay = alpha.y;
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x += ax*vsrc->x+ay*vsrc->y;
|
||
|
vdst->y -= ax*vsrc->y-ay*vsrc->x;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x += ax*vsrc->x-ay*vsrc->y;
|
||
|
vdst->y += ax*vsrc->y+ay*vsrc->x;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* highly optimized case
|
||
|
*/
|
||
|
double ax = alpha.x, ay = alpha.y;
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x += ax*vsrc->x+ay*vsrc->y;
|
||
|
vdst->y -= ax*vsrc->y-ay*vsrc->x;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x += ax*vsrc->x-ay*vsrc->y;
|
||
|
vdst->y += ax*vsrc->y+ay*vsrc->x;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_v_csub(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
|
||
|
{
|
||
|
ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x -= vsrc->x;
|
||
|
vdst->y += vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x -= vsrc->x;
|
||
|
vdst->y -= vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* highly optimized case
|
||
|
*/
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x -= vsrc->x;
|
||
|
vdst->y += vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x -= vsrc->x;
|
||
|
vdst->y -= vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_v_csubd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
|
||
|
{
|
||
|
ae_v_caddd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha);
|
||
|
}
|
||
|
|
||
|
void ae_v_csubc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
|
||
|
{
|
||
|
alpha.x = -alpha.x;
|
||
|
alpha.y = -alpha.y;
|
||
|
ae_v_caddc(vdst, stride_dst, vsrc, stride_src, conj_src, n, alpha);
|
||
|
}
|
||
|
|
||
|
void ae_v_cmuld(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst)
|
||
|
{
|
||
|
vdst->x *= alpha;
|
||
|
vdst->y *= alpha;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* optimized case
|
||
|
*/
|
||
|
for(i=0; i<n; i++, vdst++)
|
||
|
{
|
||
|
vdst->x *= alpha;
|
||
|
vdst->y *= alpha;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_v_cmulc(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, ae_complex alpha)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
double ax = alpha.x, ay = alpha.y;
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst)
|
||
|
{
|
||
|
double dstx = vdst->x, dsty = vdst->y;
|
||
|
vdst->x = ax*dstx-ay*dsty;
|
||
|
vdst->y = ax*dsty+ay*dstx;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* highly optimized case
|
||
|
*/
|
||
|
double ax = alpha.x, ay = alpha.y;
|
||
|
for(i=0; i<n; i++, vdst++)
|
||
|
{
|
||
|
double dstx = vdst->x, dsty = vdst->y;
|
||
|
vdst->x = ax*dstx-ay*dsty;
|
||
|
vdst->y = ax*dsty+ay*dstx;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
Real BLAS operations
|
||
|
************************************************************************/
|
||
|
double ae_v_dotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n)
|
||
|
{
|
||
|
double result = 0;
|
||
|
ae_int_t i;
|
||
|
if( stride0!=1 || stride1!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* slow general code
|
||
|
*/
|
||
|
for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
|
||
|
result += (*v0)*(*v1);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* optimized code for stride=1
|
||
|
*/
|
||
|
ae_int_t n4 = n/4;
|
||
|
ae_int_t nleft = n%4;
|
||
|
for(i=0; i<n4; i++, v0+=4, v1+=4)
|
||
|
result += v0[0]*v1[0]+v0[1]*v1[1]+v0[2]*v1[2]+v0[3]*v1[3];
|
||
|
for(i=0; i<nleft; i++, v0++, v1++)
|
||
|
result += v0[0]*v1[0];
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
void ae_v_move(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
*vdst = *vsrc;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* optimized case
|
||
|
*/
|
||
|
ae_int_t n2 = n/2;
|
||
|
for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
|
||
|
{
|
||
|
vdst[0] = vsrc[0];
|
||
|
vdst[1] = vsrc[1];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
vdst[0] = vsrc[0];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_v_moveneg(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
*vdst = -*vsrc;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* optimized case
|
||
|
*/
|
||
|
ae_int_t n2 = n/2;
|
||
|
for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
|
||
|
{
|
||
|
vdst[0] = -vsrc[0];
|
||
|
vdst[1] = -vsrc[1];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
vdst[0] = -vsrc[0];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_v_moved(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
*vdst = alpha*(*vsrc);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* optimized case
|
||
|
*/
|
||
|
ae_int_t n2 = n/2;
|
||
|
for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
|
||
|
{
|
||
|
vdst[0] = alpha*vsrc[0];
|
||
|
vdst[1] = alpha*vsrc[1];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
vdst[0] = alpha*vsrc[0];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_v_add(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
*vdst += *vsrc;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* optimized case
|
||
|
*/
|
||
|
ae_int_t n2 = n/2;
|
||
|
for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
|
||
|
{
|
||
|
vdst[0] += vsrc[0];
|
||
|
vdst[1] += vsrc[1];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
vdst[0] += vsrc[0];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_v_addd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
*vdst += alpha*(*vsrc);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* optimized case
|
||
|
*/
|
||
|
ae_int_t n2 = n/2;
|
||
|
for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
|
||
|
{
|
||
|
vdst[0] += alpha*vsrc[0];
|
||
|
vdst[1] += alpha*vsrc[1];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
vdst[0] += alpha*vsrc[0];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_v_sub(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
*vdst -= *vsrc;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* highly optimized case
|
||
|
*/
|
||
|
ae_int_t n2 = n/2;
|
||
|
for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
|
||
|
{
|
||
|
vdst[0] -= vsrc[0];
|
||
|
vdst[1] -= vsrc[1];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
vdst[0] -= vsrc[0];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ae_v_subd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
|
||
|
{
|
||
|
ae_v_addd(vdst, stride_dst, vsrc, stride_src, n, -alpha);
|
||
|
}
|
||
|
|
||
|
void ae_v_muld(double *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 )
|
||
|
{
|
||
|
/*
|
||
|
* general unoptimized case
|
||
|
*/
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst)
|
||
|
*vdst *= alpha;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* highly optimized case
|
||
|
*/
|
||
|
for(i=0; i<n; i++, vdst++)
|
||
|
*vdst *= alpha;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
Other functions
|
||
|
************************************************************************/
|
||
|
ae_int_t ae_v_len(ae_int_t a, ae_int_t b)
|
||
|
{
|
||
|
return b-a+1;
|
||
|
}
|
||
|
|
||
|
/************************************************************************
|
||
|
RComm functions
|
||
|
************************************************************************/
|
||
|
void _rcommstate_init(rcommstate* p, ae_state *_state, ae_bool make_automatic)
|
||
|
{
|
||
|
/* initial zero-filling */
|
||
|
memset(&p->ba, 0, sizeof(p->ba));
|
||
|
memset(&p->ia, 0, sizeof(p->ia));
|
||
|
memset(&p->ra, 0, sizeof(p->ra));
|
||
|
memset(&p->ca, 0, sizeof(p->ca));
|
||
|
|
||
|
/* initialization */
|
||
|
ae_vector_init(&p->ba, 0, DT_BOOL, _state, make_automatic);
|
||
|
ae_vector_init(&p->ia, 0, DT_INT, _state, make_automatic);
|
||
|
ae_vector_init(&p->ra, 0, DT_REAL, _state, make_automatic);
|
||
|
ae_vector_init(&p->ca, 0, DT_COMPLEX, _state, make_automatic);
|
||
|
}
|
||
|
|
||
|
void _rcommstate_init_copy(rcommstate* dst, rcommstate* src, ae_state *_state, ae_bool make_automatic)
|
||
|
{
|
||
|
/* initial zero-filling */
|
||
|
memset(&dst->ba, 0, sizeof(dst->ba));
|
||
|
memset(&dst->ia, 0, sizeof(dst->ia));
|
||
|
memset(&dst->ra, 0, sizeof(dst->ra));
|
||
|
memset(&dst->ca, 0, sizeof(dst->ca));
|
||
|
|
||
|
/* initialization */
|
||
|
ae_vector_init_copy(&dst->ba, &src->ba, _state, make_automatic);
|
||
|
ae_vector_init_copy(&dst->ia, &src->ia, _state, make_automatic);
|
||
|
ae_vector_init_copy(&dst->ra, &src->ra, _state, make_automatic);
|
||
|
ae_vector_init_copy(&dst->ca, &src->ca, _state, make_automatic);
|
||
|
dst->stage = src->stage;
|
||
|
}
|
||
|
|
||
|
void _rcommstate_clear(rcommstate* p)
|
||
|
{
|
||
|
ae_vector_clear(&p->ba);
|
||
|
ae_vector_clear(&p->ia);
|
||
|
ae_vector_clear(&p->ra);
|
||
|
ae_vector_clear(&p->ca);
|
||
|
}
|
||
|
|
||
|
void _rcommstate_destroy(rcommstate* p)
|
||
|
{
|
||
|
_rcommstate_clear(p);
|
||
|
}
|
||
|
|
||
|
#ifdef AE_DEBUG4WINDOWS
|
||
|
int _tickcount()
|
||
|
{
|
||
|
return GetTickCount();
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#ifdef AE_DEBUG4POSIX
|
||
|
#include <sys/time.h>
|
||
|
int _tickcount()
|
||
|
{
|
||
|
struct timeval now;
|
||
|
ae_int64_t r, v;
|
||
|
gettimeofday(&now, NULL);
|
||
|
v = now.tv_sec;
|
||
|
r = v*1000;
|
||
|
v = now.tv_usec/1000;
|
||
|
r = r+v;
|
||
|
return r;
|
||
|
/*struct timespec now;
|
||
|
if (clock_gettime(CLOCK_MONOTONIC, &now) )
|
||
|
return 0;
|
||
|
return now.tv_sec * 1000.0 + now.tv_nsec / 1000000.0;*/
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
|
||
|
}
|
||
|
|
||
|
/////////////////////////////////////////////////////////////////////////
|
||
|
//
|
||
|
// THIS SECTION CONTAINS C++ RELATED FUNCTIONALITY
|
||
|
//
|
||
|
/////////////////////////////////////////////////////////////////////////
|
||
|
/********************************************************************
|
||
|
Internal forwards
|
||
|
********************************************************************/
|
||
|
namespace alglib
|
||
|
{
|
||
|
double get_aenv_nan();
|
||
|
double get_aenv_posinf();
|
||
|
double get_aenv_neginf();
|
||
|
ae_int_t my_stricmp(const char *s1, const char *s2);
|
||
|
char* filter_spaces(const char *s);
|
||
|
void str_vector_create(const char *src, bool match_head_only, std::vector<const char*> *p_vec);
|
||
|
void str_matrix_create(const char *src, std::vector< std::vector<const char*> > *p_mat);
|
||
|
|
||
|
ae_bool parse_bool_delim(const char *s, const char *delim);
|
||
|
ae_int_t parse_int_delim(const char *s, const char *delim);
|
||
|
bool _parse_real_delim(const char *s, const char *delim, double *result, const char **new_s);
|
||
|
double parse_real_delim(const char *s, const char *delim);
|
||
|
alglib::complex parse_complex_delim(const char *s, const char *delim);
|
||
|
|
||
|
std::string arraytostring(const bool *ptr, ae_int_t n);
|
||
|
std::string arraytostring(const ae_int_t *ptr, ae_int_t n);
|
||
|
std::string arraytostring(const double *ptr, ae_int_t n, int dps);
|
||
|
std::string arraytostring(const alglib::complex *ptr, ae_int_t n, int dps);
|
||
|
}
|
||
|
|
||
|
/********************************************************************
|
||
|
Global and local constants/variables
|
||
|
********************************************************************/
|
||
|
const double alglib::machineepsilon = 5E-16;
|
||
|
const double alglib::maxrealnumber = 1E300;
|
||
|
const double alglib::minrealnumber = 1E-300;
|
||
|
const alglib::ae_int_t alglib::endianness = alglib_impl::ae_get_endianness();
|
||
|
const double alglib::fp_nan = alglib::get_aenv_nan();
|
||
|
const double alglib::fp_posinf = alglib::get_aenv_posinf();
|
||
|
const double alglib::fp_neginf = alglib::get_aenv_neginf();
|
||
|
#if defined(AE_NO_EXCEPTIONS)
|
||
|
static const char *_alglib_last_error = NULL;
|
||
|
#endif
|
||
|
static const alglib_impl::ae_uint64_t _i64_xdefault = 0x0;
|
||
|
static const alglib_impl::ae_uint64_t _i64_xserial = _ALGLIB_FLG_THREADING_SERIAL;
|
||
|
static const alglib_impl::ae_uint64_t _i64_xparallel = _ALGLIB_FLG_THREADING_PARALLEL;
|
||
|
const alglib::xparams &alglib::xdefault = *((const alglib::xparams *)(&_i64_xdefault));
|
||
|
const alglib::xparams &alglib::serial = *((const alglib::xparams *)(&_i64_xserial));
|
||
|
const alglib::xparams &alglib::parallel = *((const alglib::xparams *)(&_i64_xparallel));
|
||
|
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
Exception handling
|
||
|
********************************************************************/
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
alglib::ap_error::ap_error()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::ap_error::ap_error(const char *s)
|
||
|
{
|
||
|
msg = s;
|
||
|
}
|
||
|
|
||
|
void alglib::ap_error::make_assertion(bool bClause)
|
||
|
{
|
||
|
if(!bClause)
|
||
|
_ALGLIB_CPP_EXCEPTION("");
|
||
|
}
|
||
|
|
||
|
void alglib::ap_error::make_assertion(bool bClause, const char *p_msg)
|
||
|
{
|
||
|
if(!bClause)
|
||
|
_ALGLIB_CPP_EXCEPTION(p_msg);
|
||
|
}
|
||
|
#else
|
||
|
void alglib::set_error_flag(const char *s)
|
||
|
{
|
||
|
if( s==NULL )
|
||
|
s = "ALGLIB: unknown error";
|
||
|
_alglib_last_error = s;
|
||
|
}
|
||
|
|
||
|
bool alglib::get_error_flag(const char **p_msg)
|
||
|
{
|
||
|
if( _alglib_last_error==NULL )
|
||
|
return false;
|
||
|
if( p_msg!=NULL )
|
||
|
*p_msg = _alglib_last_error;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
void alglib::clear_error_flag()
|
||
|
{
|
||
|
_alglib_last_error = NULL;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/********************************************************************
|
||
|
Complex number with double precision.
|
||
|
********************************************************************/
|
||
|
alglib::complex::complex():x(0.0),y(0.0)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::complex::complex(const double &_x):x(_x),y(0.0)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::complex::complex(const double &_x, const double &_y):x(_x),y(_y)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::complex::complex(const alglib::complex &z):x(z.x),y(z.y)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::complex& alglib::complex::operator= (const double& v)
|
||
|
{
|
||
|
x = v;
|
||
|
y = 0.0;
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
alglib::complex& alglib::complex::operator+=(const double& v)
|
||
|
{
|
||
|
x += v;
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
alglib::complex& alglib::complex::operator-=(const double& v)
|
||
|
{
|
||
|
x -= v;
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
alglib::complex& alglib::complex::operator*=(const double& v)
|
||
|
{
|
||
|
x *= v;
|
||
|
y *= v;
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
alglib::complex& alglib::complex::operator/=(const double& v)
|
||
|
{
|
||
|
x /= v;
|
||
|
y /= v;
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
alglib::complex& alglib::complex::operator= (const alglib::complex& z)
|
||
|
{
|
||
|
x = z.x;
|
||
|
y = z.y;
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
alglib::complex& alglib::complex::operator+=(const alglib::complex& z)
|
||
|
{
|
||
|
x += z.x;
|
||
|
y += z.y;
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
alglib::complex& alglib::complex::operator-=(const alglib::complex& z)
|
||
|
{
|
||
|
x -= z.x;
|
||
|
y -= z.y;
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
alglib::complex& alglib::complex::operator*=(const alglib::complex& z)
|
||
|
{
|
||
|
double t = x*z.x-y*z.y;
|
||
|
y = x*z.y+y*z.x;
|
||
|
x = t;
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
alglib::complex& alglib::complex::operator/=(const alglib::complex& z)
|
||
|
{
|
||
|
alglib::complex result;
|
||
|
double e;
|
||
|
double f;
|
||
|
if( fabs(z.y)<fabs(z.x) )
|
||
|
{
|
||
|
e = z.y/z.x;
|
||
|
f = z.x+z.y*e;
|
||
|
result.x = (x+y*e)/f;
|
||
|
result.y = (y-x*e)/f;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
e = z.x/z.y;
|
||
|
f = z.y+z.x*e;
|
||
|
result.x = (y+x*e)/f;
|
||
|
result.y = (-x+y*e)/f;
|
||
|
}
|
||
|
*this = result;
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
alglib_impl::ae_complex* alglib::complex::c_ptr()
|
||
|
{
|
||
|
return (alglib_impl::ae_complex*)this;
|
||
|
}
|
||
|
|
||
|
const alglib_impl::ae_complex* alglib::complex::c_ptr() const
|
||
|
{
|
||
|
return (const alglib_impl::ae_complex*)this;
|
||
|
}
|
||
|
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
std::string alglib::complex::tostring(int _dps) const
|
||
|
{
|
||
|
char mask[32];
|
||
|
char buf_x[32];
|
||
|
char buf_y[32];
|
||
|
char buf_zero[32];
|
||
|
int dps = _dps>=0 ? _dps : -_dps;
|
||
|
if( dps<=0 || dps>=20 )
|
||
|
_ALGLIB_CPP_EXCEPTION("complex::tostring(): incorrect dps");
|
||
|
|
||
|
// handle IEEE special quantities
|
||
|
if( fp_isnan(x) || fp_isnan(y) )
|
||
|
return "NAN";
|
||
|
if( fp_isinf(x) || fp_isinf(y) )
|
||
|
return "INF";
|
||
|
|
||
|
// generate mask
|
||
|
if( sprintf(mask, "%%.%d%s", dps, _dps>=0 ? "f" : "e")>=(int)sizeof(mask) )
|
||
|
_ALGLIB_CPP_EXCEPTION("complex::tostring(): buffer overflow");
|
||
|
|
||
|
// print |x|, |y| and zero with same mask and compare
|
||
|
if( sprintf(buf_x, mask, (double)(fabs(x)))>=(int)sizeof(buf_x) )
|
||
|
_ALGLIB_CPP_EXCEPTION("complex::tostring(): buffer overflow");
|
||
|
if( sprintf(buf_y, mask, (double)(fabs(y)))>=(int)sizeof(buf_y) )
|
||
|
_ALGLIB_CPP_EXCEPTION("complex::tostring(): buffer overflow");
|
||
|
if( sprintf(buf_zero, mask, (double)0)>=(int)sizeof(buf_zero) )
|
||
|
_ALGLIB_CPP_EXCEPTION("complex::tostring(): buffer overflow");
|
||
|
|
||
|
// different zero/nonzero patterns
|
||
|
if( strcmp(buf_x,buf_zero)!=0 && strcmp(buf_y,buf_zero)!=0 )
|
||
|
return std::string(x>0 ? "" : "-")+buf_x+(y>0 ? "+" : "-")+buf_y+"i";
|
||
|
if( strcmp(buf_x,buf_zero)!=0 && strcmp(buf_y,buf_zero)==0 )
|
||
|
return std::string(x>0 ? "" : "-")+buf_x;
|
||
|
if( strcmp(buf_x,buf_zero)==0 && strcmp(buf_y,buf_zero)!=0 )
|
||
|
return std::string(y>0 ? "" : "-")+buf_y+"i";
|
||
|
return std::string("0");
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
bool alglib::operator==(const alglib::complex& lhs, const alglib::complex& rhs)
|
||
|
{
|
||
|
volatile double x1 = lhs.x;
|
||
|
volatile double x2 = rhs.x;
|
||
|
volatile double y1 = lhs.y;
|
||
|
volatile double y2 = rhs.y;
|
||
|
return x1==x2 && y1==y2;
|
||
|
}
|
||
|
|
||
|
bool alglib::operator!=(const alglib::complex& lhs, const alglib::complex& rhs)
|
||
|
{ return !(lhs==rhs); }
|
||
|
|
||
|
const alglib::complex alglib::operator+(const alglib::complex& lhs)
|
||
|
{ return lhs; }
|
||
|
|
||
|
const alglib::complex alglib::operator-(const alglib::complex& lhs)
|
||
|
{ return alglib::complex(-lhs.x, -lhs.y); }
|
||
|
|
||
|
const alglib::complex alglib::operator+(const alglib::complex& lhs, const alglib::complex& rhs)
|
||
|
{ alglib::complex r = lhs; r += rhs; return r; }
|
||
|
|
||
|
const alglib::complex alglib::operator+(const alglib::complex& lhs, const double& rhs)
|
||
|
{ alglib::complex r = lhs; r += rhs; return r; }
|
||
|
|
||
|
const alglib::complex alglib::operator+(const double& lhs, const alglib::complex& rhs)
|
||
|
{ alglib::complex r = rhs; r += lhs; return r; }
|
||
|
|
||
|
const alglib::complex alglib::operator-(const alglib::complex& lhs, const alglib::complex& rhs)
|
||
|
{ alglib::complex r = lhs; r -= rhs; return r; }
|
||
|
|
||
|
const alglib::complex alglib::operator-(const alglib::complex& lhs, const double& rhs)
|
||
|
{ alglib::complex r = lhs; r -= rhs; return r; }
|
||
|
|
||
|
const alglib::complex alglib::operator-(const double& lhs, const alglib::complex& rhs)
|
||
|
{ alglib::complex r = lhs; r -= rhs; return r; }
|
||
|
|
||
|
const alglib::complex alglib::operator*(const alglib::complex& lhs, const alglib::complex& rhs)
|
||
|
{ return alglib::complex(lhs.x*rhs.x - lhs.y*rhs.y, lhs.x*rhs.y + lhs.y*rhs.x); }
|
||
|
|
||
|
const alglib::complex alglib::operator*(const alglib::complex& lhs, const double& rhs)
|
||
|
{ return alglib::complex(lhs.x*rhs, lhs.y*rhs); }
|
||
|
|
||
|
const alglib::complex alglib::operator*(const double& lhs, const alglib::complex& rhs)
|
||
|
{ return alglib::complex(lhs*rhs.x, lhs*rhs.y); }
|
||
|
|
||
|
const alglib::complex alglib::operator/(const alglib::complex& lhs, const alglib::complex& rhs)
|
||
|
{
|
||
|
alglib::complex result;
|
||
|
double e;
|
||
|
double f;
|
||
|
if( fabs(rhs.y)<fabs(rhs.x) )
|
||
|
{
|
||
|
e = rhs.y/rhs.x;
|
||
|
f = rhs.x+rhs.y*e;
|
||
|
result.x = (lhs.x+lhs.y*e)/f;
|
||
|
result.y = (lhs.y-lhs.x*e)/f;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
e = rhs.x/rhs.y;
|
||
|
f = rhs.y+rhs.x*e;
|
||
|
result.x = (lhs.y+lhs.x*e)/f;
|
||
|
result.y = (-lhs.x+lhs.y*e)/f;
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
const alglib::complex alglib::operator/(const double& lhs, const alglib::complex& rhs)
|
||
|
{
|
||
|
alglib::complex result;
|
||
|
double e;
|
||
|
double f;
|
||
|
if( fabs(rhs.y)<fabs(rhs.x) )
|
||
|
{
|
||
|
e = rhs.y/rhs.x;
|
||
|
f = rhs.x+rhs.y*e;
|
||
|
result.x = lhs/f;
|
||
|
result.y = -lhs*e/f;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
e = rhs.x/rhs.y;
|
||
|
f = rhs.y+rhs.x*e;
|
||
|
result.x = lhs*e/f;
|
||
|
result.y = -lhs/f;
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
const alglib::complex alglib::operator/(const alglib::complex& lhs, const double& rhs)
|
||
|
{ return alglib::complex(lhs.x/rhs, lhs.y/rhs); }
|
||
|
|
||
|
double alglib::abscomplex(const alglib::complex &z)
|
||
|
{
|
||
|
double w;
|
||
|
double xabs;
|
||
|
double yabs;
|
||
|
double v;
|
||
|
|
||
|
xabs = fabs(z.x);
|
||
|
yabs = fabs(z.y);
|
||
|
w = xabs>yabs ? xabs : yabs;
|
||
|
v = xabs<yabs ? xabs : yabs;
|
||
|
if( v==0 )
|
||
|
return w;
|
||
|
else
|
||
|
{
|
||
|
double t = v/w;
|
||
|
return w*sqrt(1+t*t);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
alglib::complex alglib::conj(const alglib::complex &z)
|
||
|
{ return alglib::complex(z.x, -z.y); }
|
||
|
|
||
|
alglib::complex alglib::csqr(const alglib::complex &z)
|
||
|
{ return alglib::complex(z.x*z.x-z.y*z.y, 2*z.x*z.y); }
|
||
|
|
||
|
void alglib::setnworkers(alglib::ae_int_t nworkers)
|
||
|
{
|
||
|
#ifdef AE_HPC
|
||
|
alglib_impl::ae_set_cores_to_use(nworkers);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
void alglib::setglobalthreading(const alglib::xparams settings)
|
||
|
{
|
||
|
#ifdef AE_HPC
|
||
|
alglib_impl::ae_set_global_threading(settings.flags);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
alglib::ae_int_t alglib::getnworkers()
|
||
|
{
|
||
|
#ifdef AE_HPC
|
||
|
return alglib_impl::ae_get_cores_to_use();
|
||
|
#else
|
||
|
return 1;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
alglib::ae_int_t alglib::_ae_cores_count()
|
||
|
{
|
||
|
#ifdef AE_HPC
|
||
|
return alglib_impl::ae_cores_count();
|
||
|
#else
|
||
|
return 1;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
void alglib::_ae_set_global_threading(alglib_impl::ae_uint64_t flg_value)
|
||
|
{
|
||
|
#ifdef AE_HPC
|
||
|
alglib_impl::ae_set_global_threading(flg_value);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
alglib_impl::ae_uint64_t alglib::_ae_get_global_threading()
|
||
|
{
|
||
|
#ifdef AE_HPC
|
||
|
return alglib_impl::ae_get_global_threading();
|
||
|
#else
|
||
|
return _ALGLIB_FLG_THREADING_SERIAL;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
Level 1 BLAS functions
|
||
|
********************************************************************/
|
||
|
double alglib::vdotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n)
|
||
|
{
|
||
|
double result = 0;
|
||
|
ae_int_t i;
|
||
|
if( stride0!=1 || stride1!=1 )
|
||
|
{
|
||
|
//
|
||
|
// slow general code
|
||
|
//
|
||
|
for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
|
||
|
result += (*v0)*(*v1);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized code for stride=1
|
||
|
//
|
||
|
ae_int_t n4 = n/4;
|
||
|
ae_int_t nleft = n%4;
|
||
|
for(i=0; i<n4; i++, v0+=4, v1+=4)
|
||
|
result += v0[0]*v1[0]+v0[1]*v1[1]+v0[2]*v1[2]+v0[3]*v1[3];
|
||
|
for(i=0; i<nleft; i++, v0++, v1++)
|
||
|
result += v0[0]*v1[0];
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
double alglib::vdotproduct(const double *v1, const double *v2, ae_int_t N)
|
||
|
{
|
||
|
return vdotproduct(v1, 1, v2, 1, N);
|
||
|
}
|
||
|
|
||
|
alglib::complex alglib::vdotproduct(const alglib::complex *v0, ae_int_t stride0, const char *conj0, const alglib::complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n)
|
||
|
{
|
||
|
double rx = 0, ry = 0;
|
||
|
ae_int_t i;
|
||
|
bool bconj0 = !((conj0[0]=='N') || (conj0[0]=='n'));
|
||
|
bool bconj1 = !((conj1[0]=='N') || (conj1[0]=='n'));
|
||
|
if( bconj0 && bconj1 )
|
||
|
{
|
||
|
double v0x, v0y, v1x, v1y;
|
||
|
for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
|
||
|
{
|
||
|
v0x = v0->x;
|
||
|
v0y = -v0->y;
|
||
|
v1x = v1->x;
|
||
|
v1y = -v1->y;
|
||
|
rx += v0x*v1x-v0y*v1y;
|
||
|
ry += v0x*v1y+v0y*v1x;
|
||
|
}
|
||
|
}
|
||
|
if( !bconj0 && bconj1 )
|
||
|
{
|
||
|
double v0x, v0y, v1x, v1y;
|
||
|
for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
|
||
|
{
|
||
|
v0x = v0->x;
|
||
|
v0y = v0->y;
|
||
|
v1x = v1->x;
|
||
|
v1y = -v1->y;
|
||
|
rx += v0x*v1x-v0y*v1y;
|
||
|
ry += v0x*v1y+v0y*v1x;
|
||
|
}
|
||
|
}
|
||
|
if( bconj0 && !bconj1 )
|
||
|
{
|
||
|
double v0x, v0y, v1x, v1y;
|
||
|
for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
|
||
|
{
|
||
|
v0x = v0->x;
|
||
|
v0y = -v0->y;
|
||
|
v1x = v1->x;
|
||
|
v1y = v1->y;
|
||
|
rx += v0x*v1x-v0y*v1y;
|
||
|
ry += v0x*v1y+v0y*v1x;
|
||
|
}
|
||
|
}
|
||
|
if( !bconj0 && !bconj1 )
|
||
|
{
|
||
|
double v0x, v0y, v1x, v1y;
|
||
|
for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
|
||
|
{
|
||
|
v0x = v0->x;
|
||
|
v0y = v0->y;
|
||
|
v1x = v1->x;
|
||
|
v1y = v1->y;
|
||
|
rx += v0x*v1x-v0y*v1y;
|
||
|
ry += v0x*v1y+v0y*v1x;
|
||
|
}
|
||
|
}
|
||
|
return alglib::complex(rx,ry);
|
||
|
}
|
||
|
|
||
|
alglib::complex alglib::vdotproduct(const alglib::complex *v1, const alglib::complex *v2, ae_int_t N)
|
||
|
{
|
||
|
return vdotproduct(v1, 1, "N", v2, 1, "N", N);
|
||
|
}
|
||
|
|
||
|
void alglib::vmove(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
*vdst = *vsrc;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
ae_int_t n2 = n/2;
|
||
|
for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
|
||
|
{
|
||
|
vdst[0] = vsrc[0];
|
||
|
vdst[1] = vsrc[1];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
vdst[0] = vsrc[0];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vmove(double *vdst, const double* vsrc, ae_int_t N)
|
||
|
{
|
||
|
vmove(vdst, 1, vsrc, 1, N);
|
||
|
}
|
||
|
|
||
|
void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
|
||
|
{
|
||
|
bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x = vsrc->x;
|
||
|
vdst->y = -vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
*vdst = *vsrc;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x = vsrc->x;
|
||
|
vdst->y = -vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
*vdst = *vsrc;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vmove(alglib::complex *vdst, const alglib::complex* vsrc, ae_int_t N)
|
||
|
{
|
||
|
vmove(vdst, 1, vsrc, 1, "N", N);
|
||
|
}
|
||
|
|
||
|
void alglib::vmoveneg(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
*vdst = -*vsrc;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
ae_int_t n2 = n/2;
|
||
|
for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
|
||
|
{
|
||
|
vdst[0] = -vsrc[0];
|
||
|
vdst[1] = -vsrc[1];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
vdst[0] = -vsrc[0];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vmoveneg(double *vdst, const double *vsrc, ae_int_t N)
|
||
|
{
|
||
|
vmoveneg(vdst, 1, vsrc, 1, N);
|
||
|
}
|
||
|
|
||
|
void alglib::vmoveneg(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
|
||
|
{
|
||
|
bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x = -vsrc->x;
|
||
|
vdst->y = vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x = -vsrc->x;
|
||
|
vdst->y = -vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x = -vsrc->x;
|
||
|
vdst->y = vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x = -vsrc->x;
|
||
|
vdst->y = -vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vmoveneg(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N)
|
||
|
{
|
||
|
vmoveneg(vdst, 1, vsrc, 1, "N", N);
|
||
|
}
|
||
|
|
||
|
void alglib::vmove(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
*vdst = alpha*(*vsrc);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
ae_int_t n2 = n/2;
|
||
|
for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
|
||
|
{
|
||
|
vdst[0] = alpha*vsrc[0];
|
||
|
vdst[1] = alpha*vsrc[1];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
vdst[0] = alpha*vsrc[0];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vmove(double *vdst, const double *vsrc, ae_int_t N, double alpha)
|
||
|
{
|
||
|
vmove(vdst, 1, vsrc, 1, N, alpha);
|
||
|
}
|
||
|
|
||
|
void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
|
||
|
{
|
||
|
bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x = alpha*vsrc->x;
|
||
|
vdst->y = -alpha*vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x = alpha*vsrc->x;
|
||
|
vdst->y = alpha*vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x = alpha*vsrc->x;
|
||
|
vdst->y = -alpha*vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x = alpha*vsrc->x;
|
||
|
vdst->y = alpha*vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vmove(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha)
|
||
|
{
|
||
|
vmove(vdst, 1, vsrc, 1, "N", N, alpha);
|
||
|
}
|
||
|
|
||
|
void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha)
|
||
|
{
|
||
|
bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
if( bconj )
|
||
|
{
|
||
|
double ax = alpha.x, ay = alpha.y;
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x = ax*vsrc->x+ay*vsrc->y;
|
||
|
vdst->y = -ax*vsrc->y+ay*vsrc->x;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
double ax = alpha.x, ay = alpha.y;
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x = ax*vsrc->x-ay*vsrc->y;
|
||
|
vdst->y = ax*vsrc->y+ay*vsrc->x;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
if( bconj )
|
||
|
{
|
||
|
double ax = alpha.x, ay = alpha.y;
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x = ax*vsrc->x+ay*vsrc->y;
|
||
|
vdst->y = -ax*vsrc->y+ay*vsrc->x;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
double ax = alpha.x, ay = alpha.y;
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x = ax*vsrc->x-ay*vsrc->y;
|
||
|
vdst->y = ax*vsrc->y+ay*vsrc->x;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vmove(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha)
|
||
|
{
|
||
|
vmove(vdst, 1, vsrc, 1, "N", N, alpha);
|
||
|
}
|
||
|
|
||
|
void alglib::vadd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
*vdst += *vsrc;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
ae_int_t n2 = n/2;
|
||
|
for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
|
||
|
{
|
||
|
vdst[0] += vsrc[0];
|
||
|
vdst[1] += vsrc[1];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
vdst[0] += vsrc[0];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vadd(double *vdst, const double *vsrc, ae_int_t N)
|
||
|
{
|
||
|
vadd(vdst, 1, vsrc, 1, N);
|
||
|
}
|
||
|
|
||
|
void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
|
||
|
{
|
||
|
bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x += vsrc->x;
|
||
|
vdst->y -= vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x += vsrc->x;
|
||
|
vdst->y += vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x += vsrc->x;
|
||
|
vdst->y -= vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x += vsrc->x;
|
||
|
vdst->y += vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N)
|
||
|
{
|
||
|
vadd(vdst, 1, vsrc, 1, "N", N);
|
||
|
}
|
||
|
|
||
|
void alglib::vadd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
*vdst += alpha*(*vsrc);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
ae_int_t n2 = n/2;
|
||
|
for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
|
||
|
{
|
||
|
vdst[0] += alpha*vsrc[0];
|
||
|
vdst[1] += alpha*vsrc[1];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
vdst[0] += alpha*vsrc[0];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vadd(double *vdst, const double *vsrc, ae_int_t N, double alpha)
|
||
|
{
|
||
|
vadd(vdst, 1, vsrc, 1, N, alpha);
|
||
|
}
|
||
|
|
||
|
void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
|
||
|
{
|
||
|
bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x += alpha*vsrc->x;
|
||
|
vdst->y -= alpha*vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x += alpha*vsrc->x;
|
||
|
vdst->y += alpha*vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x += alpha*vsrc->x;
|
||
|
vdst->y -= alpha*vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x += alpha*vsrc->x;
|
||
|
vdst->y += alpha*vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha)
|
||
|
{
|
||
|
vadd(vdst, 1, vsrc, 1, "N", N, alpha);
|
||
|
}
|
||
|
|
||
|
void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha)
|
||
|
{
|
||
|
bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
double ax = alpha.x, ay = alpha.y;
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x += ax*vsrc->x+ay*vsrc->y;
|
||
|
vdst->y -= ax*vsrc->y-ay*vsrc->x;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x += ax*vsrc->x-ay*vsrc->y;
|
||
|
vdst->y += ax*vsrc->y+ay*vsrc->x;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
double ax = alpha.x, ay = alpha.y;
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x += ax*vsrc->x+ay*vsrc->y;
|
||
|
vdst->y -= ax*vsrc->y-ay*vsrc->x;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x += ax*vsrc->x-ay*vsrc->y;
|
||
|
vdst->y += ax*vsrc->y+ay*vsrc->x;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha)
|
||
|
{
|
||
|
vadd(vdst, 1, vsrc, 1, "N", N, alpha);
|
||
|
}
|
||
|
|
||
|
void alglib::vsub(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
*vdst -= *vsrc;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
ae_int_t n2 = n/2;
|
||
|
for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
|
||
|
{
|
||
|
vdst[0] -= vsrc[0];
|
||
|
vdst[1] -= vsrc[1];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
vdst[0] -= vsrc[0];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vsub(double *vdst, const double *vsrc, ae_int_t N)
|
||
|
{
|
||
|
vsub(vdst, 1, vsrc, 1, N);
|
||
|
}
|
||
|
|
||
|
void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
|
||
|
{
|
||
|
bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 || stride_src!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x -= vsrc->x;
|
||
|
vdst->y += vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
|
||
|
{
|
||
|
vdst->x -= vsrc->x;
|
||
|
vdst->y -= vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
if( bconj )
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x -= vsrc->x;
|
||
|
vdst->y += vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++, vdst++, vsrc++)
|
||
|
{
|
||
|
vdst->x -= vsrc->x;
|
||
|
vdst->y -= vsrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N)
|
||
|
{
|
||
|
vsub(vdst, 1, vsrc, 1, "N", N);
|
||
|
}
|
||
|
|
||
|
void alglib::vsub(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
|
||
|
{
|
||
|
vadd(vdst, stride_dst, vsrc, stride_src, n, -alpha);
|
||
|
}
|
||
|
|
||
|
void alglib::vsub(double *vdst, const double *vsrc, ae_int_t N, double alpha)
|
||
|
{
|
||
|
vadd(vdst, 1, vsrc, 1, N, -alpha);
|
||
|
}
|
||
|
|
||
|
void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
|
||
|
{
|
||
|
vadd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha);
|
||
|
}
|
||
|
|
||
|
void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t n, double alpha)
|
||
|
{
|
||
|
vadd(vdst, 1, vsrc, 1, "N", n, -alpha);
|
||
|
}
|
||
|
|
||
|
void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha)
|
||
|
{
|
||
|
vadd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha);
|
||
|
}
|
||
|
|
||
|
void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t n, alglib::complex alpha)
|
||
|
{
|
||
|
vadd(vdst, 1, vsrc, 1, "N", n, -alpha);
|
||
|
}
|
||
|
void alglib::vmul(double *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst)
|
||
|
*vdst *= alpha;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
for(i=0; i<n; i++, vdst++)
|
||
|
*vdst *= alpha;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vmul(double *vdst, ae_int_t N, double alpha)
|
||
|
{
|
||
|
vmul(vdst, 1, N, alpha);
|
||
|
}
|
||
|
|
||
|
void alglib::vmul(alglib::complex *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst)
|
||
|
{
|
||
|
vdst->x *= alpha;
|
||
|
vdst->y *= alpha;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
for(i=0; i<n; i++, vdst++)
|
||
|
{
|
||
|
vdst->x *= alpha;
|
||
|
vdst->y *= alpha;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vmul(alglib::complex *vdst, ae_int_t N, double alpha)
|
||
|
{
|
||
|
vmul(vdst, 1, N, alpha);
|
||
|
}
|
||
|
|
||
|
void alglib::vmul(alglib::complex *vdst, ae_int_t stride_dst, ae_int_t n, alglib::complex alpha)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride_dst!=1 )
|
||
|
{
|
||
|
//
|
||
|
// general unoptimized case
|
||
|
//
|
||
|
double ax = alpha.x, ay = alpha.y;
|
||
|
for(i=0; i<n; i++, vdst+=stride_dst)
|
||
|
{
|
||
|
double dstx = vdst->x, dsty = vdst->y;
|
||
|
vdst->x = ax*dstx-ay*dsty;
|
||
|
vdst->y = ax*dsty+ay*dstx;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// optimized case
|
||
|
//
|
||
|
double ax = alpha.x, ay = alpha.y;
|
||
|
for(i=0; i<n; i++, vdst++)
|
||
|
{
|
||
|
double dstx = vdst->x, dsty = vdst->y;
|
||
|
vdst->x = ax*dstx-ay*dsty;
|
||
|
vdst->y = ax*dsty+ay*dstx;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::vmul(alglib::complex *vdst, ae_int_t N, alglib::complex alpha)
|
||
|
{
|
||
|
vmul(vdst, 1, N, alpha);
|
||
|
}
|
||
|
|
||
|
alglib::ae_int_t alglib::vlen(ae_int_t n1, ae_int_t n2)
|
||
|
{
|
||
|
return n2-n1+1;
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
Matrices and vectors
|
||
|
********************************************************************/
|
||
|
alglib::ae_vector_wrapper::ae_vector_wrapper(alglib_impl::ae_vector *e_ptr, alglib_impl::ae_datatype datatype)
|
||
|
{
|
||
|
if( e_ptr==NULL || e_ptr->datatype!=datatype )
|
||
|
{
|
||
|
const char *msg = "ALGLIB: ae_vector_wrapper datatype check failed";
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
_ALGLIB_CPP_EXCEPTION(msg);
|
||
|
#else
|
||
|
ptr = NULL;
|
||
|
is_frozen_proxy = false;
|
||
|
_ALGLIB_SET_ERROR_FLAG(msg);
|
||
|
return;
|
||
|
#endif
|
||
|
}
|
||
|
ptr = e_ptr;
|
||
|
is_frozen_proxy = true;
|
||
|
}
|
||
|
|
||
|
alglib::ae_vector_wrapper::ae_vector_wrapper(alglib_impl::ae_datatype datatype)
|
||
|
{
|
||
|
jmp_buf _break_jump;
|
||
|
alglib_impl::ae_state _state;
|
||
|
|
||
|
alglib_impl::ae_state_init(&_state);
|
||
|
if( setjmp(_break_jump) )
|
||
|
{
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
||
|
#else
|
||
|
ptr = NULL;
|
||
|
is_frozen_proxy = false;
|
||
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
||
|
return;
|
||
|
#endif
|
||
|
}
|
||
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
||
|
ptr = &inner_vec;
|
||
|
is_frozen_proxy = false;
|
||
|
memset(ptr, 0, sizeof(*ptr));
|
||
|
ae_vector_init(ptr, 0, datatype, &_state, ae_false);
|
||
|
ae_state_clear(&_state);
|
||
|
}
|
||
|
|
||
|
alglib::ae_vector_wrapper::ae_vector_wrapper(const ae_vector_wrapper &rhs, alglib_impl::ae_datatype datatype)
|
||
|
{
|
||
|
jmp_buf _break_jump;
|
||
|
alglib_impl::ae_state _state;
|
||
|
|
||
|
alglib_impl::ae_state_init(&_state);
|
||
|
if( setjmp(_break_jump) )
|
||
|
{
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
||
|
#else
|
||
|
ptr = NULL;
|
||
|
is_frozen_proxy = false;
|
||
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
||
|
return;
|
||
|
#endif
|
||
|
}
|
||
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
||
|
alglib_impl::ae_assert(rhs.ptr!=NULL, "ALGLIB: ae_vector_wrapper source is not initialized", &_state);
|
||
|
alglib_impl::ae_assert(rhs.ptr->datatype==datatype, "ALGLIB: ae_vector_wrapper datatype check failed", &_state);
|
||
|
ptr = &inner_vec;
|
||
|
is_frozen_proxy = false;
|
||
|
memset(ptr, 0, sizeof(*ptr));
|
||
|
ae_vector_init_copy(ptr, rhs.ptr, &_state, ae_false);
|
||
|
ae_state_clear(&_state);
|
||
|
}
|
||
|
|
||
|
alglib::ae_vector_wrapper::~ae_vector_wrapper()
|
||
|
{
|
||
|
if( ptr==&inner_vec )
|
||
|
ae_vector_clear(ptr);
|
||
|
}
|
||
|
|
||
|
void alglib::ae_vector_wrapper::setlength(ae_int_t iLen)
|
||
|
{
|
||
|
jmp_buf _break_jump;
|
||
|
alglib_impl::ae_state _state;
|
||
|
alglib_impl::ae_state_init(&_state);
|
||
|
if( setjmp(_break_jump) )
|
||
|
{
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
||
|
#else
|
||
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
||
|
return;
|
||
|
#endif
|
||
|
}
|
||
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
||
|
alglib_impl::ae_assert(ptr!=NULL, "ALGLIB: setlength() error, ptr==NULL (array was not correctly initialized)", &_state);
|
||
|
alglib_impl::ae_assert(!is_frozen_proxy, "ALGLIB: setlength() error, ptr is frozen proxy array", &_state);
|
||
|
alglib_impl::ae_vector_set_length(ptr, iLen, &_state);
|
||
|
alglib_impl::ae_state_clear(&_state);
|
||
|
}
|
||
|
|
||
|
alglib::ae_int_t alglib::ae_vector_wrapper::length() const
|
||
|
{
|
||
|
if( ptr==NULL )
|
||
|
return 0;
|
||
|
return ptr->cnt;
|
||
|
}
|
||
|
|
||
|
void alglib::ae_vector_wrapper::attach_to(alglib_impl::x_vector *new_ptr, alglib_impl::ae_state *_state)
|
||
|
{
|
||
|
if( ptr==&inner_vec )
|
||
|
ae_vector_clear(ptr);
|
||
|
ptr = &inner_vec;
|
||
|
memset(ptr, 0, sizeof(*ptr));
|
||
|
ae_vector_init_attach_to_x(ptr, new_ptr, _state, ae_false);
|
||
|
is_frozen_proxy = true;
|
||
|
}
|
||
|
|
||
|
const alglib::ae_vector_wrapper& alglib::ae_vector_wrapper::assign(const alglib::ae_vector_wrapper &rhs)
|
||
|
{
|
||
|
jmp_buf _break_jump;
|
||
|
alglib_impl::ae_state _state;
|
||
|
if( this==&rhs )
|
||
|
return *this;
|
||
|
alglib_impl::ae_state_init(&_state);
|
||
|
if( setjmp(_break_jump) )
|
||
|
{
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
||
|
#else
|
||
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
||
|
return *this;
|
||
|
#endif
|
||
|
}
|
||
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
||
|
ae_assert(ptr!=NULL, "ALGLIB: incorrect assignment (uninitialized destination)", &_state);
|
||
|
ae_assert(rhs.ptr!=NULL, "ALGLIB: incorrect assignment (uninitialized source)", &_state);
|
||
|
ae_assert(rhs.ptr->datatype==ptr->datatype, "ALGLIB: incorrect assignment to array (types do not match)", &_state);
|
||
|
if( is_frozen_proxy )
|
||
|
ae_assert(rhs.ptr->cnt==ptr->cnt, "ALGLIB: incorrect assignment to proxy array (sizes do not match)", &_state);
|
||
|
if( rhs.ptr->cnt!=ptr->cnt )
|
||
|
ae_vector_set_length(ptr, rhs.ptr->cnt, &_state);
|
||
|
memcpy(ptr->ptr.p_ptr, rhs.ptr->ptr.p_ptr, ptr->cnt*alglib_impl::ae_sizeof(ptr->datatype));
|
||
|
alglib_impl::ae_state_clear(&_state);
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
const alglib_impl::ae_vector* alglib::ae_vector_wrapper::c_ptr() const
|
||
|
{
|
||
|
return ptr;
|
||
|
}
|
||
|
|
||
|
alglib_impl::ae_vector* alglib::ae_vector_wrapper::c_ptr()
|
||
|
{
|
||
|
return ptr;
|
||
|
}
|
||
|
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
alglib::ae_vector_wrapper::ae_vector_wrapper(const char *s, alglib_impl::ae_datatype datatype)
|
||
|
{
|
||
|
std::vector<const char*> svec;
|
||
|
size_t i;
|
||
|
char *p = filter_spaces(s);
|
||
|
if( p==NULL )
|
||
|
_ALGLIB_CPP_EXCEPTION("ALGLIB: allocation error");
|
||
|
try
|
||
|
{
|
||
|
str_vector_create(p, true, &svec);
|
||
|
{
|
||
|
jmp_buf _break_jump;
|
||
|
alglib_impl::ae_state _state;
|
||
|
alglib_impl::ae_state_init(&_state);
|
||
|
if( setjmp(_break_jump) )
|
||
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
||
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
||
|
ptr = &inner_vec;
|
||
|
is_frozen_proxy = false;
|
||
|
memset(ptr, 0, sizeof(*ptr));
|
||
|
ae_vector_init(ptr, (ae_int_t)(svec.size()), datatype, &_state, ae_false);
|
||
|
ae_state_clear(&_state);
|
||
|
}
|
||
|
for(i=0; i<svec.size(); i++)
|
||
|
{
|
||
|
if( datatype==alglib_impl::DT_BOOL )
|
||
|
ptr->ptr.p_bool[i] = parse_bool_delim(svec[i],",]");
|
||
|
if( datatype==alglib_impl::DT_INT )
|
||
|
ptr->ptr.p_int[i] = parse_int_delim(svec[i],",]");
|
||
|
if( datatype==alglib_impl::DT_REAL )
|
||
|
ptr->ptr.p_double[i] = parse_real_delim(svec[i],",]");
|
||
|
if( datatype==alglib_impl::DT_COMPLEX )
|
||
|
{
|
||
|
alglib::complex t = parse_complex_delim(svec[i],",]");
|
||
|
ptr->ptr.p_complex[i].x = t.x;
|
||
|
ptr->ptr.p_complex[i].y = t.y;
|
||
|
}
|
||
|
}
|
||
|
alglib_impl::ae_free(p);
|
||
|
}
|
||
|
catch(...)
|
||
|
{
|
||
|
alglib_impl::ae_free(p);
|
||
|
throw;
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
alglib::boolean_1d_array::boolean_1d_array():ae_vector_wrapper(alglib_impl::DT_BOOL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::boolean_1d_array::boolean_1d_array(const alglib::boolean_1d_array &rhs):ae_vector_wrapper(rhs,alglib_impl::DT_BOOL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::boolean_1d_array::boolean_1d_array(alglib_impl::ae_vector *p):ae_vector_wrapper(p,alglib_impl::DT_BOOL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
const alglib::boolean_1d_array& alglib::boolean_1d_array::operator=(const alglib::boolean_1d_array &rhs)
|
||
|
{
|
||
|
return static_cast<const alglib::boolean_1d_array&>(assign(rhs));
|
||
|
}
|
||
|
|
||
|
alglib::boolean_1d_array::~boolean_1d_array()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
const ae_bool& alglib::boolean_1d_array::operator()(ae_int_t i) const
|
||
|
{
|
||
|
return ptr->ptr.p_bool[i];
|
||
|
}
|
||
|
|
||
|
ae_bool& alglib::boolean_1d_array::operator()(ae_int_t i)
|
||
|
{
|
||
|
return ptr->ptr.p_bool[i];
|
||
|
}
|
||
|
|
||
|
const ae_bool& alglib::boolean_1d_array::operator[](ae_int_t i) const
|
||
|
{
|
||
|
return ptr->ptr.p_bool[i];
|
||
|
}
|
||
|
|
||
|
ae_bool& alglib::boolean_1d_array::operator[](ae_int_t i)
|
||
|
{
|
||
|
return ptr->ptr.p_bool[i];
|
||
|
}
|
||
|
|
||
|
void alglib::boolean_1d_array::setcontent(ae_int_t iLen, const bool *pContent )
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
|
||
|
// setlength, with exception-free error handling fallback code
|
||
|
setlength(iLen);
|
||
|
if( ptr==NULL || ptr->cnt!=iLen )
|
||
|
return;
|
||
|
|
||
|
// copy
|
||
|
for(i=0; i<iLen; i++)
|
||
|
ptr->ptr.p_bool[i] = pContent[i];
|
||
|
}
|
||
|
|
||
|
ae_bool* alglib::boolean_1d_array::getcontent()
|
||
|
{
|
||
|
return ptr->ptr.p_bool;
|
||
|
}
|
||
|
|
||
|
const ae_bool* alglib::boolean_1d_array::getcontent() const
|
||
|
{
|
||
|
return ptr->ptr.p_bool;
|
||
|
}
|
||
|
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
alglib::boolean_1d_array::boolean_1d_array(const char *s):ae_vector_wrapper(s, alglib_impl::DT_BOOL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
std::string alglib::boolean_1d_array::tostring() const
|
||
|
{
|
||
|
if( length()==0 )
|
||
|
return "[]";
|
||
|
return arraytostring(&(operator()(0)), length());
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
alglib::integer_1d_array::integer_1d_array():ae_vector_wrapper(alglib_impl::DT_INT)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::integer_1d_array::integer_1d_array(alglib_impl::ae_vector *p):ae_vector_wrapper(p,alglib_impl::DT_INT)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::integer_1d_array::integer_1d_array(const alglib::integer_1d_array &rhs):ae_vector_wrapper(rhs,alglib_impl::DT_INT)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
const alglib::integer_1d_array& alglib::integer_1d_array::operator=(const alglib::integer_1d_array &rhs)
|
||
|
{
|
||
|
return static_cast<const alglib::integer_1d_array&>(assign(rhs));
|
||
|
}
|
||
|
|
||
|
alglib::integer_1d_array::~integer_1d_array()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
const alglib::ae_int_t& alglib::integer_1d_array::operator()(ae_int_t i) const
|
||
|
{
|
||
|
return ptr->ptr.p_int[i];
|
||
|
}
|
||
|
|
||
|
alglib::ae_int_t& alglib::integer_1d_array::operator()(ae_int_t i)
|
||
|
{
|
||
|
return ptr->ptr.p_int[i];
|
||
|
}
|
||
|
|
||
|
const alglib::ae_int_t& alglib::integer_1d_array::operator[](ae_int_t i) const
|
||
|
{
|
||
|
return ptr->ptr.p_int[i];
|
||
|
}
|
||
|
|
||
|
alglib::ae_int_t& alglib::integer_1d_array::operator[](ae_int_t i)
|
||
|
{
|
||
|
return ptr->ptr.p_int[i];
|
||
|
}
|
||
|
|
||
|
void alglib::integer_1d_array::setcontent(ae_int_t iLen, const ae_int_t *pContent )
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
|
||
|
// setlength(), handle possible exception-free errors
|
||
|
setlength(iLen);
|
||
|
if( ptr==NULL || ptr->cnt!=iLen )
|
||
|
return;
|
||
|
|
||
|
// copy
|
||
|
for(i=0; i<iLen; i++)
|
||
|
ptr->ptr.p_int[i] = pContent[i];
|
||
|
}
|
||
|
|
||
|
alglib::ae_int_t* alglib::integer_1d_array::getcontent()
|
||
|
{
|
||
|
return ptr->ptr.p_int;
|
||
|
}
|
||
|
|
||
|
const alglib::ae_int_t* alglib::integer_1d_array::getcontent() const
|
||
|
{
|
||
|
return ptr->ptr.p_int;
|
||
|
}
|
||
|
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
alglib::integer_1d_array::integer_1d_array(const char *s):ae_vector_wrapper(s, alglib_impl::DT_INT)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
std::string alglib::integer_1d_array::tostring() const
|
||
|
{
|
||
|
if( length()==0 )
|
||
|
return "[]";
|
||
|
return arraytostring(&operator()(0), length());
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
alglib::real_1d_array::real_1d_array():ae_vector_wrapper(alglib_impl::DT_REAL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::real_1d_array::real_1d_array(alglib_impl::ae_vector *p):ae_vector_wrapper(p,alglib_impl::DT_REAL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::real_1d_array::real_1d_array(const alglib::real_1d_array &rhs):ae_vector_wrapper(rhs,alglib_impl::DT_REAL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
const alglib::real_1d_array& alglib::real_1d_array::operator=(const alglib::real_1d_array &rhs)
|
||
|
{
|
||
|
return static_cast<const alglib::real_1d_array&>(assign(rhs));
|
||
|
}
|
||
|
|
||
|
alglib::real_1d_array::~real_1d_array()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
const double& alglib::real_1d_array::operator()(ae_int_t i) const
|
||
|
{
|
||
|
return ptr->ptr.p_double[i];
|
||
|
}
|
||
|
|
||
|
double& alglib::real_1d_array::operator()(ae_int_t i)
|
||
|
{
|
||
|
return ptr->ptr.p_double[i];
|
||
|
}
|
||
|
|
||
|
const double& alglib::real_1d_array::operator[](ae_int_t i) const
|
||
|
{
|
||
|
return ptr->ptr.p_double[i];
|
||
|
}
|
||
|
|
||
|
double& alglib::real_1d_array::operator[](ae_int_t i)
|
||
|
{
|
||
|
return ptr->ptr.p_double[i];
|
||
|
}
|
||
|
|
||
|
void alglib::real_1d_array::setcontent(ae_int_t iLen, const double *pContent )
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
|
||
|
// setlength(), handle possible exception-free errors
|
||
|
setlength(iLen);
|
||
|
if( ptr==NULL || ptr->cnt!=iLen )
|
||
|
return;
|
||
|
|
||
|
// copy
|
||
|
for(i=0; i<iLen; i++)
|
||
|
ptr->ptr.p_double[i] = pContent[i];
|
||
|
}
|
||
|
|
||
|
void alglib::real_1d_array::attach_to_ptr(ae_int_t iLen, double *pContent ) // TODO: convert to constructor!!!!!!!
|
||
|
{
|
||
|
alglib_impl::x_vector x;
|
||
|
jmp_buf _break_jump;
|
||
|
alglib_impl::ae_state _state;
|
||
|
|
||
|
alglib_impl::ae_state_init(&_state);
|
||
|
if( setjmp(_break_jump) )
|
||
|
{
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
||
|
#else
|
||
|
ptr = NULL;
|
||
|
is_frozen_proxy = false;
|
||
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
||
|
return;
|
||
|
#endif
|
||
|
}
|
||
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
||
|
alglib_impl::ae_assert(!is_frozen_proxy, "ALGLIB: unable to attach proxy object to something else", &_state);
|
||
|
alglib_impl::ae_assert(iLen>0, "ALGLIB: non-positive length for attach_to_ptr()", &_state);
|
||
|
x.cnt = iLen;
|
||
|
x.datatype = alglib_impl::DT_REAL;
|
||
|
x.owner = alglib_impl::OWN_CALLER;
|
||
|
x.last_action = alglib_impl::ACT_UNCHANGED;
|
||
|
x.x_ptr.p_ptr = pContent;
|
||
|
attach_to(&x, &_state);
|
||
|
ae_state_clear(&_state);
|
||
|
}
|
||
|
|
||
|
double* alglib::real_1d_array::getcontent()
|
||
|
{
|
||
|
return ptr->ptr.p_double;
|
||
|
}
|
||
|
|
||
|
const double* alglib::real_1d_array::getcontent() const
|
||
|
{
|
||
|
return ptr->ptr.p_double;
|
||
|
}
|
||
|
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
alglib::real_1d_array::real_1d_array(const char *s):ae_vector_wrapper(s, alglib_impl::DT_REAL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
std::string alglib::real_1d_array::tostring(int dps) const
|
||
|
{
|
||
|
if( length()==0 )
|
||
|
return "[]";
|
||
|
return arraytostring(&operator()(0), length(), dps);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
alglib::complex_1d_array::complex_1d_array():ae_vector_wrapper(alglib_impl::DT_COMPLEX)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::complex_1d_array::complex_1d_array(alglib_impl::ae_vector *p):ae_vector_wrapper(p,alglib_impl::DT_COMPLEX)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::complex_1d_array::complex_1d_array(const alglib::complex_1d_array &rhs):ae_vector_wrapper(rhs,alglib_impl::DT_COMPLEX)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
const alglib::complex_1d_array& alglib::complex_1d_array::operator=(const alglib::complex_1d_array &rhs)
|
||
|
{
|
||
|
return static_cast<const alglib::complex_1d_array&>(assign(rhs));
|
||
|
}
|
||
|
|
||
|
alglib::complex_1d_array::~complex_1d_array()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
const alglib::complex& alglib::complex_1d_array::operator()(ae_int_t i) const
|
||
|
{
|
||
|
return *((const alglib::complex*)(ptr->ptr.p_complex+i));
|
||
|
}
|
||
|
|
||
|
alglib::complex& alglib::complex_1d_array::operator()(ae_int_t i)
|
||
|
{
|
||
|
return *((alglib::complex*)(ptr->ptr.p_complex+i));
|
||
|
}
|
||
|
|
||
|
const alglib::complex& alglib::complex_1d_array::operator[](ae_int_t i) const
|
||
|
{
|
||
|
return *((const alglib::complex*)(ptr->ptr.p_complex+i));
|
||
|
}
|
||
|
|
||
|
alglib::complex& alglib::complex_1d_array::operator[](ae_int_t i)
|
||
|
{
|
||
|
return *((alglib::complex*)(ptr->ptr.p_complex+i));
|
||
|
}
|
||
|
|
||
|
void alglib::complex_1d_array::setcontent(ae_int_t iLen, const alglib::complex *pContent )
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
|
||
|
// setlength(), handle possible exception-free errors
|
||
|
setlength(iLen);
|
||
|
if( ptr==NULL || ptr->cnt!=iLen )
|
||
|
return;
|
||
|
|
||
|
// copy
|
||
|
for(i=0; i<iLen; i++)
|
||
|
{
|
||
|
ptr->ptr.p_complex[i].x = pContent[i].x;
|
||
|
ptr->ptr.p_complex[i].y = pContent[i].y;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
alglib::complex* alglib::complex_1d_array::getcontent()
|
||
|
{
|
||
|
return (alglib::complex*)ptr->ptr.p_complex;
|
||
|
}
|
||
|
|
||
|
const alglib::complex* alglib::complex_1d_array::getcontent() const
|
||
|
{
|
||
|
return (const alglib::complex*)ptr->ptr.p_complex;
|
||
|
}
|
||
|
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
alglib::complex_1d_array::complex_1d_array(const char *s):ae_vector_wrapper(s, alglib_impl::DT_COMPLEX)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
std::string alglib::complex_1d_array::tostring(int dps) const
|
||
|
{
|
||
|
if( length()==0 )
|
||
|
return "[]";
|
||
|
return arraytostring(&operator()(0), length(), dps);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
alglib::ae_matrix_wrapper::ae_matrix_wrapper(alglib_impl::ae_matrix *e_ptr, alglib_impl::ae_datatype datatype)
|
||
|
{
|
||
|
if( e_ptr->datatype!=datatype )
|
||
|
{
|
||
|
const char *msg = "ALGLIB: ae_vector_wrapper datatype check failed";
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
_ALGLIB_CPP_EXCEPTION(msg);
|
||
|
#else
|
||
|
ptr = NULL;
|
||
|
is_frozen_proxy = false;
|
||
|
_ALGLIB_SET_ERROR_FLAG(msg);
|
||
|
return;
|
||
|
#endif
|
||
|
}
|
||
|
ptr = e_ptr;
|
||
|
is_frozen_proxy = true;
|
||
|
}
|
||
|
|
||
|
alglib::ae_matrix_wrapper::ae_matrix_wrapper(alglib_impl::ae_datatype datatype)
|
||
|
{
|
||
|
jmp_buf _break_jump;
|
||
|
alglib_impl::ae_state _state;
|
||
|
|
||
|
alglib_impl::ae_state_init(&_state);
|
||
|
if( setjmp(_break_jump) )
|
||
|
{
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
||
|
#else
|
||
|
ptr = NULL;
|
||
|
is_frozen_proxy = false;
|
||
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
||
|
return;
|
||
|
#endif
|
||
|
}
|
||
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
||
|
ptr = &inner_mat;
|
||
|
is_frozen_proxy = false;
|
||
|
memset(ptr, 0, sizeof(*ptr));
|
||
|
ae_matrix_init(ptr, 0, 0, datatype, &_state, ae_false);
|
||
|
ae_state_clear(&_state);
|
||
|
|
||
|
}
|
||
|
|
||
|
alglib::ae_matrix_wrapper::ae_matrix_wrapper(const ae_matrix_wrapper &rhs, alglib_impl::ae_datatype datatype)
|
||
|
{
|
||
|
jmp_buf _break_jump;
|
||
|
alglib_impl::ae_state _state;
|
||
|
|
||
|
alglib_impl::ae_state_init(&_state);
|
||
|
if( setjmp(_break_jump) )
|
||
|
{
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
||
|
#else
|
||
|
ptr = NULL;
|
||
|
is_frozen_proxy = false;
|
||
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
||
|
return;
|
||
|
#endif
|
||
|
}
|
||
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
||
|
is_frozen_proxy = false;
|
||
|
ptr = NULL;
|
||
|
alglib_impl::ae_assert(rhs.ptr->datatype==datatype, "ALGLIB: ae_matrix_wrapper datatype check failed", &_state);
|
||
|
if( rhs.ptr!=NULL )
|
||
|
{
|
||
|
ptr = &inner_mat;
|
||
|
memset(ptr, 0, sizeof(*ptr));
|
||
|
ae_matrix_init_copy(ptr, rhs.ptr, &_state, ae_false);
|
||
|
}
|
||
|
ae_state_clear(&_state);
|
||
|
}
|
||
|
|
||
|
alglib::ae_matrix_wrapper::~ae_matrix_wrapper()
|
||
|
{
|
||
|
if( ptr==&inner_mat )
|
||
|
ae_matrix_clear(ptr);
|
||
|
}
|
||
|
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
alglib::ae_matrix_wrapper::ae_matrix_wrapper(const char *s, alglib_impl::ae_datatype datatype)
|
||
|
{
|
||
|
std::vector< std::vector<const char*> > smat;
|
||
|
size_t i, j;
|
||
|
char *p = filter_spaces(s);
|
||
|
if( p==NULL )
|
||
|
_ALGLIB_CPP_EXCEPTION("ALGLIB: allocation error");
|
||
|
try
|
||
|
{
|
||
|
str_matrix_create(p, &smat);
|
||
|
{
|
||
|
jmp_buf _break_jump;
|
||
|
alglib_impl::ae_state _state;
|
||
|
alglib_impl::ae_state_init(&_state);
|
||
|
if( setjmp(_break_jump) )
|
||
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
||
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
||
|
ptr = &inner_mat;
|
||
|
is_frozen_proxy = false;
|
||
|
memset(ptr, 0, sizeof(*ptr));
|
||
|
if( smat.size()!=0 )
|
||
|
ae_matrix_init(ptr, (ae_int_t)(smat.size()), (ae_int_t)(smat[0].size()), datatype, &_state, ae_false);
|
||
|
else
|
||
|
ae_matrix_init(ptr, 0, 0, datatype, &_state, ae_false);
|
||
|
ae_state_clear(&_state);
|
||
|
}
|
||
|
for(i=0; i<smat.size(); i++)
|
||
|
for(j=0; j<smat[0].size(); j++)
|
||
|
{
|
||
|
if( datatype==alglib_impl::DT_BOOL )
|
||
|
ptr->ptr.pp_bool[i][j] = parse_bool_delim(smat[i][j],",]");
|
||
|
if( datatype==alglib_impl::DT_INT )
|
||
|
ptr->ptr.pp_int[i][j] = parse_int_delim(smat[i][j],",]");
|
||
|
if( datatype==alglib_impl::DT_REAL )
|
||
|
ptr->ptr.pp_double[i][j] = parse_real_delim(smat[i][j],",]");
|
||
|
if( datatype==alglib_impl::DT_COMPLEX )
|
||
|
{
|
||
|
alglib::complex t = parse_complex_delim(smat[i][j],",]");
|
||
|
ptr->ptr.pp_complex[i][j].x = t.x;
|
||
|
ptr->ptr.pp_complex[i][j].y = t.y;
|
||
|
}
|
||
|
}
|
||
|
alglib_impl::ae_free(p);
|
||
|
}
|
||
|
catch(...)
|
||
|
{
|
||
|
alglib_impl::ae_free(p);
|
||
|
throw;
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
void alglib::ae_matrix_wrapper::setlength(ae_int_t rows, ae_int_t cols) // TODO: automatic allocation of NULL ptr!!!!!
|
||
|
{
|
||
|
jmp_buf _break_jump;
|
||
|
alglib_impl::ae_state _state;
|
||
|
alglib_impl::ae_state_init(&_state);
|
||
|
if( setjmp(_break_jump) )
|
||
|
{
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
||
|
#else
|
||
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
||
|
return;
|
||
|
#endif
|
||
|
}
|
||
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
||
|
alglib_impl::ae_assert(ptr!=NULL, "ALGLIB: setlength() error, p_mat==NULL (array was not correctly initialized)", &_state);
|
||
|
alglib_impl::ae_assert(!is_frozen_proxy, "ALGLIB: setlength() error, attempt to resize proxy array", &_state);
|
||
|
alglib_impl::ae_matrix_set_length(ptr, rows, cols, &_state);
|
||
|
alglib_impl::ae_state_clear(&_state);
|
||
|
}
|
||
|
|
||
|
alglib::ae_int_t alglib::ae_matrix_wrapper::rows() const
|
||
|
{
|
||
|
if( ptr==NULL )
|
||
|
return 0;
|
||
|
return ptr->rows;
|
||
|
}
|
||
|
|
||
|
alglib::ae_int_t alglib::ae_matrix_wrapper::cols() const
|
||
|
{
|
||
|
if( ptr==NULL )
|
||
|
return 0;
|
||
|
return ptr->cols;
|
||
|
}
|
||
|
|
||
|
bool alglib::ae_matrix_wrapper::isempty() const
|
||
|
{
|
||
|
return rows()==0 || cols()==0;
|
||
|
}
|
||
|
|
||
|
alglib::ae_int_t alglib::ae_matrix_wrapper::getstride() const
|
||
|
{
|
||
|
if( ptr==NULL )
|
||
|
return 0;
|
||
|
return ptr->stride;
|
||
|
}
|
||
|
|
||
|
void alglib::ae_matrix_wrapper::attach_to(alglib_impl::x_matrix *new_ptr, alglib_impl::ae_state *_state)
|
||
|
{
|
||
|
if( ptr==&inner_mat )
|
||
|
ae_matrix_clear(ptr);
|
||
|
ptr = &inner_mat;
|
||
|
memset(ptr, 0, sizeof(*ptr));
|
||
|
ae_matrix_init_attach_to_x(ptr, new_ptr, _state, ae_false);
|
||
|
is_frozen_proxy = true;
|
||
|
}
|
||
|
|
||
|
const alglib::ae_matrix_wrapper& alglib::ae_matrix_wrapper::assign(const alglib::ae_matrix_wrapper &rhs)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
jmp_buf _break_jump;
|
||
|
alglib_impl::ae_state _state;
|
||
|
if( this==&rhs )
|
||
|
return *this;
|
||
|
alglib_impl::ae_state_init(&_state);
|
||
|
if( setjmp(_break_jump) )
|
||
|
{
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
||
|
#else
|
||
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
||
|
return *this;
|
||
|
#endif
|
||
|
}
|
||
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
||
|
ae_assert(ptr!=NULL, "ALGLIB: incorrect assignment to matrix (uninitialized destination)", &_state);
|
||
|
ae_assert(rhs.ptr!=NULL, "ALGLIB: incorrect assignment to array (uninitialized source)", &_state);
|
||
|
ae_assert(rhs.ptr->datatype==ptr->datatype, "ALGLIB: incorrect assignment to array (types dont match)", &_state);
|
||
|
if( is_frozen_proxy )
|
||
|
{
|
||
|
ae_assert(rhs.ptr->rows==ptr->rows, "ALGLIB: incorrect assignment to proxy array (sizes dont match)", &_state);
|
||
|
ae_assert(rhs.ptr->cols==ptr->cols, "ALGLIB: incorrect assignment to proxy array (sizes dont match)", &_state);
|
||
|
}
|
||
|
if( (rhs.ptr->rows!=ptr->rows) || (rhs.ptr->cols!=ptr->cols) )
|
||
|
ae_matrix_set_length(ptr, rhs.ptr->rows, rhs.ptr->cols, &_state);
|
||
|
for(i=0; i<ptr->rows; i++)
|
||
|
memcpy(ptr->ptr.pp_void[i], rhs.ptr->ptr.pp_void[i], ptr->cols*alglib_impl::ae_sizeof(ptr->datatype));
|
||
|
alglib_impl::ae_state_clear(&_state);
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
const alglib_impl::ae_matrix* alglib::ae_matrix_wrapper::c_ptr() const
|
||
|
{
|
||
|
return ptr;
|
||
|
}
|
||
|
|
||
|
alglib_impl::ae_matrix* alglib::ae_matrix_wrapper::c_ptr()
|
||
|
{
|
||
|
return ptr;
|
||
|
}
|
||
|
|
||
|
alglib::boolean_2d_array::boolean_2d_array():ae_matrix_wrapper(alglib_impl::DT_BOOL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::boolean_2d_array::boolean_2d_array(const alglib::boolean_2d_array &rhs):ae_matrix_wrapper(rhs,alglib_impl::DT_BOOL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::boolean_2d_array::boolean_2d_array(alglib_impl::ae_matrix *p):ae_matrix_wrapper(p,alglib_impl::DT_BOOL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::boolean_2d_array::~boolean_2d_array()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
const alglib::boolean_2d_array& alglib::boolean_2d_array::operator=(const alglib::boolean_2d_array &rhs)
|
||
|
{
|
||
|
return static_cast<const boolean_2d_array&>(assign(rhs));
|
||
|
}
|
||
|
|
||
|
const ae_bool& alglib::boolean_2d_array::operator()(ae_int_t i, ae_int_t j) const
|
||
|
{
|
||
|
return ptr->ptr.pp_bool[i][j];
|
||
|
}
|
||
|
|
||
|
ae_bool& alglib::boolean_2d_array::operator()(ae_int_t i, ae_int_t j)
|
||
|
{
|
||
|
return ptr->ptr.pp_bool[i][j];
|
||
|
}
|
||
|
|
||
|
const ae_bool* alglib::boolean_2d_array::operator[](ae_int_t i) const
|
||
|
{
|
||
|
return ptr->ptr.pp_bool[i];
|
||
|
}
|
||
|
|
||
|
ae_bool* alglib::boolean_2d_array::operator[](ae_int_t i)
|
||
|
{
|
||
|
return ptr->ptr.pp_bool[i];
|
||
|
}
|
||
|
|
||
|
void alglib::boolean_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const bool *pContent )
|
||
|
{
|
||
|
ae_int_t i, j;
|
||
|
|
||
|
// setlength(), handle possible exception-free errors
|
||
|
setlength(irows, icols);
|
||
|
if( ptr==NULL || ptr->rows!=irows || ptr->cols!=icols )
|
||
|
return;
|
||
|
|
||
|
// copy
|
||
|
for(i=0; i<irows; i++)
|
||
|
for(j=0; j<icols; j++)
|
||
|
ptr->ptr.pp_bool[i][j] = pContent[i*icols+j];
|
||
|
}
|
||
|
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
alglib::boolean_2d_array::boolean_2d_array(const char *s):ae_matrix_wrapper(s, alglib_impl::DT_BOOL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
std::string alglib::boolean_2d_array::tostring() const
|
||
|
{
|
||
|
std::string result;
|
||
|
ae_int_t i;
|
||
|
if( isempty() )
|
||
|
return "[[]]";
|
||
|
result = "[";
|
||
|
for(i=0; i<rows(); i++)
|
||
|
{
|
||
|
if( i!=0 )
|
||
|
result += ",";
|
||
|
result += arraytostring(&operator()(i,0), cols());
|
||
|
}
|
||
|
result += "]";
|
||
|
return result;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
alglib::integer_2d_array::integer_2d_array():ae_matrix_wrapper(alglib_impl::DT_INT)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::integer_2d_array::integer_2d_array(const alglib::integer_2d_array &rhs):ae_matrix_wrapper(rhs,alglib_impl::DT_INT)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::integer_2d_array::integer_2d_array(alglib_impl::ae_matrix *p):ae_matrix_wrapper(p,alglib_impl::DT_INT)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::integer_2d_array::~integer_2d_array()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
const alglib::integer_2d_array& alglib::integer_2d_array::operator=(const alglib::integer_2d_array &rhs)
|
||
|
{
|
||
|
return static_cast<const integer_2d_array&>(assign(rhs));
|
||
|
}
|
||
|
|
||
|
const alglib::ae_int_t& alglib::integer_2d_array::operator()(ae_int_t i, ae_int_t j) const
|
||
|
{
|
||
|
return ptr->ptr.pp_int[i][j];
|
||
|
}
|
||
|
|
||
|
alglib::ae_int_t& alglib::integer_2d_array::operator()(ae_int_t i, ae_int_t j)
|
||
|
{
|
||
|
return ptr->ptr.pp_int[i][j];
|
||
|
}
|
||
|
|
||
|
const alglib::ae_int_t* alglib::integer_2d_array::operator[](ae_int_t i) const
|
||
|
{
|
||
|
return ptr->ptr.pp_int[i];
|
||
|
}
|
||
|
|
||
|
alglib::ae_int_t* alglib::integer_2d_array::operator[](ae_int_t i)
|
||
|
{
|
||
|
return ptr->ptr.pp_int[i];
|
||
|
}
|
||
|
|
||
|
void alglib::integer_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const ae_int_t *pContent )
|
||
|
{
|
||
|
ae_int_t i, j;
|
||
|
|
||
|
// setlength(), handle possible exception-free errors
|
||
|
setlength(irows, icols);
|
||
|
if( ptr==NULL || ptr->rows!=irows || ptr->cols!=icols )
|
||
|
return;
|
||
|
|
||
|
// copy
|
||
|
for(i=0; i<irows; i++)
|
||
|
for(j=0; j<icols; j++)
|
||
|
ptr->ptr.pp_int[i][j] = pContent[i*icols+j];
|
||
|
}
|
||
|
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
alglib::integer_2d_array::integer_2d_array(const char *s):ae_matrix_wrapper(s, alglib_impl::DT_INT)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
std::string alglib::integer_2d_array::tostring() const
|
||
|
{
|
||
|
std::string result;
|
||
|
ae_int_t i;
|
||
|
if( isempty() )
|
||
|
return "[[]]";
|
||
|
result = "[";
|
||
|
for(i=0; i<rows(); i++)
|
||
|
{
|
||
|
if( i!=0 )
|
||
|
result += ",";
|
||
|
result += arraytostring(&operator()(i,0), cols());
|
||
|
}
|
||
|
result += "]";
|
||
|
return result;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
alglib::real_2d_array::real_2d_array():ae_matrix_wrapper(alglib_impl::DT_REAL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::real_2d_array::real_2d_array(const alglib::real_2d_array &rhs):ae_matrix_wrapper(rhs,alglib_impl::DT_REAL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::real_2d_array::real_2d_array(alglib_impl::ae_matrix *p):ae_matrix_wrapper(p,alglib_impl::DT_REAL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::real_2d_array::~real_2d_array()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
const alglib::real_2d_array& alglib::real_2d_array::operator=(const alglib::real_2d_array &rhs)
|
||
|
{
|
||
|
return static_cast<const real_2d_array&>(assign(rhs));
|
||
|
}
|
||
|
|
||
|
const double& alglib::real_2d_array::operator()(ae_int_t i, ae_int_t j) const
|
||
|
{
|
||
|
return ptr->ptr.pp_double[i][j];
|
||
|
}
|
||
|
|
||
|
double& alglib::real_2d_array::operator()(ae_int_t i, ae_int_t j)
|
||
|
{
|
||
|
return ptr->ptr.pp_double[i][j];
|
||
|
}
|
||
|
|
||
|
const double* alglib::real_2d_array::operator[](ae_int_t i) const
|
||
|
{
|
||
|
return ptr->ptr.pp_double[i];
|
||
|
}
|
||
|
|
||
|
double* alglib::real_2d_array::operator[](ae_int_t i)
|
||
|
{
|
||
|
return ptr->ptr.pp_double[i];
|
||
|
}
|
||
|
|
||
|
void alglib::real_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const double *pContent )
|
||
|
{
|
||
|
ae_int_t i, j;
|
||
|
|
||
|
// setlength(), handle possible exception-free errors
|
||
|
setlength(irows, icols);
|
||
|
if( ptr==NULL || ptr->rows!=irows || ptr->cols!=icols )
|
||
|
return;
|
||
|
|
||
|
// copy
|
||
|
for(i=0; i<irows; i++)
|
||
|
for(j=0; j<icols; j++)
|
||
|
ptr->ptr.pp_double[i][j] = pContent[i*icols+j];
|
||
|
}
|
||
|
|
||
|
void alglib::real_2d_array::attach_to_ptr(ae_int_t irows, ae_int_t icols, double *pContent )
|
||
|
{
|
||
|
jmp_buf _break_jump;
|
||
|
alglib_impl::ae_state _state;
|
||
|
alglib_impl::x_matrix x;
|
||
|
alglib_impl::ae_state_init(&_state);
|
||
|
if( setjmp(_break_jump) )
|
||
|
{
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
||
|
#else
|
||
|
ptr = NULL;
|
||
|
is_frozen_proxy = false;
|
||
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
||
|
return;
|
||
|
#endif
|
||
|
}
|
||
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
||
|
alglib_impl::ae_assert(!is_frozen_proxy, "ALGLIB: unable to attach proxy object to something else", &_state);
|
||
|
alglib_impl::ae_assert(irows>0&&icols>0, "ALGLIB: non-positive length for attach_to_ptr()", &_state);
|
||
|
x.rows = irows;
|
||
|
x.cols = icols;
|
||
|
x.stride = icols;
|
||
|
x.datatype = alglib_impl::DT_REAL;
|
||
|
x.owner = alglib_impl::OWN_CALLER;
|
||
|
x.last_action = alglib_impl::ACT_UNCHANGED;
|
||
|
x.x_ptr.p_ptr = pContent;
|
||
|
attach_to(&x, &_state);
|
||
|
ae_state_clear(&_state);
|
||
|
}
|
||
|
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
alglib::real_2d_array::real_2d_array(const char *s):ae_matrix_wrapper(s, alglib_impl::DT_REAL)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
std::string alglib::real_2d_array::tostring(int dps) const
|
||
|
{
|
||
|
std::string result;
|
||
|
ae_int_t i;
|
||
|
if( isempty() )
|
||
|
return "[[]]";
|
||
|
result = "[";
|
||
|
for(i=0; i<rows(); i++)
|
||
|
{
|
||
|
if( i!=0 )
|
||
|
result += ",";
|
||
|
result += arraytostring(&operator()(i,0), cols(), dps);
|
||
|
}
|
||
|
result += "]";
|
||
|
return result;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
alglib::complex_2d_array::complex_2d_array():ae_matrix_wrapper(alglib_impl::DT_COMPLEX)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::complex_2d_array::complex_2d_array(const alglib::complex_2d_array &rhs):ae_matrix_wrapper(rhs,alglib_impl::DT_COMPLEX)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::complex_2d_array::complex_2d_array(alglib_impl::ae_matrix *p):ae_matrix_wrapper(p,alglib_impl::DT_COMPLEX)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
alglib::complex_2d_array::~complex_2d_array()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
const alglib::complex_2d_array& alglib::complex_2d_array::operator=(const alglib::complex_2d_array &rhs)
|
||
|
{
|
||
|
return static_cast<const complex_2d_array&>(assign(rhs));
|
||
|
}
|
||
|
|
||
|
const alglib::complex& alglib::complex_2d_array::operator()(ae_int_t i, ae_int_t j) const
|
||
|
{
|
||
|
return *((const alglib::complex*)(ptr->ptr.pp_complex[i]+j));
|
||
|
}
|
||
|
|
||
|
alglib::complex& alglib::complex_2d_array::operator()(ae_int_t i, ae_int_t j)
|
||
|
{
|
||
|
return *((alglib::complex*)(ptr->ptr.pp_complex[i]+j));
|
||
|
}
|
||
|
|
||
|
const alglib::complex* alglib::complex_2d_array::operator[](ae_int_t i) const
|
||
|
{
|
||
|
return (const alglib::complex*)(ptr->ptr.pp_complex[i]);
|
||
|
}
|
||
|
|
||
|
alglib::complex* alglib::complex_2d_array::operator[](ae_int_t i)
|
||
|
{
|
||
|
return (alglib::complex*)(ptr->ptr.pp_complex[i]);
|
||
|
}
|
||
|
|
||
|
void alglib::complex_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const alglib::complex *pContent )
|
||
|
{
|
||
|
ae_int_t i, j;
|
||
|
|
||
|
// setlength(), handle possible exception-free errors
|
||
|
setlength(irows, icols);
|
||
|
if( ptr==NULL || ptr->rows!=irows || ptr->cols!=icols )
|
||
|
return;
|
||
|
|
||
|
// copy
|
||
|
for(i=0; i<irows; i++)
|
||
|
for(j=0; j<icols; j++)
|
||
|
{
|
||
|
ptr->ptr.pp_complex[i][j].x = pContent[i*icols+j].x;
|
||
|
ptr->ptr.pp_complex[i][j].y = pContent[i*icols+j].y;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
alglib::complex_2d_array::complex_2d_array(const char *s):ae_matrix_wrapper(s, alglib_impl::DT_COMPLEX)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
std::string alglib::complex_2d_array::tostring(int dps) const
|
||
|
{
|
||
|
std::string result;
|
||
|
ae_int_t i;
|
||
|
if( isempty() )
|
||
|
return "[[]]";
|
||
|
result = "[";
|
||
|
for(i=0; i<rows(); i++)
|
||
|
{
|
||
|
if( i!=0 )
|
||
|
result += ",";
|
||
|
result += arraytostring(&operator()(i,0), cols(), dps);
|
||
|
}
|
||
|
result += "]";
|
||
|
return result;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/********************************************************************
|
||
|
Internal functions
|
||
|
********************************************************************/
|
||
|
double alglib::get_aenv_nan()
|
||
|
{
|
||
|
double r;
|
||
|
alglib_impl::ae_state _alglib_env_state;
|
||
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
||
|
r = _alglib_env_state.v_nan;
|
||
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
double alglib::get_aenv_posinf()
|
||
|
{
|
||
|
double r;
|
||
|
alglib_impl::ae_state _alglib_env_state;
|
||
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
||
|
r = _alglib_env_state.v_posinf;
|
||
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
double alglib::get_aenv_neginf()
|
||
|
{
|
||
|
double r;
|
||
|
alglib_impl::ae_state _alglib_env_state;
|
||
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
||
|
r = _alglib_env_state.v_neginf;
|
||
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
alglib::ae_int_t alglib::my_stricmp(const char *s1, const char *s2)
|
||
|
{
|
||
|
int c1, c2;
|
||
|
|
||
|
//
|
||
|
// handle special cases
|
||
|
//
|
||
|
if(s1==NULL && s2!=NULL)
|
||
|
return -1;
|
||
|
if(s1!=NULL && s2==NULL)
|
||
|
return +1;
|
||
|
if(s1==NULL && s2==NULL)
|
||
|
return 0;
|
||
|
|
||
|
//
|
||
|
// compare
|
||
|
//
|
||
|
for (;;)
|
||
|
{
|
||
|
c1 = *s1;
|
||
|
c2 = *s2;
|
||
|
s1++;
|
||
|
s2++;
|
||
|
if( c1==0 )
|
||
|
return c2==0 ? 0 : -1;
|
||
|
if( c2==0 )
|
||
|
return c1==0 ? 0 : +1;
|
||
|
c1 = tolower(c1);
|
||
|
c2 = tolower(c2);
|
||
|
if( c1<c2 )
|
||
|
return -1;
|
||
|
if( c1>c2 )
|
||
|
return +1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
//
|
||
|
// This function filters out all spaces from the string.
|
||
|
// It returns string allocated with ae_malloc().
|
||
|
// On allocaction failure returns NULL.
|
||
|
//
|
||
|
char* alglib::filter_spaces(const char *s)
|
||
|
{
|
||
|
size_t i, n;
|
||
|
char *r;
|
||
|
char *r0;
|
||
|
n = strlen(s);
|
||
|
r = (char*)alglib_impl::ae_malloc(n+1,NULL);
|
||
|
if( r==NULL )
|
||
|
return r;
|
||
|
for(i=0,r0=r; i<=n; i++,s++)
|
||
|
if( !isspace(*s) )
|
||
|
{
|
||
|
*r0 = *s;
|
||
|
r0++;
|
||
|
}
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
void alglib::str_vector_create(const char *src, bool match_head_only, std::vector<const char*> *p_vec)
|
||
|
{
|
||
|
//
|
||
|
// parse beginning of the string.
|
||
|
// try to handle "[]" string
|
||
|
//
|
||
|
p_vec->clear();
|
||
|
if( *src!='[' )
|
||
|
_ALGLIB_CPP_EXCEPTION("Incorrect initializer for vector");
|
||
|
src++;
|
||
|
if( *src==']' )
|
||
|
return;
|
||
|
p_vec->push_back(src);
|
||
|
for(;;)
|
||
|
{
|
||
|
if( *src==0 )
|
||
|
_ALGLIB_CPP_EXCEPTION("Incorrect initializer for vector");
|
||
|
if( *src==']' )
|
||
|
{
|
||
|
if( src[1]==0 || !match_head_only)
|
||
|
return;
|
||
|
_ALGLIB_CPP_EXCEPTION("Incorrect initializer for vector");
|
||
|
}
|
||
|
if( *src==',' )
|
||
|
{
|
||
|
p_vec->push_back(src+1);
|
||
|
src++;
|
||
|
continue;
|
||
|
}
|
||
|
src++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void alglib::str_matrix_create(const char *src, std::vector< std::vector<const char*> > *p_mat)
|
||
|
{
|
||
|
p_mat->clear();
|
||
|
|
||
|
//
|
||
|
// Try to handle "[[]]" string
|
||
|
//
|
||
|
if( strcmp(src, "[[]]")==0 )
|
||
|
return;
|
||
|
|
||
|
//
|
||
|
// Parse non-empty string
|
||
|
//
|
||
|
if( *src!='[' )
|
||
|
_ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
|
||
|
src++;
|
||
|
for(;;)
|
||
|
{
|
||
|
p_mat->push_back(std::vector<const char*>());
|
||
|
str_vector_create(src, false, &p_mat->back());
|
||
|
if( p_mat->back().size()==0 || p_mat->back().size()!=(*p_mat)[0].size() )
|
||
|
_ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
|
||
|
src = strchr(src, ']');
|
||
|
if( src==NULL )
|
||
|
_ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
|
||
|
src++;
|
||
|
if( *src==',' )
|
||
|
{
|
||
|
src++;
|
||
|
continue;
|
||
|
}
|
||
|
if( *src==']' )
|
||
|
break;
|
||
|
_ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
|
||
|
}
|
||
|
src++;
|
||
|
if( *src!=0 )
|
||
|
_ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
|
||
|
}
|
||
|
|
||
|
ae_bool alglib::parse_bool_delim(const char *s, const char *delim)
|
||
|
{
|
||
|
const char *p;
|
||
|
char buf[8];
|
||
|
|
||
|
// try to parse false
|
||
|
p = "false";
|
||
|
memset(buf, 0, sizeof(buf));
|
||
|
strncpy(buf, s, strlen(p));
|
||
|
if( my_stricmp(buf, p)==0 )
|
||
|
{
|
||
|
if( s[strlen(p)]==0 || strchr(delim,s[strlen(p)])==NULL )
|
||
|
_ALGLIB_CPP_EXCEPTION("Cannot parse value");
|
||
|
return ae_false;
|
||
|
}
|
||
|
|
||
|
// try to parse true
|
||
|
p = "true";
|
||
|
memset(buf, 0, sizeof(buf));
|
||
|
strncpy(buf, s, strlen(p));
|
||
|
if( my_stricmp(buf, p)==0 )
|
||
|
{
|
||
|
if( s[strlen(p)]==0 || strchr(delim,s[strlen(p)])==NULL )
|
||
|
_ALGLIB_CPP_EXCEPTION("Cannot parse value");
|
||
|
return ae_true;
|
||
|
}
|
||
|
|
||
|
// error
|
||
|
_ALGLIB_CPP_EXCEPTION("Cannot parse value");
|
||
|
}
|
||
|
|
||
|
alglib::ae_int_t alglib::parse_int_delim(const char *s, const char *delim)
|
||
|
{
|
||
|
const char *p;
|
||
|
long long_val;
|
||
|
volatile ae_int_t ae_val;
|
||
|
|
||
|
p = s;
|
||
|
|
||
|
//
|
||
|
// check string structure:
|
||
|
// * leading sign
|
||
|
// * at least one digit
|
||
|
// * delimiter
|
||
|
//
|
||
|
if( *s=='-' || *s=='+' )
|
||
|
s++;
|
||
|
if( *s==0 || strchr("1234567890",*s)==NULL)
|
||
|
_ALGLIB_CPP_EXCEPTION("Cannot parse value");
|
||
|
while( *s!=0 && strchr("1234567890",*s)!=NULL )
|
||
|
s++;
|
||
|
if( *s==0 || strchr(delim,*s)==NULL )
|
||
|
_ALGLIB_CPP_EXCEPTION("Cannot parse value");
|
||
|
|
||
|
// convert and ensure that value fits into ae_int_t
|
||
|
s = p;
|
||
|
long_val = atol(s);
|
||
|
ae_val = long_val;
|
||
|
if( ae_val!=long_val )
|
||
|
_ALGLIB_CPP_EXCEPTION("Cannot parse value");
|
||
|
return ae_val;
|
||
|
}
|
||
|
|
||
|
bool alglib::_parse_real_delim(const char *s, const char *delim, double *result, const char **new_s)
|
||
|
{
|
||
|
const char *p;
|
||
|
char *t;
|
||
|
bool has_digits;
|
||
|
char buf[64];
|
||
|
int isign;
|
||
|
lconv *loc;
|
||
|
|
||
|
p = s;
|
||
|
|
||
|
//
|
||
|
// check string structure and decide what to do
|
||
|
//
|
||
|
isign = 1;
|
||
|
if( *s=='-' || *s=='+' )
|
||
|
{
|
||
|
isign = *s=='-' ? -1 : +1;
|
||
|
s++;
|
||
|
}
|
||
|
memset(buf, 0, sizeof(buf));
|
||
|
strncpy(buf, s, 3);
|
||
|
if( my_stricmp(buf,"nan")!=0 && my_stricmp(buf,"inf")!=0 )
|
||
|
{
|
||
|
//
|
||
|
// [sign] [ddd] [.] [ddd] [e|E[sign]ddd]
|
||
|
//
|
||
|
has_digits = false;
|
||
|
if( *s!=0 && strchr("1234567890",*s)!=NULL )
|
||
|
{
|
||
|
has_digits = true;
|
||
|
while( *s!=0 && strchr("1234567890",*s)!=NULL )
|
||
|
s++;
|
||
|
}
|
||
|
if( *s=='.' )
|
||
|
s++;
|
||
|
if( *s!=0 && strchr("1234567890",*s)!=NULL )
|
||
|
{
|
||
|
has_digits = true;
|
||
|
while( *s!=0 && strchr("1234567890",*s)!=NULL )
|
||
|
s++;
|
||
|
}
|
||
|
if (!has_digits )
|
||
|
return false;
|
||
|
if( *s=='e' || *s=='E' )
|
||
|
{
|
||
|
s++;
|
||
|
if( *s=='-' || *s=='+' )
|
||
|
s++;
|
||
|
if( *s==0 || strchr("1234567890",*s)==NULL )
|
||
|
return false;
|
||
|
while( *s!=0 && strchr("1234567890",*s)!=NULL )
|
||
|
s++;
|
||
|
}
|
||
|
if( *s==0 || strchr(delim,*s)==NULL )
|
||
|
return false;
|
||
|
*new_s = s;
|
||
|
|
||
|
//
|
||
|
// finite value conversion
|
||
|
//
|
||
|
if( *new_s-p>=(int)sizeof(buf) )
|
||
|
return false;
|
||
|
strncpy(buf, p, (size_t)(*new_s-p));
|
||
|
buf[*new_s-p] = 0;
|
||
|
loc = localeconv();
|
||
|
t = strchr(buf,'.');
|
||
|
if( t!=NULL )
|
||
|
*t = *loc->decimal_point;
|
||
|
*result = atof(buf);
|
||
|
return true;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//
|
||
|
// check delimiter and update *new_s
|
||
|
//
|
||
|
s += 3;
|
||
|
if( *s==0 || strchr(delim,*s)==NULL )
|
||
|
return false;
|
||
|
*new_s = s;
|
||
|
|
||
|
//
|
||
|
// NAN, INF conversion
|
||
|
//
|
||
|
if( my_stricmp(buf,"nan")==0 )
|
||
|
*result = fp_nan;
|
||
|
if( my_stricmp(buf,"inf")==0 )
|
||
|
*result = isign>0 ? fp_posinf : fp_neginf;
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
double alglib::parse_real_delim(const char *s, const char *delim)
|
||
|
{
|
||
|
double result;
|
||
|
const char *new_s;
|
||
|
if( !_parse_real_delim(s, delim, &result, &new_s) )
|
||
|
_ALGLIB_CPP_EXCEPTION("Cannot parse value");
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
alglib::complex alglib::parse_complex_delim(const char *s, const char *delim)
|
||
|
{
|
||
|
double d_result;
|
||
|
const char *new_s;
|
||
|
alglib::complex c_result;
|
||
|
|
||
|
// parse as real value
|
||
|
if( _parse_real_delim(s, delim, &d_result, &new_s) )
|
||
|
return d_result;
|
||
|
|
||
|
// parse as "a+bi" or "a-bi"
|
||
|
if( _parse_real_delim(s, "+-", &c_result.x, &new_s) )
|
||
|
{
|
||
|
s = new_s;
|
||
|
if( !_parse_real_delim(s, "i", &c_result.y, &new_s) )
|
||
|
_ALGLIB_CPP_EXCEPTION("Cannot parse value");
|
||
|
s = new_s+1;
|
||
|
if( *s==0 || strchr(delim,*s)==NULL )
|
||
|
_ALGLIB_CPP_EXCEPTION("Cannot parse value");
|
||
|
return c_result;
|
||
|
}
|
||
|
|
||
|
// parse as complex value "bi+a" or "bi-a"
|
||
|
if( _parse_real_delim(s, "i", &c_result.y, &new_s) )
|
||
|
{
|
||
|
s = new_s+1;
|
||
|
if( *s==0 )
|
||
|
_ALGLIB_CPP_EXCEPTION("Cannot parse value");
|
||
|
if( strchr(delim,*s)!=NULL )
|
||
|
{
|
||
|
c_result.x = 0;
|
||
|
return c_result;
|
||
|
}
|
||
|
if( strchr("+-",*s)!=NULL )
|
||
|
{
|
||
|
if( !_parse_real_delim(s, delim, &c_result.x, &new_s) )
|
||
|
_ALGLIB_CPP_EXCEPTION("Cannot parse value");
|
||
|
return c_result;
|
||
|
}
|
||
|
_ALGLIB_CPP_EXCEPTION("Cannot parse value");
|
||
|
}
|
||
|
|
||
|
// error
|
||
|
_ALGLIB_CPP_EXCEPTION("Cannot parse value");
|
||
|
}
|
||
|
|
||
|
std::string alglib::arraytostring(const bool *ptr, ae_int_t n)
|
||
|
{
|
||
|
std::string result;
|
||
|
ae_int_t i;
|
||
|
result = "[";
|
||
|
for(i=0; i<n; i++)
|
||
|
{
|
||
|
if( i!=0 )
|
||
|
result += ",";
|
||
|
result += ptr[i] ? "true" : "false";
|
||
|
}
|
||
|
result += "]";
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
std::string alglib::arraytostring(const ae_int_t *ptr, ae_int_t n)
|
||
|
{
|
||
|
std::string result;
|
||
|
ae_int_t i;
|
||
|
char buf[64];
|
||
|
result = "[";
|
||
|
for(i=0; i<n; i++)
|
||
|
{
|
||
|
if( sprintf(buf, i==0 ? "%ld" : ",%ld", long(ptr[i]))>=(int)sizeof(buf) )
|
||
|
_ALGLIB_CPP_EXCEPTION("arraytostring(): buffer overflow");
|
||
|
result += buf;
|
||
|
}
|
||
|
result += "]";
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
std::string alglib::arraytostring(const double *ptr, ae_int_t n, int _dps)
|
||
|
{
|
||
|
std::string result;
|
||
|
ae_int_t i;
|
||
|
char buf[64];
|
||
|
char mask1[64];
|
||
|
char mask2[64];
|
||
|
int dps = _dps>=0 ? _dps : -_dps;
|
||
|
result = "[";
|
||
|
if( sprintf(mask1, "%%.%d%s", dps, _dps>=0 ? "f" : "e")>=(int)sizeof(mask1) )
|
||
|
_ALGLIB_CPP_EXCEPTION("arraytostring(): buffer overflow");
|
||
|
if( sprintf(mask2, ",%s", mask1)>=(int)sizeof(mask2) )
|
||
|
_ALGLIB_CPP_EXCEPTION("arraytostring(): buffer overflow");
|
||
|
for(i=0; i<n; i++)
|
||
|
{
|
||
|
buf[0] = 0;
|
||
|
if( fp_isfinite(ptr[i]) )
|
||
|
{
|
||
|
if( sprintf(buf, i==0 ? mask1 : mask2, double(ptr[i]))>=(int)sizeof(buf) )
|
||
|
_ALGLIB_CPP_EXCEPTION("arraytostring(): buffer overflow");
|
||
|
}
|
||
|
else if( fp_isnan(ptr[i]) )
|
||
|
strcpy(buf, i==0 ? "NAN" : ",NAN");
|
||
|
else if( fp_isposinf(ptr[i]) )
|
||
|
strcpy(buf, i==0 ? "+INF" : ",+INF");
|
||
|
else if( fp_isneginf(ptr[i]) )
|
||
|
strcpy(buf, i==0 ? "-INF" : ",-INF");
|
||
|
result += buf;
|
||
|
}
|
||
|
result += "]";
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
std::string alglib::arraytostring(const alglib::complex *ptr, ae_int_t n, int dps)
|
||
|
{
|
||
|
std::string result;
|
||
|
ae_int_t i;
|
||
|
result = "[";
|
||
|
for(i=0; i<n; i++)
|
||
|
{
|
||
|
if( i!=0 )
|
||
|
result += ",";
|
||
|
result += ptr[i].tostring(dps);
|
||
|
}
|
||
|
result += "]";
|
||
|
return result;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
standard functions
|
||
|
********************************************************************/
|
||
|
int alglib::sign(double x)
|
||
|
{
|
||
|
if( x>0 ) return 1;
|
||
|
if( x<0 ) return -1;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
double alglib::randomreal()
|
||
|
{
|
||
|
int i1 = rand();
|
||
|
int i2 = rand();
|
||
|
double mx = (double)(RAND_MAX)+1.0;
|
||
|
volatile double tmp0 = i2/mx;
|
||
|
volatile double tmp1 = i1+tmp0;
|
||
|
return tmp1/mx;
|
||
|
}
|
||
|
|
||
|
alglib::ae_int_t alglib::randominteger(alglib::ae_int_t maxv)
|
||
|
{
|
||
|
return ((alglib::ae_int_t)rand())%maxv;
|
||
|
}
|
||
|
|
||
|
int alglib::round(double x)
|
||
|
{ return int(floor(x+0.5)); }
|
||
|
|
||
|
int alglib::trunc(double x)
|
||
|
{ return int(x>0 ? floor(x) : ceil(x)); }
|
||
|
|
||
|
int alglib::ifloor(double x)
|
||
|
{ return int(floor(x)); }
|
||
|
|
||
|
int alglib::iceil(double x)
|
||
|
{ return int(ceil(x)); }
|
||
|
|
||
|
double alglib::pi()
|
||
|
{ return 3.14159265358979323846; }
|
||
|
|
||
|
double alglib::sqr(double x)
|
||
|
{ return x*x; }
|
||
|
|
||
|
int alglib::maxint(int m1, int m2)
|
||
|
{
|
||
|
return m1>m2 ? m1 : m2;
|
||
|
}
|
||
|
|
||
|
int alglib::minint(int m1, int m2)
|
||
|
{
|
||
|
return m1>m2 ? m2 : m1;
|
||
|
}
|
||
|
|
||
|
double alglib::maxreal(double m1, double m2)
|
||
|
{
|
||
|
return m1>m2 ? m1 : m2;
|
||
|
}
|
||
|
|
||
|
double alglib::minreal(double m1, double m2)
|
||
|
{
|
||
|
return m1>m2 ? m2 : m1;
|
||
|
}
|
||
|
|
||
|
bool alglib::fp_eq(double v1, double v2)
|
||
|
{
|
||
|
// IEEE-strict floating point comparison
|
||
|
volatile double x = v1;
|
||
|
volatile double y = v2;
|
||
|
return x==y;
|
||
|
}
|
||
|
|
||
|
bool alglib::fp_neq(double v1, double v2)
|
||
|
{
|
||
|
// IEEE-strict floating point comparison
|
||
|
return !fp_eq(v1,v2);
|
||
|
}
|
||
|
|
||
|
bool alglib::fp_less(double v1, double v2)
|
||
|
{
|
||
|
// IEEE-strict floating point comparison
|
||
|
volatile double x = v1;
|
||
|
volatile double y = v2;
|
||
|
return x<y;
|
||
|
}
|
||
|
|
||
|
bool alglib::fp_less_eq(double v1, double v2)
|
||
|
{
|
||
|
// IEEE-strict floating point comparison
|
||
|
volatile double x = v1;
|
||
|
volatile double y = v2;
|
||
|
return x<=y;
|
||
|
}
|
||
|
|
||
|
bool alglib::fp_greater(double v1, double v2)
|
||
|
{
|
||
|
// IEEE-strict floating point comparison
|
||
|
volatile double x = v1;
|
||
|
volatile double y = v2;
|
||
|
return x>y;
|
||
|
}
|
||
|
|
||
|
bool alglib::fp_greater_eq(double v1, double v2)
|
||
|
{
|
||
|
// IEEE-strict floating point comparison
|
||
|
volatile double x = v1;
|
||
|
volatile double y = v2;
|
||
|
return x>=y;
|
||
|
}
|
||
|
|
||
|
bool alglib::fp_isnan(double x)
|
||
|
{
|
||
|
return alglib_impl::ae_isnan_stateless(x,endianness);
|
||
|
}
|
||
|
|
||
|
bool alglib::fp_isposinf(double x)
|
||
|
{
|
||
|
return alglib_impl::ae_isposinf_stateless(x,endianness);
|
||
|
}
|
||
|
|
||
|
bool alglib::fp_isneginf(double x)
|
||
|
{
|
||
|
return alglib_impl::ae_isneginf_stateless(x,endianness);
|
||
|
}
|
||
|
|
||
|
bool alglib::fp_isinf(double x)
|
||
|
{
|
||
|
return alglib_impl::ae_isinf_stateless(x,endianness);
|
||
|
}
|
||
|
|
||
|
bool alglib::fp_isfinite(double x)
|
||
|
{
|
||
|
return alglib_impl::ae_isfinite_stateless(x,endianness);
|
||
|
}
|
||
|
|
||
|
/********************************************************************
|
||
|
CSV functions
|
||
|
********************************************************************/
|
||
|
#if !defined(AE_NO_EXCEPTIONS)
|
||
|
void alglib::read_csv(const char *filename, char separator, int flags, alglib::real_2d_array &out)
|
||
|
{
|
||
|
int flag;
|
||
|
|
||
|
//
|
||
|
// Parameters
|
||
|
//
|
||
|
bool skip_first_row = (flags&CSV_SKIP_HEADERS)!=0;
|
||
|
|
||
|
//
|
||
|
// Prepare empty output array
|
||
|
//
|
||
|
out.setlength(0,0);
|
||
|
|
||
|
//
|
||
|
// Open file, determine size, read contents
|
||
|
//
|
||
|
FILE *f_in = fopen(filename, "rb");
|
||
|
if( f_in==NULL )
|
||
|
_ALGLIB_CPP_EXCEPTION("read_csv: unable to open input file");
|
||
|
flag = fseek(f_in, 0, SEEK_END);
|
||
|
AE_CRITICAL_ASSERT(flag==0);
|
||
|
long int _filesize = ftell(f_in);
|
||
|
AE_CRITICAL_ASSERT(_filesize>=0);
|
||
|
if( _filesize==0 )
|
||
|
{
|
||
|
// empty file, return empty array, success
|
||
|
fclose(f_in);
|
||
|
return;
|
||
|
}
|
||
|
size_t filesize = _filesize;
|
||
|
std::vector<char> v_buf;
|
||
|
v_buf.resize(filesize+2, 0);
|
||
|
char *p_buf = &v_buf[0];
|
||
|
flag = fseek(f_in, 0, SEEK_SET);
|
||
|
AE_CRITICAL_ASSERT(flag==0);
|
||
|
size_t bytes_read = fread ((void*)p_buf, 1, filesize, f_in);
|
||
|
AE_CRITICAL_ASSERT(bytes_read==filesize);
|
||
|
fclose(f_in);
|
||
|
|
||
|
//
|
||
|
// Normalize file contents:
|
||
|
// * replace 0x0 by spaces
|
||
|
// * remove trailing spaces and newlines
|
||
|
// * append trailing '\n' and '\0' characters
|
||
|
// Return if file contains only spaces/newlines.
|
||
|
//
|
||
|
for(size_t i=0; i<filesize; i++)
|
||
|
if( p_buf[i]==0 )
|
||
|
p_buf[i] = ' ';
|
||
|
for(; filesize>0; )
|
||
|
{
|
||
|
char c = p_buf[filesize-1];
|
||
|
if( c==' ' || c=='\t' || c=='\n' || c=='\r' )
|
||
|
{
|
||
|
filesize--;
|
||
|
continue;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
if( filesize==0 )
|
||
|
return;
|
||
|
p_buf[filesize+0] = '\n';
|
||
|
p_buf[filesize+1] = '\0';
|
||
|
filesize+=2;
|
||
|
|
||
|
//
|
||
|
// Scan dataset.
|
||
|
//
|
||
|
size_t rows_count = 0, cols_count = 0, max_length = 0;
|
||
|
std::vector<size_t> offsets, lengths;
|
||
|
for(size_t row_start=0; p_buf[row_start]!=0x0; )
|
||
|
{
|
||
|
// determine row length
|
||
|
size_t row_length;
|
||
|
for(row_length=0; p_buf[row_start+row_length]!='\n'; row_length++);
|
||
|
|
||
|
// determine cols count, perform integrity check
|
||
|
size_t cur_cols_cnt=1;
|
||
|
for(size_t idx=0; idx<row_length; idx++)
|
||
|
if( p_buf[row_start+idx]==separator )
|
||
|
cur_cols_cnt++;
|
||
|
if( cols_count>0 && cols_count!=cur_cols_cnt )
|
||
|
_ALGLIB_CPP_EXCEPTION("read_csv: non-rectangular contents, rows have different sizes");
|
||
|
cols_count = cur_cols_cnt;
|
||
|
|
||
|
// store offsets and lengths of the fields
|
||
|
size_t cur_offs = 0;
|
||
|
for(size_t idx=0; idx<row_length+1; idx++)
|
||
|
if( p_buf[row_start+idx]==separator || p_buf[row_start+idx]=='\n' )
|
||
|
{
|
||
|
offsets.push_back(row_start+cur_offs);
|
||
|
lengths.push_back(idx-cur_offs);
|
||
|
max_length = idx-cur_offs>max_length ? idx-cur_offs : max_length;
|
||
|
cur_offs = idx+1;
|
||
|
}
|
||
|
|
||
|
// advance row start
|
||
|
rows_count++;
|
||
|
row_start = row_start+row_length+1;
|
||
|
}
|
||
|
AE_CRITICAL_ASSERT(rows_count>=1);
|
||
|
AE_CRITICAL_ASSERT(cols_count>=1);
|
||
|
AE_CRITICAL_ASSERT(cols_count*rows_count==offsets.size());
|
||
|
AE_CRITICAL_ASSERT(cols_count*rows_count==lengths.size());
|
||
|
if( rows_count==1 && skip_first_row ) // empty output, return
|
||
|
return;
|
||
|
|
||
|
//
|
||
|
// Convert
|
||
|
//
|
||
|
size_t row0 = skip_first_row ? 1 : 0;
|
||
|
size_t row1 = rows_count;
|
||
|
lconv *loc = localeconv();
|
||
|
out.setlength(row1-row0, cols_count);
|
||
|
for(size_t ridx=row0; ridx<row1; ridx++)
|
||
|
for(size_t cidx=0; cidx<cols_count; cidx++)
|
||
|
{
|
||
|
char *p_field = p_buf+offsets[ridx*cols_count+cidx];
|
||
|
size_t field_len = lengths[ridx*cols_count+cidx];
|
||
|
for(size_t idx=0; idx<field_len; idx++)
|
||
|
if( p_field[idx]=='.' || p_field[idx]==',' )
|
||
|
p_field[idx] = *loc->decimal_point;
|
||
|
out[ridx-row0][cidx] = atof(p_field);
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
Trace functions
|
||
|
********************************************************************/
|
||
|
void alglib::trace_file(std::string tags, std::string filename)
|
||
|
{
|
||
|
alglib_impl::ae_trace_file(tags.c_str(), filename.c_str());
|
||
|
}
|
||
|
|
||
|
void alglib::trace_disable()
|
||
|
{
|
||
|
alglib_impl::ae_trace_disable();
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/////////////////////////////////////////////////////////////////////////
|
||
|
//
|
||
|
// THIS SECTIONS CONTAINS OPTIMIZED LINEAR ALGEBRA CODE
|
||
|
// IT IS SHARED BETWEEN C++ AND PURE C LIBRARIES
|
||
|
//
|
||
|
/////////////////////////////////////////////////////////////////////////
|
||
|
namespace alglib_impl
|
||
|
{
|
||
|
#define alglib_simd_alignment 16
|
||
|
|
||
|
#define alglib_r_block 32
|
||
|
#define alglib_half_r_block 16
|
||
|
#define alglib_twice_r_block 64
|
||
|
|
||
|
#define alglib_c_block 16
|
||
|
#define alglib_half_c_block 8
|
||
|
#define alglib_twice_c_block 32
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
This subroutine calculates fast 32x32 real matrix-vector product:
|
||
|
|
||
|
y := beta*y + alpha*A*x
|
||
|
|
||
|
using either generic C code or native optimizations (if available)
|
||
|
|
||
|
IMPORTANT:
|
||
|
* A must be stored in row-major order,
|
||
|
stride is alglib_r_block,
|
||
|
aligned on alglib_simd_alignment boundary
|
||
|
* X must be aligned on alglib_simd_alignment boundary
|
||
|
* Y may be non-aligned
|
||
|
********************************************************************/
|
||
|
void _ialglib_mv_32(const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta)
|
||
|
{
|
||
|
ae_int_t i, k;
|
||
|
const double *pa0, *pa1, *pb;
|
||
|
|
||
|
pa0 = a;
|
||
|
pa1 = a+alglib_r_block;
|
||
|
pb = x;
|
||
|
for(i=0; i<16; i++)
|
||
|
{
|
||
|
double v0 = 0, v1 = 0;
|
||
|
for(k=0; k<4; k++)
|
||
|
{
|
||
|
v0 += pa0[0]*pb[0];
|
||
|
v1 += pa1[0]*pb[0];
|
||
|
v0 += pa0[1]*pb[1];
|
||
|
v1 += pa1[1]*pb[1];
|
||
|
v0 += pa0[2]*pb[2];
|
||
|
v1 += pa1[2]*pb[2];
|
||
|
v0 += pa0[3]*pb[3];
|
||
|
v1 += pa1[3]*pb[3];
|
||
|
v0 += pa0[4]*pb[4];
|
||
|
v1 += pa1[4]*pb[4];
|
||
|
v0 += pa0[5]*pb[5];
|
||
|
v1 += pa1[5]*pb[5];
|
||
|
v0 += pa0[6]*pb[6];
|
||
|
v1 += pa1[6]*pb[6];
|
||
|
v0 += pa0[7]*pb[7];
|
||
|
v1 += pa1[7]*pb[7];
|
||
|
pa0 += 8;
|
||
|
pa1 += 8;
|
||
|
pb += 8;
|
||
|
}
|
||
|
y[0] = beta*y[0]+alpha*v0;
|
||
|
y[stride] = beta*y[stride]+alpha*v1;
|
||
|
|
||
|
/*
|
||
|
* now we've processed rows I and I+1,
|
||
|
* pa0 and pa1 are pointing to rows I+1 and I+2.
|
||
|
* move to I+2 and I+3.
|
||
|
*/
|
||
|
pa0 += alglib_r_block;
|
||
|
pa1 += alglib_r_block;
|
||
|
pb = x;
|
||
|
y+=2*stride;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/*************************************************************************
|
||
|
This function calculates MxN real matrix-vector product:
|
||
|
|
||
|
y := beta*y + alpha*A*x
|
||
|
|
||
|
using generic C code. It calls _ialglib_mv_32 if both M=32 and N=32.
|
||
|
|
||
|
If beta is zero, we do not use previous values of y (they are overwritten
|
||
|
by alpha*A*x without ever being read). If alpha is zero, no matrix-vector
|
||
|
product is calculated (only beta is updated); however, this update is not
|
||
|
efficient and this function should NOT be used for multiplication of
|
||
|
vector and scalar.
|
||
|
|
||
|
IMPORTANT:
|
||
|
* 0<=M<=alglib_r_block, 0<=N<=alglib_r_block
|
||
|
* A must be stored in row-major order with stride equal to alglib_r_block
|
||
|
*************************************************************************/
|
||
|
void _ialglib_rmv(ae_int_t m, ae_int_t n, const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta)
|
||
|
{
|
||
|
/*
|
||
|
* Handle special cases:
|
||
|
* - alpha is zero or n is zero
|
||
|
* - m is zero
|
||
|
*/
|
||
|
if( m==0 )
|
||
|
return;
|
||
|
if( alpha==0.0 || n==0 )
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( beta==0.0 )
|
||
|
{
|
||
|
for(i=0; i<m; i++)
|
||
|
{
|
||
|
*y = 0.0;
|
||
|
y += stride;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<m; i++)
|
||
|
{
|
||
|
*y *= beta;
|
||
|
y += stride;
|
||
|
}
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Handle general case: nonzero alpha, n and m
|
||
|
*
|
||
|
*/
|
||
|
if( m==32 && n==32 )
|
||
|
{
|
||
|
/*
|
||
|
* 32x32, may be we have something better than general implementation
|
||
|
*/
|
||
|
_ialglib_mv_32(a, x, y, stride, alpha, beta);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
ae_int_t i, k, m2, n8, n2, ntrail2;
|
||
|
const double *pa0, *pa1, *pb;
|
||
|
|
||
|
/*
|
||
|
* First M/2 rows of A are processed in pairs.
|
||
|
* optimized code is used.
|
||
|
*/
|
||
|
m2 = m/2;
|
||
|
n8 = n/8;
|
||
|
ntrail2 = (n-8*n8)/2;
|
||
|
for(i=0; i<m2; i++)
|
||
|
{
|
||
|
double v0 = 0, v1 = 0;
|
||
|
|
||
|
/*
|
||
|
* 'a' points to the part of the matrix which
|
||
|
* is not processed yet
|
||
|
*/
|
||
|
pb = x;
|
||
|
pa0 = a;
|
||
|
pa1 = a+alglib_r_block;
|
||
|
a += alglib_twice_r_block;
|
||
|
|
||
|
/*
|
||
|
* 8 elements per iteration
|
||
|
*/
|
||
|
for(k=0; k<n8; k++)
|
||
|
{
|
||
|
v0 += pa0[0]*pb[0];
|
||
|
v1 += pa1[0]*pb[0];
|
||
|
v0 += pa0[1]*pb[1];
|
||
|
v1 += pa1[1]*pb[1];
|
||
|
v0 += pa0[2]*pb[2];
|
||
|
v1 += pa1[2]*pb[2];
|
||
|
v0 += pa0[3]*pb[3];
|
||
|
v1 += pa1[3]*pb[3];
|
||
|
v0 += pa0[4]*pb[4];
|
||
|
v1 += pa1[4]*pb[4];
|
||
|
v0 += pa0[5]*pb[5];
|
||
|
v1 += pa1[5]*pb[5];
|
||
|
v0 += pa0[6]*pb[6];
|
||
|
v1 += pa1[6]*pb[6];
|
||
|
v0 += pa0[7]*pb[7];
|
||
|
v1 += pa1[7]*pb[7];
|
||
|
pa0 += 8;
|
||
|
pa1 += 8;
|
||
|
pb += 8;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* 2 elements per iteration
|
||
|
*/
|
||
|
for(k=0; k<ntrail2; k++)
|
||
|
{
|
||
|
v0 += pa0[0]*pb[0];
|
||
|
v1 += pa1[0]*pb[0];
|
||
|
v0 += pa0[1]*pb[1];
|
||
|
v1 += pa1[1]*pb[1];
|
||
|
pa0 += 2;
|
||
|
pa1 += 2;
|
||
|
pb += 2;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* last element, if needed
|
||
|
*/
|
||
|
if( n%2!=0 )
|
||
|
{
|
||
|
v0 += pa0[0]*pb[0];
|
||
|
v1 += pa1[0]*pb[0];
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* final update
|
||
|
*/
|
||
|
if( beta!=0 )
|
||
|
{
|
||
|
y[0] = beta*y[0]+alpha*v0;
|
||
|
y[stride] = beta*y[stride]+alpha*v1;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
y[0] = alpha*v0;
|
||
|
y[stride] = alpha*v1;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* move to the next pair of elements
|
||
|
*/
|
||
|
y+=2*stride;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
* Last (odd) row is processed with less optimized code.
|
||
|
*/
|
||
|
if( m%2!=0 )
|
||
|
{
|
||
|
double v0 = 0;
|
||
|
|
||
|
/*
|
||
|
* 'a' points to the part of the matrix which
|
||
|
* is not processed yet
|
||
|
*/
|
||
|
pb = x;
|
||
|
pa0 = a;
|
||
|
|
||
|
/*
|
||
|
* 2 elements per iteration
|
||
|
*/
|
||
|
n2 = n/2;
|
||
|
for(k=0; k<n2; k++)
|
||
|
{
|
||
|
v0 += pa0[0]*pb[0]+pa0[1]*pb[1];
|
||
|
pa0 += 2;
|
||
|
pb += 2;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* last element, if needed
|
||
|
*/
|
||
|
if( n%2!=0 )
|
||
|
v0 += pa0[0]*pb[0];
|
||
|
|
||
|
/*
|
||
|
* final update
|
||
|
*/
|
||
|
if( beta!=0 )
|
||
|
y[0] = beta*y[0]+alpha*v0;
|
||
|
else
|
||
|
y[0] = alpha*v0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/*************************************************************************
|
||
|
This function calculates MxN real matrix-vector product:
|
||
|
|
||
|
y := beta*y + alpha*A*x
|
||
|
|
||
|
using generic C code. It calls _ialglib_mv_32 if both M=32 and N=32.
|
||
|
|
||
|
If beta is zero, we do not use previous values of y (they are overwritten
|
||
|
by alpha*A*x without ever being read). If alpha is zero, no matrix-vector
|
||
|
product is calculated (only beta is updated); however, this update is not
|
||
|
efficient and this function should NOT be used for multiplication of
|
||
|
vector and scalar.
|
||
|
|
||
|
IMPORTANT:
|
||
|
* 0<=M<=alglib_r_block, 0<=N<=alglib_r_block
|
||
|
* A must be stored in row-major order with stride equal to alglib_r_block
|
||
|
* y may be non-aligned
|
||
|
* both A and x must have same offset with respect to 16-byte boundary:
|
||
|
either both are aligned, or both are aligned with offset 8. Function
|
||
|
will crash your system if you try to call it with misaligned or
|
||
|
incorrectly aligned data.
|
||
|
|
||
|
This function supports SSE2; it can be used when:
|
||
|
1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
|
||
|
2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
|
||
|
|
||
|
If (1) is failed, this function will be undefined. If (2) is failed, call
|
||
|
to this function will probably crash your system.
|
||
|
|
||
|
If you want to know whether it is safe to call it, you should check
|
||
|
results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
|
||
|
and will do its work.
|
||
|
*************************************************************************/
|
||
|
#if defined(AE_HAS_SSE2_INTRINSICS)
|
||
|
void _ialglib_rmv_sse2(ae_int_t m, ae_int_t n, const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta)
|
||
|
{
|
||
|
ae_int_t i, k, n2;
|
||
|
ae_int_t mb3, mtail, nhead, nb8, nb2, ntail;
|
||
|
const double *pa0, *pa1, *pa2, *pb;
|
||
|
__m128d v0, v1, v2, va0, va1, va2, vx, vtmp;
|
||
|
|
||
|
/*
|
||
|
* Handle special cases:
|
||
|
* - alpha is zero or n is zero
|
||
|
* - m is zero
|
||
|
*/
|
||
|
if( m==0 )
|
||
|
return;
|
||
|
if( alpha==0.0 || n==0 )
|
||
|
{
|
||
|
if( beta==0.0 )
|
||
|
{
|
||
|
for(i=0; i<m; i++)
|
||
|
{
|
||
|
*y = 0.0;
|
||
|
y += stride;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<m; i++)
|
||
|
{
|
||
|
*y *= beta;
|
||
|
y += stride;
|
||
|
}
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Handle general case: nonzero alpha, n and m
|
||
|
*
|
||
|
* We divide problem as follows...
|
||
|
*
|
||
|
* Rows M are divided into:
|
||
|
* - mb3 blocks, each 3xN
|
||
|
* - mtail blocks, each 1xN
|
||
|
*
|
||
|
* Within a row, elements are divided into:
|
||
|
* - nhead 1x1 blocks (used to align the rest, either 0 or 1)
|
||
|
* - nb8 1x8 blocks, aligned to 16-byte boundary
|
||
|
* - nb2 1x2 blocks, aligned to 16-byte boundary
|
||
|
* - ntail 1x1 blocks, aligned too (altough we don't rely on it)
|
||
|
*
|
||
|
*/
|
||
|
n2 = n/2;
|
||
|
mb3 = m/3;
|
||
|
mtail = m%3;
|
||
|
nhead = ae_misalignment(a,alglib_simd_alignment)==0 ? 0 : 1;
|
||
|
nb8 = (n-nhead)/8;
|
||
|
nb2 = (n-nhead-8*nb8)/2;
|
||
|
ntail = n-nhead-8*nb8-2*nb2;
|
||
|
for(i=0; i<mb3; i++)
|
||
|
{
|
||
|
double row0, row1, row2;
|
||
|
row0 = 0;
|
||
|
row1 = 0;
|
||
|
row2 = 0;
|
||
|
pb = x;
|
||
|
pa0 = a;
|
||
|
pa1 = a+alglib_r_block;
|
||
|
pa2 = a+alglib_twice_r_block;
|
||
|
a += 3*alglib_r_block;
|
||
|
if( nhead==1 )
|
||
|
{
|
||
|
vx = _mm_load_sd(pb);
|
||
|
v0 = _mm_load_sd(pa0);
|
||
|
v1 = _mm_load_sd(pa1);
|
||
|
v2 = _mm_load_sd(pa2);
|
||
|
|
||
|
v0 = _mm_mul_sd(v0,vx);
|
||
|
v1 = _mm_mul_sd(v1,vx);
|
||
|
v2 = _mm_mul_sd(v2,vx);
|
||
|
|
||
|
pa0++;
|
||
|
pa1++;
|
||
|
pa2++;
|
||
|
pb++;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
v0 = _mm_setzero_pd();
|
||
|
v1 = _mm_setzero_pd();
|
||
|
v2 = _mm_setzero_pd();
|
||
|
}
|
||
|
for(k=0; k<nb8; k++)
|
||
|
{
|
||
|
/*
|
||
|
* this code is a shuffle of simultaneous dot product.
|
||
|
* see below for commented unshuffled original version.
|
||
|
*/
|
||
|
vx = _mm_load_pd(pb);
|
||
|
va0 = _mm_load_pd(pa0);
|
||
|
va1 = _mm_load_pd(pa1);
|
||
|
va0 = _mm_mul_pd(va0,vx);
|
||
|
va2 = _mm_load_pd(pa2);
|
||
|
v0 = _mm_add_pd(va0,v0);
|
||
|
va1 = _mm_mul_pd(va1,vx);
|
||
|
va0 = _mm_load_pd(pa0+2);
|
||
|
v1 = _mm_add_pd(va1,v1);
|
||
|
va2 = _mm_mul_pd(va2,vx);
|
||
|
va1 = _mm_load_pd(pa1+2);
|
||
|
v2 = _mm_add_pd(va2,v2);
|
||
|
vx = _mm_load_pd(pb+2);
|
||
|
va0 = _mm_mul_pd(va0,vx);
|
||
|
va2 = _mm_load_pd(pa2+2);
|
||
|
v0 = _mm_add_pd(va0,v0);
|
||
|
va1 = _mm_mul_pd(va1,vx);
|
||
|
va0 = _mm_load_pd(pa0+4);
|
||
|
v1 = _mm_add_pd(va1,v1);
|
||
|
va2 = _mm_mul_pd(va2,vx);
|
||
|
va1 = _mm_load_pd(pa1+4);
|
||
|
v2 = _mm_add_pd(va2,v2);
|
||
|
vx = _mm_load_pd(pb+4);
|
||
|
va0 = _mm_mul_pd(va0,vx);
|
||
|
va2 = _mm_load_pd(pa2+4);
|
||
|
v0 = _mm_add_pd(va0,v0);
|
||
|
va1 = _mm_mul_pd(va1,vx);
|
||
|
va0 = _mm_load_pd(pa0+6);
|
||
|
v1 = _mm_add_pd(va1,v1);
|
||
|
va2 = _mm_mul_pd(va2,vx);
|
||
|
va1 = _mm_load_pd(pa1+6);
|
||
|
v2 = _mm_add_pd(va2,v2);
|
||
|
vx = _mm_load_pd(pb+6);
|
||
|
va0 = _mm_mul_pd(va0,vx);
|
||
|
v0 = _mm_add_pd(va0,v0);
|
||
|
va2 = _mm_load_pd(pa2+6);
|
||
|
va1 = _mm_mul_pd(va1,vx);
|
||
|
v1 = _mm_add_pd(va1,v1);
|
||
|
va2 = _mm_mul_pd(va2,vx);
|
||
|
v2 = _mm_add_pd(va2,v2);
|
||
|
|
||
|
pa0 += 8;
|
||
|
pa1 += 8;
|
||
|
pa2 += 8;
|
||
|
pb += 8;
|
||
|
|
||
|
/*
|
||
|
this is unshuffled version of code above
|
||
|
|
||
|
vx = _mm_load_pd(pb);
|
||
|
va0 = _mm_load_pd(pa0);
|
||
|
va1 = _mm_load_pd(pa1);
|
||
|
va2 = _mm_load_pd(pa2);
|
||
|
|
||
|
va0 = _mm_mul_pd(va0,vx);
|
||
|
va1 = _mm_mul_pd(va1,vx);
|
||
|
va2 = _mm_mul_pd(va2,vx);
|
||
|
|
||
|
v0 = _mm_add_pd(va0,v0);
|
||
|
v1 = _mm_add_pd(va1,v1);
|
||
|
v2 = _mm_add_pd(va2,v2);
|
||
|
|
||
|
vx = _mm_load_pd(pb+2);
|
||
|
va0 = _mm_load_pd(pa0+2);
|
||
|
va1 = _mm_load_pd(pa1+2);
|
||
|
va2 = _mm_load_pd(pa2+2);
|
||
|
|
||
|
va0 = _mm_mul_pd(va0,vx);
|
||
|
va1 = _mm_mul_pd(va1,vx);
|
||
|
va2 = _mm_mul_pd(va2,vx);
|
||
|
|
||
|
v0 = _mm_add_pd(va0,v0);
|
||
|
v1 = _mm_add_pd(va1,v1);
|
||
|
v2 = _mm_add_pd(va2,v2);
|
||
|
|
||
|
vx = _mm_load_pd(pb+4);
|
||
|
va0 = _mm_load_pd(pa0+4);
|
||
|
va1 = _mm_load_pd(pa1+4);
|
||
|
va2 = _mm_load_pd(pa2+4);
|
||
|
|
||
|
va0 = _mm_mul_pd(va0,vx);
|
||
|
va1 = _mm_mul_pd(va1,vx);
|
||
|
va2 = _mm_mul_pd(va2,vx);
|
||
|
|
||
|
v0 = _mm_add_pd(va0,v0);
|
||
|
v1 = _mm_add_pd(va1,v1);
|
||
|
v2 = _mm_add_pd(va2,v2);
|
||
|
|
||
|
vx = _mm_load_pd(pb+6);
|
||
|
va0 = _mm_load_pd(pa0+6);
|
||
|
va1 = _mm_load_pd(pa1+6);
|
||
|
va2 = _mm_load_pd(pa2+6);
|
||
|
|
||
|
va0 = _mm_mul_pd(va0,vx);
|
||
|
va1 = _mm_mul_pd(va1,vx);
|
||
|
va2 = _mm_mul_pd(va2,vx);
|
||
|
|
||
|
v0 = _mm_add_pd(va0,v0);
|
||
|
v1 = _mm_add_pd(va1,v1);
|
||
|
v2 = _mm_add_pd(va2,v2);
|
||
|
*/
|
||
|
}
|
||
|
for(k=0; k<nb2; k++)
|
||
|
{
|
||
|
vx = _mm_load_pd(pb);
|
||
|
va0 = _mm_load_pd(pa0);
|
||
|
va1 = _mm_load_pd(pa1);
|
||
|
va2 = _mm_load_pd(pa2);
|
||
|
|
||
|
va0 = _mm_mul_pd(va0,vx);
|
||
|
v0 = _mm_add_pd(va0,v0);
|
||
|
va1 = _mm_mul_pd(va1,vx);
|
||
|
v1 = _mm_add_pd(va1,v1);
|
||
|
va2 = _mm_mul_pd(va2,vx);
|
||
|
v2 = _mm_add_pd(va2,v2);
|
||
|
|
||
|
pa0 += 2;
|
||
|
pa1 += 2;
|
||
|
pa2 += 2;
|
||
|
pb += 2;
|
||
|
}
|
||
|
for(k=0; k<ntail; k++)
|
||
|
{
|
||
|
vx = _mm_load1_pd(pb);
|
||
|
va0 = _mm_load1_pd(pa0);
|
||
|
va1 = _mm_load1_pd(pa1);
|
||
|
va2 = _mm_load1_pd(pa2);
|
||
|
|
||
|
va0 = _mm_mul_sd(va0,vx);
|
||
|
v0 = _mm_add_sd(v0,va0);
|
||
|
va1 = _mm_mul_sd(va1,vx);
|
||
|
v1 = _mm_add_sd(v1,va1);
|
||
|
va2 = _mm_mul_sd(va2,vx);
|
||
|
v2 = _mm_add_sd(v2,va2);
|
||
|
}
|
||
|
vtmp = _mm_add_pd(_mm_unpacklo_pd(v0,v1),_mm_unpackhi_pd(v0,v1));
|
||
|
_mm_storel_pd(&row0, vtmp);
|
||
|
_mm_storeh_pd(&row1, vtmp);
|
||
|
v2 = _mm_add_sd(_mm_shuffle_pd(v2,v2,1),v2);
|
||
|
_mm_storel_pd(&row2, v2);
|
||
|
if( beta!=0 )
|
||
|
{
|
||
|
y[0] = beta*y[0]+alpha*row0;
|
||
|
y[stride] = beta*y[stride]+alpha*row1;
|
||
|
y[2*stride] = beta*y[2*stride]+alpha*row2;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
y[0] = alpha*row0;
|
||
|
y[stride] = alpha*row1;
|
||
|
y[2*stride] = alpha*row2;
|
||
|
}
|
||
|
y+=3*stride;
|
||
|
}
|
||
|
for(i=0; i<mtail; i++)
|
||
|
{
|
||
|
double row0;
|
||
|
row0 = 0;
|
||
|
pb = x;
|
||
|
pa0 = a;
|
||
|
a += alglib_r_block;
|
||
|
for(k=0; k<n2; k++)
|
||
|
{
|
||
|
row0 += pb[0]*pa0[0]+pb[1]*pa0[1];
|
||
|
pa0 += 2;
|
||
|
pb += 2;
|
||
|
}
|
||
|
if( n%2 )
|
||
|
row0 += pb[0]*pa0[0];
|
||
|
if( beta!=0 )
|
||
|
y[0] = beta*y[0]+alpha*row0;
|
||
|
else
|
||
|
y[0] = alpha*row0;
|
||
|
y+=stride;
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
|
||
|
/*************************************************************************
|
||
|
This subroutine calculates fast MxN complex matrix-vector product:
|
||
|
|
||
|
y := beta*y + alpha*A*x
|
||
|
|
||
|
using generic C code, where A, x, y, alpha and beta are complex.
|
||
|
|
||
|
If beta is zero, we do not use previous values of y (they are overwritten
|
||
|
by alpha*A*x without ever being read). However, when alpha is zero, we
|
||
|
still calculate A*x and multiply it by alpha (this distinction can be
|
||
|
important when A or x contain infinities/NANs).
|
||
|
|
||
|
IMPORTANT:
|
||
|
* 0<=M<=alglib_c_block, 0<=N<=alglib_c_block
|
||
|
* A must be stored in row-major order, as sequence of double precision
|
||
|
pairs. Stride is alglib_c_block (it is measured in pairs of doubles, not
|
||
|
in doubles).
|
||
|
* Y may be referenced by cy (pointer to ae_complex) or
|
||
|
dy (pointer to array of double precision pair) depending on what type of
|
||
|
output you wish. Pass pointer to Y as one of these parameters,
|
||
|
AND SET OTHER PARAMETER TO NULL.
|
||
|
* both A and x must be aligned; y may be non-aligned.
|
||
|
*************************************************************************/
|
||
|
void _ialglib_cmv(ae_int_t m, ae_int_t n, const double *a, const double *x, ae_complex *cy, double *dy, ae_int_t stride, ae_complex alpha, ae_complex beta)
|
||
|
{
|
||
|
ae_int_t i, j;
|
||
|
const double *pa, *parow, *pb;
|
||
|
|
||
|
parow = a;
|
||
|
for(i=0; i<m; i++)
|
||
|
{
|
||
|
double v0 = 0, v1 = 0;
|
||
|
pa = parow;
|
||
|
pb = x;
|
||
|
for(j=0; j<n; j++)
|
||
|
{
|
||
|
v0 += pa[0]*pb[0];
|
||
|
v1 += pa[0]*pb[1];
|
||
|
v0 -= pa[1]*pb[1];
|
||
|
v1 += pa[1]*pb[0];
|
||
|
|
||
|
pa += 2;
|
||
|
pb += 2;
|
||
|
}
|
||
|
if( cy!=NULL )
|
||
|
{
|
||
|
double tx = (beta.x*cy->x-beta.y*cy->y)+(alpha.x*v0-alpha.y*v1);
|
||
|
double ty = (beta.x*cy->y+beta.y*cy->x)+(alpha.x*v1+alpha.y*v0);
|
||
|
cy->x = tx;
|
||
|
cy->y = ty;
|
||
|
cy+=stride;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
double tx = (beta.x*dy[0]-beta.y*dy[1])+(alpha.x*v0-alpha.y*v1);
|
||
|
double ty = (beta.x*dy[1]+beta.y*dy[0])+(alpha.x*v1+alpha.y*v0);
|
||
|
dy[0] = tx;
|
||
|
dy[1] = ty;
|
||
|
dy += 2*stride;
|
||
|
}
|
||
|
parow += 2*alglib_c_block;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/*************************************************************************
|
||
|
This subroutine calculates fast MxN complex matrix-vector product:
|
||
|
|
||
|
y := beta*y + alpha*A*x
|
||
|
|
||
|
using generic C code, where A, x, y, alpha and beta are complex.
|
||
|
|
||
|
If beta is zero, we do not use previous values of y (they are overwritten
|
||
|
by alpha*A*x without ever being read). However, when alpha is zero, we
|
||
|
still calculate A*x and multiply it by alpha (this distinction can be
|
||
|
important when A or x contain infinities/NANs).
|
||
|
|
||
|
IMPORTANT:
|
||
|
* 0<=M<=alglib_c_block, 0<=N<=alglib_c_block
|
||
|
* A must be stored in row-major order, as sequence of double precision
|
||
|
pairs. Stride is alglib_c_block (it is measured in pairs of doubles, not
|
||
|
in doubles).
|
||
|
* Y may be referenced by cy (pointer to ae_complex) or
|
||
|
dy (pointer to array of double precision pair) depending on what type of
|
||
|
output you wish. Pass pointer to Y as one of these parameters,
|
||
|
AND SET OTHER PARAMETER TO NULL.
|
||
|
* both A and x must be aligned; y may be non-aligned.
|
||
|
|
||
|
This function supports SSE2; it can be used when:
|
||
|
1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
|
||
|
2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
|
||
|
|
||
|
If (1) is failed, this function will be undefined. If (2) is failed, call
|
||
|
to this function will probably crash your system.
|
||
|
|
||
|
If you want to know whether it is safe to call it, you should check
|
||
|
results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
|
||
|
and will do its work.
|
||
|
*************************************************************************/
|
||
|
#if defined(AE_HAS_SSE2_INTRINSICS)
|
||
|
void _ialglib_cmv_sse2(ae_int_t m, ae_int_t n, const double *a, const double *x, ae_complex *cy, double *dy, ae_int_t stride, ae_complex alpha, ae_complex beta)
|
||
|
{
|
||
|
ae_int_t i, j, m2;
|
||
|
const double *pa0, *pa1, *parow, *pb;
|
||
|
__m128d vbeta, vbetax, vbetay;
|
||
|
__m128d valpha, valphax, valphay;
|
||
|
|
||
|
m2 = m/2;
|
||
|
parow = a;
|
||
|
if( cy!=NULL )
|
||
|
{
|
||
|
dy = (double*)cy;
|
||
|
cy = NULL;
|
||
|
}
|
||
|
vbeta = _mm_loadh_pd(_mm_load_sd(&beta.x),&beta.y);
|
||
|
vbetax = _mm_unpacklo_pd(vbeta,vbeta);
|
||
|
vbetay = _mm_unpackhi_pd(vbeta,vbeta);
|
||
|
valpha = _mm_loadh_pd(_mm_load_sd(&alpha.x),&alpha.y);
|
||
|
valphax = _mm_unpacklo_pd(valpha,valpha);
|
||
|
valphay = _mm_unpackhi_pd(valpha,valpha);
|
||
|
for(i=0; i<m2; i++)
|
||
|
{
|
||
|
__m128d vx, vy, vt0, vt1, vt2, vt3, vt4, vt5, vrx, vry, vtx, vty;
|
||
|
pa0 = parow;
|
||
|
pa1 = parow+2*alglib_c_block;
|
||
|
pb = x;
|
||
|
vx = _mm_setzero_pd();
|
||
|
vy = _mm_setzero_pd();
|
||
|
for(j=0; j<n; j++)
|
||
|
{
|
||
|
vt0 = _mm_load1_pd(pb);
|
||
|
vt1 = _mm_load1_pd(pb+1);
|
||
|
vt2 = _mm_load_pd(pa0);
|
||
|
vt3 = _mm_load_pd(pa1);
|
||
|
vt5 = _mm_unpacklo_pd(vt2,vt3);
|
||
|
vt4 = _mm_unpackhi_pd(vt2,vt3);
|
||
|
vt2 = vt5;
|
||
|
vt3 = vt4;
|
||
|
|
||
|
vt2 = _mm_mul_pd(vt2,vt0);
|
||
|
vx = _mm_add_pd(vx,vt2);
|
||
|
vt3 = _mm_mul_pd(vt3,vt1);
|
||
|
vx = _mm_sub_pd(vx,vt3);
|
||
|
vt4 = _mm_mul_pd(vt4,vt0);
|
||
|
vy = _mm_add_pd(vy,vt4);
|
||
|
vt5 = _mm_mul_pd(vt5,vt1);
|
||
|
vy = _mm_add_pd(vy,vt5);
|
||
|
|
||
|
pa0 += 2;
|
||
|
pa1 += 2;
|
||
|
pb += 2;
|
||
|
}
|
||
|
if( beta.x==0.0 && beta.y==0.0 )
|
||
|
{
|
||
|
vrx = _mm_setzero_pd();
|
||
|
vry = _mm_setzero_pd();
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
vtx = _mm_loadh_pd(_mm_load_sd(dy+0),dy+2*stride+0);
|
||
|
vty = _mm_loadh_pd(_mm_load_sd(dy+1),dy+2*stride+1);
|
||
|
vrx = _mm_sub_pd(_mm_mul_pd(vbetax,vtx),_mm_mul_pd(vbetay,vty));
|
||
|
vry = _mm_add_pd(_mm_mul_pd(vbetax,vty),_mm_mul_pd(vbetay,vtx));
|
||
|
}
|
||
|
vtx = _mm_sub_pd(_mm_mul_pd(valphax,vx),_mm_mul_pd(valphay,vy));
|
||
|
vty = _mm_add_pd(_mm_mul_pd(valphax,vy),_mm_mul_pd(valphay,vx));
|
||
|
vrx = _mm_add_pd(vrx,vtx);
|
||
|
vry = _mm_add_pd(vry,vty);
|
||
|
_mm_storel_pd(dy+0, vrx);
|
||
|
_mm_storeh_pd(dy+2*stride+0, vrx);
|
||
|
_mm_storel_pd(dy+1, vry);
|
||
|
_mm_storeh_pd(dy+2*stride+1, vry);
|
||
|
dy += 4*stride;
|
||
|
parow += 4*alglib_c_block;
|
||
|
}
|
||
|
if( m%2 )
|
||
|
{
|
||
|
double v0 = 0, v1 = 0;
|
||
|
double tx, ty;
|
||
|
pa0 = parow;
|
||
|
pb = x;
|
||
|
for(j=0; j<n; j++)
|
||
|
{
|
||
|
v0 += pa0[0]*pb[0];
|
||
|
v1 += pa0[0]*pb[1];
|
||
|
v0 -= pa0[1]*pb[1];
|
||
|
v1 += pa0[1]*pb[0];
|
||
|
|
||
|
pa0 += 2;
|
||
|
pb += 2;
|
||
|
}
|
||
|
if( beta.x==0.0 && beta.y==0.0 )
|
||
|
{
|
||
|
tx = 0.0;
|
||
|
ty = 0.0;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
tx = beta.x*dy[0]-beta.y*dy[1];
|
||
|
ty = beta.x*dy[1]+beta.y*dy[0];
|
||
|
}
|
||
|
tx += alpha.x*v0-alpha.y*v1;
|
||
|
ty += alpha.x*v1+alpha.y*v0;
|
||
|
dy[0] = tx;
|
||
|
dy[1] = ty;
|
||
|
dy += 2*stride;
|
||
|
parow += 2*alglib_c_block;
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/********************************************************************
|
||
|
This subroutine sets vector to zero
|
||
|
********************************************************************/
|
||
|
void _ialglib_vzero(ae_int_t n, double *p, ae_int_t stride)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride==1 )
|
||
|
{
|
||
|
for(i=0; i<n; i++,p++)
|
||
|
*p = 0.0;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++,p+=stride)
|
||
|
*p = 0.0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/********************************************************************
|
||
|
This subroutine sets vector to zero
|
||
|
********************************************************************/
|
||
|
void _ialglib_vzero_complex(ae_int_t n, ae_complex *p, ae_int_t stride)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
if( stride==1 )
|
||
|
{
|
||
|
for(i=0; i<n; i++,p++)
|
||
|
{
|
||
|
p->x = 0.0;
|
||
|
p->y = 0.0;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++,p+=stride)
|
||
|
{
|
||
|
p->x = 0.0;
|
||
|
p->y = 0.0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
This subroutine copies unaligned real vector
|
||
|
********************************************************************/
|
||
|
void _ialglib_vcopy(ae_int_t n, const double *a, ae_int_t stridea, double *b, ae_int_t strideb)
|
||
|
{
|
||
|
ae_int_t i, n2;
|
||
|
if( stridea==1 && strideb==1 )
|
||
|
{
|
||
|
n2 = n/2;
|
||
|
for(i=n2; i!=0; i--, a+=2, b+=2)
|
||
|
{
|
||
|
b[0] = a[0];
|
||
|
b[1] = a[1];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
b[0] = a[0];
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++,a+=stridea,b+=strideb)
|
||
|
*b = *a;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
This subroutine copies unaligned complex vector
|
||
|
(passed as ae_complex*)
|
||
|
|
||
|
1. strideb is stride measured in complex numbers, not doubles
|
||
|
2. conj may be "N" (no conj.) or "C" (conj.)
|
||
|
********************************************************************/
|
||
|
void _ialglib_vcopy_complex(ae_int_t n, const ae_complex *a, ae_int_t stridea, double *b, ae_int_t strideb, const char *conj)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
|
||
|
/*
|
||
|
* more general case
|
||
|
*/
|
||
|
if( conj[0]=='N' || conj[0]=='n' )
|
||
|
{
|
||
|
for(i=0; i<n; i++,a+=stridea,b+=2*strideb)
|
||
|
{
|
||
|
b[0] = a->x;
|
||
|
b[1] = a->y;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++,a+=stridea,b+=2*strideb)
|
||
|
{
|
||
|
b[0] = a->x;
|
||
|
b[1] = -a->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
This subroutine copies unaligned complex vector (passed as double*)
|
||
|
|
||
|
1. strideb is stride measured in complex numbers, not doubles
|
||
|
2. conj may be "N" (no conj.) or "C" (conj.)
|
||
|
********************************************************************/
|
||
|
void _ialglib_vcopy_dcomplex(ae_int_t n, const double *a, ae_int_t stridea, double *b, ae_int_t strideb, const char *conj)
|
||
|
{
|
||
|
ae_int_t i;
|
||
|
|
||
|
/*
|
||
|
* more general case
|
||
|
*/
|
||
|
if( conj[0]=='N' || conj[0]=='n' )
|
||
|
{
|
||
|
for(i=0; i<n; i++,a+=2*stridea,b+=2*strideb)
|
||
|
{
|
||
|
b[0] = a[0];
|
||
|
b[1] = a[1];
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0; i<n; i++,a+=2*stridea,b+=2*strideb)
|
||
|
{
|
||
|
b[0] = a[0];
|
||
|
b[1] = -a[1];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
This subroutine copies matrix from non-aligned non-contigous storage
|
||
|
to aligned contigous storage
|
||
|
|
||
|
A:
|
||
|
* MxN
|
||
|
* non-aligned
|
||
|
* non-contigous
|
||
|
* may be transformed during copying (as prescribed by op)
|
||
|
|
||
|
B:
|
||
|
* alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used)
|
||
|
* aligned
|
||
|
* stride is alglib_r_block
|
||
|
|
||
|
Transformation types:
|
||
|
* 0 - no transform
|
||
|
* 1 - transposition
|
||
|
********************************************************************/
|
||
|
void _ialglib_mcopyblock(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_int_t stride, double *b)
|
||
|
{
|
||
|
ae_int_t i, j, n2;
|
||
|
const double *psrc;
|
||
|
double *pdst;
|
||
|
if( op==0 )
|
||
|
{
|
||
|
n2 = n/2;
|
||
|
for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_r_block,psrc=a)
|
||
|
{
|
||
|
for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=2)
|
||
|
{
|
||
|
pdst[0] = psrc[0];
|
||
|
pdst[1] = psrc[1];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
pdst[0] = psrc[0];
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
n2 = n/2;
|
||
|
for(i=0,psrc=a; i<m; i++,a+=stride,b+=1,psrc=a)
|
||
|
{
|
||
|
for(j=0,pdst=b; j<n2; j++,pdst+=alglib_twice_r_block,psrc+=2)
|
||
|
{
|
||
|
pdst[0] = psrc[0];
|
||
|
pdst[alglib_r_block] = psrc[1];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
pdst[0] = psrc[0];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
This subroutine copies matrix from non-aligned non-contigous storage
|
||
|
to aligned contigous storage
|
||
|
|
||
|
A:
|
||
|
* MxN
|
||
|
* non-aligned
|
||
|
* non-contigous
|
||
|
* may be transformed during copying (as prescribed by op)
|
||
|
|
||
|
B:
|
||
|
* alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used)
|
||
|
* aligned
|
||
|
* stride is alglib_r_block
|
||
|
|
||
|
Transformation types:
|
||
|
* 0 - no transform
|
||
|
* 1 - transposition
|
||
|
|
||
|
This function supports SSE2; it can be used when:
|
||
|
1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
|
||
|
2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
|
||
|
|
||
|
If (1) is failed, this function will be undefined. If (2) is failed, call
|
||
|
to this function will probably crash your system.
|
||
|
|
||
|
If you want to know whether it is safe to call it, you should check
|
||
|
results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
|
||
|
and will do its work.
|
||
|
********************************************************************/
|
||
|
#if defined(AE_HAS_SSE2_INTRINSICS)
|
||
|
void _ialglib_mcopyblock_sse2(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_int_t stride, double *b)
|
||
|
{
|
||
|
ae_int_t i, j, mb2;
|
||
|
const double *psrc0, *psrc1;
|
||
|
double *pdst;
|
||
|
if( op==0 )
|
||
|
{
|
||
|
ae_int_t nb8, ntail;
|
||
|
nb8 = n/8;
|
||
|
ntail = n-8*nb8;
|
||
|
for(i=0,psrc0=a; i<m; i++,a+=stride,b+=alglib_r_block,psrc0=a)
|
||
|
{
|
||
|
pdst=b;
|
||
|
for(j=0; j<nb8; j++)
|
||
|
{
|
||
|
__m128d v0, v1;
|
||
|
v0 = _mm_loadu_pd(psrc0);
|
||
|
_mm_store_pd(pdst, v0);
|
||
|
v1 = _mm_loadu_pd(psrc0+2);
|
||
|
_mm_store_pd(pdst+2, v1);
|
||
|
v1 = _mm_loadu_pd(psrc0+4);
|
||
|
_mm_store_pd(pdst+4, v1);
|
||
|
v1 = _mm_loadu_pd(psrc0+6);
|
||
|
_mm_store_pd(pdst+6, v1);
|
||
|
pdst+=8;
|
||
|
psrc0+=8;
|
||
|
}
|
||
|
for(j=0; j<ntail; j++)
|
||
|
pdst[j] = psrc0[j];
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
const double *arow0, *arow1;
|
||
|
double *bcol0, *bcol1, *pdst0, *pdst1;
|
||
|
ae_int_t nb4, ntail, n2;
|
||
|
|
||
|
n2 = n/2;
|
||
|
mb2 = m/2;
|
||
|
nb4 = n/4;
|
||
|
ntail = n-4*nb4;
|
||
|
|
||
|
arow0 = a;
|
||
|
arow1 = a+stride;
|
||
|
bcol0 = b;
|
||
|
bcol1 = b+1;
|
||
|
for(i=0; i<mb2; i++)
|
||
|
{
|
||
|
psrc0 = arow0;
|
||
|
psrc1 = arow1;
|
||
|
pdst0 = bcol0;
|
||
|
pdst1 = bcol1;
|
||
|
for(j=0; j<nb4; j++)
|
||
|
{
|
||
|
__m128d v0, v1, v2, v3;
|
||
|
v0 = _mm_loadu_pd(psrc0);
|
||
|
v1 = _mm_loadu_pd(psrc1);
|
||
|
v2 = _mm_loadu_pd(psrc0+2);
|
||
|
v3 = _mm_loadu_pd(psrc1+2);
|
||
|
_mm_store_pd(pdst0, _mm_unpacklo_pd(v0,v1));
|
||
|
_mm_store_pd(pdst0+alglib_r_block, _mm_unpackhi_pd(v0,v1));
|
||
|
_mm_store_pd(pdst0+2*alglib_r_block, _mm_unpacklo_pd(v2,v3));
|
||
|
_mm_store_pd(pdst0+3*alglib_r_block, _mm_unpackhi_pd(v2,v3));
|
||
|
|
||
|
pdst0 += 4*alglib_r_block;
|
||
|
pdst1 += 4*alglib_r_block;
|
||
|
psrc0 += 4;
|
||
|
psrc1 += 4;
|
||
|
}
|
||
|
for(j=0; j<ntail; j++)
|
||
|
{
|
||
|
pdst0[0] = psrc0[0];
|
||
|
pdst1[0] = psrc1[0];
|
||
|
pdst0 += alglib_r_block;
|
||
|
pdst1 += alglib_r_block;
|
||
|
psrc0 += 1;
|
||
|
psrc1 += 1;
|
||
|
}
|
||
|
arow0 += 2*stride;
|
||
|
arow1 += 2*stride;
|
||
|
bcol0 += 2;
|
||
|
bcol1 += 2;
|
||
|
}
|
||
|
if( m%2 )
|
||
|
{
|
||
|
psrc0 = arow0;
|
||
|
pdst0 = bcol0;
|
||
|
for(j=0; j<n2; j++)
|
||
|
{
|
||
|
pdst0[0] = psrc0[0];
|
||
|
pdst0[alglib_r_block] = psrc0[1];
|
||
|
pdst0 += alglib_twice_r_block;
|
||
|
psrc0 += 2;
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
pdst0[0] = psrc0[0];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
This subroutine copies matrix from aligned contigous storage to non-
|
||
|
aligned non-contigous storage
|
||
|
|
||
|
A:
|
||
|
* MxN
|
||
|
* aligned
|
||
|
* contigous
|
||
|
* stride is alglib_r_block
|
||
|
* may be transformed during copying (as prescribed by op)
|
||
|
|
||
|
B:
|
||
|
* alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used)
|
||
|
* non-aligned, non-contigous
|
||
|
|
||
|
Transformation types:
|
||
|
* 0 - no transform
|
||
|
* 1 - transposition
|
||
|
********************************************************************/
|
||
|
void _ialglib_mcopyunblock(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, double *b, ae_int_t stride)
|
||
|
{
|
||
|
ae_int_t i, j, n2;
|
||
|
const double *psrc;
|
||
|
double *pdst;
|
||
|
if( op==0 )
|
||
|
{
|
||
|
n2 = n/2;
|
||
|
for(i=0,psrc=a; i<m; i++,a+=alglib_r_block,b+=stride,psrc=a)
|
||
|
{
|
||
|
for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=2)
|
||
|
{
|
||
|
pdst[0] = psrc[0];
|
||
|
pdst[1] = psrc[1];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
pdst[0] = psrc[0];
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
n2 = n/2;
|
||
|
for(i=0,psrc=a; i<m; i++,a++,b+=stride,psrc=a)
|
||
|
{
|
||
|
for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=alglib_twice_r_block)
|
||
|
{
|
||
|
pdst[0] = psrc[0];
|
||
|
pdst[1] = psrc[alglib_r_block];
|
||
|
}
|
||
|
if( n%2!=0 )
|
||
|
pdst[0] = psrc[0];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
This subroutine copies matrix from non-aligned non-contigous storage
|
||
|
to aligned contigous storage
|
||
|
|
||
|
A:
|
||
|
* MxN
|
||
|
* non-aligned
|
||
|
* non-contigous
|
||
|
* may be transformed during copying (as prescribed by op)
|
||
|
* pointer to ae_complex is passed
|
||
|
|
||
|
B:
|
||
|
* 2*alglib_c_block*alglib_c_block doubles (only MxN/NxM submatrix is used)
|
||
|
* aligned
|
||
|
* stride is alglib_c_block
|
||
|
* pointer to double is passed
|
||
|
|
||
|
Transformation types:
|
||
|
* 0 - no transform
|
||
|
* 1 - transposition
|
||
|
* 2 - conjugate transposition
|
||
|
* 3 - conjugate, but no transposition
|
||
|
********************************************************************/
|
||
|
void _ialglib_mcopyblock_complex(ae_int_t m, ae_int_t n, const ae_complex *a, ae_int_t op, ae_int_t stride, double *b)
|
||
|
{
|
||
|
ae_int_t i, j;
|
||
|
const ae_complex *psrc;
|
||
|
double *pdst;
|
||
|
if( op==0 )
|
||
|
{
|
||
|
for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_twice_c_block,psrc=a)
|
||
|
for(j=0,pdst=b; j<n; j++,pdst+=2,psrc++)
|
||
|
{
|
||
|
pdst[0] = psrc->x;
|
||
|
pdst[1] = psrc->y;
|
||
|
}
|
||
|
}
|
||
|
if( op==1 )
|
||
|
{
|
||
|
for(i=0,psrc=a; i<m; i++,a+=stride,b+=2,psrc=a)
|
||
|
for(j=0,pdst=b; j<n; j++,pdst+=alglib_twice_c_block,psrc++)
|
||
|
{
|
||
|
pdst[0] = psrc->x;
|
||
|
pdst[1] = psrc->y;
|
||
|
}
|
||
|
}
|
||
|
if( op==2 )
|
||
|
{
|
||
|
for(i=0,psrc=a; i<m; i++,a+=stride,b+=2,psrc=a)
|
||
|
for(j=0,pdst=b; j<n; j++,pdst+=alglib_twice_c_block,psrc++)
|
||
|
{
|
||
|
pdst[0] = psrc->x;
|
||
|
pdst[1] = -psrc->y;
|
||
|
}
|
||
|
}
|
||
|
if( op==3 )
|
||
|
{
|
||
|
for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_twice_c_block,psrc=a)
|
||
|
for(j=0,pdst=b; j<n; j++,pdst+=2,psrc++)
|
||
|
{
|
||
|
pdst[0] = psrc->x;
|
||
|
pdst[1] = -psrc->y;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
This subroutine copies matrix from aligned contigous storage to
|
||
|
non-aligned non-contigous storage
|
||
|
|
||
|
A:
|
||
|
* 2*alglib_c_block*alglib_c_block doubles (only MxN submatrix is used)
|
||
|
* aligned
|
||
|
* stride is alglib_c_block
|
||
|
* pointer to double is passed
|
||
|
* may be transformed during copying (as prescribed by op)
|
||
|
|
||
|
B:
|
||
|
* MxN
|
||
|
* non-aligned
|
||
|
* non-contigous
|
||
|
* pointer to ae_complex is passed
|
||
|
|
||
|
Transformation types:
|
||
|
* 0 - no transform
|
||
|
* 1 - transposition
|
||
|
* 2 - conjugate transposition
|
||
|
* 3 - conjugate, but no transposition
|
||
|
********************************************************************/
|
||
|
void _ialglib_mcopyunblock_complex(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_complex* b, ae_int_t stride)
|
||
|
{
|
||
|
ae_int_t i, j;
|
||
|
const double *psrc;
|
||
|
ae_complex *pdst;
|
||
|
if( op==0 )
|
||
|
{
|
||
|
for(i=0,psrc=a; i<m; i++,a+=alglib_twice_c_block,b+=stride,psrc=a)
|
||
|
for(j=0,pdst=b; j<n; j++,pdst++,psrc+=2)
|
||
|
{
|
||
|
pdst->x = psrc[0];
|
||
|
pdst->y = psrc[1];
|
||
|
}
|
||
|
}
|
||
|
if( op==1 )
|
||
|
{
|
||
|
for(i=0,psrc=a; i<m; i++,a+=2,b+=stride,psrc=a)
|
||
|
for(j=0,pdst=b; j<n; j++,pdst++,psrc+=alglib_twice_c_block)
|
||
|
{
|
||
|
pdst->x = psrc[0];
|
||
|
pdst->y = psrc[1];
|
||
|
}
|
||
|
}
|
||
|
if( op==2 )
|
||
|
{
|
||
|
for(i=0,psrc=a; i<m; i++,a+=2,b+=stride,psrc=a)
|
||
|
for(j=0,pdst=b; j<n; j++,pdst++,psrc+=alglib_twice_c_block)
|
||
|
{
|
||
|
pdst->x = psrc[0];
|
||
|
pdst->y = -psrc[1];
|
||
|
}
|
||
|
}
|
||
|
if( op==3 )
|
||
|
{
|
||
|
for(i=0,psrc=a; i<m; i++,a+=alglib_twice_c_block,b+=stride,psrc=a)
|
||
|
for(j=0,pdst=b; j<n; j++,pdst++,psrc+=2)
|
||
|
{
|
||
|
pdst->x = psrc[0];
|
||
|
pdst->y = -psrc[1];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
Real GEMM kernel
|
||
|
********************************************************************/
|
||
|
ae_bool _ialglib_rmatrixgemm(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
ae_int_t k,
|
||
|
double alpha,
|
||
|
double *_a,
|
||
|
ae_int_t _a_stride,
|
||
|
ae_int_t optypea,
|
||
|
double *_b,
|
||
|
ae_int_t _b_stride,
|
||
|
ae_int_t optypeb,
|
||
|
double beta,
|
||
|
double *_c,
|
||
|
ae_int_t _c_stride)
|
||
|
{
|
||
|
int i;
|
||
|
double *crow;
|
||
|
double _abuf[alglib_r_block+alglib_simd_alignment];
|
||
|
double _bbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
|
||
|
double * const abuf = (double * ) ae_align(_abuf,alglib_simd_alignment);
|
||
|
double * const b = (double * ) ae_align(_bbuf,alglib_simd_alignment);
|
||
|
void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv;
|
||
|
void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock;
|
||
|
|
||
|
if( m>alglib_r_block || n>alglib_r_block || k>alglib_r_block || m<=0 || n<=0 || k<=0 || alpha==0.0 )
|
||
|
return ae_false;
|
||
|
|
||
|
/*
|
||
|
* Check for SSE2 support
|
||
|
*/
|
||
|
#ifdef AE_HAS_SSE2_INTRINSICS
|
||
|
if( ae_cpuid() & CPU_SSE2 )
|
||
|
{
|
||
|
rmv = &_ialglib_rmv_sse2;
|
||
|
mcopyblock = &_ialglib_mcopyblock_sse2;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* copy b
|
||
|
*/
|
||
|
if( optypeb==0 )
|
||
|
mcopyblock(k, n, _b, 1, _b_stride, b);
|
||
|
else
|
||
|
mcopyblock(n, k, _b, 0, _b_stride, b);
|
||
|
|
||
|
/*
|
||
|
* multiply B by A (from the right, by rows)
|
||
|
* and store result in C
|
||
|
*/
|
||
|
crow = _c;
|
||
|
if( optypea==0 )
|
||
|
{
|
||
|
const double *arow = _a;
|
||
|
for(i=0; i<m; i++)
|
||
|
{
|
||
|
_ialglib_vcopy(k, arow, 1, abuf, 1);
|
||
|
if( beta==0 )
|
||
|
_ialglib_vzero(n, crow, 1);
|
||
|
rmv(n, k, b, abuf, crow, 1, alpha, beta);
|
||
|
crow += _c_stride;
|
||
|
arow += _a_stride;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
const double *acol = _a;
|
||
|
for(i=0; i<m; i++)
|
||
|
{
|
||
|
_ialglib_vcopy(k, acol, _a_stride, abuf, 1);
|
||
|
if( beta==0 )
|
||
|
_ialglib_vzero(n, crow, 1);
|
||
|
rmv(n, k, b, abuf, crow, 1, alpha, beta);
|
||
|
crow += _c_stride;
|
||
|
acol++;
|
||
|
}
|
||
|
}
|
||
|
return ae_true;
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
Complex GEMM kernel
|
||
|
********************************************************************/
|
||
|
ae_bool _ialglib_cmatrixgemm(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
ae_int_t k,
|
||
|
ae_complex alpha,
|
||
|
ae_complex *_a,
|
||
|
ae_int_t _a_stride,
|
||
|
ae_int_t optypea,
|
||
|
ae_complex *_b,
|
||
|
ae_int_t _b_stride,
|
||
|
ae_int_t optypeb,
|
||
|
ae_complex beta,
|
||
|
ae_complex *_c,
|
||
|
ae_int_t _c_stride)
|
||
|
{
|
||
|
const ae_complex *arow;
|
||
|
ae_complex *crow;
|
||
|
ae_int_t i;
|
||
|
double _loc_abuf[2*alglib_c_block+alglib_simd_alignment];
|
||
|
double _loc_b[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
|
||
|
double * const abuf = (double *)ae_align(_loc_abuf,alglib_simd_alignment);
|
||
|
double * const b = (double *)ae_align(_loc_b, alglib_simd_alignment);
|
||
|
ae_int_t brows;
|
||
|
ae_int_t bcols;
|
||
|
void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv;
|
||
|
|
||
|
if( m>alglib_c_block || n>alglib_c_block || k>alglib_c_block )
|
||
|
return ae_false;
|
||
|
|
||
|
/*
|
||
|
* Check for SSE2 support
|
||
|
*/
|
||
|
#ifdef AE_HAS_SSE2_INTRINSICS
|
||
|
if( ae_cpuid() & CPU_SSE2 )
|
||
|
{
|
||
|
cmv = &_ialglib_cmv_sse2;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* copy b
|
||
|
*/
|
||
|
brows = optypeb==0 ? k : n;
|
||
|
bcols = optypeb==0 ? n : k;
|
||
|
if( optypeb==0 )
|
||
|
_ialglib_mcopyblock_complex(brows, bcols, _b, 1, _b_stride, b);
|
||
|
if( optypeb==1 )
|
||
|
_ialglib_mcopyblock_complex(brows, bcols, _b, 0, _b_stride, b);
|
||
|
if( optypeb==2 )
|
||
|
_ialglib_mcopyblock_complex(brows, bcols, _b, 3, _b_stride, b);
|
||
|
|
||
|
/*
|
||
|
* multiply B by A (from the right, by rows)
|
||
|
* and store result in C
|
||
|
*/
|
||
|
arow = _a;
|
||
|
crow = _c;
|
||
|
for(i=0; i<m; i++)
|
||
|
{
|
||
|
if( optypea==0 )
|
||
|
{
|
||
|
_ialglib_vcopy_complex(k, arow, 1, abuf, 1, "No conj");
|
||
|
arow += _a_stride;
|
||
|
}
|
||
|
else if( optypea==1 )
|
||
|
{
|
||
|
_ialglib_vcopy_complex(k, arow, _a_stride, abuf, 1, "No conj");
|
||
|
arow++;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
_ialglib_vcopy_complex(k, arow, _a_stride, abuf, 1, "Conj");
|
||
|
arow++;
|
||
|
}
|
||
|
if( beta.x==0 && beta.y==0 )
|
||
|
_ialglib_vzero_complex(n, crow, 1);
|
||
|
cmv(n, k, b, abuf, crow, NULL, 1, alpha, beta);
|
||
|
crow += _c_stride;
|
||
|
}
|
||
|
return ae_true;
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
complex TRSM kernel
|
||
|
********************************************************************/
|
||
|
ae_bool _ialglib_cmatrixrighttrsm(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
ae_complex *_a,
|
||
|
ae_int_t _a_stride,
|
||
|
ae_bool isupper,
|
||
|
ae_bool isunit,
|
||
|
ae_int_t optype,
|
||
|
ae_complex *_x,
|
||
|
ae_int_t _x_stride)
|
||
|
{
|
||
|
/*
|
||
|
* local buffers
|
||
|
*/
|
||
|
double *pdiag;
|
||
|
ae_int_t i;
|
||
|
double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
|
||
|
double _loc_xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
|
||
|
double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment];
|
||
|
double * const abuf = (double*)ae_align(_loc_abuf, alglib_simd_alignment);
|
||
|
double * const xbuf = (double*)ae_align(_loc_xbuf, alglib_simd_alignment);
|
||
|
double * const tmpbuf = (double*)ae_align(_loc_tmpbuf,alglib_simd_alignment);
|
||
|
ae_bool uppera;
|
||
|
void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv;
|
||
|
|
||
|
if( m>alglib_c_block || n>alglib_c_block )
|
||
|
return ae_false;
|
||
|
|
||
|
/*
|
||
|
* Check for SSE2 support
|
||
|
*/
|
||
|
#ifdef AE_HAS_SSE2_INTRINSICS
|
||
|
if( ae_cpuid() & CPU_SSE2 )
|
||
|
{
|
||
|
cmv = &_ialglib_cmv_sse2;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* Prepare
|
||
|
*/
|
||
|
_ialglib_mcopyblock_complex(n, n, _a, optype, _a_stride, abuf);
|
||
|
_ialglib_mcopyblock_complex(m, n, _x, 0, _x_stride, xbuf);
|
||
|
if( isunit )
|
||
|
for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1))
|
||
|
{
|
||
|
pdiag[0] = 1.0;
|
||
|
pdiag[1] = 0.0;
|
||
|
}
|
||
|
if( optype==0 )
|
||
|
uppera = isupper;
|
||
|
else
|
||
|
uppera = !isupper;
|
||
|
|
||
|
/*
|
||
|
* Solve Y*A^-1=X where A is upper or lower triangular
|
||
|
*/
|
||
|
if( uppera )
|
||
|
{
|
||
|
for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1))
|
||
|
{
|
||
|
ae_complex tmp_c;
|
||
|
ae_complex beta;
|
||
|
ae_complex alpha;
|
||
|
tmp_c.x = pdiag[0];
|
||
|
tmp_c.y = pdiag[1];
|
||
|
beta = ae_c_d_div(1.0, tmp_c);
|
||
|
alpha.x = -beta.x;
|
||
|
alpha.y = -beta.y;
|
||
|
_ialglib_vcopy_dcomplex(i, abuf+2*i, alglib_c_block, tmpbuf, 1, "No conj");
|
||
|
cmv(m, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
|
||
|
}
|
||
|
_ialglib_mcopyunblock_complex(m, n, xbuf, 0, _x, _x_stride);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=n-1,pdiag=abuf+2*((n-1)*alglib_c_block+(n-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1))
|
||
|
{
|
||
|
ae_complex tmp_c;
|
||
|
ae_complex beta;
|
||
|
ae_complex alpha;
|
||
|
tmp_c.x = pdiag[0];
|
||
|
tmp_c.y = pdiag[1];
|
||
|
beta = ae_c_d_div(1.0, tmp_c);
|
||
|
alpha.x = -beta.x;
|
||
|
alpha.y = -beta.y;
|
||
|
_ialglib_vcopy_dcomplex(n-1-i, pdiag+2*alglib_c_block, alglib_c_block, tmpbuf, 1, "No conj");
|
||
|
cmv(m, n-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
|
||
|
}
|
||
|
_ialglib_mcopyunblock_complex(m, n, xbuf, 0, _x, _x_stride);
|
||
|
}
|
||
|
return ae_true;
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
real TRSM kernel
|
||
|
********************************************************************/
|
||
|
ae_bool _ialglib_rmatrixrighttrsm(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
double *_a,
|
||
|
ae_int_t _a_stride,
|
||
|
ae_bool isupper,
|
||
|
ae_bool isunit,
|
||
|
ae_int_t optype,
|
||
|
double *_x,
|
||
|
ae_int_t _x_stride)
|
||
|
{
|
||
|
/*
|
||
|
* local buffers
|
||
|
*/
|
||
|
double *pdiag;
|
||
|
ae_int_t i;
|
||
|
double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
|
||
|
double _loc_xbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
|
||
|
double _loc_tmpbuf[alglib_r_block+alglib_simd_alignment];
|
||
|
double * const abuf = (double *) ae_align(_loc_abuf, alglib_simd_alignment);
|
||
|
double * const xbuf = (double *) ae_align(_loc_xbuf, alglib_simd_alignment);
|
||
|
double * const tmpbuf = (double *) ae_align(_loc_tmpbuf,alglib_simd_alignment);
|
||
|
ae_bool uppera;
|
||
|
void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv;
|
||
|
void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock;
|
||
|
|
||
|
if( m>alglib_r_block || n>alglib_r_block )
|
||
|
return ae_false;
|
||
|
|
||
|
/*
|
||
|
* Check for SSE2 support
|
||
|
*/
|
||
|
#ifdef AE_HAS_SSE2_INTRINSICS
|
||
|
if( ae_cpuid() & CPU_SSE2 )
|
||
|
{
|
||
|
rmv = &_ialglib_rmv_sse2;
|
||
|
mcopyblock = &_ialglib_mcopyblock_sse2;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* Prepare
|
||
|
*/
|
||
|
mcopyblock(n, n, _a, optype, _a_stride, abuf);
|
||
|
mcopyblock(m, n, _x, 0, _x_stride, xbuf);
|
||
|
if( isunit )
|
||
|
for(i=0,pdiag=abuf; i<n; i++,pdiag+=alglib_r_block+1)
|
||
|
*pdiag = 1.0;
|
||
|
if( optype==0 )
|
||
|
uppera = isupper;
|
||
|
else
|
||
|
uppera = !isupper;
|
||
|
|
||
|
/*
|
||
|
* Solve Y*A^-1=X where A is upper or lower triangular
|
||
|
*/
|
||
|
if( uppera )
|
||
|
{
|
||
|
for(i=0,pdiag=abuf; i<n; i++,pdiag+=alglib_r_block+1)
|
||
|
{
|
||
|
double beta = 1.0/(*pdiag);
|
||
|
double alpha = -beta;
|
||
|
_ialglib_vcopy(i, abuf+i, alglib_r_block, tmpbuf, 1);
|
||
|
rmv(m, i, xbuf, tmpbuf, xbuf+i, alglib_r_block, alpha, beta);
|
||
|
}
|
||
|
_ialglib_mcopyunblock(m, n, xbuf, 0, _x, _x_stride);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=n-1,pdiag=abuf+(n-1)*alglib_r_block+(n-1); i>=0; i--,pdiag-=alglib_r_block+1)
|
||
|
{
|
||
|
double beta = 1.0/(*pdiag);
|
||
|
double alpha = -beta;
|
||
|
_ialglib_vcopy(n-1-i, pdiag+alglib_r_block, alglib_r_block, tmpbuf+i+1, 1);
|
||
|
rmv(m, n-1-i, xbuf+i+1, tmpbuf+i+1, xbuf+i, alglib_r_block, alpha, beta);
|
||
|
}
|
||
|
_ialglib_mcopyunblock(m, n, xbuf, 0, _x, _x_stride);
|
||
|
}
|
||
|
return ae_true;
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
complex TRSM kernel
|
||
|
********************************************************************/
|
||
|
ae_bool _ialglib_cmatrixlefttrsm(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
ae_complex *_a,
|
||
|
ae_int_t _a_stride,
|
||
|
ae_bool isupper,
|
||
|
ae_bool isunit,
|
||
|
ae_int_t optype,
|
||
|
ae_complex *_x,
|
||
|
ae_int_t _x_stride)
|
||
|
{
|
||
|
/*
|
||
|
* local buffers
|
||
|
*/
|
||
|
double *pdiag, *arow;
|
||
|
ae_int_t i;
|
||
|
double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
|
||
|
double _loc_xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
|
||
|
double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment];
|
||
|
double * const abuf = (double *) ae_align(_loc_abuf, alglib_simd_alignment);
|
||
|
double * const xbuf = (double *) ae_align(_loc_xbuf, alglib_simd_alignment);
|
||
|
double * const tmpbuf = (double *) ae_align(_loc_tmpbuf,alglib_simd_alignment);
|
||
|
ae_bool uppera;
|
||
|
void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv;
|
||
|
|
||
|
if( m>alglib_c_block || n>alglib_c_block )
|
||
|
return ae_false;
|
||
|
|
||
|
/*
|
||
|
* Check for SSE2 support
|
||
|
*/
|
||
|
#ifdef AE_HAS_SSE2_INTRINSICS
|
||
|
if( ae_cpuid() & CPU_SSE2 )
|
||
|
{
|
||
|
cmv = &_ialglib_cmv_sse2;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* Prepare
|
||
|
* Transpose X (so we may use mv, which calculates A*x, but not x*A)
|
||
|
*/
|
||
|
_ialglib_mcopyblock_complex(m, m, _a, optype, _a_stride, abuf);
|
||
|
_ialglib_mcopyblock_complex(m, n, _x, 1, _x_stride, xbuf);
|
||
|
if( isunit )
|
||
|
for(i=0,pdiag=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1))
|
||
|
{
|
||
|
pdiag[0] = 1.0;
|
||
|
pdiag[1] = 0.0;
|
||
|
}
|
||
|
if( optype==0 )
|
||
|
uppera = isupper;
|
||
|
else
|
||
|
uppera = !isupper;
|
||
|
|
||
|
/*
|
||
|
* Solve A^-1*Y^T=X^T where A is upper or lower triangular
|
||
|
*/
|
||
|
if( uppera )
|
||
|
{
|
||
|
for(i=m-1,pdiag=abuf+2*((m-1)*alglib_c_block+(m-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1))
|
||
|
{
|
||
|
ae_complex tmp_c;
|
||
|
ae_complex beta;
|
||
|
ae_complex alpha;
|
||
|
tmp_c.x = pdiag[0];
|
||
|
tmp_c.y = pdiag[1];
|
||
|
beta = ae_c_d_div(1.0, tmp_c);
|
||
|
alpha.x = -beta.x;
|
||
|
alpha.y = -beta.y;
|
||
|
_ialglib_vcopy_dcomplex(m-1-i, pdiag+2, 1, tmpbuf, 1, "No conj");
|
||
|
cmv(n, m-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
|
||
|
}
|
||
|
_ialglib_mcopyunblock_complex(m, n, xbuf, 1, _x, _x_stride);
|
||
|
}
|
||
|
else
|
||
|
{ for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1),arow+=2*alglib_c_block)
|
||
|
{
|
||
|
ae_complex tmp_c;
|
||
|
ae_complex beta;
|
||
|
ae_complex alpha;
|
||
|
tmp_c.x = pdiag[0];
|
||
|
tmp_c.y = pdiag[1];
|
||
|
beta = ae_c_d_div(1.0, tmp_c);
|
||
|
alpha.x = -beta.x;
|
||
|
alpha.y = -beta.y;
|
||
|
_ialglib_vcopy_dcomplex(i, arow, 1, tmpbuf, 1, "No conj");
|
||
|
cmv(n, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
|
||
|
}
|
||
|
_ialglib_mcopyunblock_complex(m, n, xbuf, 1, _x, _x_stride);
|
||
|
}
|
||
|
return ae_true;
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
real TRSM kernel
|
||
|
********************************************************************/
|
||
|
ae_bool _ialglib_rmatrixlefttrsm(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
double *_a,
|
||
|
ae_int_t _a_stride,
|
||
|
ae_bool isupper,
|
||
|
ae_bool isunit,
|
||
|
ae_int_t optype,
|
||
|
double *_x,
|
||
|
ae_int_t _x_stride)
|
||
|
{
|
||
|
/*
|
||
|
* local buffers
|
||
|
*/
|
||
|
double *pdiag, *arow;
|
||
|
ae_int_t i;
|
||
|
double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
|
||
|
double _loc_xbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
|
||
|
double _loc_tmpbuf[alglib_r_block+alglib_simd_alignment];
|
||
|
double * const abuf = (double *) ae_align(_loc_abuf, alglib_simd_alignment);
|
||
|
double * const xbuf = (double *) ae_align(_loc_xbuf, alglib_simd_alignment);
|
||
|
double * const tmpbuf = (double *) ae_align(_loc_tmpbuf,alglib_simd_alignment);
|
||
|
ae_bool uppera;
|
||
|
void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv;
|
||
|
void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock;
|
||
|
|
||
|
if( m>alglib_r_block || n>alglib_r_block )
|
||
|
return ae_false;
|
||
|
|
||
|
/*
|
||
|
* Check for SSE2 support
|
||
|
*/
|
||
|
#ifdef AE_HAS_SSE2_INTRINSICS
|
||
|
if( ae_cpuid() & CPU_SSE2 )
|
||
|
{
|
||
|
rmv = &_ialglib_rmv_sse2;
|
||
|
mcopyblock = &_ialglib_mcopyblock_sse2;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* Prepare
|
||
|
* Transpose X (so we may use mv, which calculates A*x, but not x*A)
|
||
|
*/
|
||
|
mcopyblock(m, m, _a, optype, _a_stride, abuf);
|
||
|
mcopyblock(m, n, _x, 1, _x_stride, xbuf);
|
||
|
if( isunit )
|
||
|
for(i=0,pdiag=abuf; i<m; i++,pdiag+=alglib_r_block+1)
|
||
|
*pdiag = 1.0;
|
||
|
if( optype==0 )
|
||
|
uppera = isupper;
|
||
|
else
|
||
|
uppera = !isupper;
|
||
|
|
||
|
/*
|
||
|
* Solve A^-1*Y^T=X^T where A is upper or lower triangular
|
||
|
*/
|
||
|
if( uppera )
|
||
|
{
|
||
|
for(i=m-1,pdiag=abuf+(m-1)*alglib_r_block+(m-1); i>=0; i--,pdiag-=alglib_r_block+1)
|
||
|
{
|
||
|
double beta = 1.0/(*pdiag);
|
||
|
double alpha = -beta;
|
||
|
_ialglib_vcopy(m-1-i, pdiag+1, 1, tmpbuf+i+1, 1);
|
||
|
rmv(n, m-1-i, xbuf+i+1, tmpbuf+i+1, xbuf+i, alglib_r_block, alpha, beta);
|
||
|
}
|
||
|
_ialglib_mcopyunblock(m, n, xbuf, 1, _x, _x_stride);
|
||
|
}
|
||
|
else
|
||
|
{ for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=alglib_r_block+1,arow+=alglib_r_block)
|
||
|
{
|
||
|
double beta = 1.0/(*pdiag);
|
||
|
double alpha = -beta;
|
||
|
_ialglib_vcopy(i, arow, 1, tmpbuf, 1);
|
||
|
rmv(n, i, xbuf, tmpbuf, xbuf+i, alglib_r_block, alpha, beta);
|
||
|
}
|
||
|
_ialglib_mcopyunblock(m, n, xbuf, 1, _x, _x_stride);
|
||
|
}
|
||
|
return ae_true;
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
complex SYRK kernel
|
||
|
********************************************************************/
|
||
|
ae_bool _ialglib_cmatrixherk(ae_int_t n,
|
||
|
ae_int_t k,
|
||
|
double alpha,
|
||
|
ae_complex *_a,
|
||
|
ae_int_t _a_stride,
|
||
|
ae_int_t optypea,
|
||
|
double beta,
|
||
|
ae_complex *_c,
|
||
|
ae_int_t _c_stride,
|
||
|
ae_bool isupper)
|
||
|
{
|
||
|
/*
|
||
|
* local buffers
|
||
|
*/
|
||
|
double *arow, *crow;
|
||
|
ae_complex c_alpha, c_beta;
|
||
|
ae_int_t i;
|
||
|
double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
|
||
|
double _loc_cbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
|
||
|
double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment];
|
||
|
double * const abuf = (double *) ae_align(_loc_abuf, alglib_simd_alignment);
|
||
|
double * const cbuf = (double *) ae_align(_loc_cbuf, alglib_simd_alignment);
|
||
|
double * const tmpbuf = (double *) ae_align(_loc_tmpbuf,alglib_simd_alignment);
|
||
|
|
||
|
if( n>alglib_c_block || k>alglib_c_block )
|
||
|
return ae_false;
|
||
|
if( n==0 )
|
||
|
return ae_true;
|
||
|
|
||
|
/*
|
||
|
* copy A and C, task is transformed to "A*A^H"-form.
|
||
|
* if beta==0, then C is filled by zeros (and not referenced)
|
||
|
*
|
||
|
* alpha==0 or k==0 are correctly processed (A is not referenced)
|
||
|
*/
|
||
|
c_alpha.x = alpha;
|
||
|
c_alpha.y = 0;
|
||
|
c_beta.x = beta;
|
||
|
c_beta.y = 0;
|
||
|
if( alpha==0 )
|
||
|
k = 0;
|
||
|
if( k>0 )
|
||
|
{
|
||
|
if( optypea==0 )
|
||
|
_ialglib_mcopyblock_complex(n, k, _a, 3, _a_stride, abuf);
|
||
|
else
|
||
|
_ialglib_mcopyblock_complex(k, n, _a, 1, _a_stride, abuf);
|
||
|
}
|
||
|
_ialglib_mcopyblock_complex(n, n, _c, 0, _c_stride, cbuf);
|
||
|
if( beta==0 )
|
||
|
{
|
||
|
for(i=0,crow=cbuf; i<n; i++,crow+=2*alglib_c_block)
|
||
|
if( isupper )
|
||
|
_ialglib_vzero(2*(n-i), crow+2*i, 1);
|
||
|
else
|
||
|
_ialglib_vzero(2*(i+1), crow, 1);
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
* update C
|
||
|
*/
|
||
|
if( isupper )
|
||
|
{
|
||
|
for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block)
|
||
|
{
|
||
|
_ialglib_vcopy_dcomplex(k, arow, 1, tmpbuf, 1, "Conj");
|
||
|
_ialglib_cmv(n-i, k, arow, tmpbuf, NULL, crow+2*i, 1, c_alpha, c_beta);
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block)
|
||
|
{
|
||
|
_ialglib_vcopy_dcomplex(k, arow, 1, tmpbuf, 1, "Conj");
|
||
|
_ialglib_cmv(i+1, k, abuf, tmpbuf, NULL, crow, 1, c_alpha, c_beta);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* copy back
|
||
|
*/
|
||
|
_ialglib_mcopyunblock_complex(n, n, cbuf, 0, _c, _c_stride);
|
||
|
|
||
|
return ae_true;
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
real SYRK kernel
|
||
|
********************************************************************/
|
||
|
ae_bool _ialglib_rmatrixsyrk(ae_int_t n,
|
||
|
ae_int_t k,
|
||
|
double alpha,
|
||
|
double *_a,
|
||
|
ae_int_t _a_stride,
|
||
|
ae_int_t optypea,
|
||
|
double beta,
|
||
|
double *_c,
|
||
|
ae_int_t _c_stride,
|
||
|
ae_bool isupper)
|
||
|
{
|
||
|
/*
|
||
|
* local buffers
|
||
|
*/
|
||
|
double *arow, *crow;
|
||
|
ae_int_t i;
|
||
|
double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
|
||
|
double _loc_cbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
|
||
|
double * const abuf = (double *) ae_align(_loc_abuf, alglib_simd_alignment);
|
||
|
double * const cbuf = (double *) ae_align(_loc_cbuf, alglib_simd_alignment);
|
||
|
|
||
|
if( n>alglib_r_block || k>alglib_r_block )
|
||
|
return ae_false;
|
||
|
if( n==0 )
|
||
|
return ae_true;
|
||
|
|
||
|
/*
|
||
|
* copy A and C, task is transformed to "A*A^T"-form.
|
||
|
* if beta==0, then C is filled by zeros (and not referenced)
|
||
|
*
|
||
|
* alpha==0 or k==0 are correctly processed (A is not referenced)
|
||
|
*/
|
||
|
if( alpha==0 )
|
||
|
k = 0;
|
||
|
if( k>0 )
|
||
|
{
|
||
|
if( optypea==0 )
|
||
|
_ialglib_mcopyblock(n, k, _a, 0, _a_stride, abuf);
|
||
|
else
|
||
|
_ialglib_mcopyblock(k, n, _a, 1, _a_stride, abuf);
|
||
|
}
|
||
|
_ialglib_mcopyblock(n, n, _c, 0, _c_stride, cbuf);
|
||
|
if( beta==0 )
|
||
|
{
|
||
|
for(i=0,crow=cbuf; i<n; i++,crow+=alglib_r_block)
|
||
|
if( isupper )
|
||
|
_ialglib_vzero(n-i, crow+i, 1);
|
||
|
else
|
||
|
_ialglib_vzero(i+1, crow, 1);
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
* update C
|
||
|
*/
|
||
|
if( isupper )
|
||
|
{
|
||
|
for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=alglib_r_block,crow+=alglib_r_block)
|
||
|
{
|
||
|
_ialglib_rmv(n-i, k, arow, arow, crow+i, 1, alpha, beta);
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=alglib_r_block,crow+=alglib_r_block)
|
||
|
{
|
||
|
_ialglib_rmv(i+1, k, abuf, arow, crow, 1, alpha, beta);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* copy back
|
||
|
*/
|
||
|
_ialglib_mcopyunblock(n, n, cbuf, 0, _c, _c_stride);
|
||
|
|
||
|
return ae_true;
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
complex rank-1 kernel
|
||
|
********************************************************************/
|
||
|
ae_bool _ialglib_cmatrixrank1(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
ae_complex *_a,
|
||
|
ae_int_t _a_stride,
|
||
|
ae_complex *_u,
|
||
|
ae_complex *_v)
|
||
|
{
|
||
|
/*
|
||
|
* Locals
|
||
|
*/
|
||
|
ae_complex *arow, *pu, *pv, *vtmp, *dst;
|
||
|
ae_int_t n2 = n/2;
|
||
|
ae_int_t i, j;
|
||
|
|
||
|
/*
|
||
|
* Quick exit
|
||
|
*/
|
||
|
if( m<=0 || n<=0 )
|
||
|
return ae_false;
|
||
|
|
||
|
|
||
|
/*
|
||
|
* update pairs of rows
|
||
|
*/
|
||
|
arow = _a;
|
||
|
pu = _u;
|
||
|
vtmp = _v;
|
||
|
for(i=0; i<m; i++, arow+=_a_stride, pu++)
|
||
|
{
|
||
|
/*
|
||
|
* update by two
|
||
|
*/
|
||
|
for(j=0,pv=vtmp, dst=arow; j<n2; j++, dst+=2, pv+=2)
|
||
|
{
|
||
|
double ux = pu[0].x;
|
||
|
double uy = pu[0].y;
|
||
|
double v0x = pv[0].x;
|
||
|
double v0y = pv[0].y;
|
||
|
double v1x = pv[1].x;
|
||
|
double v1y = pv[1].y;
|
||
|
dst[0].x += ux*v0x-uy*v0y;
|
||
|
dst[0].y += ux*v0y+uy*v0x;
|
||
|
dst[1].x += ux*v1x-uy*v1y;
|
||
|
dst[1].y += ux*v1y+uy*v1x;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* final update
|
||
|
*/
|
||
|
if( n%2!=0 )
|
||
|
{
|
||
|
double ux = pu[0].x;
|
||
|
double uy = pu[0].y;
|
||
|
double vx = pv[0].x;
|
||
|
double vy = pv[0].y;
|
||
|
dst[0].x += ux*vx-uy*vy;
|
||
|
dst[0].y += ux*vy+uy*vx;
|
||
|
}
|
||
|
}
|
||
|
return ae_true;
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
real rank-1 kernel
|
||
|
deprecated version
|
||
|
********************************************************************/
|
||
|
ae_bool _ialglib_rmatrixrank1(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
double *_a,
|
||
|
ae_int_t _a_stride,
|
||
|
double *_u,
|
||
|
double *_v)
|
||
|
{
|
||
|
/*
|
||
|
* Locals
|
||
|
*/
|
||
|
double *arow0, *arow1, *pu, *pv, *vtmp, *dst0, *dst1;
|
||
|
ae_int_t m2 = m/2;
|
||
|
ae_int_t n2 = n/2;
|
||
|
ae_int_t stride = _a_stride;
|
||
|
ae_int_t stride2 = 2*_a_stride;
|
||
|
ae_int_t i, j;
|
||
|
|
||
|
/*
|
||
|
* Quick exit
|
||
|
*/
|
||
|
if( m<=0 || n<=0 )
|
||
|
return ae_false;
|
||
|
|
||
|
/*
|
||
|
* update pairs of rows
|
||
|
*/
|
||
|
arow0 = _a;
|
||
|
arow1 = arow0+stride;
|
||
|
pu = _u;
|
||
|
vtmp = _v;
|
||
|
for(i=0; i<m2; i++,arow0+=stride2,arow1+=stride2,pu+=2)
|
||
|
{
|
||
|
/*
|
||
|
* update by two
|
||
|
*/
|
||
|
for(j=0,pv=vtmp, dst0=arow0, dst1=arow1; j<n2; j++, dst0+=2, dst1+=2, pv+=2)
|
||
|
{
|
||
|
dst0[0] += pu[0]*pv[0];
|
||
|
dst0[1] += pu[0]*pv[1];
|
||
|
dst1[0] += pu[1]*pv[0];
|
||
|
dst1[1] += pu[1]*pv[1];
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* final update
|
||
|
*/
|
||
|
if( n%2!=0 )
|
||
|
{
|
||
|
dst0[0] += pu[0]*pv[0];
|
||
|
dst1[0] += pu[1]*pv[0];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* update last row
|
||
|
*/
|
||
|
if( m%2!=0 )
|
||
|
{
|
||
|
/*
|
||
|
* update by two
|
||
|
*/
|
||
|
for(j=0,pv=vtmp, dst0=arow0; j<n2; j++, dst0+=2, pv+=2)
|
||
|
{
|
||
|
dst0[0] += pu[0]*pv[0];
|
||
|
dst0[1] += pu[0]*pv[1];
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* final update
|
||
|
*/
|
||
|
if( n%2!=0 )
|
||
|
dst0[0] += pu[0]*pv[0];
|
||
|
}
|
||
|
return ae_true;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
real rank-1 kernel
|
||
|
deprecated version
|
||
|
********************************************************************/
|
||
|
ae_bool _ialglib_rmatrixger(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
double *_a,
|
||
|
ae_int_t _a_stride,
|
||
|
double alpha,
|
||
|
double *_u,
|
||
|
double *_v)
|
||
|
{
|
||
|
/*
|
||
|
* Locals
|
||
|
*/
|
||
|
double *arow0, *arow1, *pu, *pv, *vtmp, *dst0, *dst1;
|
||
|
ae_int_t m2 = m/2;
|
||
|
ae_int_t n2 = n/2;
|
||
|
ae_int_t stride = _a_stride;
|
||
|
ae_int_t stride2 = 2*_a_stride;
|
||
|
ae_int_t i, j;
|
||
|
|
||
|
/*
|
||
|
* Quick exit
|
||
|
*/
|
||
|
if( m<=0 || n<=0 || alpha==0.0 )
|
||
|
return ae_false;
|
||
|
|
||
|
/*
|
||
|
* update pairs of rows
|
||
|
*/
|
||
|
arow0 = _a;
|
||
|
arow1 = arow0+stride;
|
||
|
pu = _u;
|
||
|
vtmp = _v;
|
||
|
for(i=0; i<m2; i++,arow0+=stride2,arow1+=stride2,pu+=2)
|
||
|
{
|
||
|
double au0 = alpha*pu[0];
|
||
|
double au1 = alpha*pu[1];
|
||
|
|
||
|
/*
|
||
|
* update by two
|
||
|
*/
|
||
|
for(j=0,pv=vtmp, dst0=arow0, dst1=arow1; j<n2; j++, dst0+=2, dst1+=2, pv+=2)
|
||
|
{
|
||
|
dst0[0] += au0*pv[0];
|
||
|
dst0[1] += au0*pv[1];
|
||
|
dst1[0] += au1*pv[0];
|
||
|
dst1[1] += au1*pv[1];
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* final update
|
||
|
*/
|
||
|
if( n%2!=0 )
|
||
|
{
|
||
|
dst0[0] += au0*pv[0];
|
||
|
dst1[0] += au1*pv[0];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* update last row
|
||
|
*/
|
||
|
if( m%2!=0 )
|
||
|
{
|
||
|
double au0 = alpha*pu[0];
|
||
|
|
||
|
/*
|
||
|
* update by two
|
||
|
*/
|
||
|
for(j=0,pv=vtmp, dst0=arow0; j<n2; j++, dst0+=2, pv+=2)
|
||
|
{
|
||
|
dst0[0] += au0*pv[0];
|
||
|
dst0[1] += au0*pv[1];
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* final update
|
||
|
*/
|
||
|
if( n%2!=0 )
|
||
|
dst0[0] += au0*pv[0];
|
||
|
}
|
||
|
return ae_true;
|
||
|
}
|
||
|
|
||
|
/********************************************************************
|
||
|
Interface functions for efficient kernels
|
||
|
********************************************************************/
|
||
|
ae_bool _ialglib_i_rmatrixgemmf(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
ae_int_t k,
|
||
|
double alpha,
|
||
|
ae_matrix *_a,
|
||
|
ae_int_t ia,
|
||
|
ae_int_t ja,
|
||
|
ae_int_t optypea,
|
||
|
ae_matrix *_b,
|
||
|
ae_int_t ib,
|
||
|
ae_int_t jb,
|
||
|
ae_int_t optypeb,
|
||
|
double beta,
|
||
|
ae_matrix *_c,
|
||
|
ae_int_t ic,
|
||
|
ae_int_t jc)
|
||
|
{
|
||
|
/* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
|
||
|
if( alpha==0.0 || k==0 || n==0 || m==0)
|
||
|
return ae_false;
|
||
|
|
||
|
/* handle with optimized ALGLIB kernel */
|
||
|
return _ialglib_rmatrixgemm(m, n, k, alpha, _a->ptr.pp_double[ia]+ja, _a->stride, optypea, _b->ptr.pp_double[ib]+jb, _b->stride, optypeb, beta, _c->ptr.pp_double[ic]+jc, _c->stride);
|
||
|
}
|
||
|
|
||
|
ae_bool _ialglib_i_cmatrixgemmf(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
ae_int_t k,
|
||
|
ae_complex alpha,
|
||
|
ae_matrix *_a,
|
||
|
ae_int_t ia,
|
||
|
ae_int_t ja,
|
||
|
ae_int_t optypea,
|
||
|
ae_matrix *_b,
|
||
|
ae_int_t ib,
|
||
|
ae_int_t jb,
|
||
|
ae_int_t optypeb,
|
||
|
ae_complex beta,
|
||
|
ae_matrix *_c,
|
||
|
ae_int_t ic,
|
||
|
ae_int_t jc)
|
||
|
{
|
||
|
/* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
|
||
|
if( (alpha.x==0.0 && alpha.y==0) || k==0 || n==0 || m==0 )
|
||
|
return ae_false;
|
||
|
|
||
|
/* handle with optimized ALGLIB kernel */
|
||
|
return _ialglib_cmatrixgemm(m, n, k, alpha, _a->ptr.pp_complex[ia]+ja, _a->stride, optypea, _b->ptr.pp_complex[ib]+jb, _b->stride, optypeb, beta, _c->ptr.pp_complex[ic]+jc, _c->stride);
|
||
|
}
|
||
|
|
||
|
ae_bool _ialglib_i_cmatrixrighttrsmf(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
ae_matrix *a,
|
||
|
ae_int_t i1,
|
||
|
ae_int_t j1,
|
||
|
ae_bool isupper,
|
||
|
ae_bool isunit,
|
||
|
ae_int_t optype,
|
||
|
ae_matrix *x,
|
||
|
ae_int_t i2,
|
||
|
ae_int_t j2)
|
||
|
{
|
||
|
/* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
|
||
|
if( m==0 || n==0)
|
||
|
return ae_false;
|
||
|
|
||
|
/* handle with optimized ALGLIB kernel */
|
||
|
return _ialglib_cmatrixrighttrsm(m, n, &a->ptr.pp_complex[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_complex[i2][j2], x->stride);
|
||
|
}
|
||
|
|
||
|
ae_bool _ialglib_i_rmatrixrighttrsmf(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
ae_matrix *a,
|
||
|
ae_int_t i1,
|
||
|
ae_int_t j1,
|
||
|
ae_bool isupper,
|
||
|
ae_bool isunit,
|
||
|
ae_int_t optype,
|
||
|
ae_matrix *x,
|
||
|
ae_int_t i2,
|
||
|
ae_int_t j2)
|
||
|
{
|
||
|
/* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
|
||
|
if( m==0 || n==0)
|
||
|
return ae_false;
|
||
|
|
||
|
/* handle with optimized ALGLIB kernel */
|
||
|
return _ialglib_rmatrixrighttrsm(m, n, &a->ptr.pp_double[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_double[i2][j2], x->stride);
|
||
|
}
|
||
|
|
||
|
ae_bool _ialglib_i_cmatrixlefttrsmf(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
ae_matrix *a,
|
||
|
ae_int_t i1,
|
||
|
ae_int_t j1,
|
||
|
ae_bool isupper,
|
||
|
ae_bool isunit,
|
||
|
ae_int_t optype,
|
||
|
ae_matrix *x,
|
||
|
ae_int_t i2,
|
||
|
ae_int_t j2)
|
||
|
{
|
||
|
/* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
|
||
|
if( m==0 || n==0)
|
||
|
return ae_false;
|
||
|
|
||
|
/* handle with optimized ALGLIB kernel */
|
||
|
return _ialglib_cmatrixlefttrsm(m, n, &a->ptr.pp_complex[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_complex[i2][j2], x->stride);
|
||
|
}
|
||
|
|
||
|
ae_bool _ialglib_i_rmatrixlefttrsmf(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
ae_matrix *a,
|
||
|
ae_int_t i1,
|
||
|
ae_int_t j1,
|
||
|
ae_bool isupper,
|
||
|
ae_bool isunit,
|
||
|
ae_int_t optype,
|
||
|
ae_matrix *x,
|
||
|
ae_int_t i2,
|
||
|
ae_int_t j2)
|
||
|
{
|
||
|
/* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
|
||
|
if( m==0 || n==0)
|
||
|
return ae_false;
|
||
|
|
||
|
/* handle with optimized ALGLIB kernel */
|
||
|
return _ialglib_rmatrixlefttrsm(m, n, &a->ptr.pp_double[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_double[i2][j2], x->stride);
|
||
|
}
|
||
|
|
||
|
ae_bool _ialglib_i_cmatrixherkf(ae_int_t n,
|
||
|
ae_int_t k,
|
||
|
double alpha,
|
||
|
ae_matrix *a,
|
||
|
ae_int_t ia,
|
||
|
ae_int_t ja,
|
||
|
ae_int_t optypea,
|
||
|
double beta,
|
||
|
ae_matrix *c,
|
||
|
ae_int_t ic,
|
||
|
ae_int_t jc,
|
||
|
ae_bool isupper)
|
||
|
{
|
||
|
/* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
|
||
|
if( alpha==0.0 || k==0 || n==0)
|
||
|
return ae_false;
|
||
|
|
||
|
/* ALGLIB kernel */
|
||
|
return _ialglib_cmatrixherk(n, k, alpha, &a->ptr.pp_complex[ia][ja], a->stride, optypea, beta, &c->ptr.pp_complex[ic][jc], c->stride, isupper);
|
||
|
}
|
||
|
|
||
|
ae_bool _ialglib_i_rmatrixsyrkf(ae_int_t n,
|
||
|
ae_int_t k,
|
||
|
double alpha,
|
||
|
ae_matrix *a,
|
||
|
ae_int_t ia,
|
||
|
ae_int_t ja,
|
||
|
ae_int_t optypea,
|
||
|
double beta,
|
||
|
ae_matrix *c,
|
||
|
ae_int_t ic,
|
||
|
ae_int_t jc,
|
||
|
ae_bool isupper)
|
||
|
{
|
||
|
/* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
|
||
|
if( alpha==0.0 || k==0 || n==0)
|
||
|
return ae_false;
|
||
|
|
||
|
/* ALGLIB kernel */
|
||
|
return _ialglib_rmatrixsyrk(n, k, alpha, &a->ptr.pp_double[ia][ja], a->stride, optypea, beta, &c->ptr.pp_double[ic][jc], c->stride, isupper);
|
||
|
}
|
||
|
|
||
|
ae_bool _ialglib_i_cmatrixrank1f(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
ae_matrix *a,
|
||
|
ae_int_t ia,
|
||
|
ae_int_t ja,
|
||
|
ae_vector *u,
|
||
|
ae_int_t uoffs,
|
||
|
ae_vector *v,
|
||
|
ae_int_t voffs)
|
||
|
{
|
||
|
return _ialglib_cmatrixrank1(m, n, &a->ptr.pp_complex[ia][ja], a->stride, &u->ptr.p_complex[uoffs], &v->ptr.p_complex[voffs]);
|
||
|
}
|
||
|
|
||
|
ae_bool _ialglib_i_rmatrixrank1f(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
ae_matrix *a,
|
||
|
ae_int_t ia,
|
||
|
ae_int_t ja,
|
||
|
ae_vector *u,
|
||
|
ae_int_t uoffs,
|
||
|
ae_vector *v,
|
||
|
ae_int_t voffs)
|
||
|
{
|
||
|
return _ialglib_rmatrixrank1(m, n, &a->ptr.pp_double[ia][ja], a->stride, &u->ptr.p_double[uoffs], &v->ptr.p_double[voffs]);
|
||
|
}
|
||
|
|
||
|
ae_bool _ialglib_i_rmatrixgerf(ae_int_t m,
|
||
|
ae_int_t n,
|
||
|
ae_matrix *a,
|
||
|
ae_int_t ia,
|
||
|
ae_int_t ja,
|
||
|
double alpha,
|
||
|
ae_vector *u,
|
||
|
ae_int_t uoffs,
|
||
|
ae_vector *v,
|
||
|
ae_int_t voffs)
|
||
|
{
|
||
|
return _ialglib_rmatrixger(m, n, &a->ptr.pp_double[ia][ja], a->stride, alpha, &u->ptr.p_double[uoffs], &v->ptr.p_double[voffs]);
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
This function reads rectangular matrix A given by two column pointers
|
||
|
col0 and col1 and stride src_stride and moves it into contiguous row-
|
||
|
by-row storage given by dst.
|
||
|
|
||
|
It can handle following special cases:
|
||
|
* col1==NULL in this case second column of A is filled by zeros
|
||
|
********************************************************************/
|
||
|
void _ialglib_pack_n2(
|
||
|
double *col0,
|
||
|
double *col1,
|
||
|
ae_int_t n,
|
||
|
ae_int_t src_stride,
|
||
|
double *dst)
|
||
|
{
|
||
|
ae_int_t n2, j, stride2;
|
||
|
|
||
|
/*
|
||
|
* handle special case
|
||
|
*/
|
||
|
if( col1==NULL )
|
||
|
{
|
||
|
for(j=0; j<n; j++)
|
||
|
{
|
||
|
dst[0] = *col0;
|
||
|
dst[1] = 0.0;
|
||
|
col0 += src_stride;
|
||
|
dst += 2;
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* handle general case
|
||
|
*/
|
||
|
n2 = n/2;
|
||
|
stride2 = src_stride*2;
|
||
|
for(j=0; j<n2; j++)
|
||
|
{
|
||
|
dst[0] = *col0;
|
||
|
dst[1] = *col1;
|
||
|
dst[2] = col0[src_stride];
|
||
|
dst[3] = col1[src_stride];
|
||
|
col0 += stride2;
|
||
|
col1 += stride2;
|
||
|
dst += 4;
|
||
|
}
|
||
|
if( n%2 )
|
||
|
{
|
||
|
dst[0] = *col0;
|
||
|
dst[1] = *col1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
This function reads rectangular matrix A given by two column pointers col0
|
||
|
and col1 and stride src_stride and moves it into contiguous row-by-row
|
||
|
storage given by dst.
|
||
|
|
||
|
dst must be aligned, col0 and col1 may be non-aligned.
|
||
|
|
||
|
It can handle following special cases:
|
||
|
* col1==NULL in this case second column of A is filled by zeros
|
||
|
* src_stride==1 efficient SSE-based code is used
|
||
|
* col1-col0==1 efficient SSE-based code is used
|
||
|
|
||
|
This function supports SSE2; it can be used when:
|
||
|
1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
|
||
|
2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
|
||
|
|
||
|
If you want to know whether it is safe to call it, you should check
|
||
|
results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
|
||
|
and will do its work.
|
||
|
*************************************************************************/
|
||
|
#if defined(AE_HAS_SSE2_INTRINSICS)
|
||
|
void _ialglib_pack_n2_sse2(
|
||
|
double *col0,
|
||
|
double *col1,
|
||
|
ae_int_t n,
|
||
|
ae_int_t src_stride,
|
||
|
double *dst)
|
||
|
{
|
||
|
ae_int_t n2, j, stride2;
|
||
|
|
||
|
/*
|
||
|
* handle special case: col1==NULL
|
||
|
*/
|
||
|
if( col1==NULL )
|
||
|
{
|
||
|
for(j=0; j<n; j++)
|
||
|
{
|
||
|
dst[0] = *col0;
|
||
|
dst[1] = 0.0;
|
||
|
col0 += src_stride;
|
||
|
dst += 2;
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* handle unit stride
|
||
|
*/
|
||
|
if( src_stride==1 )
|
||
|
{
|
||
|
__m128d v0, v1;
|
||
|
n2 = n/2;
|
||
|
for(j=0; j<n2; j++)
|
||
|
{
|
||
|
v0 = _mm_loadu_pd(col0);
|
||
|
col0 += 2;
|
||
|
v1 = _mm_loadu_pd(col1);
|
||
|
col1 += 2;
|
||
|
_mm_store_pd(dst, _mm_unpacklo_pd(v0,v1));
|
||
|
_mm_store_pd(dst+2,_mm_unpackhi_pd(v0,v1));
|
||
|
dst += 4;
|
||
|
}
|
||
|
if( n%2 )
|
||
|
{
|
||
|
dst[0] = *col0;
|
||
|
dst[1] = *col1;
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* handle col1-col0==1
|
||
|
*/
|
||
|
if( col1-col0==1 )
|
||
|
{
|
||
|
__m128d v0, v1;
|
||
|
n2 = n/2;
|
||
|
stride2 = 2*src_stride;
|
||
|
for(j=0; j<n2; j++)
|
||
|
{
|
||
|
v0 = _mm_loadu_pd(col0);
|
||
|
v1 = _mm_loadu_pd(col0+src_stride);
|
||
|
_mm_store_pd(dst, v0);
|
||
|
_mm_store_pd(dst+2,v1);
|
||
|
col0 += stride2;
|
||
|
dst += 4;
|
||
|
}
|
||
|
if( n%2 )
|
||
|
{
|
||
|
dst[0] = col0[0];
|
||
|
dst[1] = col0[1];
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* handle general case
|
||
|
*/
|
||
|
n2 = n/2;
|
||
|
stride2 = src_stride*2;
|
||
|
for(j=0; j<n2; j++)
|
||
|
{
|
||
|
dst[0] = *col0;
|
||
|
dst[1] = *col1;
|
||
|
dst[2] = col0[src_stride];
|
||
|
dst[3] = col1[src_stride];
|
||
|
col0 += stride2;
|
||
|
col1 += stride2;
|
||
|
dst += 4;
|
||
|
}
|
||
|
if( n%2 )
|
||
|
{
|
||
|
dst[0] = *col0;
|
||
|
dst[1] = *col1;
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
This function calculates R := alpha*A'*B+beta*R where A and B are Kx2
|
||
|
matrices stored in contiguous row-by-row storage, R is 2x2 matrix
|
||
|
stored in non-contiguous row-by-row storage.
|
||
|
|
||
|
A and B must be aligned; R may be non-aligned.
|
||
|
|
||
|
If beta is zero, contents of R is ignored (not multiplied by zero -
|
||
|
just ignored).
|
||
|
|
||
|
However, when alpha is zero, we still calculate A'*B, which is
|
||
|
multiplied by zero afterwards.
|
||
|
|
||
|
Function accepts additional parameter store_mode:
|
||
|
* if 0, full R is stored
|
||
|
* if 1, only first row of R is stored
|
||
|
* if 2, only first column of R is stored
|
||
|
* if 3, only top left element of R is stored
|
||
|
********************************************************************/
|
||
|
void _ialglib_mm22(double alpha, const double *a, const double *b, ae_int_t k, double beta, double *r, ae_int_t stride, ae_int_t store_mode)
|
||
|
{
|
||
|
double v00, v01, v10, v11;
|
||
|
ae_int_t t;
|
||
|
v00 = 0.0;
|
||
|
v01 = 0.0;
|
||
|
v10 = 0.0;
|
||
|
v11 = 0.0;
|
||
|
for(t=0; t<k; t++)
|
||
|
{
|
||
|
v00 += a[0]*b[0];
|
||
|
v01 += a[0]*b[1];
|
||
|
v10 += a[1]*b[0];
|
||
|
v11 += a[1]*b[1];
|
||
|
a+=2;
|
||
|
b+=2;
|
||
|
}
|
||
|
if( store_mode==0 )
|
||
|
{
|
||
|
if( beta==0 )
|
||
|
{
|
||
|
r[0] = alpha*v00;
|
||
|
r[1] = alpha*v01;
|
||
|
r[stride+0] = alpha*v10;
|
||
|
r[stride+1] = alpha*v11;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
r[0] = beta*r[0] + alpha*v00;
|
||
|
r[1] = beta*r[1] + alpha*v01;
|
||
|
r[stride+0] = beta*r[stride+0] + alpha*v10;
|
||
|
r[stride+1] = beta*r[stride+1] + alpha*v11;
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
if( store_mode==1 )
|
||
|
{
|
||
|
if( beta==0 )
|
||
|
{
|
||
|
r[0] = alpha*v00;
|
||
|
r[1] = alpha*v01;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
r[0] = beta*r[0] + alpha*v00;
|
||
|
r[1] = beta*r[1] + alpha*v01;
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
if( store_mode==2 )
|
||
|
{
|
||
|
if( beta==0 )
|
||
|
{
|
||
|
r[0] =alpha*v00;
|
||
|
r[stride+0] = alpha*v10;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
r[0] = beta*r[0] + alpha*v00;
|
||
|
r[stride+0] = beta*r[stride+0] + alpha*v10;
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
if( store_mode==3 )
|
||
|
{
|
||
|
if( beta==0 )
|
||
|
{
|
||
|
r[0] = alpha*v00;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
r[0] = beta*r[0] + alpha*v00;
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/********************************************************************
|
||
|
This function calculates R := alpha*A'*B+beta*R where A and B are Kx2
|
||
|
matrices stored in contiguous row-by-row storage, R is 2x2 matrix
|
||
|
stored in non-contiguous row-by-row storage.
|
||
|
|
||
|
A and B must be aligned; R may be non-aligned.
|
||
|
|
||
|
If beta is zero, contents of R is ignored (not multiplied by zero -
|
||
|
just ignored).
|
||
|
|
||
|
However, when alpha is zero, we still calculate A'*B, which is
|
||
|
multiplied by zero afterwards.
|
||
|
|
||
|
Function accepts additional parameter store_mode:
|
||
|
* if 0, full R is stored
|
||
|
* if 1, only first row of R is stored
|
||
|
* if 2, only first column of R is stored
|
||
|
* if 3, only top left element of R is stored
|
||
|
|
||
|
This function supports SSE2; it can be used when:
|
||
|
1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
|
||
|
2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
|
||
|
|
||
|
If (1) is failed, this function will still be defined and callable, but it
|
||
|
will do nothing. If (2) is failed , call to this function will probably
|
||
|
crash your system.
|
||
|
|
||
|
If you want to know whether it is safe to call it, you should check
|
||
|
results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
|
||
|
and will do its work.
|
||
|
********************************************************************/
|
||
|
#if defined(AE_HAS_SSE2_INTRINSICS)
|
||
|
void _ialglib_mm22_sse2(double alpha, const double *a, const double *b, ae_int_t k, double beta, double *r, ae_int_t stride, ae_int_t store_mode)
|
||
|
{
|
||
|
/*
|
||
|
* We calculate product of two Kx2 matrices (result is 2x2).
|
||
|
* VA and VB store result as follows:
|
||
|
*
|
||
|
* [ VD[0] VE[0] ]
|
||
|
* A'*B = [ ]
|
||
|
* [ VE[1] VD[1] ]
|
||
|
*
|
||
|
*/
|
||
|
__m128d va, vb, vd, ve, vt, r0, r1, valpha, vbeta;
|
||
|
ae_int_t t, k2;
|
||
|
|
||
|
/*
|
||
|
* calculate product
|
||
|
*/
|
||
|
k2 = k/2;
|
||
|
vd = _mm_setzero_pd();
|
||
|
ve = _mm_setzero_pd();
|
||
|
for(t=0; t<k2; t++)
|
||
|
{
|
||
|
vb = _mm_load_pd(b);
|
||
|
va = _mm_load_pd(a);
|
||
|
vt = vb;
|
||
|
vb = _mm_mul_pd(va,vb);
|
||
|
vt = _mm_shuffle_pd(vt, vt, 1);
|
||
|
vd = _mm_add_pd(vb,vd);
|
||
|
vt = _mm_mul_pd(va,vt);
|
||
|
vb = _mm_load_pd(b+2);
|
||
|
ve = _mm_add_pd(vt,ve);
|
||
|
va = _mm_load_pd(a+2);
|
||
|
vt = vb;
|
||
|
vb = _mm_mul_pd(va,vb);
|
||
|
vt = _mm_shuffle_pd(vt, vt, 1);
|
||
|
vd = _mm_add_pd(vb,vd);
|
||
|
vt = _mm_mul_pd(va,vt);
|
||
|
ve = _mm_add_pd(vt,ve);
|
||
|
a+=4;
|
||
|
b+=4;
|
||
|
}
|
||
|
if( k%2 )
|
||
|
{
|
||
|
va = _mm_load_pd(a);
|
||
|
vb = _mm_load_pd(b);
|
||
|
vt = _mm_shuffle_pd(vb, vb, 1);
|
||
|
vd = _mm_add_pd(_mm_mul_pd(va,vb),vd);
|
||
|
ve = _mm_add_pd(_mm_mul_pd(va,vt),ve);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* r0 is first row of alpha*A'*B, r1 is second row
|
||
|
*/
|
||
|
valpha = _mm_load1_pd(&alpha);
|
||
|
r0 = _mm_mul_pd(_mm_unpacklo_pd(vd,ve),valpha);
|
||
|
r1 = _mm_mul_pd(_mm_unpackhi_pd(ve,vd),valpha);
|
||
|
|
||
|
/*
|
||
|
* store
|
||
|
*/
|
||
|
if( store_mode==0 )
|
||
|
{
|
||
|
if( beta==0 )
|
||
|
{
|
||
|
_mm_storeu_pd(r,r0);
|
||
|
_mm_storeu_pd(r+stride,r1);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
vbeta = _mm_load1_pd(&beta);
|
||
|
_mm_storeu_pd(r,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),vbeta),r0));
|
||
|
_mm_storeu_pd(r+stride,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride),vbeta),r1));
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
if( store_mode==1 )
|
||
|
{
|
||
|
if( beta==0 )
|
||
|
_mm_storeu_pd(r,r0);
|
||
|
else
|
||
|
_mm_storeu_pd(r,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),_mm_load1_pd(&beta)),r0));
|
||
|
return;
|
||
|
}
|
||
|
if( store_mode==2 )
|
||
|
{
|
||
|
double buf[4];
|
||
|
_mm_storeu_pd(buf,r0);
|
||
|
_mm_storeu_pd(buf+2,r1);
|
||
|
if( beta==0 )
|
||
|
{
|
||
|
r[0] =buf[0];
|
||
|
r[stride+0] = buf[2];
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
r[0] = beta*r[0] + buf[0];
|
||
|
r[stride+0] = beta*r[stride+0] + buf[2];
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
if( store_mode==3 )
|
||
|
{
|
||
|
double buf[2];
|
||
|
_mm_storeu_pd(buf,r0);
|
||
|
if( beta==0 )
|
||
|
r[0] = buf[0];
|
||
|
else
|
||
|
r[0] = beta*r[0] + buf[0];
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
|
||
|
/*************************************************************************
|
||
|
This function calculates R := alpha*A'*(B0|B1)+beta*R where A, B0 and B1
|
||
|
are Kx2 matrices stored in contiguous row-by-row storage, R is 2x4 matrix
|
||
|
stored in non-contiguous row-by-row storage.
|
||
|
|
||
|
A, B0 and B1 must be aligned; R may be non-aligned.
|
||
|
|
||
|
Note that B0 and B1 are two separate matrices stored in different
|
||
|
locations.
|
||
|
|
||
|
If beta is zero, contents of R is ignored (not multiplied by zero - just
|
||
|
ignored).
|
||
|
|
||
|
However, when alpha is zero , we still calculate MM product, which is
|
||
|
multiplied by zero afterwards.
|
||
|
|
||
|
Unlike mm22 functions, this function does NOT support partial output of R
|
||
|
- we always store full 2x4 matrix.
|
||
|
*************************************************************************/
|
||
|
void _ialglib_mm22x2(double alpha, const double *a, const double *b0, const double *b1, ae_int_t k, double beta, double *r, ae_int_t stride)
|
||
|
{
|
||
|
_ialglib_mm22(alpha, a, b0, k, beta, r, stride, 0);
|
||
|
_ialglib_mm22(alpha, a, b1, k, beta, r+2, stride, 0);
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
This function calculates R := alpha*A'*(B0|B1)+beta*R where A, B0 and B1
|
||
|
are Kx2 matrices stored in contiguous row-by-row storage, R is 2x4 matrix
|
||
|
stored in non-contiguous row-by-row storage.
|
||
|
|
||
|
A, B0 and B1 must be aligned; R may be non-aligned.
|
||
|
|
||
|
Note that B0 and B1 are two separate matrices stored in different
|
||
|
locations.
|
||
|
|
||
|
If beta is zero, contents of R is ignored (not multiplied by zero - just
|
||
|
ignored).
|
||
|
|
||
|
However, when alpha is zero , we still calculate MM product, which is
|
||
|
multiplied by zero afterwards.
|
||
|
|
||
|
Unlike mm22 functions, this function does NOT support partial output of R
|
||
|
- we always store full 2x4 matrix.
|
||
|
|
||
|
This function supports SSE2; it can be used when:
|
||
|
1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
|
||
|
2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
|
||
|
|
||
|
If (1) is failed, this function will still be defined and callable, but it
|
||
|
will do nothing. If (2) is failed , call to this function will probably
|
||
|
crash your system.
|
||
|
|
||
|
If you want to know whether it is safe to call it, you should check
|
||
|
results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
|
||
|
and will do its work.
|
||
|
*************************************************************************/
|
||
|
#if defined(AE_HAS_SSE2_INTRINSICS)
|
||
|
void _ialglib_mm22x2_sse2(double alpha, const double *a, const double *b0, const double *b1, ae_int_t k, double beta, double *r, ae_int_t stride)
|
||
|
{
|
||
|
/*
|
||
|
* We calculate product of two Kx2 matrices (result is 2x2).
|
||
|
* V0, V1, V2, V3 store result as follows:
|
||
|
*
|
||
|
* [ V0[0] V1[1] V2[0] V3[1] ]
|
||
|
* R = [ ]
|
||
|
* [ V1[0] V0[1] V3[0] V2[1] ]
|
||
|
*
|
||
|
* VA0 stores current 1x2 block of A, VA1 stores shuffle of VA0,
|
||
|
* VB0 and VB1 are used to store two copies of 1x2 block of B0 or B1
|
||
|
* (both vars store same data - either B0 or B1). Results from multiplication
|
||
|
* by VA0/VA1 are stored in VB0/VB1 too.
|
||
|
*
|
||
|
*/
|
||
|
__m128d v0, v1, v2, v3, va0, va1, vb0, vb1;
|
||
|
__m128d r00, r01, r10, r11, valpha, vbeta;
|
||
|
ae_int_t t;
|
||
|
|
||
|
v0 = _mm_setzero_pd();
|
||
|
v1 = _mm_setzero_pd();
|
||
|
v2 = _mm_setzero_pd();
|
||
|
v3 = _mm_setzero_pd();
|
||
|
for(t=0; t<k; t++)
|
||
|
{
|
||
|
va0 = _mm_load_pd(a);
|
||
|
vb0 = _mm_load_pd(b0);
|
||
|
va1 = _mm_load_pd(a);
|
||
|
|
||
|
vb0 = _mm_mul_pd(va0,vb0);
|
||
|
vb1 = _mm_load_pd(b0);
|
||
|
v0 = _mm_add_pd(v0,vb0);
|
||
|
vb1 = _mm_mul_pd(va1,vb1);
|
||
|
vb0 = _mm_load_pd(b1);
|
||
|
v1 = _mm_add_pd(v1,vb1);
|
||
|
|
||
|
vb0 = _mm_mul_pd(va0,vb0);
|
||
|
vb1 = _mm_load_pd(b1);
|
||
|
v2 = _mm_add_pd(v2,vb0);
|
||
|
vb1 = _mm_mul_pd(va1,vb1);
|
||
|
v3 = _mm_add_pd(v3,vb1);
|
||
|
|
||
|
a+=2;
|
||
|
b0+=2;
|
||
|
b1+=2;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* shuffle V1 and V3 (conversion to more convenient storage format):
|
||
|
*
|
||
|
* [ V0[0] V1[0] V2[0] V3[0] ]
|
||
|
* R = [ ]
|
||
|
* [ V1[1] V0[1] V3[1] V2[1] ]
|
||
|
*
|
||
|
* unpack results to
|
||
|
*
|
||
|
* [ r00 r01 ]
|
||
|
* [ r10 r11 ]
|
||
|
*
|
||
|
*/
|
||
|
valpha = _mm_load1_pd(&alpha);
|
||
|
v1 = _mm_shuffle_pd(v1, v1, 1);
|
||
|
v3 = _mm_shuffle_pd(v3, v3, 1);
|
||
|
r00 = _mm_mul_pd(_mm_unpacklo_pd(v0,v1),valpha);
|
||
|
r10 = _mm_mul_pd(_mm_unpackhi_pd(v1,v0),valpha);
|
||
|
r01 = _mm_mul_pd(_mm_unpacklo_pd(v2,v3),valpha);
|
||
|
r11 = _mm_mul_pd(_mm_unpackhi_pd(v3,v2),valpha);
|
||
|
|
||
|
/*
|
||
|
* store
|
||
|
*/
|
||
|
if( beta==0 )
|
||
|
{
|
||
|
_mm_storeu_pd(r,r00);
|
||
|
_mm_storeu_pd(r+2,r01);
|
||
|
_mm_storeu_pd(r+stride,r10);
|
||
|
_mm_storeu_pd(r+stride+2,r11);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
vbeta = _mm_load1_pd(&beta);
|
||
|
_mm_storeu_pd(r, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),vbeta),r00));
|
||
|
_mm_storeu_pd(r+2, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+2),vbeta),r01));
|
||
|
_mm_storeu_pd(r+stride, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride),vbeta),r10));
|
||
|
_mm_storeu_pd(r+stride+2, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride+2),vbeta),r11));
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/////////////////////////////////////////////////////////////////////////
|
||
|
//
|
||
|
// THIS SECTION CONTAINS PARALLEL SUBROUTINES
|
||
|
//
|
||
|
/////////////////////////////////////////////////////////////////////////
|
||
|
namespace alglib_impl
|
||
|
{
|
||
|
|
||
|
|
||
|
}
|
||
|
|