51737 lines
1.8 MiB
Executable File
51737 lines
1.8 MiB
Executable File
/*************************************************************************
|
|
ALGLIB 3.16.0 (source code generated 2019-12-19)
|
|
Copyright (c) Sergey Bochkanov (ALGLIB project).
|
|
|
|
>>> SOURCE LICENSE >>>
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation (www.fsf.org); either version 2 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
A copy of the GNU General Public License is available at
|
|
http://www.fsf.org/licensing/licenses
|
|
>>> END OF LICENSE >>>
|
|
*************************************************************************/
|
|
#ifdef _MSC_VER
|
|
#define _CRT_SECURE_NO_WARNINGS
|
|
#endif
|
|
#include "stdafx.h"
|
|
#include "dataanalysis.h"
|
|
|
|
// disable some irrelevant warnings
|
|
#if (AE_COMPILER==AE_MSVC) && !defined(AE_ALL_WARNINGS)
|
|
#pragma warning(disable:4100)
|
|
#pragma warning(disable:4127)
|
|
#pragma warning(disable:4611)
|
|
#pragma warning(disable:4702)
|
|
#pragma warning(disable:4996)
|
|
#endif
|
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// THIS SECTION CONTAINS IMPLEMENTATION OF C++ INTERFACE
|
|
//
|
|
/////////////////////////////////////////////////////////////////////////
|
|
namespace alglib
|
|
{
|
|
|
|
#if defined(AE_COMPILE_PCA) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_BDSS) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_MLPBASE) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_LDA) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_SSA) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_LINREG) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_FILTERS) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_LOGIT) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_MCPD) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_MLPE) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_MLPTRAIN) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_CLUSTERING) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_DFOREST) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_KNN) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_DATACOMP) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_PCA) || !defined(AE_PARTIAL_BUILD)
|
|
/*************************************************************************
|
|
Principal components analysis
|
|
|
|
This function builds orthogonal basis where first axis corresponds to
|
|
direction with maximum variance, second axis maximizes variance in the
|
|
subspace orthogonal to first axis and so on.
|
|
|
|
This function builds FULL basis, i.e. returns N vectors corresponding to
|
|
ALL directions, no matter how informative. If you need just a few (say,
|
|
10 or 50) of the most important directions, you may find it faster to use
|
|
one of the reduced versions:
|
|
* pcatruncatedsubspace() - for subspace iteration based method
|
|
|
|
It should be noted that, unlike LDA, PCA does not use class labels.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
! * hardware vendor (Intel) implementations of linear algebra primitives
|
|
! (C++ and C# versions, x86/x64 platform)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
X - dataset, array[0..NPoints-1,0..NVars-1].
|
|
matrix contains ONLY INDEPENDENT VARIABLES.
|
|
NPoints - dataset size, NPoints>=0
|
|
NVars - number of independent variables, NVars>=1
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code:
|
|
* -4, if SVD subroutine haven't converged
|
|
* -1, if wrong parameters has been passed (NPoints<0,
|
|
NVars<1)
|
|
* 1, if task is solved
|
|
S2 - array[0..NVars-1]. variance values corresponding
|
|
to basis vectors.
|
|
V - array[0..NVars-1,0..NVars-1]
|
|
matrix, whose columns store basis vectors.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void pcabuildbasis(const real_2d_array &x, const ae_int_t npoints, const ae_int_t nvars, ae_int_t &info, real_1d_array &s2, real_2d_array &v, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::pcabuildbasis(const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), npoints, nvars, &info, const_cast<alglib_impl::ae_vector*>(s2.c_ptr()), const_cast<alglib_impl::ae_matrix*>(v.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Principal components analysis
|
|
|
|
This function performs truncated PCA, i.e. returns just a few most important
|
|
directions.
|
|
|
|
Internally it uses iterative eigensolver which is very efficient when only
|
|
a minor fraction of full basis is required. Thus, if you need full basis,
|
|
it is better to use pcabuildbasis() function.
|
|
|
|
It should be noted that, unlike LDA, PCA does not use class labels.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
! * hardware vendor (Intel) implementations of linear algebra primitives
|
|
! (C++ and C# versions, x86/x64 platform)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
X - dataset, array[0..NPoints-1,0..NVars-1].
|
|
matrix contains ONLY INDEPENDENT VARIABLES.
|
|
NPoints - dataset size, NPoints>=0
|
|
NVars - number of independent variables, NVars>=1
|
|
NNeeded - number of requested components, in [1,NVars] range;
|
|
this function is efficient only for NNeeded<<NVars.
|
|
Eps - desired precision of vectors returned; underlying
|
|
solver will stop iterations as soon as absolute error
|
|
in corresponding singular values reduces to roughly
|
|
eps*MAX(lambda[]), with lambda[] being array of eigen
|
|
values.
|
|
Zero value means that algorithm performs number of
|
|
iterations specified by maxits parameter, without
|
|
paying attention to precision.
|
|
MaxIts - number of iterations performed by subspace iteration
|
|
method. Zero value means that no limit on iteration
|
|
count is placed (eps-based stopping condition is used).
|
|
|
|
|
|
OUTPUT PARAMETERS:
|
|
S2 - array[NNeeded]. Variance values corresponding
|
|
to basis vectors.
|
|
V - array[NVars,NNeeded]
|
|
matrix, whose columns store basis vectors.
|
|
|
|
NOTE: passing eps=0 and maxits=0 results in small eps being selected as
|
|
stopping condition. Exact value of automatically selected eps is version-
|
|
-dependent.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.01.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void pcatruncatedsubspace(const real_2d_array &x, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nneeded, const double eps, const ae_int_t maxits, real_1d_array &s2, real_2d_array &v, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::pcatruncatedsubspace(const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), npoints, nvars, nneeded, eps, maxits, const_cast<alglib_impl::ae_vector*>(s2.c_ptr()), const_cast<alglib_impl::ae_matrix*>(v.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Sparse truncated principal components analysis
|
|
|
|
This function performs sparse truncated PCA, i.e. returns just a few most
|
|
important principal components for a sparse input X.
|
|
|
|
Internally it uses iterative eigensolver which is very efficient when only
|
|
a minor fraction of full basis is required.
|
|
|
|
It should be noted that, unlike LDA, PCA does not use class labels.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
! * hardware vendor (Intel) implementations of linear algebra primitives
|
|
! (C++ and C# versions, x86/x64 platform)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
X - sparse dataset, sparse npoints*nvars matrix. It is
|
|
recommended to use CRS sparse storage format; non-CRS
|
|
input will be internally converted to CRS.
|
|
Matrix contains ONLY INDEPENDENT VARIABLES, and must
|
|
be EXACTLY npoints*nvars.
|
|
NPoints - dataset size, NPoints>=0
|
|
NVars - number of independent variables, NVars>=1
|
|
NNeeded - number of requested components, in [1,NVars] range;
|
|
this function is efficient only for NNeeded<<NVars.
|
|
Eps - desired precision of vectors returned; underlying
|
|
solver will stop iterations as soon as absolute error
|
|
in corresponding singular values reduces to roughly
|
|
eps*MAX(lambda[]), with lambda[] being array of eigen
|
|
values.
|
|
Zero value means that algorithm performs number of
|
|
iterations specified by maxits parameter, without
|
|
paying attention to precision.
|
|
MaxIts - number of iterations performed by subspace iteration
|
|
method. Zero value means that no limit on iteration
|
|
count is placed (eps-based stopping condition is used).
|
|
|
|
|
|
OUTPUT PARAMETERS:
|
|
S2 - array[NNeeded]. Variance values corresponding
|
|
to basis vectors.
|
|
V - array[NVars,NNeeded]
|
|
matrix, whose columns store basis vectors.
|
|
|
|
NOTE: passing eps=0 and maxits=0 results in small eps being selected as
|
|
a stopping condition. Exact value of automatically selected eps is
|
|
version-dependent.
|
|
|
|
NOTE: zero MaxIts is silently replaced by some reasonable value which
|
|
prevents eternal loops (possible when inputs are degenerate and too
|
|
stringent stopping criteria are specified). In current version it
|
|
is 50+2*NVars.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.01.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void pcatruncatedsubspacesparse(const sparsematrix &x, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nneeded, const double eps, const ae_int_t maxits, real_1d_array &s2, real_2d_array &v, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::pcatruncatedsubspacesparse(const_cast<alglib_impl::sparsematrix*>(x.c_ptr()), npoints, nvars, nneeded, eps, maxits, const_cast<alglib_impl::ae_vector*>(s2.c_ptr()), const_cast<alglib_impl::ae_matrix*>(v.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_BDSS) || !defined(AE_PARTIAL_BUILD)
|
|
/*************************************************************************
|
|
Optimal binary classification
|
|
|
|
Algorithms finds optimal (=with minimal cross-entropy) binary partition.
|
|
Internal subroutine.
|
|
|
|
INPUT PARAMETERS:
|
|
A - array[0..N-1], variable
|
|
C - array[0..N-1], class numbers (0 or 1).
|
|
N - array size
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - completetion code:
|
|
* -3, all values of A[] are same (partition is impossible)
|
|
* -2, one of C[] is incorrect (<0, >1)
|
|
* -1, incorrect pararemets were passed (N<=0).
|
|
* 1, OK
|
|
Threshold- partiton boundary. Left part contains values which are
|
|
strictly less than Threshold. Right part contains values
|
|
which are greater than or equal to Threshold.
|
|
PAL, PBL- probabilities P(0|v<Threshold) and P(1|v<Threshold)
|
|
PAR, PBR- probabilities P(0|v>=Threshold) and P(1|v>=Threshold)
|
|
CVE - cross-validation estimate of cross-entropy
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.05.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dsoptimalsplit2(const real_1d_array &a, const integer_1d_array &c, const ae_int_t n, ae_int_t &info, double &threshold, double &pal, double &pbl, double &par, double &pbr, double &cve, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dsoptimalsplit2(const_cast<alglib_impl::ae_vector*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(c.c_ptr()), n, &info, &threshold, &pal, &pbl, &par, &pbr, &cve, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Optimal partition, internal subroutine. Fast version.
|
|
|
|
Accepts:
|
|
A array[0..N-1] array of attributes array[0..N-1]
|
|
C array[0..N-1] array of class labels
|
|
TiesBuf array[0..N] temporaries (ties)
|
|
CntBuf array[0..2*NC-1] temporaries (counts)
|
|
Alpha centering factor (0<=alpha<=1, recommended value - 0.05)
|
|
BufR array[0..N-1] temporaries
|
|
BufI array[0..N-1] temporaries
|
|
|
|
Output:
|
|
Info error code (">0"=OK, "<0"=bad)
|
|
RMS training set RMS error
|
|
CVRMS leave-one-out RMS error
|
|
|
|
Note:
|
|
content of all arrays is changed by subroutine;
|
|
it doesn't allocate temporaries.
|
|
|
|
-- ALGLIB --
|
|
Copyright 11.12.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dsoptimalsplit2fast(real_1d_array &a, integer_1d_array &c, integer_1d_array &tiesbuf, integer_1d_array &cntbuf, real_1d_array &bufr, integer_1d_array &bufi, const ae_int_t n, const ae_int_t nc, const double alpha, ae_int_t &info, double &threshold, double &rms, double &cvrms, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dsoptimalsplit2fast(const_cast<alglib_impl::ae_vector*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(c.c_ptr()), const_cast<alglib_impl::ae_vector*>(tiesbuf.c_ptr()), const_cast<alglib_impl::ae_vector*>(cntbuf.c_ptr()), const_cast<alglib_impl::ae_vector*>(bufr.c_ptr()), const_cast<alglib_impl::ae_vector*>(bufi.c_ptr()), n, nc, alpha, &info, &threshold, &rms, &cvrms, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_MLPBASE) || !defined(AE_PARTIAL_BUILD)
|
|
/*************************************************************************
|
|
Model's errors:
|
|
* RelCLSError - fraction of misclassified cases.
|
|
* AvgCE - acerage cross-entropy
|
|
* RMSError - root-mean-square error
|
|
* AvgError - average error
|
|
* AvgRelError - average relative error
|
|
|
|
NOTE 1: RelCLSError/AvgCE are zero on regression problems.
|
|
|
|
NOTE 2: on classification problems RMSError/AvgError/AvgRelError contain
|
|
errors in prediction of posterior probabilities
|
|
*************************************************************************/
|
|
_modelerrors_owner::_modelerrors_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_modelerrors_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::modelerrors*)alglib_impl::ae_malloc(sizeof(alglib_impl::modelerrors), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::modelerrors));
|
|
alglib_impl::_modelerrors_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_modelerrors_owner::_modelerrors_owner(const _modelerrors_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_modelerrors_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: modelerrors copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::modelerrors*)alglib_impl::ae_malloc(sizeof(alglib_impl::modelerrors), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::modelerrors));
|
|
alglib_impl::_modelerrors_init_copy(p_struct, const_cast<alglib_impl::modelerrors*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_modelerrors_owner& _modelerrors_owner::operator=(const _modelerrors_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: modelerrors assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: modelerrors assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_modelerrors_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::modelerrors));
|
|
alglib_impl::_modelerrors_init_copy(p_struct, const_cast<alglib_impl::modelerrors*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_modelerrors_owner::~_modelerrors_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_modelerrors_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::modelerrors* _modelerrors_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::modelerrors* _modelerrors_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::modelerrors*>(p_struct);
|
|
}
|
|
modelerrors::modelerrors() : _modelerrors_owner() ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror)
|
|
{
|
|
}
|
|
|
|
modelerrors::modelerrors(const modelerrors &rhs):_modelerrors_owner(rhs) ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror)
|
|
{
|
|
}
|
|
|
|
modelerrors& modelerrors::operator=(const modelerrors &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_modelerrors_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
modelerrors::~modelerrors()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
|
|
*************************************************************************/
|
|
_multilayerperceptron_owner::_multilayerperceptron_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_multilayerperceptron_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::multilayerperceptron*)alglib_impl::ae_malloc(sizeof(alglib_impl::multilayerperceptron), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::multilayerperceptron));
|
|
alglib_impl::_multilayerperceptron_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_multilayerperceptron_owner::_multilayerperceptron_owner(const _multilayerperceptron_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_multilayerperceptron_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: multilayerperceptron copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::multilayerperceptron*)alglib_impl::ae_malloc(sizeof(alglib_impl::multilayerperceptron), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::multilayerperceptron));
|
|
alglib_impl::_multilayerperceptron_init_copy(p_struct, const_cast<alglib_impl::multilayerperceptron*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_multilayerperceptron_owner& _multilayerperceptron_owner::operator=(const _multilayerperceptron_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: multilayerperceptron assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: multilayerperceptron assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_multilayerperceptron_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::multilayerperceptron));
|
|
alglib_impl::_multilayerperceptron_init_copy(p_struct, const_cast<alglib_impl::multilayerperceptron*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_multilayerperceptron_owner::~_multilayerperceptron_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_multilayerperceptron_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::multilayerperceptron* _multilayerperceptron_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::multilayerperceptron* _multilayerperceptron_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::multilayerperceptron*>(p_struct);
|
|
}
|
|
multilayerperceptron::multilayerperceptron() : _multilayerperceptron_owner()
|
|
{
|
|
}
|
|
|
|
multilayerperceptron::multilayerperceptron(const multilayerperceptron &rhs):_multilayerperceptron_owner(rhs)
|
|
{
|
|
}
|
|
|
|
multilayerperceptron& multilayerperceptron::operator=(const multilayerperceptron &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_multilayerperceptron_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
multilayerperceptron::~multilayerperceptron()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function serializes data structure to string.
|
|
|
|
Important properties of s_out:
|
|
* it contains alphanumeric characters, dots, underscores, minus signs
|
|
* these symbols are grouped into words, which are separated by spaces
|
|
and Windows-style (CR+LF) newlines
|
|
* although serializer uses spaces and CR+LF as separators, you can
|
|
replace any separator character by arbitrary combination of spaces,
|
|
tabs, Windows or Unix newlines. It allows flexible reformatting of
|
|
the string in case you want to include it into text or XML file.
|
|
But you should not insert separators into the middle of the "words"
|
|
nor you should change case of letters.
|
|
* s_out can be freely moved between 32-bit and 64-bit systems, little
|
|
and big endian machines, and so on. You can serialize structure on
|
|
32-bit machine and unserialize it on 64-bit one (or vice versa), or
|
|
serialize it on SPARC and unserialize on x86. You can also
|
|
serialize it in C++ version of ALGLIB and unserialize in C# one,
|
|
and vice versa.
|
|
*************************************************************************/
|
|
void mlpserialize(multilayerperceptron &obj, std::string &s_out)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state state;
|
|
alglib_impl::ae_serializer serializer;
|
|
alglib_impl::ae_int_t ssize;
|
|
|
|
alglib_impl::ae_state_init(&state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&state, &_break_jump);
|
|
alglib_impl::ae_serializer_init(&serializer);
|
|
alglib_impl::ae_serializer_alloc_start(&serializer);
|
|
alglib_impl::mlpalloc(&serializer, obj.c_ptr(), &state);
|
|
ssize = alglib_impl::ae_serializer_get_alloc_size(&serializer);
|
|
s_out.clear();
|
|
s_out.reserve((size_t)(ssize+1));
|
|
alglib_impl::ae_serializer_sstart_str(&serializer, &s_out);
|
|
alglib_impl::mlpserialize(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_stop(&serializer, &state);
|
|
alglib_impl::ae_assert( s_out.length()<=(size_t)ssize, "ALGLIB: serialization integrity error", &state);
|
|
alglib_impl::ae_serializer_clear(&serializer);
|
|
alglib_impl::ae_state_clear(&state);
|
|
}
|
|
/*************************************************************************
|
|
This function unserializes data structure from string.
|
|
*************************************************************************/
|
|
void mlpunserialize(const std::string &s_in, multilayerperceptron &obj)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state state;
|
|
alglib_impl::ae_serializer serializer;
|
|
|
|
alglib_impl::ae_state_init(&state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&state, &_break_jump);
|
|
alglib_impl::ae_serializer_init(&serializer);
|
|
alglib_impl::ae_serializer_ustart_str(&serializer, &s_in);
|
|
alglib_impl::mlpunserialize(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_stop(&serializer, &state);
|
|
alglib_impl::ae_serializer_clear(&serializer);
|
|
alglib_impl::ae_state_clear(&state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function serializes data structure to C++ stream.
|
|
|
|
Data stream generated by this function is same as string representation
|
|
generated by string version of serializer - alphanumeric characters,
|
|
dots, underscores, minus signs, which are grouped into words separated by
|
|
spaces and CR+LF.
|
|
|
|
We recommend you to read comments on string version of serializer to find
|
|
out more about serialization of AlGLIB objects.
|
|
*************************************************************************/
|
|
void mlpserialize(multilayerperceptron &obj, std::ostream &s_out)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state state;
|
|
alglib_impl::ae_serializer serializer;
|
|
|
|
alglib_impl::ae_state_init(&state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&state, &_break_jump);
|
|
alglib_impl::ae_serializer_init(&serializer);
|
|
alglib_impl::ae_serializer_alloc_start(&serializer);
|
|
alglib_impl::mlpalloc(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_get_alloc_size(&serializer); // not actually needed, but we have to ask
|
|
alglib_impl::ae_serializer_sstart_stream(&serializer, &s_out);
|
|
alglib_impl::mlpserialize(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_stop(&serializer, &state);
|
|
alglib_impl::ae_serializer_clear(&serializer);
|
|
alglib_impl::ae_state_clear(&state);
|
|
}
|
|
/*************************************************************************
|
|
This function unserializes data structure from stream.
|
|
*************************************************************************/
|
|
void mlpunserialize(const std::istream &s_in, multilayerperceptron &obj)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state state;
|
|
alglib_impl::ae_serializer serializer;
|
|
|
|
alglib_impl::ae_state_init(&state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&state, &_break_jump);
|
|
alglib_impl::ae_serializer_init(&serializer);
|
|
alglib_impl::ae_serializer_ustart_stream(&serializer, &s_in);
|
|
alglib_impl::mlpunserialize(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_stop(&serializer, &state);
|
|
alglib_impl::ae_serializer_clear(&serializer);
|
|
alglib_impl::ae_state_clear(&state);
|
|
}
|
|
|
|
/*************************************************************************
|
|
Creates neural network with NIn inputs, NOut outputs, without hidden
|
|
layers, with linear output layer. Network weights are filled with small
|
|
random values.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreate0(const ae_int_t nin, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpcreate0(nin, nout, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Same as MLPCreate0, but with one hidden layer (NHid neurons) with
|
|
non-linear activation function. Output layer is linear.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreate1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpcreate1(nin, nhid, nout, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Same as MLPCreate0, but with two hidden layers (NHid1 and NHid2 neurons)
|
|
with non-linear activation function. Output layer is linear.
|
|
$ALL
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreate2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpcreate2(nin, nhid1, nhid2, nout, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Creates neural network with NIn inputs, NOut outputs, without hidden
|
|
layers with non-linear output layer. Network weights are filled with small
|
|
random values.
|
|
|
|
Activation function of the output layer takes values:
|
|
|
|
(B, +INF), if D>=0
|
|
|
|
or
|
|
|
|
(-INF, B), if D<0.
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreateb0(const ae_int_t nin, const ae_int_t nout, const double b, const double d, multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpcreateb0(nin, nout, b, d, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Same as MLPCreateB0 but with non-linear hidden layer.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreateb1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const double b, const double d, multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpcreateb1(nin, nhid, nout, b, d, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Same as MLPCreateB0 but with two non-linear hidden layers.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreateb2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const double b, const double d, multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpcreateb2(nin, nhid1, nhid2, nout, b, d, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Creates neural network with NIn inputs, NOut outputs, without hidden
|
|
layers with non-linear output layer. Network weights are filled with small
|
|
random values. Activation function of the output layer takes values [A,B].
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreater0(const ae_int_t nin, const ae_int_t nout, const double a, const double b, multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpcreater0(nin, nout, a, b, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Same as MLPCreateR0, but with non-linear hidden layer.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreater1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const double a, const double b, multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpcreater1(nin, nhid, nout, a, b, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Same as MLPCreateR0, but with two non-linear hidden layers.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreater2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const double a, const double b, multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpcreater2(nin, nhid1, nhid2, nout, a, b, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Creates classifier network with NIn inputs and NOut possible classes.
|
|
Network contains no hidden layers and linear output layer with SOFTMAX-
|
|
normalization (so outputs sums up to 1.0 and converge to posterior
|
|
probabilities).
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreatec0(const ae_int_t nin, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpcreatec0(nin, nout, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Same as MLPCreateC0, but with one non-linear hidden layer.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreatec1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpcreatec1(nin, nhid, nout, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Same as MLPCreateC0, but with two non-linear hidden layers.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreatec2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpcreatec2(nin, nhid1, nhid2, nout, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Copying of neural network
|
|
|
|
INPUT PARAMETERS:
|
|
Network1 - original
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network2 - copy
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcopy(const multilayerperceptron &network1, multilayerperceptron &network2, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpcopy(const_cast<alglib_impl::multilayerperceptron*>(network1.c_ptr()), const_cast<alglib_impl::multilayerperceptron*>(network2.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function copies tunable parameters (weights/means/sigmas) from one
|
|
network to another with same architecture. It performs some rudimentary
|
|
checks that architectures are same, and throws exception if check fails.
|
|
|
|
It is intended for fast copying of states between two network which are
|
|
known to have same geometry.
|
|
|
|
INPUT PARAMETERS:
|
|
Network1 - source, must be correctly initialized
|
|
Network2 - target, must have same architecture
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network2 - network state is copied from source to target
|
|
|
|
-- ALGLIB --
|
|
Copyright 20.06.2013 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcopytunableparameters(const multilayerperceptron &network1, const multilayerperceptron &network2, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpcopytunableparameters(const_cast<alglib_impl::multilayerperceptron*>(network1.c_ptr()), const_cast<alglib_impl::multilayerperceptron*>(network2.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Randomization of neural network weights
|
|
|
|
-- ALGLIB --
|
|
Copyright 06.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlprandomize(const multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlprandomize(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Randomization of neural network weights and standartisator
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlprandomizefull(const multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlprandomizefull(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Internal subroutine.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpinitpreprocessor(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpinitpreprocessor(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), ssize, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Returns information about initialized network: number of inputs, outputs,
|
|
weights.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpproperties(const multilayerperceptron &network, ae_int_t &nin, ae_int_t &nout, ae_int_t &wcount, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpproperties(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &nin, &nout, &wcount, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Returns number of inputs.
|
|
|
|
-- ALGLIB --
|
|
Copyright 19.10.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t mlpgetinputscount(const multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ae_int_t result = alglib_impl::mlpgetinputscount(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<ae_int_t*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Returns number of outputs.
|
|
|
|
-- ALGLIB --
|
|
Copyright 19.10.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t mlpgetoutputscount(const multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ae_int_t result = alglib_impl::mlpgetoutputscount(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<ae_int_t*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Returns number of weights.
|
|
|
|
-- ALGLIB --
|
|
Copyright 19.10.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t mlpgetweightscount(const multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ae_int_t result = alglib_impl::mlpgetweightscount(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<ae_int_t*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Tells whether network is SOFTMAX-normalized (i.e. classifier) or not.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
bool mlpissoftmax(const multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
ae_bool result = alglib_impl::mlpissoftmax(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<bool*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function returns total number of layers (including input, hidden and
|
|
output layers).
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t mlpgetlayerscount(const multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ae_int_t result = alglib_impl::mlpgetlayerscount(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<ae_int_t*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function returns size of K-th layer.
|
|
|
|
K=0 corresponds to input layer, K=CNT-1 corresponds to output layer.
|
|
|
|
Size of the output layer is always equal to the number of outputs, although
|
|
when we have softmax-normalized network, last neuron doesn't have any
|
|
connections - it is just zero.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t mlpgetlayersize(const multilayerperceptron &network, const ae_int_t k, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ae_int_t result = alglib_impl::mlpgetlayersize(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), k, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<ae_int_t*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function returns offset/scaling coefficients for I-th input of the
|
|
network.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network
|
|
I - input index
|
|
|
|
OUTPUT PARAMETERS:
|
|
Mean - mean term
|
|
Sigma - sigma term, guaranteed to be nonzero.
|
|
|
|
I-th input is passed through linear transformation
|
|
IN[i] = (IN[i]-Mean)/Sigma
|
|
before feeding to the network
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgetinputscaling(const multilayerperceptron &network, const ae_int_t i, double &mean, double &sigma, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpgetinputscaling(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), i, &mean, &sigma, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function returns offset/scaling coefficients for I-th output of the
|
|
network.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network
|
|
I - input index
|
|
|
|
OUTPUT PARAMETERS:
|
|
Mean - mean term
|
|
Sigma - sigma term, guaranteed to be nonzero.
|
|
|
|
I-th output is passed through linear transformation
|
|
OUT[i] = OUT[i]*Sigma+Mean
|
|
before returning it to user. In case we have SOFTMAX-normalized network,
|
|
we return (Mean,Sigma)=(0.0,1.0).
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgetoutputscaling(const multilayerperceptron &network, const ae_int_t i, double &mean, double &sigma, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpgetoutputscaling(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), i, &mean, &sigma, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function returns information about Ith neuron of Kth layer
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network
|
|
K - layer index
|
|
I - neuron index (within layer)
|
|
|
|
OUTPUT PARAMETERS:
|
|
FKind - activation function type (used by MLPActivationFunction())
|
|
this value is zero for input or linear neurons
|
|
Threshold - also called offset, bias
|
|
zero for input neurons
|
|
|
|
NOTE: this function throws exception if layer or neuron with given index
|
|
do not exists.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgetneuroninfo(const multilayerperceptron &network, const ae_int_t k, const ae_int_t i, ae_int_t &fkind, double &threshold, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpgetneuroninfo(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), k, i, &fkind, &threshold, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function returns information about connection from I0-th neuron of
|
|
K0-th layer to I1-th neuron of K1-th layer.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network
|
|
K0 - layer index
|
|
I0 - neuron index (within layer)
|
|
K1 - layer index
|
|
I1 - neuron index (within layer)
|
|
|
|
RESULT:
|
|
connection weight (zero for non-existent connections)
|
|
|
|
This function:
|
|
1. throws exception if layer or neuron with given index do not exists.
|
|
2. returns zero if neurons exist, but there is no connection between them
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpgetweight(const multilayerperceptron &network, const ae_int_t k0, const ae_int_t i0, const ae_int_t k1, const ae_int_t i1, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlpgetweight(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), k0, i0, k1, i1, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets offset/scaling coefficients for I-th input of the
|
|
network.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network
|
|
I - input index
|
|
Mean - mean term
|
|
Sigma - sigma term (if zero, will be replaced by 1.0)
|
|
|
|
NTE: I-th input is passed through linear transformation
|
|
IN[i] = (IN[i]-Mean)/Sigma
|
|
before feeding to the network. This function sets Mean and Sigma.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetinputscaling(const multilayerperceptron &network, const ae_int_t i, const double mean, const double sigma, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpsetinputscaling(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), i, mean, sigma, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets offset/scaling coefficients for I-th output of the
|
|
network.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network
|
|
I - input index
|
|
Mean - mean term
|
|
Sigma - sigma term (if zero, will be replaced by 1.0)
|
|
|
|
OUTPUT PARAMETERS:
|
|
|
|
NOTE: I-th output is passed through linear transformation
|
|
OUT[i] = OUT[i]*Sigma+Mean
|
|
before returning it to user. This function sets Sigma/Mean. In case we
|
|
have SOFTMAX-normalized network, you can not set (Sigma,Mean) to anything
|
|
other than(0.0,1.0) - this function will throw exception.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetoutputscaling(const multilayerperceptron &network, const ae_int_t i, const double mean, const double sigma, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpsetoutputscaling(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), i, mean, sigma, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function modifies information about Ith neuron of Kth layer
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network
|
|
K - layer index
|
|
I - neuron index (within layer)
|
|
FKind - activation function type (used by MLPActivationFunction())
|
|
this value must be zero for input neurons
|
|
(you can not set activation function for input neurons)
|
|
Threshold - also called offset, bias
|
|
this value must be zero for input neurons
|
|
(you can not set threshold for input neurons)
|
|
|
|
NOTES:
|
|
1. this function throws exception if layer or neuron with given index do
|
|
not exists.
|
|
2. this function also throws exception when you try to set non-linear
|
|
activation function for input neurons (any kind of network) or for output
|
|
neurons of classifier network.
|
|
3. this function throws exception when you try to set non-zero threshold for
|
|
input neurons (any kind of network).
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetneuroninfo(const multilayerperceptron &network, const ae_int_t k, const ae_int_t i, const ae_int_t fkind, const double threshold, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpsetneuroninfo(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), k, i, fkind, threshold, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function modifies information about connection from I0-th neuron of
|
|
K0-th layer to I1-th neuron of K1-th layer.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network
|
|
K0 - layer index
|
|
I0 - neuron index (within layer)
|
|
K1 - layer index
|
|
I1 - neuron index (within layer)
|
|
W - connection weight (must be zero for non-existent
|
|
connections)
|
|
|
|
This function:
|
|
1. throws exception if layer or neuron with given index do not exists.
|
|
2. throws exception if you try to set non-zero weight for non-existent
|
|
connection
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetweight(const multilayerperceptron &network, const ae_int_t k0, const ae_int_t i0, const ae_int_t k1, const ae_int_t i1, const double w, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpsetweight(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), k0, i0, k1, i1, w, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Neural network activation function
|
|
|
|
INPUT PARAMETERS:
|
|
NET - neuron input
|
|
K - function index (zero for linear function)
|
|
|
|
OUTPUT PARAMETERS:
|
|
F - function
|
|
DF - its derivative
|
|
D2F - its second derivative
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpactivationfunction(const double net, const ae_int_t k, double &f, double &df, double &d2f, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpactivationfunction(net, k, &f, &df, &d2f, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Procesing
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network
|
|
X - input vector, array[0..NIn-1].
|
|
|
|
OUTPUT PARAMETERS:
|
|
Y - result. Regression estimate when solving regression task,
|
|
vector of posterior probabilities for classification task.
|
|
|
|
See also MLPProcessI
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpprocess(const multilayerperceptron &network, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpprocess(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
'interactive' variant of MLPProcess for languages like Python which
|
|
support constructs like "Y = MLPProcess(NN,X)" and interactive mode of the
|
|
interpreter
|
|
|
|
This function allocates new array on each call, so it is significantly
|
|
slower than its 'non-interactive' counterpart, but it is more convenient
|
|
when you call it from command line.
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.09.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpprocessi(const multilayerperceptron &network, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpprocessi(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Error of the neural network on dataset.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format;
|
|
NPoints - points count.
|
|
|
|
RESULT:
|
|
sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlperror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlperror(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Error of the neural network on dataset given by sparse matrix.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network
|
|
XY - training set, see below for information on the
|
|
training set format. This function checks correctness
|
|
of the dataset (no NANs/INFs, class numbers are
|
|
correct) and throws exception when incorrect dataset
|
|
is passed. Sparse matrix must use CRS format for
|
|
storage.
|
|
NPoints - points count, >=0
|
|
|
|
RESULT:
|
|
sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlperrorsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlperrorsparse(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Natural error function for neural network, internal subroutine.
|
|
|
|
NOTE: this function is single-threaded. Unlike other error function, it
|
|
receives no speed-up from being executed in SMP mode.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlperrorn(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlperrorn(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), ssize, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Classification error of the neural network on dataset.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format;
|
|
NPoints - points count.
|
|
|
|
RESULT:
|
|
classification error (number of misclassified cases)
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t mlpclserror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ae_int_t result = alglib_impl::mlpclserror(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<ae_int_t*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Relative classification error on the test set.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format;
|
|
NPoints - points count.
|
|
|
|
RESULT:
|
|
Percent of incorrectly classified cases. Works both for classifier
|
|
networks and general purpose networks used as classifiers.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.12.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlprelclserror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlprelclserror(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Relative classification error on the test set given by sparse matrix.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format. Sparse matrix must use CRS format
|
|
for storage.
|
|
NPoints - points count, >=0.
|
|
|
|
RESULT:
|
|
Percent of incorrectly classified cases. Works both for classifier
|
|
networks and general purpose networks used as classifiers.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 09.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlprelclserrorsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlprelclserrorsparse(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average cross-entropy (in bits per element) on the test set.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format;
|
|
NPoints - points count.
|
|
|
|
RESULT:
|
|
CrossEntropy/(NPoints*LN(2)).
|
|
Zero if network solves regression task.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 08.01.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpavgce(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlpavgce(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average cross-entropy (in bits per element) on the test set given by
|
|
sparse matrix.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format. This function checks correctness
|
|
of the dataset (no NANs/INFs, class numbers are
|
|
correct) and throws exception when incorrect dataset
|
|
is passed. Sparse matrix must use CRS format for
|
|
storage.
|
|
NPoints - points count, >=0.
|
|
|
|
RESULT:
|
|
CrossEntropy/(NPoints*LN(2)).
|
|
Zero if network solves regression task.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 9.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpavgcesparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlpavgcesparse(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
RMS error on the test set given.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format;
|
|
NPoints - points count.
|
|
|
|
RESULT:
|
|
Root mean square error. Its meaning for regression task is obvious. As for
|
|
classification task, RMS error means error when estimating posterior
|
|
probabilities.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlprmserror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlprmserror(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
RMS error on the test set given by sparse matrix.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format. This function checks correctness
|
|
of the dataset (no NANs/INFs, class numbers are
|
|
correct) and throws exception when incorrect dataset
|
|
is passed. Sparse matrix must use CRS format for
|
|
storage.
|
|
NPoints - points count, >=0.
|
|
|
|
RESULT:
|
|
Root mean square error. Its meaning for regression task is obvious. As for
|
|
classification task, RMS error means error when estimating posterior
|
|
probabilities.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 09.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlprmserrorsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlprmserrorsparse(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average absolute error on the test set.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format;
|
|
NPoints - points count.
|
|
|
|
RESULT:
|
|
Its meaning for regression task is obvious. As for classification task, it
|
|
means average error when estimating posterior probabilities.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 11.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpavgerror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlpavgerror(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average absolute error on the test set given by sparse matrix.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format. This function checks correctness
|
|
of the dataset (no NANs/INFs, class numbers are
|
|
correct) and throws exception when incorrect dataset
|
|
is passed. Sparse matrix must use CRS format for
|
|
storage.
|
|
NPoints - points count, >=0.
|
|
|
|
RESULT:
|
|
Its meaning for regression task is obvious. As for classification task, it
|
|
means average error when estimating posterior probabilities.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 09.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpavgerrorsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlpavgerrorsparse(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average relative error on the test set.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format;
|
|
NPoints - points count.
|
|
|
|
RESULT:
|
|
Its meaning for regression task is obvious. As for classification task, it
|
|
means average relative error when estimating posterior probability of
|
|
belonging to the correct class.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 11.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpavgrelerror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlpavgrelerror(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average relative error on the test set given by sparse matrix.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format. This function checks correctness
|
|
of the dataset (no NANs/INFs, class numbers are
|
|
correct) and throws exception when incorrect dataset
|
|
is passed. Sparse matrix must use CRS format for
|
|
storage.
|
|
NPoints - points count, >=0.
|
|
|
|
RESULT:
|
|
Its meaning for regression task is obvious. As for classification task, it
|
|
means average relative error when estimating posterior probability of
|
|
belonging to the correct class.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 09.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpavgrelerrorsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlpavgrelerrorsparse(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Gradient calculation
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
X - input vector, length of array must be at least NIn
|
|
DesiredY- desired outputs, length of array must be at least NOut
|
|
Grad - possibly preallocated array. If size of array is smaller
|
|
than WCount, it will be reallocated. It is recommended to
|
|
reuse previously allocated array to reduce allocation
|
|
overhead.
|
|
|
|
OUTPUT PARAMETERS:
|
|
E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
|
|
Grad - gradient of E with respect to weights of network, array[WCount]
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgrad(const multilayerperceptron &network, const real_1d_array &x, const real_1d_array &desiredy, double &e, real_1d_array &grad, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpgrad(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(desiredy.c_ptr()), &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Gradient calculation (natural error function is used)
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
X - input vector, length of array must be at least NIn
|
|
DesiredY- desired outputs, length of array must be at least NOut
|
|
Grad - possibly preallocated array. If size of array is smaller
|
|
than WCount, it will be reallocated. It is recommended to
|
|
reuse previously allocated array to reduce allocation
|
|
overhead.
|
|
|
|
OUTPUT PARAMETERS:
|
|
E - error function, sum-of-squares for regression networks,
|
|
cross-entropy for classification networks.
|
|
Grad - gradient of E with respect to weights of network, array[WCount]
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgradn(const multilayerperceptron &network, const real_1d_array &x, const real_1d_array &desiredy, double &e, real_1d_array &grad, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpgradn(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(desiredy.c_ptr()), &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Batch gradient calculation for a set of inputs/outputs
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
XY - original dataset in dense format; one sample = one row:
|
|
* first NIn columns contain inputs,
|
|
* for regression problem, next NOut columns store
|
|
desired outputs.
|
|
* for classification problem, next column (just one!)
|
|
stores class number.
|
|
SSize - number of elements in XY
|
|
Grad - possibly preallocated array. If size of array is smaller
|
|
than WCount, it will be reallocated. It is recommended to
|
|
reuse previously allocated array to reduce allocation
|
|
overhead.
|
|
|
|
OUTPUT PARAMETERS:
|
|
E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
|
|
Grad - gradient of E with respect to weights of network, array[WCount]
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgradbatch(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, double &e, real_1d_array &grad, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpgradbatch(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), ssize, &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Batch gradient calculation for a set of inputs/outputs given by sparse
|
|
matrices
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
XY - original dataset in sparse format; one sample = one row:
|
|
* MATRIX MUST BE STORED IN CRS FORMAT
|
|
* first NIn columns contain inputs.
|
|
* for regression problem, next NOut columns store
|
|
desired outputs.
|
|
* for classification problem, next column (just one!)
|
|
stores class number.
|
|
SSize - number of elements in XY
|
|
Grad - possibly preallocated array. If size of array is smaller
|
|
than WCount, it will be reallocated. It is recommended to
|
|
reuse previously allocated array to reduce allocation
|
|
overhead.
|
|
|
|
OUTPUT PARAMETERS:
|
|
E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
|
|
Grad - gradient of E with respect to weights of network, array[WCount]
|
|
|
|
-- ALGLIB --
|
|
Copyright 26.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgradbatchsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t ssize, double &e, real_1d_array &grad, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpgradbatchsparse(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), ssize, &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Batch gradient calculation for a subset of dataset
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
XY - original dataset in dense format; one sample = one row:
|
|
* first NIn columns contain inputs,
|
|
* for regression problem, next NOut columns store
|
|
desired outputs.
|
|
* for classification problem, next column (just one!)
|
|
stores class number.
|
|
SetSize - real size of XY, SetSize>=0;
|
|
Idx - subset of SubsetSize elements, array[SubsetSize]:
|
|
* Idx[I] stores row index in the original dataset which is
|
|
given by XY. Gradient is calculated with respect to rows
|
|
whose indexes are stored in Idx[].
|
|
* Idx[] must store correct indexes; this function throws
|
|
an exception in case incorrect index (less than 0 or
|
|
larger than rows(XY)) is given
|
|
* Idx[] may store indexes in any order and even with
|
|
repetitions.
|
|
SubsetSize- number of elements in Idx[] array:
|
|
* positive value means that subset given by Idx[] is processed
|
|
* zero value results in zero gradient
|
|
* negative value means that full dataset is processed
|
|
Grad - possibly preallocated array. If size of array is smaller
|
|
than WCount, it will be reallocated. It is recommended to
|
|
reuse previously allocated array to reduce allocation
|
|
overhead.
|
|
|
|
OUTPUT PARAMETERS:
|
|
E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
|
|
Grad - gradient of E with respect to weights of network,
|
|
array[WCount]
|
|
|
|
-- ALGLIB --
|
|
Copyright 26.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgradbatchsubset(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t setsize, const integer_1d_array &idx, const ae_int_t subsetsize, double &e, real_1d_array &grad, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpgradbatchsubset(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), setsize, const_cast<alglib_impl::ae_vector*>(idx.c_ptr()), subsetsize, &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Batch gradient calculation for a set of inputs/outputs for a subset of
|
|
dataset given by set of indexes.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
XY - original dataset in sparse format; one sample = one row:
|
|
* MATRIX MUST BE STORED IN CRS FORMAT
|
|
* first NIn columns contain inputs,
|
|
* for regression problem, next NOut columns store
|
|
desired outputs.
|
|
* for classification problem, next column (just one!)
|
|
stores class number.
|
|
SetSize - real size of XY, SetSize>=0;
|
|
Idx - subset of SubsetSize elements, array[SubsetSize]:
|
|
* Idx[I] stores row index in the original dataset which is
|
|
given by XY. Gradient is calculated with respect to rows
|
|
whose indexes are stored in Idx[].
|
|
* Idx[] must store correct indexes; this function throws
|
|
an exception in case incorrect index (less than 0 or
|
|
larger than rows(XY)) is given
|
|
* Idx[] may store indexes in any order and even with
|
|
repetitions.
|
|
SubsetSize- number of elements in Idx[] array:
|
|
* positive value means that subset given by Idx[] is processed
|
|
* zero value results in zero gradient
|
|
* negative value means that full dataset is processed
|
|
Grad - possibly preallocated array. If size of array is smaller
|
|
than WCount, it will be reallocated. It is recommended to
|
|
reuse previously allocated array to reduce allocation
|
|
overhead.
|
|
|
|
OUTPUT PARAMETERS:
|
|
E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
|
|
Grad - gradient of E with respect to weights of network,
|
|
array[WCount]
|
|
|
|
NOTE: when SubsetSize<0 is used full dataset by call MLPGradBatchSparse
|
|
function.
|
|
|
|
-- ALGLIB --
|
|
Copyright 26.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgradbatchsparsesubset(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t setsize, const integer_1d_array &idx, const ae_int_t subsetsize, double &e, real_1d_array &grad, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpgradbatchsparsesubset(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), setsize, const_cast<alglib_impl::ae_vector*>(idx.c_ptr()), subsetsize, &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Batch gradient calculation for a set of inputs/outputs
|
|
(natural error function is used)
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
XY - set of inputs/outputs; one sample = one row;
|
|
first NIn columns contain inputs,
|
|
next NOut columns - desired outputs.
|
|
SSize - number of elements in XY
|
|
Grad - possibly preallocated array. If size of array is smaller
|
|
than WCount, it will be reallocated. It is recommended to
|
|
reuse previously allocated array to reduce allocation
|
|
overhead.
|
|
|
|
OUTPUT PARAMETERS:
|
|
E - error function, sum-of-squares for regression networks,
|
|
cross-entropy for classification networks.
|
|
Grad - gradient of E with respect to weights of network, array[WCount]
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgradnbatch(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, double &e, real_1d_array &grad, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpgradnbatch(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), ssize, &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Batch Hessian calculation (natural error function) using R-algorithm.
|
|
Internal subroutine.
|
|
|
|
-- ALGLIB --
|
|
Copyright 26.01.2008 by Bochkanov Sergey.
|
|
|
|
Hessian calculation based on R-algorithm described in
|
|
"Fast Exact Multiplication by the Hessian",
|
|
B. A. Pearlmutter,
|
|
Neural Computation, 1994.
|
|
*************************************************************************/
|
|
void mlphessiannbatch(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, double &e, real_1d_array &grad, real_2d_array &h, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlphessiannbatch(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), ssize, &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), const_cast<alglib_impl::ae_matrix*>(h.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Batch Hessian calculation using R-algorithm.
|
|
Internal subroutine.
|
|
|
|
-- ALGLIB --
|
|
Copyright 26.01.2008 by Bochkanov Sergey.
|
|
|
|
Hessian calculation based on R-algorithm described in
|
|
"Fast Exact Multiplication by the Hessian",
|
|
B. A. Pearlmutter,
|
|
Neural Computation, 1994.
|
|
*************************************************************************/
|
|
void mlphessianbatch(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, double &e, real_1d_array &grad, real_2d_array &h, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlphessianbatch(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), ssize, &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), const_cast<alglib_impl::ae_matrix*>(h.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Calculation of all types of errors on subset of dataset.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
XY - original dataset; one sample = one row;
|
|
first NIn columns contain inputs,
|
|
next NOut columns - desired outputs.
|
|
SetSize - real size of XY, SetSize>=0;
|
|
Subset - subset of SubsetSize elements, array[SubsetSize];
|
|
SubsetSize- number of elements in Subset[] array:
|
|
* if SubsetSize>0, rows of XY with indices Subset[0]...
|
|
...Subset[SubsetSize-1] are processed
|
|
* if SubsetSize=0, zeros are returned
|
|
* if SubsetSize<0, entire dataset is processed; Subset[]
|
|
array is ignored in this case.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Rep - it contains all type of errors.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.09.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpallerrorssubset(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t setsize, const integer_1d_array &subset, const ae_int_t subsetsize, modelerrors &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpallerrorssubset(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), setsize, const_cast<alglib_impl::ae_vector*>(subset.c_ptr()), subsetsize, const_cast<alglib_impl::modelerrors*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Calculation of all types of errors on subset of dataset.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
XY - original dataset given by sparse matrix;
|
|
one sample = one row;
|
|
first NIn columns contain inputs,
|
|
next NOut columns - desired outputs.
|
|
SetSize - real size of XY, SetSize>=0;
|
|
Subset - subset of SubsetSize elements, array[SubsetSize];
|
|
SubsetSize- number of elements in Subset[] array:
|
|
* if SubsetSize>0, rows of XY with indices Subset[0]...
|
|
...Subset[SubsetSize-1] are processed
|
|
* if SubsetSize=0, zeros are returned
|
|
* if SubsetSize<0, entire dataset is processed; Subset[]
|
|
array is ignored in this case.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Rep - it contains all type of errors.
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.09.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpallerrorssparsesubset(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t setsize, const integer_1d_array &subset, const ae_int_t subsetsize, modelerrors &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpallerrorssparsesubset(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), setsize, const_cast<alglib_impl::ae_vector*>(subset.c_ptr()), subsetsize, const_cast<alglib_impl::modelerrors*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Error of the neural network on subset of dataset.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format;
|
|
SetSize - real size of XY, SetSize>=0;
|
|
Subset - subset of SubsetSize elements, array[SubsetSize];
|
|
SubsetSize- number of elements in Subset[] array:
|
|
* if SubsetSize>0, rows of XY with indices Subset[0]...
|
|
...Subset[SubsetSize-1] are processed
|
|
* if SubsetSize=0, zeros are returned
|
|
* if SubsetSize<0, entire dataset is processed; Subset[]
|
|
array is ignored in this case.
|
|
|
|
RESULT:
|
|
sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.09.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlperrorsubset(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t setsize, const integer_1d_array &subset, const ae_int_t subsetsize, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlperrorsubset(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), setsize, const_cast<alglib_impl::ae_vector*>(subset.c_ptr()), subsetsize, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Error of the neural network on subset of sparse dataset.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format. This function checks correctness
|
|
of the dataset (no NANs/INFs, class numbers are
|
|
correct) and throws exception when incorrect dataset
|
|
is passed. Sparse matrix must use CRS format for
|
|
storage.
|
|
SetSize - real size of XY, SetSize>=0;
|
|
it is used when SubsetSize<0;
|
|
Subset - subset of SubsetSize elements, array[SubsetSize];
|
|
SubsetSize- number of elements in Subset[] array:
|
|
* if SubsetSize>0, rows of XY with indices Subset[0]...
|
|
...Subset[SubsetSize-1] are processed
|
|
* if SubsetSize=0, zeros are returned
|
|
* if SubsetSize<0, entire dataset is processed; Subset[]
|
|
array is ignored in this case.
|
|
|
|
RESULT:
|
|
sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.09.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlperrorsparsesubset(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t setsize, const integer_1d_array &subset, const ae_int_t subsetsize, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlperrorsparsesubset(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), setsize, const_cast<alglib_impl::ae_vector*>(subset.c_ptr()), subsetsize, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_LDA) || !defined(AE_PARTIAL_BUILD)
|
|
/*************************************************************************
|
|
Multiclass Fisher LDA
|
|
|
|
Subroutine finds coefficients of linear combination which optimally separates
|
|
training set on classes.
|
|
|
|
COMMERCIAL EDITION OF ALGLIB:
|
|
|
|
! Commercial version of ALGLIB includes two important improvements of
|
|
! this function, which can be used from C++ and C#:
|
|
! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB)
|
|
! * multithreading support
|
|
!
|
|
! Intel MKL gives approximately constant (with respect to number of
|
|
! worker threads) acceleration factor which depends on CPU being used,
|
|
! problem size and "baseline" ALGLIB edition which is used for
|
|
! comparison. Best results are achieved for high-dimensional problems
|
|
! (NVars is at least 256).
|
|
!
|
|
! Multithreading is used to accelerate initial phase of LDA, which
|
|
! includes calculation of products of large matrices. Again, for best
|
|
! efficiency problem must be high-dimensional.
|
|
!
|
|
! Generally, commercial ALGLIB is several times faster than open-source
|
|
! generic C edition, and many times faster than open-source C# edition.
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
XY - training set, array[0..NPoints-1,0..NVars].
|
|
First NVars columns store values of independent
|
|
variables, next column stores number of class (from 0
|
|
to NClasses-1) which dataset element belongs to. Fractional
|
|
values are rounded to nearest integer.
|
|
NPoints - training set size, NPoints>=0
|
|
NVars - number of independent variables, NVars>=1
|
|
NClasses - number of classes, NClasses>=2
|
|
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code:
|
|
* -4, if internal EVD subroutine hasn't converged
|
|
* -2, if there is a point with class number
|
|
outside of [0..NClasses-1].
|
|
* -1, if incorrect parameters was passed (NPoints<0,
|
|
NVars<1, NClasses<2)
|
|
* 1, if task has been solved
|
|
* 2, if there was a multicollinearity in training set,
|
|
but task has been solved.
|
|
W - linear combination coefficients, array[0..NVars-1]
|
|
|
|
-- ALGLIB --
|
|
Copyright 31.05.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void fisherlda(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, ae_int_t &info, real_1d_array &w, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::fisherlda(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, nclasses, &info, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
N-dimensional multiclass Fisher LDA
|
|
|
|
Subroutine finds coefficients of linear combinations which optimally separates
|
|
training set on classes. It returns N-dimensional basis whose vector are sorted
|
|
by quality of training set separation (in descending order).
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
! * hardware vendor (Intel) implementations of linear algebra primitives
|
|
! (C++ and C# versions, x86/x64 platform)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
XY - training set, array[0..NPoints-1,0..NVars].
|
|
First NVars columns store values of independent
|
|
variables, next column stores number of class (from 0
|
|
to NClasses-1) which dataset element belongs to. Fractional
|
|
values are rounded to nearest integer.
|
|
NPoints - training set size, NPoints>=0
|
|
NVars - number of independent variables, NVars>=1
|
|
NClasses - number of classes, NClasses>=2
|
|
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code:
|
|
* -4, if internal EVD subroutine hasn't converged
|
|
* -2, if there is a point with class number
|
|
outside of [0..NClasses-1].
|
|
* -1, if incorrect parameters was passed (NPoints<0,
|
|
NVars<1, NClasses<2)
|
|
* 1, if task has been solved
|
|
* 2, if there was a multicollinearity in training set,
|
|
but task has been solved.
|
|
W - basis, array[0..NVars-1,0..NVars-1]
|
|
columns of matrix stores basis vectors, sorted by
|
|
quality of training set separation (in descending order)
|
|
|
|
-- ALGLIB --
|
|
Copyright 31.05.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void fisherldan(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, ae_int_t &info, real_2d_array &w, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::fisherldan(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, nclasses, &info, const_cast<alglib_impl::ae_matrix*>(w.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_SSA) || !defined(AE_PARTIAL_BUILD)
|
|
/*************************************************************************
|
|
This object stores state of the SSA model.
|
|
|
|
You should use ALGLIB functions to work with this object.
|
|
*************************************************************************/
|
|
_ssamodel_owner::_ssamodel_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_ssamodel_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::ssamodel*)alglib_impl::ae_malloc(sizeof(alglib_impl::ssamodel), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::ssamodel));
|
|
alglib_impl::_ssamodel_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_ssamodel_owner::_ssamodel_owner(const _ssamodel_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_ssamodel_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: ssamodel copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::ssamodel*)alglib_impl::ae_malloc(sizeof(alglib_impl::ssamodel), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::ssamodel));
|
|
alglib_impl::_ssamodel_init_copy(p_struct, const_cast<alglib_impl::ssamodel*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_ssamodel_owner& _ssamodel_owner::operator=(const _ssamodel_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: ssamodel assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: ssamodel assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_ssamodel_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::ssamodel));
|
|
alglib_impl::_ssamodel_init_copy(p_struct, const_cast<alglib_impl::ssamodel*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_ssamodel_owner::~_ssamodel_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_ssamodel_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::ssamodel* _ssamodel_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::ssamodel* _ssamodel_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::ssamodel*>(p_struct);
|
|
}
|
|
ssamodel::ssamodel() : _ssamodel_owner()
|
|
{
|
|
}
|
|
|
|
ssamodel::ssamodel(const ssamodel &rhs):_ssamodel_owner(rhs)
|
|
{
|
|
}
|
|
|
|
ssamodel& ssamodel::operator=(const ssamodel &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_ssamodel_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
ssamodel::~ssamodel()
|
|
{
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function creates SSA model object. Right after creation model is in
|
|
"dummy" mode - you can add data, but analyzing/prediction will return
|
|
just zeros (it assumes that basis is empty).
|
|
|
|
HOW TO USE SSA MODEL:
|
|
|
|
1. create model with ssacreate()
|
|
2. add data with one/many ssaaddsequence() calls
|
|
3. choose SSA algorithm with one of ssasetalgo...() functions:
|
|
* ssasetalgotopkdirect() for direct one-run analysis
|
|
* ssasetalgotopkrealtime() for algorithm optimized for many subsequent
|
|
runs with warm-start capabilities
|
|
* ssasetalgoprecomputed() for user-supplied basis
|
|
4. set window width with ssasetwindow()
|
|
5. perform one of the analysis-related activities:
|
|
a) call ssagetbasis() to get basis
|
|
b) call ssaanalyzelast() ssaanalyzesequence() or ssaanalyzelastwindow()
|
|
to perform analysis (trend/noise separation)
|
|
c) call one of the forecasting functions (ssaforecastlast() or
|
|
ssaforecastsequence()) to perform prediction; alternatively, you can
|
|
extract linear recurrence coefficients with ssagetlrr().
|
|
SSA analysis will be performed during first call to analysis-related
|
|
function. SSA model is smart enough to track all changes in the dataset
|
|
and model settings, to cache previously computed basis and to
|
|
re-evaluate basis only when necessary.
|
|
|
|
Additionally, if your setting involves constant stream of incoming data,
|
|
you can perform quick update already calculated model with one of the
|
|
incremental append-and-update functions: ssaappendpointandupdate() or
|
|
ssaappendsequenceandupdate().
|
|
|
|
NOTE: steps (2), (3), (4) can be performed in arbitrary order.
|
|
|
|
INPUT PARAMETERS:
|
|
none
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - structure which stores model state
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssacreate(ssamodel &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssacreate(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets window width for SSA model. You should call it before
|
|
analysis phase. Default window width is 1 (not for real use).
|
|
|
|
Special notes:
|
|
* this function call can be performed at any moment before first call to
|
|
analysis-related functions
|
|
* changing window width invalidates internally stored basis; if you change
|
|
window width AFTER you call analysis-related function, next analysis
|
|
phase will require re-calculation of the basis according to current
|
|
algorithm.
|
|
* calling this function with exactly same window width as current one has
|
|
no effect
|
|
* if you specify window width larger than any data sequence stored in the
|
|
model, analysis will return zero basis.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model created with ssacreate()
|
|
WindowWidth - >=1, new window width
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - SSA model, updated
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssasetwindow(const ssamodel &s, const ae_int_t windowwidth, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssasetwindow(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), windowwidth, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets seed which is used to initialize internal RNG when
|
|
we make pseudorandom decisions on model updates.
|
|
|
|
By default, deterministic seed is used - which results in same sequence of
|
|
pseudorandom decisions every time you run SSA model. If you specify non-
|
|
deterministic seed value, then SSA model may return slightly different
|
|
results after each run.
|
|
|
|
This function can be useful when you have several SSA models updated with
|
|
sseappendpointandupdate() called with 0<UpdateIts<1 (fractional value) and
|
|
due to performance limitations want them to perform updates at different
|
|
moments.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
Seed - seed:
|
|
* positive values = use deterministic seed for each run of
|
|
algorithms which depend on random initialization
|
|
* zero or negative values = use non-deterministic seed
|
|
|
|
-- ALGLIB --
|
|
Copyright 03.11.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssasetseed(const ssamodel &s, const ae_int_t seed, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssasetseed(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), seed, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets length of power-up cycle for real-time algorithm.
|
|
|
|
By default, this algorithm performs costly O(N*WindowWidth^2) init phase
|
|
followed by full run of truncated EVD. However, if you are ready to
|
|
live with a bit lower-quality basis during first few iterations, you can
|
|
split this O(N*WindowWidth^2) initialization between several subsequent
|
|
append-and-update rounds. It results in better latency of the algorithm.
|
|
|
|
This function invalidates basis/solver, next analysis call will result in
|
|
full recalculation of everything.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
PWLen - length of the power-up stage:
|
|
* 0 means that no power-up is requested
|
|
* 1 is the same as 0
|
|
* >1 means that delayed power-up is performed
|
|
|
|
-- ALGLIB --
|
|
Copyright 03.11.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssasetpoweruplength(const ssamodel &s, const ae_int_t pwlen, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssasetpoweruplength(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), pwlen, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets memory limit of SSA analysis.
|
|
|
|
Straightforward SSA with sequence length T and window width W needs O(T*W)
|
|
memory. It is possible to reduce memory consumption by splitting task into
|
|
smaller chunks.
|
|
|
|
Thus function allows you to specify approximate memory limit (measured in
|
|
double precision numbers used for buffers). Actual memory consumption will
|
|
be comparable to the number specified by you.
|
|
|
|
Default memory limit is 50.000.000 (400Mbytes) in current version.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
MemLimit- memory limit, >=0. Zero value means no limit.
|
|
|
|
-- ALGLIB --
|
|
Copyright 20.12.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssasetmemorylimit(const ssamodel &s, const ae_int_t memlimit, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssasetmemorylimit(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), memlimit, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function adds data sequence to SSA model. Only single-dimensional
|
|
sequences are supported.
|
|
|
|
What is a sequences? Following definitions/requirements apply:
|
|
* a sequence is an array of values measured in subsequent, equally
|
|
separated time moments (ticks).
|
|
* you may have many sequences in your dataset; say, one sequence may
|
|
correspond to one trading session.
|
|
* sequence length should be larger than current window length (shorter
|
|
sequences will be ignored during analysis).
|
|
* analysis is performed within a sequence; different sequences are NOT
|
|
stacked together to produce one large contiguous stream of data.
|
|
* analysis is performed for all sequences at once, i.e. same set of basis
|
|
vectors is computed for all sequences
|
|
|
|
INCREMENTAL ANALYSIS
|
|
|
|
This function is non intended for incremental updates of previously found
|
|
SSA basis. Calling it invalidates all previous analysis results (basis is
|
|
reset and will be recalculated from zero during next analysis).
|
|
|
|
If you want to perform incremental/real-time SSA, consider using
|
|
following functions:
|
|
* ssaappendpointandupdate() for appending one point
|
|
* ssaappendsequenceandupdate() for appending new sequence
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model created with ssacreate()
|
|
X - array[N], data, can be larger (additional values
|
|
are ignored)
|
|
N - data length, can be automatically determined from
|
|
the array length. N>=0.
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - SSA model, updated
|
|
|
|
NOTE: you can clear dataset with ssacleardata()
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaaddsequence(const ssamodel &s, const real_1d_array &x, const ae_int_t n, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssaaddsequence(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), n, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function adds data sequence to SSA model. Only single-dimensional
|
|
sequences are supported.
|
|
|
|
What is a sequences? Following definitions/requirements apply:
|
|
* a sequence is an array of values measured in subsequent, equally
|
|
separated time moments (ticks).
|
|
* you may have many sequences in your dataset; say, one sequence may
|
|
correspond to one trading session.
|
|
* sequence length should be larger than current window length (shorter
|
|
sequences will be ignored during analysis).
|
|
* analysis is performed within a sequence; different sequences are NOT
|
|
stacked together to produce one large contiguous stream of data.
|
|
* analysis is performed for all sequences at once, i.e. same set of basis
|
|
vectors is computed for all sequences
|
|
|
|
INCREMENTAL ANALYSIS
|
|
|
|
This function is non intended for incremental updates of previously found
|
|
SSA basis. Calling it invalidates all previous analysis results (basis is
|
|
reset and will be recalculated from zero during next analysis).
|
|
|
|
If you want to perform incremental/real-time SSA, consider using
|
|
following functions:
|
|
* ssaappendpointandupdate() for appending one point
|
|
* ssaappendsequenceandupdate() for appending new sequence
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model created with ssacreate()
|
|
X - array[N], data, can be larger (additional values
|
|
are ignored)
|
|
N - data length, can be automatically determined from
|
|
the array length. N>=0.
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - SSA model, updated
|
|
|
|
NOTE: you can clear dataset with ssacleardata()
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
void ssaaddsequence(const ssamodel &s, const real_1d_array &x, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
ae_int_t n;
|
|
|
|
n = x.length();
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssaaddsequence(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), n, &_alglib_env_state);
|
|
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
/*************************************************************************
|
|
This function appends single point to last data sequence stored in the SSA
|
|
model and tries to update model in the incremental manner (if possible
|
|
with current algorithm).
|
|
|
|
If you want to add more than one point at once:
|
|
* if you want to add M points to the same sequence, perform M-1 calls with
|
|
UpdateIts parameter set to 0.0, and last call with non-zero UpdateIts.
|
|
* if you want to add new sequence, use ssaappendsequenceandupdate()
|
|
|
|
Running time of this function does NOT depend on dataset size, only on
|
|
window width and number of singular vectors. Depending on algorithm being
|
|
used, incremental update has complexity:
|
|
* for top-K real time - O(UpdateIts*K*Width^2), with fractional UpdateIts
|
|
* for top-K direct - O(Width^3) for any non-zero UpdateIts
|
|
* for precomputed basis - O(1), no update is performed
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model created with ssacreate()
|
|
X - new point
|
|
UpdateIts - >=0, floating point (!) value, desired update
|
|
frequency:
|
|
* zero value means that point is stored, but no
|
|
update is performed
|
|
* integer part of the value means that specified
|
|
number of iterations is always performed
|
|
* fractional part of the value means that one
|
|
iteration is performed with this probability.
|
|
|
|
Recommended value: 0<UpdateIts<=1. Values larger
|
|
than 1 are VERY seldom needed. If your dataset
|
|
changes slowly, you can set it to 0.1 and skip
|
|
90% of updates.
|
|
|
|
In any case, no information is lost even with zero
|
|
value of UpdateIts! It will be incorporated into
|
|
model, sooner or later.
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - SSA model, updated
|
|
|
|
NOTE: this function uses internal RNG to handle fractional values of
|
|
UpdateIts. By default it is initialized with fixed seed during
|
|
initial calculation of basis. Thus subsequent calls to this function
|
|
will result in the same sequence of pseudorandom decisions.
|
|
|
|
However, if you have several SSA models which are calculated
|
|
simultaneously, and if you want to reduce computational bottlenecks
|
|
by performing random updates at random moments, then fixed seed is
|
|
not an option - all updates will fire at same moments.
|
|
|
|
You may change it with ssasetseed() function.
|
|
|
|
NOTE: this function throws an exception if called for empty dataset (there
|
|
is no "last" sequence to modify).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaappendpointandupdate(const ssamodel &s, const double x, const double updateits, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssaappendpointandupdate(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), x, updateits, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function appends new sequence to dataset stored in the SSA model and
|
|
tries to update model in the incremental manner (if possible with current
|
|
algorithm).
|
|
|
|
Notes:
|
|
* if you want to add M sequences at once, perform M-1 calls with UpdateIts
|
|
parameter set to 0.0, and last call with non-zero UpdateIts.
|
|
* if you want to add just one point, use ssaappendpointandupdate()
|
|
|
|
Running time of this function does NOT depend on dataset size, only on
|
|
sequence length, window width and number of singular vectors. Depending on
|
|
algorithm being used, incremental update has complexity:
|
|
* for top-K real time - O(UpdateIts*K*Width^2+(NTicks-Width)*Width^2)
|
|
* for top-K direct - O(Width^3+(NTicks-Width)*Width^2)
|
|
* for precomputed basis - O(1), no update is performed
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model created with ssacreate()
|
|
X - new sequence, array[NTicks] or larget
|
|
NTicks - >=1, number of ticks in the sequence
|
|
UpdateIts - >=0, floating point (!) value, desired update
|
|
frequency:
|
|
* zero value means that point is stored, but no
|
|
update is performed
|
|
* integer part of the value means that specified
|
|
number of iterations is always performed
|
|
* fractional part of the value means that one
|
|
iteration is performed with this probability.
|
|
|
|
Recommended value: 0<UpdateIts<=1. Values larger
|
|
than 1 are VERY seldom needed. If your dataset
|
|
changes slowly, you can set it to 0.1 and skip
|
|
90% of updates.
|
|
|
|
In any case, no information is lost even with zero
|
|
value of UpdateIts! It will be incorporated into
|
|
model, sooner or later.
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - SSA model, updated
|
|
|
|
NOTE: this function uses internal RNG to handle fractional values of
|
|
UpdateIts. By default it is initialized with fixed seed during
|
|
initial calculation of basis. Thus subsequent calls to this function
|
|
will result in the same sequence of pseudorandom decisions.
|
|
|
|
However, if you have several SSA models which are calculated
|
|
simultaneously, and if you want to reduce computational bottlenecks
|
|
by performing random updates at random moments, then fixed seed is
|
|
not an option - all updates will fire at same moments.
|
|
|
|
You may change it with ssasetseed() function.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaappendsequenceandupdate(const ssamodel &s, const real_1d_array &x, const ae_int_t nticks, const double updateits, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssaappendsequenceandupdate(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), nticks, updateits, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function appends new sequence to dataset stored in the SSA model and
|
|
tries to update model in the incremental manner (if possible with current
|
|
algorithm).
|
|
|
|
Notes:
|
|
* if you want to add M sequences at once, perform M-1 calls with UpdateIts
|
|
parameter set to 0.0, and last call with non-zero UpdateIts.
|
|
* if you want to add just one point, use ssaappendpointandupdate()
|
|
|
|
Running time of this function does NOT depend on dataset size, only on
|
|
sequence length, window width and number of singular vectors. Depending on
|
|
algorithm being used, incremental update has complexity:
|
|
* for top-K real time - O(UpdateIts*K*Width^2+(NTicks-Width)*Width^2)
|
|
* for top-K direct - O(Width^3+(NTicks-Width)*Width^2)
|
|
* for precomputed basis - O(1), no update is performed
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model created with ssacreate()
|
|
X - new sequence, array[NTicks] or larget
|
|
NTicks - >=1, number of ticks in the sequence
|
|
UpdateIts - >=0, floating point (!) value, desired update
|
|
frequency:
|
|
* zero value means that point is stored, but no
|
|
update is performed
|
|
* integer part of the value means that specified
|
|
number of iterations is always performed
|
|
* fractional part of the value means that one
|
|
iteration is performed with this probability.
|
|
|
|
Recommended value: 0<UpdateIts<=1. Values larger
|
|
than 1 are VERY seldom needed. If your dataset
|
|
changes slowly, you can set it to 0.1 and skip
|
|
90% of updates.
|
|
|
|
In any case, no information is lost even with zero
|
|
value of UpdateIts! It will be incorporated into
|
|
model, sooner or later.
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - SSA model, updated
|
|
|
|
NOTE: this function uses internal RNG to handle fractional values of
|
|
UpdateIts. By default it is initialized with fixed seed during
|
|
initial calculation of basis. Thus subsequent calls to this function
|
|
will result in the same sequence of pseudorandom decisions.
|
|
|
|
However, if you have several SSA models which are calculated
|
|
simultaneously, and if you want to reduce computational bottlenecks
|
|
by performing random updates at random moments, then fixed seed is
|
|
not an option - all updates will fire at same moments.
|
|
|
|
You may change it with ssasetseed() function.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
void ssaappendsequenceandupdate(const ssamodel &s, const real_1d_array &x, const double updateits, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
ae_int_t nticks;
|
|
|
|
nticks = x.length();
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssaappendsequenceandupdate(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), nticks, updateits, &_alglib_env_state);
|
|
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
/*************************************************************************
|
|
This function sets SSA algorithm to "precomputed vectors" algorithm.
|
|
|
|
This algorithm uses precomputed set of orthonormal (orthogonal AND
|
|
normalized) basis vectors supplied by user. Thus, basis calculation phase
|
|
is not performed - we already have our basis - and only analysis/
|
|
forecasting phase requires actual calculations.
|
|
|
|
This algorithm may handle "append" requests which add just one/few ticks
|
|
to the end of the last sequence in O(1) time.
|
|
|
|
NOTE: this algorithm accepts both basis and window width, because these
|
|
two parameters are naturally aligned. Calling this function sets
|
|
window width; if you call ssasetwindow() with other window width,
|
|
then during analysis stage algorithm will detect conflict and reset
|
|
to zero basis.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
A - array[WindowWidth,NBasis], orthonormalized basis;
|
|
this function does NOT control orthogonality and
|
|
does NOT perform any kind of renormalization. It
|
|
is your responsibility to provide it with correct
|
|
basis.
|
|
WindowWidth - window width, >=1
|
|
NBasis - number of basis vectors, 1<=NBasis<=WindowWidth
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - updated model
|
|
|
|
NOTE: calling this function invalidates basis in all cases.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssasetalgoprecomputed(const ssamodel &s, const real_2d_array &a, const ae_int_t windowwidth, const ae_int_t nbasis, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssasetalgoprecomputed(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), windowwidth, nbasis, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets SSA algorithm to "precomputed vectors" algorithm.
|
|
|
|
This algorithm uses precomputed set of orthonormal (orthogonal AND
|
|
normalized) basis vectors supplied by user. Thus, basis calculation phase
|
|
is not performed - we already have our basis - and only analysis/
|
|
forecasting phase requires actual calculations.
|
|
|
|
This algorithm may handle "append" requests which add just one/few ticks
|
|
to the end of the last sequence in O(1) time.
|
|
|
|
NOTE: this algorithm accepts both basis and window width, because these
|
|
two parameters are naturally aligned. Calling this function sets
|
|
window width; if you call ssasetwindow() with other window width,
|
|
then during analysis stage algorithm will detect conflict and reset
|
|
to zero basis.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
A - array[WindowWidth,NBasis], orthonormalized basis;
|
|
this function does NOT control orthogonality and
|
|
does NOT perform any kind of renormalization. It
|
|
is your responsibility to provide it with correct
|
|
basis.
|
|
WindowWidth - window width, >=1
|
|
NBasis - number of basis vectors, 1<=NBasis<=WindowWidth
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - updated model
|
|
|
|
NOTE: calling this function invalidates basis in all cases.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
void ssasetalgoprecomputed(const ssamodel &s, const real_2d_array &a, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
ae_int_t windowwidth;
|
|
ae_int_t nbasis;
|
|
|
|
windowwidth = a.rows();
|
|
nbasis = a.cols();
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssasetalgoprecomputed(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), windowwidth, nbasis, &_alglib_env_state);
|
|
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
/*************************************************************************
|
|
This function sets SSA algorithm to "direct top-K" algorithm.
|
|
|
|
"Direct top-K" algorithm performs full SVD of the N*WINDOW trajectory
|
|
matrix (hence its name - direct solver is used), then extracts top K
|
|
components. Overall running time is O(N*WINDOW^2), where N is a number of
|
|
ticks in the dataset, WINDOW is window width.
|
|
|
|
This algorithm may handle "append" requests which add just one/few ticks
|
|
to the end of the last sequence in O(WINDOW^3) time, which is ~N/WINDOW
|
|
times faster than re-computing everything from scratch.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
TopK - number of components to analyze; TopK>=1.
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - updated model
|
|
|
|
|
|
NOTE: TopK>WindowWidth is silently decreased to WindowWidth during analysis
|
|
phase
|
|
|
|
NOTE: calling this function invalidates basis, except for the situation
|
|
when this algorithm was already set with same parameters.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssasetalgotopkdirect(const ssamodel &s, const ae_int_t topk, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssasetalgotopkdirect(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), topk, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets SSA algorithm to "top-K real time algorithm". This algo
|
|
extracts K components with largest singular values.
|
|
|
|
It is real-time version of top-K algorithm which is optimized for
|
|
incremental processing and fast start-up. Internally it uses subspace
|
|
eigensolver for truncated SVD. It results in ability to perform quick
|
|
updates of the basis when only a few points/sequences is added to dataset.
|
|
|
|
Performance profile of the algorithm is given below:
|
|
* O(K*WindowWidth^2) running time for incremental update of the dataset
|
|
with one of the "append-and-update" functions (ssaappendpointandupdate()
|
|
or ssaappendsequenceandupdate()).
|
|
* O(N*WindowWidth^2) running time for initial basis evaluation (N=size of
|
|
dataset)
|
|
* ability to split costly initialization across several incremental
|
|
updates of the basis (so called "Power-Up" functionality, activated by
|
|
ssasetpoweruplength() function)
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
TopK - number of components to analyze; TopK>=1.
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - updated model
|
|
|
|
NOTE: this algorithm is optimized for large-scale tasks with large
|
|
datasets. On toy problems with just 5-10 points it can return basis
|
|
which is slightly different from that returned by direct algorithm
|
|
(ssasetalgotopkdirect() function). However, the difference becomes
|
|
negligible as dataset grows.
|
|
|
|
NOTE: TopK>WindowWidth is silently decreased to WindowWidth during analysis
|
|
phase
|
|
|
|
NOTE: calling this function invalidates basis, except for the situation
|
|
when this algorithm was already set with same parameters.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssasetalgotopkrealtime(const ssamodel &s, const ae_int_t topk, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssasetalgotopkrealtime(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), topk, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function clears all data stored in the model and invalidates all
|
|
basis components found so far.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model created with ssacreate()
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - SSA model, updated
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssacleardata(const ssamodel &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssacleardata(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function executes SSA on internally stored dataset and returns basis
|
|
found by current method.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
|
|
OUTPUT PARAMETERS:
|
|
A - array[WindowWidth,NBasis], basis; vectors are
|
|
stored in matrix columns, by descreasing variance
|
|
SV - array[NBasis]:
|
|
* zeros - for model initialized with SSASetAlgoPrecomputed()
|
|
* singular values - for other algorithms
|
|
WindowWidth - current window
|
|
NBasis - basis size
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Calling this function in degenerate cases (no data or all data are
|
|
shorter than window size; no algorithm is specified) returns basis with
|
|
just one zero vector.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssagetbasis(const ssamodel &s, real_2d_array &a, real_1d_array &sv, ae_int_t &windowwidth, ae_int_t &nbasis, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssagetbasis(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(sv.c_ptr()), &windowwidth, &nbasis, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function returns linear recurrence relation (LRR) coefficients found
|
|
by current SSA algorithm.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
|
|
OUTPUT PARAMETERS:
|
|
A - array[WindowWidth-1]. Coefficients of the
|
|
linear recurrence of the form:
|
|
X[W-1] = X[W-2]*A[W-2] + X[W-3]*A[W-3] + ... + X[0]*A[0].
|
|
Empty array for WindowWidth=1.
|
|
WindowWidth - current window width
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Calling this function in degenerate cases (no data or all data are
|
|
shorter than window size; no algorithm is specified) returns zeros.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssagetlrr(const ssamodel &s, real_1d_array &a, ae_int_t &windowwidth, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssagetlrr(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(a.c_ptr()), &windowwidth, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function executes SSA on internally stored dataset and returns
|
|
analysis for the last window of the last sequence. Such analysis is
|
|
an lightweight alternative for full scale reconstruction (see below).
|
|
|
|
Typical use case for this function is real-time setting, when you are
|
|
interested in quick-and-dirty (very quick and very dirty) processing of
|
|
just a few last ticks of the trend.
|
|
|
|
IMPORTANT: full scale SSA involves analysis of the ENTIRE dataset,
|
|
with reconstruction being done for all positions of sliding
|
|
window with subsequent hankelization (diagonal averaging) of
|
|
the resulting matrix.
|
|
|
|
Such analysis requires O((DataLen-Window)*Window*NBasis) FLOPs
|
|
and can be quite costly. However, it has nice noise-canceling
|
|
effects due to averaging.
|
|
|
|
This function performs REDUCED analysis of the last window. It
|
|
is much faster - just O(Window*NBasis), but its results are
|
|
DIFFERENT from that of ssaanalyzelast(). In particular, first
|
|
few points of the trend are much more prone to noise.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[WindowSize], reconstructed trend line
|
|
Noise - array[WindowSize], the rest of the signal;
|
|
it holds that ActualData = Trend+Noise.
|
|
NTicks - current WindowSize
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
In any case, only basis is reused. Reconstruction is performed from
|
|
scratch every time you call this function.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* last sequence is shorter than the window length (analysis can be done,
|
|
but we can not perform reconstruction on the last sequence)
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* in any case, WindowWidth ticks is returned
|
|
* trend is assumed to be zero
|
|
* noise is initialized by the last sequence; if last sequence is shorter
|
|
than the window size, it is moved to the end of the array, and the
|
|
beginning of the noise array is filled by zeros
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaanalyzelastwindow(const ssamodel &s, real_1d_array &trend, real_1d_array &noise, ae_int_t &nticks, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssaanalyzelastwindow(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), const_cast<alglib_impl::ae_vector*>(noise.c_ptr()), &nticks, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function:
|
|
* builds SSA basis using internally stored (entire) dataset
|
|
* returns reconstruction for the last NTicks of the last sequence
|
|
|
|
If you want to analyze some other sequence, use ssaanalyzesequence().
|
|
|
|
Reconstruction phase involves generation of NTicks-WindowWidth sliding
|
|
windows, their decomposition using empirical orthogonal functions found by
|
|
SSA, followed by averaging of each data point across several overlapping
|
|
windows. Thus, every point in the output trend is reconstructed using up
|
|
to WindowWidth overlapping windows (WindowWidth windows exactly in the
|
|
inner points, just one window at the extremal points).
|
|
|
|
IMPORTANT: due to averaging this function returns different results for
|
|
different values of NTicks. It is expected and not a bug.
|
|
|
|
For example:
|
|
* Trend[NTicks-1] is always same because it is not averaged in
|
|
any case (same applies to Trend[0]).
|
|
* Trend[NTicks-2] has different values for NTicks=WindowWidth
|
|
and NTicks=WindowWidth+1 because former case means that no
|
|
averaging is performed, and latter case means that averaging
|
|
using two sliding windows is performed. Larger values of
|
|
NTicks produce same results as NTicks=WindowWidth+1.
|
|
* ...and so on...
|
|
|
|
PERFORMANCE: this function has O((NTicks-WindowWidth)*WindowWidth*NBasis)
|
|
running time. If you work in time-constrained setting and
|
|
have to analyze just a few last ticks, choosing NTicks equal
|
|
to WindowWidth+SmoothingLen, with SmoothingLen=1...WindowWidth
|
|
will result in good compromise between noise cancellation and
|
|
analysis speed.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
NTicks - number of ticks to analyze, Nticks>=1.
|
|
* special case of NTicks<=WindowWidth is handled
|
|
by analyzing last window and returning NTicks
|
|
last ticks.
|
|
* special case NTicks>LastSequenceLen is handled
|
|
by prepending result with NTicks-LastSequenceLen
|
|
zeros.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[NTicks], reconstructed trend line
|
|
Noise - array[NTicks], the rest of the signal;
|
|
it holds that ActualData = Trend+Noise.
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
In any case, only basis is reused. Reconstruction is performed from
|
|
scratch every time you call this function.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* last sequence is shorter than the window length (analysis can be done,
|
|
but we can not perform reconstruction on the last sequence)
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* in any case, NTicks ticks is returned
|
|
* trend is assumed to be zero
|
|
* noise is initialized by the last sequence; if last sequence is shorter
|
|
than the window size, it is moved to the end of the array, and the
|
|
beginning of the noise array is filled by zeros
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaanalyzelast(const ssamodel &s, const ae_int_t nticks, real_1d_array &trend, real_1d_array &noise, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssaanalyzelast(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), nticks, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), const_cast<alglib_impl::ae_vector*>(noise.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function:
|
|
* builds SSA basis using internally stored (entire) dataset
|
|
* returns reconstruction for the sequence being passed to this function
|
|
|
|
If you want to analyze last sequence stored in the model, use
|
|
ssaanalyzelast().
|
|
|
|
Reconstruction phase involves generation of NTicks-WindowWidth sliding
|
|
windows, their decomposition using empirical orthogonal functions found by
|
|
SSA, followed by averaging of each data point across several overlapping
|
|
windows. Thus, every point in the output trend is reconstructed using up
|
|
to WindowWidth overlapping windows (WindowWidth windows exactly in the
|
|
inner points, just one window at the extremal points).
|
|
|
|
PERFORMANCE: this function has O((NTicks-WindowWidth)*WindowWidth*NBasis)
|
|
running time. If you work in time-constrained setting and
|
|
have to analyze just a few last ticks, choosing NTicks equal
|
|
to WindowWidth+SmoothingLen, with SmoothingLen=1...WindowWidth
|
|
will result in good compromise between noise cancellation and
|
|
analysis speed.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
Data - array[NTicks], can be larger (only NTicks leading
|
|
elements will be used)
|
|
NTicks - number of ticks to analyze, Nticks>=1.
|
|
* special case of NTicks<WindowWidth is handled
|
|
by returning zeros as trend, and signal as noise
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[NTicks], reconstructed trend line
|
|
Noise - array[NTicks], the rest of the signal;
|
|
it holds that ActualData = Trend+Noise.
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
In any case, only basis is reused. Reconstruction is performed from
|
|
scratch every time you call this function.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* sequence being passed is shorter than the window length
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* in any case, NTicks ticks is returned
|
|
* trend is assumed to be zero
|
|
* noise is initialized by the sequence.
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaanalyzesequence(const ssamodel &s, const real_1d_array &data, const ae_int_t nticks, real_1d_array &trend, real_1d_array &noise, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssaanalyzesequence(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(data.c_ptr()), nticks, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), const_cast<alglib_impl::ae_vector*>(noise.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function:
|
|
* builds SSA basis using internally stored (entire) dataset
|
|
* returns reconstruction for the sequence being passed to this function
|
|
|
|
If you want to analyze last sequence stored in the model, use
|
|
ssaanalyzelast().
|
|
|
|
Reconstruction phase involves generation of NTicks-WindowWidth sliding
|
|
windows, their decomposition using empirical orthogonal functions found by
|
|
SSA, followed by averaging of each data point across several overlapping
|
|
windows. Thus, every point in the output trend is reconstructed using up
|
|
to WindowWidth overlapping windows (WindowWidth windows exactly in the
|
|
inner points, just one window at the extremal points).
|
|
|
|
PERFORMANCE: this function has O((NTicks-WindowWidth)*WindowWidth*NBasis)
|
|
running time. If you work in time-constrained setting and
|
|
have to analyze just a few last ticks, choosing NTicks equal
|
|
to WindowWidth+SmoothingLen, with SmoothingLen=1...WindowWidth
|
|
will result in good compromise between noise cancellation and
|
|
analysis speed.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
Data - array[NTicks], can be larger (only NTicks leading
|
|
elements will be used)
|
|
NTicks - number of ticks to analyze, Nticks>=1.
|
|
* special case of NTicks<WindowWidth is handled
|
|
by returning zeros as trend, and signal as noise
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[NTicks], reconstructed trend line
|
|
Noise - array[NTicks], the rest of the signal;
|
|
it holds that ActualData = Trend+Noise.
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
In any case, only basis is reused. Reconstruction is performed from
|
|
scratch every time you call this function.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* sequence being passed is shorter than the window length
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* in any case, NTicks ticks is returned
|
|
* trend is assumed to be zero
|
|
* noise is initialized by the sequence.
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
void ssaanalyzesequence(const ssamodel &s, const real_1d_array &data, real_1d_array &trend, real_1d_array &noise, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
ae_int_t nticks;
|
|
|
|
nticks = data.length();
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssaanalyzesequence(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(data.c_ptr()), nticks, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), const_cast<alglib_impl::ae_vector*>(noise.c_ptr()), &_alglib_env_state);
|
|
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
/*************************************************************************
|
|
This function builds SSA basis and performs forecasting for a specified
|
|
number of ticks, returning value of trend.
|
|
|
|
Forecast is performed as follows:
|
|
* SSA trend extraction is applied to last WindowWidth elements of the
|
|
internally stored dataset; this step is basically a noise reduction.
|
|
* linear recurrence relation is applied to extracted trend
|
|
|
|
This function has following running time:
|
|
* O(NBasis*WindowWidth) for trend extraction phase (always performed)
|
|
* O(WindowWidth*NTicks) for forecast phase
|
|
|
|
NOTE: noise reduction is ALWAYS applied by this algorithm; if you want to
|
|
apply recurrence relation to raw unprocessed data, use another
|
|
function - ssaforecastsequence() which allows to turn on and off
|
|
noise reduction phase.
|
|
|
|
NOTE: this algorithm performs prediction using only one - last - sliding
|
|
window. Predictions produced by such approach are smooth
|
|
continuations of the reconstructed trend line, but they can be
|
|
easily corrupted by noise. If you need noise-resistant prediction,
|
|
use ssaforecastavglast() function, which averages predictions built
|
|
using several sliding windows.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
NTicks - number of ticks to forecast, NTicks>=1
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[NTicks], predicted trend line
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* last sequence is shorter than the WindowWidth (analysis can be done,
|
|
but we can not perform forecasting on the last sequence)
|
|
* window lentgh is 1 (impossible to use for forecasting)
|
|
* SSA analysis algorithm is configured to extract basis whose size is
|
|
equal to window length (impossible to use for forecasting; only basis
|
|
whose size is less than window length can be used).
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* NTicks copies of the last value is returned for non-empty task with
|
|
large enough dataset, but with overcomplete basis (window width=1 or
|
|
basis size is equal to window width)
|
|
* zero trend with length=NTicks is returned for empty task
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is ever constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaforecastlast(const ssamodel &s, const ae_int_t nticks, real_1d_array &trend, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssaforecastlast(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), nticks, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function builds SSA basis and performs forecasting for a user-
|
|
specified sequence, returning value of trend.
|
|
|
|
Forecasting is done in two stages:
|
|
* first, we extract trend from the WindowWidth last elements of the
|
|
sequence. This stage is optional, you can turn it off if you pass
|
|
data which are already processed with SSA. Of course, you can turn it
|
|
off even for raw data, but it is not recommended - noise suppression is
|
|
very important for correct prediction.
|
|
* then, we apply LRR for last WindowWidth-1 elements of the extracted
|
|
trend.
|
|
|
|
This function has following running time:
|
|
* O(NBasis*WindowWidth) for trend extraction phase
|
|
* O(WindowWidth*NTicks) for forecast phase
|
|
|
|
NOTE: this algorithm performs prediction using only one - last - sliding
|
|
window. Predictions produced by such approach are smooth
|
|
continuations of the reconstructed trend line, but they can be
|
|
easily corrupted by noise. If you need noise-resistant prediction,
|
|
use ssaforecastavgsequence() function, which averages predictions
|
|
built using several sliding windows.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
Data - array[NTicks], data to forecast
|
|
DataLen - number of ticks in the data, DataLen>=1
|
|
ForecastLen - number of ticks to predict, ForecastLen>=1
|
|
ApplySmoothing - whether to apply smoothing trend extraction or not;
|
|
if you do not know what to specify, pass True.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[ForecastLen], forecasted trend
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* data sequence is shorter than the WindowWidth (analysis can be done,
|
|
but we can not perform forecasting on the last sequence)
|
|
* window lentgh is 1 (impossible to use for forecasting)
|
|
* SSA analysis algorithm is configured to extract basis whose size is
|
|
equal to window length (impossible to use for forecasting; only basis
|
|
whose size is less than window length can be used).
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* ForecastLen copies of the last value is returned for non-empty task with
|
|
large enough dataset, but with overcomplete basis (window width=1 or
|
|
basis size is equal to window width)
|
|
* zero trend with length=ForecastLen is returned for empty task
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is ever constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaforecastsequence(const ssamodel &s, const real_1d_array &data, const ae_int_t datalen, const ae_int_t forecastlen, const bool applysmoothing, real_1d_array &trend, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssaforecastsequence(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(data.c_ptr()), datalen, forecastlen, applysmoothing, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function builds SSA basis and performs forecasting for a user-
|
|
specified sequence, returning value of trend.
|
|
|
|
Forecasting is done in two stages:
|
|
* first, we extract trend from the WindowWidth last elements of the
|
|
sequence. This stage is optional, you can turn it off if you pass
|
|
data which are already processed with SSA. Of course, you can turn it
|
|
off even for raw data, but it is not recommended - noise suppression is
|
|
very important for correct prediction.
|
|
* then, we apply LRR for last WindowWidth-1 elements of the extracted
|
|
trend.
|
|
|
|
This function has following running time:
|
|
* O(NBasis*WindowWidth) for trend extraction phase
|
|
* O(WindowWidth*NTicks) for forecast phase
|
|
|
|
NOTE: this algorithm performs prediction using only one - last - sliding
|
|
window. Predictions produced by such approach are smooth
|
|
continuations of the reconstructed trend line, but they can be
|
|
easily corrupted by noise. If you need noise-resistant prediction,
|
|
use ssaforecastavgsequence() function, which averages predictions
|
|
built using several sliding windows.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
Data - array[NTicks], data to forecast
|
|
DataLen - number of ticks in the data, DataLen>=1
|
|
ForecastLen - number of ticks to predict, ForecastLen>=1
|
|
ApplySmoothing - whether to apply smoothing trend extraction or not;
|
|
if you do not know what to specify, pass True.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[ForecastLen], forecasted trend
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* data sequence is shorter than the WindowWidth (analysis can be done,
|
|
but we can not perform forecasting on the last sequence)
|
|
* window lentgh is 1 (impossible to use for forecasting)
|
|
* SSA analysis algorithm is configured to extract basis whose size is
|
|
equal to window length (impossible to use for forecasting; only basis
|
|
whose size is less than window length can be used).
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* ForecastLen copies of the last value is returned for non-empty task with
|
|
large enough dataset, but with overcomplete basis (window width=1 or
|
|
basis size is equal to window width)
|
|
* zero trend with length=ForecastLen is returned for empty task
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is ever constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
void ssaforecastsequence(const ssamodel &s, const real_1d_array &data, const ae_int_t forecastlen, real_1d_array &trend, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
ae_int_t datalen;
|
|
bool applysmoothing;
|
|
|
|
datalen = data.length();
|
|
applysmoothing = true;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssaforecastsequence(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(data.c_ptr()), datalen, forecastlen, applysmoothing, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), &_alglib_env_state);
|
|
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
/*************************************************************************
|
|
This function builds SSA basis and performs forecasting for a specified
|
|
number of ticks, returning value of trend.
|
|
|
|
Forecast is performed as follows:
|
|
* SSA trend extraction is applied to last M sliding windows of the
|
|
internally stored dataset
|
|
* for each of M sliding windows, M predictions are built
|
|
* average value of M predictions is returned
|
|
|
|
This function has following running time:
|
|
* O(NBasis*WindowWidth*M) for trend extraction phase (always performed)
|
|
* O(WindowWidth*NTicks*M) for forecast phase
|
|
|
|
NOTE: noise reduction is ALWAYS applied by this algorithm; if you want to
|
|
apply recurrence relation to raw unprocessed data, use another
|
|
function - ssaforecastsequence() which allows to turn on and off
|
|
noise reduction phase.
|
|
|
|
NOTE: combination of several predictions results in lesser sensitivity to
|
|
noise, but it may produce undesirable discontinuities between last
|
|
point of the trend and first point of the prediction. The reason is
|
|
that last point of the trend is usually corrupted by noise, but
|
|
average value of several predictions is less sensitive to noise,
|
|
thus discontinuity appears. It is not a bug.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
M - number of sliding windows to combine, M>=1. If
|
|
your dataset has less than M sliding windows, this
|
|
parameter will be silently reduced.
|
|
NTicks - number of ticks to forecast, NTicks>=1
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[NTicks], predicted trend line
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* last sequence is shorter than the WindowWidth (analysis can be done,
|
|
but we can not perform forecasting on the last sequence)
|
|
* window lentgh is 1 (impossible to use for forecasting)
|
|
* SSA analysis algorithm is configured to extract basis whose size is
|
|
equal to window length (impossible to use for forecasting; only basis
|
|
whose size is less than window length can be used).
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* NTicks copies of the last value is returned for non-empty task with
|
|
large enough dataset, but with overcomplete basis (window width=1 or
|
|
basis size is equal to window width)
|
|
* zero trend with length=NTicks is returned for empty task
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is ever constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaforecastavglast(const ssamodel &s, const ae_int_t m, const ae_int_t nticks, real_1d_array &trend, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssaforecastavglast(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), m, nticks, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function builds SSA basis and performs forecasting for a user-
|
|
specified sequence, returning value of trend.
|
|
|
|
Forecasting is done in two stages:
|
|
* first, we extract trend from M last sliding windows of the sequence.
|
|
This stage is optional, you can turn it off if you pass data which
|
|
are already processed with SSA. Of course, you can turn it off even
|
|
for raw data, but it is not recommended - noise suppression is very
|
|
important for correct prediction.
|
|
* then, we apply LRR independently for M sliding windows
|
|
* average of M predictions is returned
|
|
|
|
This function has following running time:
|
|
* O(NBasis*WindowWidth*M) for trend extraction phase
|
|
* O(WindowWidth*NTicks*M) for forecast phase
|
|
|
|
NOTE: combination of several predictions results in lesser sensitivity to
|
|
noise, but it may produce undesirable discontinuities between last
|
|
point of the trend and first point of the prediction. The reason is
|
|
that last point of the trend is usually corrupted by noise, but
|
|
average value of several predictions is less sensitive to noise,
|
|
thus discontinuity appears. It is not a bug.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
Data - array[NTicks], data to forecast
|
|
DataLen - number of ticks in the data, DataLen>=1
|
|
M - number of sliding windows to combine, M>=1. If
|
|
your dataset has less than M sliding windows, this
|
|
parameter will be silently reduced.
|
|
ForecastLen - number of ticks to predict, ForecastLen>=1
|
|
ApplySmoothing - whether to apply smoothing trend extraction or not.
|
|
if you do not know what to specify, pass true.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[ForecastLen], forecasted trend
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* data sequence is shorter than the WindowWidth (analysis can be done,
|
|
but we can not perform forecasting on the last sequence)
|
|
* window lentgh is 1 (impossible to use for forecasting)
|
|
* SSA analysis algorithm is configured to extract basis whose size is
|
|
equal to window length (impossible to use for forecasting; only basis
|
|
whose size is less than window length can be used).
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* ForecastLen copies of the last value is returned for non-empty task with
|
|
large enough dataset, but with overcomplete basis (window width=1 or
|
|
basis size is equal to window width)
|
|
* zero trend with length=ForecastLen is returned for empty task
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is ever constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaforecastavgsequence(const ssamodel &s, const real_1d_array &data, const ae_int_t datalen, const ae_int_t m, const ae_int_t forecastlen, const bool applysmoothing, real_1d_array &trend, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssaforecastavgsequence(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(data.c_ptr()), datalen, m, forecastlen, applysmoothing, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function builds SSA basis and performs forecasting for a user-
|
|
specified sequence, returning value of trend.
|
|
|
|
Forecasting is done in two stages:
|
|
* first, we extract trend from M last sliding windows of the sequence.
|
|
This stage is optional, you can turn it off if you pass data which
|
|
are already processed with SSA. Of course, you can turn it off even
|
|
for raw data, but it is not recommended - noise suppression is very
|
|
important for correct prediction.
|
|
* then, we apply LRR independently for M sliding windows
|
|
* average of M predictions is returned
|
|
|
|
This function has following running time:
|
|
* O(NBasis*WindowWidth*M) for trend extraction phase
|
|
* O(WindowWidth*NTicks*M) for forecast phase
|
|
|
|
NOTE: combination of several predictions results in lesser sensitivity to
|
|
noise, but it may produce undesirable discontinuities between last
|
|
point of the trend and first point of the prediction. The reason is
|
|
that last point of the trend is usually corrupted by noise, but
|
|
average value of several predictions is less sensitive to noise,
|
|
thus discontinuity appears. It is not a bug.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
Data - array[NTicks], data to forecast
|
|
DataLen - number of ticks in the data, DataLen>=1
|
|
M - number of sliding windows to combine, M>=1. If
|
|
your dataset has less than M sliding windows, this
|
|
parameter will be silently reduced.
|
|
ForecastLen - number of ticks to predict, ForecastLen>=1
|
|
ApplySmoothing - whether to apply smoothing trend extraction or not.
|
|
if you do not know what to specify, pass true.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[ForecastLen], forecasted trend
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* data sequence is shorter than the WindowWidth (analysis can be done,
|
|
but we can not perform forecasting on the last sequence)
|
|
* window lentgh is 1 (impossible to use for forecasting)
|
|
* SSA analysis algorithm is configured to extract basis whose size is
|
|
equal to window length (impossible to use for forecasting; only basis
|
|
whose size is less than window length can be used).
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* ForecastLen copies of the last value is returned for non-empty task with
|
|
large enough dataset, but with overcomplete basis (window width=1 or
|
|
basis size is equal to window width)
|
|
* zero trend with length=ForecastLen is returned for empty task
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is ever constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
void ssaforecastavgsequence(const ssamodel &s, const real_1d_array &data, const ae_int_t m, const ae_int_t forecastlen, real_1d_array &trend, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
ae_int_t datalen;
|
|
bool applysmoothing;
|
|
|
|
datalen = data.length();
|
|
applysmoothing = true;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ssaforecastavgsequence(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(data.c_ptr()), datalen, m, forecastlen, applysmoothing, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), &_alglib_env_state);
|
|
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_LINREG) || !defined(AE_PARTIAL_BUILD)
|
|
/*************************************************************************
|
|
|
|
*************************************************************************/
|
|
_linearmodel_owner::_linearmodel_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_linearmodel_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::linearmodel*)alglib_impl::ae_malloc(sizeof(alglib_impl::linearmodel), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::linearmodel));
|
|
alglib_impl::_linearmodel_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_linearmodel_owner::_linearmodel_owner(const _linearmodel_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_linearmodel_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: linearmodel copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::linearmodel*)alglib_impl::ae_malloc(sizeof(alglib_impl::linearmodel), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::linearmodel));
|
|
alglib_impl::_linearmodel_init_copy(p_struct, const_cast<alglib_impl::linearmodel*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_linearmodel_owner& _linearmodel_owner::operator=(const _linearmodel_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: linearmodel assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: linearmodel assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_linearmodel_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::linearmodel));
|
|
alglib_impl::_linearmodel_init_copy(p_struct, const_cast<alglib_impl::linearmodel*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_linearmodel_owner::~_linearmodel_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_linearmodel_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::linearmodel* _linearmodel_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::linearmodel* _linearmodel_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::linearmodel*>(p_struct);
|
|
}
|
|
linearmodel::linearmodel() : _linearmodel_owner()
|
|
{
|
|
}
|
|
|
|
linearmodel::linearmodel(const linearmodel &rhs):_linearmodel_owner(rhs)
|
|
{
|
|
}
|
|
|
|
linearmodel& linearmodel::operator=(const linearmodel &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_linearmodel_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
linearmodel::~linearmodel()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
LRReport structure contains additional information about linear model:
|
|
* C - covariation matrix, array[0..NVars,0..NVars].
|
|
C[i,j] = Cov(A[i],A[j])
|
|
* RMSError - root mean square error on a training set
|
|
* AvgError - average error on a training set
|
|
* AvgRelError - average relative error on a training set (excluding
|
|
observations with zero function value).
|
|
* CVRMSError - leave-one-out cross-validation estimate of
|
|
generalization error. Calculated using fast algorithm
|
|
with O(NVars*NPoints) complexity.
|
|
* CVAvgError - cross-validation estimate of average error
|
|
* CVAvgRelError - cross-validation estimate of average relative error
|
|
|
|
All other fields of the structure are intended for internal use and should
|
|
not be used outside ALGLIB.
|
|
*************************************************************************/
|
|
_lrreport_owner::_lrreport_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_lrreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::lrreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::lrreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::lrreport));
|
|
alglib_impl::_lrreport_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_lrreport_owner::_lrreport_owner(const _lrreport_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_lrreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: lrreport copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::lrreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::lrreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::lrreport));
|
|
alglib_impl::_lrreport_init_copy(p_struct, const_cast<alglib_impl::lrreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_lrreport_owner& _lrreport_owner::operator=(const _lrreport_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: lrreport assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: lrreport assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_lrreport_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::lrreport));
|
|
alglib_impl::_lrreport_init_copy(p_struct, const_cast<alglib_impl::lrreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_lrreport_owner::~_lrreport_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_lrreport_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::lrreport* _lrreport_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::lrreport* _lrreport_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::lrreport*>(p_struct);
|
|
}
|
|
lrreport::lrreport() : _lrreport_owner() ,c(&p_struct->c),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror),cvrmserror(p_struct->cvrmserror),cvavgerror(p_struct->cvavgerror),cvavgrelerror(p_struct->cvavgrelerror),ncvdefects(p_struct->ncvdefects),cvdefects(&p_struct->cvdefects)
|
|
{
|
|
}
|
|
|
|
lrreport::lrreport(const lrreport &rhs):_lrreport_owner(rhs) ,c(&p_struct->c),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror),cvrmserror(p_struct->cvrmserror),cvavgerror(p_struct->cvavgerror),cvavgrelerror(p_struct->cvavgrelerror),ncvdefects(p_struct->ncvdefects),cvdefects(&p_struct->cvdefects)
|
|
{
|
|
}
|
|
|
|
lrreport& lrreport::operator=(const lrreport &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_lrreport_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
lrreport::~lrreport()
|
|
{
|
|
}
|
|
|
|
/*************************************************************************
|
|
Linear regression
|
|
|
|
Subroutine builds model:
|
|
|
|
Y = A(0)*X[0] + ... + A(N-1)*X[N-1] + A(N)
|
|
|
|
and model found in ALGLIB format, covariation matrix, training set errors
|
|
(rms, average, average relative) and leave-one-out cross-validation
|
|
estimate of the generalization error. CV estimate calculated using fast
|
|
algorithm with O(NPoints*NVars) complexity.
|
|
|
|
When covariation matrix is calculated standard deviations of function
|
|
values are assumed to be equal to RMS error on the training set.
|
|
|
|
INPUT PARAMETERS:
|
|
XY - training set, array [0..NPoints-1,0..NVars]:
|
|
* NVars columns - independent variables
|
|
* last column - dependent variable
|
|
NPoints - training set size, NPoints>NVars+1
|
|
NVars - number of independent variables
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code:
|
|
* -255, in case of unknown internal error
|
|
* -4, if internal SVD subroutine haven't converged
|
|
* -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
|
|
* 1, if subroutine successfully finished
|
|
LM - linear model in the ALGLIB format. Use subroutines of
|
|
this unit to work with the model.
|
|
AR - additional results
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 02.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void lrbuild(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, ae_int_t &info, linearmodel &lm, lrreport &ar, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::lrbuild(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, &info, const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::lrreport*>(ar.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Linear regression
|
|
|
|
Variant of LRBuild which uses vector of standatd deviations (errors in
|
|
function values).
|
|
|
|
INPUT PARAMETERS:
|
|
XY - training set, array [0..NPoints-1,0..NVars]:
|
|
* NVars columns - independent variables
|
|
* last column - dependent variable
|
|
S - standard deviations (errors in function values)
|
|
array[0..NPoints-1], S[i]>0.
|
|
NPoints - training set size, NPoints>NVars+1
|
|
NVars - number of independent variables
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code:
|
|
* -255, in case of unknown internal error
|
|
* -4, if internal SVD subroutine haven't converged
|
|
* -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
|
|
* -2, if S[I]<=0
|
|
* 1, if subroutine successfully finished
|
|
LM - linear model in the ALGLIB format. Use subroutines of
|
|
this unit to work with the model.
|
|
AR - additional results
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 02.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void lrbuilds(const real_2d_array &xy, const real_1d_array &s, const ae_int_t npoints, const ae_int_t nvars, ae_int_t &info, linearmodel &lm, lrreport &ar, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::lrbuilds(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), const_cast<alglib_impl::ae_vector*>(s.c_ptr()), npoints, nvars, &info, const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::lrreport*>(ar.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Like LRBuildS, but builds model
|
|
|
|
Y = A(0)*X[0] + ... + A(N-1)*X[N-1]
|
|
|
|
i.e. with zero constant term.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void lrbuildzs(const real_2d_array &xy, const real_1d_array &s, const ae_int_t npoints, const ae_int_t nvars, ae_int_t &info, linearmodel &lm, lrreport &ar, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::lrbuildzs(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), const_cast<alglib_impl::ae_vector*>(s.c_ptr()), npoints, nvars, &info, const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::lrreport*>(ar.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Like LRBuild but builds model
|
|
|
|
Y = A(0)*X[0] + ... + A(N-1)*X[N-1]
|
|
|
|
i.e. with zero constant term.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void lrbuildz(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, ae_int_t &info, linearmodel &lm, lrreport &ar, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::lrbuildz(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, &info, const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::lrreport*>(ar.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Unpacks coefficients of linear model.
|
|
|
|
INPUT PARAMETERS:
|
|
LM - linear model in ALGLIB format
|
|
|
|
OUTPUT PARAMETERS:
|
|
V - coefficients, array[0..NVars]
|
|
constant term (intercept) is stored in the V[NVars].
|
|
NVars - number of independent variables (one less than number
|
|
of coefficients)
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void lrunpack(const linearmodel &lm, real_1d_array &v, ae_int_t &nvars, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::lrunpack(const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_vector*>(v.c_ptr()), &nvars, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
"Packs" coefficients and creates linear model in ALGLIB format (LRUnpack
|
|
reversed).
|
|
|
|
INPUT PARAMETERS:
|
|
V - coefficients, array[0..NVars]
|
|
NVars - number of independent variables
|
|
|
|
OUTPUT PAREMETERS:
|
|
LM - linear model.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void lrpack(const real_1d_array &v, const ae_int_t nvars, linearmodel &lm, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::lrpack(const_cast<alglib_impl::ae_vector*>(v.c_ptr()), nvars, const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Procesing
|
|
|
|
INPUT PARAMETERS:
|
|
LM - linear model
|
|
X - input vector, array[0..NVars-1].
|
|
|
|
Result:
|
|
value of linear model regression estimate
|
|
|
|
-- ALGLIB --
|
|
Copyright 03.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double lrprocess(const linearmodel &lm, const real_1d_array &x, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::lrprocess(const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
RMS error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
LM - linear model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
root mean square error.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double lrrmserror(const linearmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::lrrmserror(const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
LM - linear model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
average error.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double lravgerror(const linearmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::lravgerror(const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
RMS error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
LM - linear model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
average relative error.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double lravgrelerror(const linearmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::lravgrelerror(const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_FILTERS) || !defined(AE_PARTIAL_BUILD)
|
|
/*************************************************************************
|
|
Filters: simple moving averages (unsymmetric).
|
|
|
|
This filter replaces array by results of SMA(K) filter. SMA(K) is defined
|
|
as filter which averages at most K previous points (previous - not points
|
|
AROUND central point) - or less, in case of the first K-1 points.
|
|
|
|
INPUT PARAMETERS:
|
|
X - array[N], array to process. It can be larger than N,
|
|
in this case only first N points are processed.
|
|
N - points count, N>=0
|
|
K - K>=1 (K can be larger than N , such cases will be
|
|
correctly handled). Window width. K=1 corresponds to
|
|
identity transformation (nothing changes).
|
|
|
|
OUTPUT PARAMETERS:
|
|
X - array, whose first N elements were processed with SMA(K)
|
|
|
|
NOTE 1: this function uses efficient in-place algorithm which does not
|
|
allocate temporary arrays.
|
|
|
|
NOTE 2: this algorithm makes only one pass through array and uses running
|
|
sum to speed-up calculation of the averages. Additional measures
|
|
are taken to ensure that running sum on a long sequence of zero
|
|
elements will be correctly reset to zero even in the presence of
|
|
round-off error.
|
|
|
|
NOTE 3: this is unsymmetric version of the algorithm, which does NOT
|
|
averages points after the current one. Only X[i], X[i-1], ... are
|
|
used when calculating new value of X[i]. We should also note that
|
|
this algorithm uses BOTH previous points and current one, i.e.
|
|
new value of X[i] depends on BOTH previous point and X[i] itself.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.10.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void filtersma(real_1d_array &x, const ae_int_t n, const ae_int_t k, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::filtersma(const_cast<alglib_impl::ae_vector*>(x.c_ptr()), n, k, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Filters: simple moving averages (unsymmetric).
|
|
|
|
This filter replaces array by results of SMA(K) filter. SMA(K) is defined
|
|
as filter which averages at most K previous points (previous - not points
|
|
AROUND central point) - or less, in case of the first K-1 points.
|
|
|
|
INPUT PARAMETERS:
|
|
X - array[N], array to process. It can be larger than N,
|
|
in this case only first N points are processed.
|
|
N - points count, N>=0
|
|
K - K>=1 (K can be larger than N , such cases will be
|
|
correctly handled). Window width. K=1 corresponds to
|
|
identity transformation (nothing changes).
|
|
|
|
OUTPUT PARAMETERS:
|
|
X - array, whose first N elements were processed with SMA(K)
|
|
|
|
NOTE 1: this function uses efficient in-place algorithm which does not
|
|
allocate temporary arrays.
|
|
|
|
NOTE 2: this algorithm makes only one pass through array and uses running
|
|
sum to speed-up calculation of the averages. Additional measures
|
|
are taken to ensure that running sum on a long sequence of zero
|
|
elements will be correctly reset to zero even in the presence of
|
|
round-off error.
|
|
|
|
NOTE 3: this is unsymmetric version of the algorithm, which does NOT
|
|
averages points after the current one. Only X[i], X[i-1], ... are
|
|
used when calculating new value of X[i]. We should also note that
|
|
this algorithm uses BOTH previous points and current one, i.e.
|
|
new value of X[i] depends on BOTH previous point and X[i] itself.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.10.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
void filtersma(real_1d_array &x, const ae_int_t k, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
ae_int_t n;
|
|
|
|
n = x.length();
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::filtersma(const_cast<alglib_impl::ae_vector*>(x.c_ptr()), n, k, &_alglib_env_state);
|
|
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
/*************************************************************************
|
|
Filters: exponential moving averages.
|
|
|
|
This filter replaces array by results of EMA(alpha) filter. EMA(alpha) is
|
|
defined as filter which replaces X[] by S[]:
|
|
S[0] = X[0]
|
|
S[t] = alpha*X[t] + (1-alpha)*S[t-1]
|
|
|
|
INPUT PARAMETERS:
|
|
X - array[N], array to process. It can be larger than N,
|
|
in this case only first N points are processed.
|
|
N - points count, N>=0
|
|
alpha - 0<alpha<=1, smoothing parameter.
|
|
|
|
OUTPUT PARAMETERS:
|
|
X - array, whose first N elements were processed
|
|
with EMA(alpha)
|
|
|
|
NOTE 1: this function uses efficient in-place algorithm which does not
|
|
allocate temporary arrays.
|
|
|
|
NOTE 2: this algorithm uses BOTH previous points and current one, i.e.
|
|
new value of X[i] depends on BOTH previous point and X[i] itself.
|
|
|
|
NOTE 3: technical analytis users quite often work with EMA coefficient
|
|
expressed in DAYS instead of fractions. If you want to calculate
|
|
EMA(N), where N is a number of days, you can use alpha=2/(N+1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.10.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void filterema(real_1d_array &x, const ae_int_t n, const double alpha, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::filterema(const_cast<alglib_impl::ae_vector*>(x.c_ptr()), n, alpha, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Filters: exponential moving averages.
|
|
|
|
This filter replaces array by results of EMA(alpha) filter. EMA(alpha) is
|
|
defined as filter which replaces X[] by S[]:
|
|
S[0] = X[0]
|
|
S[t] = alpha*X[t] + (1-alpha)*S[t-1]
|
|
|
|
INPUT PARAMETERS:
|
|
X - array[N], array to process. It can be larger than N,
|
|
in this case only first N points are processed.
|
|
N - points count, N>=0
|
|
alpha - 0<alpha<=1, smoothing parameter.
|
|
|
|
OUTPUT PARAMETERS:
|
|
X - array, whose first N elements were processed
|
|
with EMA(alpha)
|
|
|
|
NOTE 1: this function uses efficient in-place algorithm which does not
|
|
allocate temporary arrays.
|
|
|
|
NOTE 2: this algorithm uses BOTH previous points and current one, i.e.
|
|
new value of X[i] depends on BOTH previous point and X[i] itself.
|
|
|
|
NOTE 3: technical analytis users quite often work with EMA coefficient
|
|
expressed in DAYS instead of fractions. If you want to calculate
|
|
EMA(N), where N is a number of days, you can use alpha=2/(N+1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.10.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
void filterema(real_1d_array &x, const double alpha, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
ae_int_t n;
|
|
|
|
n = x.length();
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::filterema(const_cast<alglib_impl::ae_vector*>(x.c_ptr()), n, alpha, &_alglib_env_state);
|
|
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
/*************************************************************************
|
|
Filters: linear regression moving averages.
|
|
|
|
This filter replaces array by results of LRMA(K) filter.
|
|
|
|
LRMA(K) is defined as filter which, for each data point, builds linear
|
|
regression model using K prevous points (point itself is included in
|
|
these K points) and calculates value of this linear model at the point in
|
|
question.
|
|
|
|
INPUT PARAMETERS:
|
|
X - array[N], array to process. It can be larger than N,
|
|
in this case only first N points are processed.
|
|
N - points count, N>=0
|
|
K - K>=1 (K can be larger than N , such cases will be
|
|
correctly handled). Window width. K=1 corresponds to
|
|
identity transformation (nothing changes).
|
|
|
|
OUTPUT PARAMETERS:
|
|
X - array, whose first N elements were processed with SMA(K)
|
|
|
|
NOTE 1: this function uses efficient in-place algorithm which does not
|
|
allocate temporary arrays.
|
|
|
|
NOTE 2: this algorithm makes only one pass through array and uses running
|
|
sum to speed-up calculation of the averages. Additional measures
|
|
are taken to ensure that running sum on a long sequence of zero
|
|
elements will be correctly reset to zero even in the presence of
|
|
round-off error.
|
|
|
|
NOTE 3: this is unsymmetric version of the algorithm, which does NOT
|
|
averages points after the current one. Only X[i], X[i-1], ... are
|
|
used when calculating new value of X[i]. We should also note that
|
|
this algorithm uses BOTH previous points and current one, i.e.
|
|
new value of X[i] depends on BOTH previous point and X[i] itself.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.10.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void filterlrma(real_1d_array &x, const ae_int_t n, const ae_int_t k, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::filterlrma(const_cast<alglib_impl::ae_vector*>(x.c_ptr()), n, k, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Filters: linear regression moving averages.
|
|
|
|
This filter replaces array by results of LRMA(K) filter.
|
|
|
|
LRMA(K) is defined as filter which, for each data point, builds linear
|
|
regression model using K prevous points (point itself is included in
|
|
these K points) and calculates value of this linear model at the point in
|
|
question.
|
|
|
|
INPUT PARAMETERS:
|
|
X - array[N], array to process. It can be larger than N,
|
|
in this case only first N points are processed.
|
|
N - points count, N>=0
|
|
K - K>=1 (K can be larger than N , such cases will be
|
|
correctly handled). Window width. K=1 corresponds to
|
|
identity transformation (nothing changes).
|
|
|
|
OUTPUT PARAMETERS:
|
|
X - array, whose first N elements were processed with SMA(K)
|
|
|
|
NOTE 1: this function uses efficient in-place algorithm which does not
|
|
allocate temporary arrays.
|
|
|
|
NOTE 2: this algorithm makes only one pass through array and uses running
|
|
sum to speed-up calculation of the averages. Additional measures
|
|
are taken to ensure that running sum on a long sequence of zero
|
|
elements will be correctly reset to zero even in the presence of
|
|
round-off error.
|
|
|
|
NOTE 3: this is unsymmetric version of the algorithm, which does NOT
|
|
averages points after the current one. Only X[i], X[i-1], ... are
|
|
used when calculating new value of X[i]. We should also note that
|
|
this algorithm uses BOTH previous points and current one, i.e.
|
|
new value of X[i] depends on BOTH previous point and X[i] itself.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.10.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
void filterlrma(real_1d_array &x, const ae_int_t k, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
ae_int_t n;
|
|
|
|
n = x.length();
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::filterlrma(const_cast<alglib_impl::ae_vector*>(x.c_ptr()), n, k, &_alglib_env_state);
|
|
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_LOGIT) || !defined(AE_PARTIAL_BUILD)
|
|
/*************************************************************************
|
|
|
|
*************************************************************************/
|
|
_logitmodel_owner::_logitmodel_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_logitmodel_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::logitmodel*)alglib_impl::ae_malloc(sizeof(alglib_impl::logitmodel), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::logitmodel));
|
|
alglib_impl::_logitmodel_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_logitmodel_owner::_logitmodel_owner(const _logitmodel_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_logitmodel_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: logitmodel copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::logitmodel*)alglib_impl::ae_malloc(sizeof(alglib_impl::logitmodel), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::logitmodel));
|
|
alglib_impl::_logitmodel_init_copy(p_struct, const_cast<alglib_impl::logitmodel*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_logitmodel_owner& _logitmodel_owner::operator=(const _logitmodel_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: logitmodel assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: logitmodel assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_logitmodel_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::logitmodel));
|
|
alglib_impl::_logitmodel_init_copy(p_struct, const_cast<alglib_impl::logitmodel*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_logitmodel_owner::~_logitmodel_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_logitmodel_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::logitmodel* _logitmodel_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::logitmodel* _logitmodel_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::logitmodel*>(p_struct);
|
|
}
|
|
logitmodel::logitmodel() : _logitmodel_owner()
|
|
{
|
|
}
|
|
|
|
logitmodel::logitmodel(const logitmodel &rhs):_logitmodel_owner(rhs)
|
|
{
|
|
}
|
|
|
|
logitmodel& logitmodel::operator=(const logitmodel &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_logitmodel_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
logitmodel::~logitmodel()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
MNLReport structure contains information about training process:
|
|
* NGrad - number of gradient calculations
|
|
* NHess - number of Hessian calculations
|
|
*************************************************************************/
|
|
_mnlreport_owner::_mnlreport_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mnlreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::mnlreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mnlreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mnlreport));
|
|
alglib_impl::_mnlreport_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_mnlreport_owner::_mnlreport_owner(const _mnlreport_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mnlreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mnlreport copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::mnlreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mnlreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mnlreport));
|
|
alglib_impl::_mnlreport_init_copy(p_struct, const_cast<alglib_impl::mnlreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_mnlreport_owner& _mnlreport_owner::operator=(const _mnlreport_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: mnlreport assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mnlreport assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_mnlreport_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mnlreport));
|
|
alglib_impl::_mnlreport_init_copy(p_struct, const_cast<alglib_impl::mnlreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_mnlreport_owner::~_mnlreport_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mnlreport_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::mnlreport* _mnlreport_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::mnlreport* _mnlreport_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::mnlreport*>(p_struct);
|
|
}
|
|
mnlreport::mnlreport() : _mnlreport_owner() ,ngrad(p_struct->ngrad),nhess(p_struct->nhess)
|
|
{
|
|
}
|
|
|
|
mnlreport::mnlreport(const mnlreport &rhs):_mnlreport_owner(rhs) ,ngrad(p_struct->ngrad),nhess(p_struct->nhess)
|
|
{
|
|
}
|
|
|
|
mnlreport& mnlreport::operator=(const mnlreport &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_mnlreport_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
mnlreport::~mnlreport()
|
|
{
|
|
}
|
|
|
|
/*************************************************************************
|
|
This subroutine trains logit model.
|
|
|
|
INPUT PARAMETERS:
|
|
XY - training set, array[0..NPoints-1,0..NVars]
|
|
First NVars columns store values of independent
|
|
variables, next column stores number of class (from 0
|
|
to NClasses-1) which dataset element belongs to. Fractional
|
|
values are rounded to nearest integer.
|
|
NPoints - training set size, NPoints>=1
|
|
NVars - number of independent variables, NVars>=1
|
|
NClasses - number of classes, NClasses>=2
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code:
|
|
* -2, if there is a point with class number
|
|
outside of [0..NClasses-1].
|
|
* -1, if incorrect parameters was passed
|
|
(NPoints<NVars+2, NVars<1, NClasses<2).
|
|
* 1, if task has been solved
|
|
LM - model built
|
|
Rep - training report
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mnltrainh(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, ae_int_t &info, logitmodel &lm, mnlreport &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mnltrainh(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, nclasses, &info, const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::mnlreport*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Procesing
|
|
|
|
INPUT PARAMETERS:
|
|
LM - logit model, passed by non-constant reference
|
|
(some fields of structure are used as temporaries
|
|
when calculating model output).
|
|
X - input vector, array[0..NVars-1].
|
|
Y - (possibly) preallocated buffer; if size of Y is less than
|
|
NClasses, it will be reallocated.If it is large enough, it
|
|
is NOT reallocated, so we can save some time on reallocation.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Y - result, array[0..NClasses-1]
|
|
Vector of posterior probabilities for classification task.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mnlprocess(const logitmodel &lm, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mnlprocess(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
'interactive' variant of MNLProcess for languages like Python which
|
|
support constructs like "Y = MNLProcess(LM,X)" and interactive mode of the
|
|
interpreter
|
|
|
|
This function allocates new array on each call, so it is significantly
|
|
slower than its 'non-interactive' counterpart, but it is more convenient
|
|
when you call it from command line.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mnlprocessi(const logitmodel &lm, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mnlprocessi(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Unpacks coefficients of logit model. Logit model have form:
|
|
|
|
P(class=i) = S(i) / (S(0) + S(1) + ... +S(M-1))
|
|
S(i) = Exp(A[i,0]*X[0] + ... + A[i,N-1]*X[N-1] + A[i,N]), when i<M-1
|
|
S(M-1) = 1
|
|
|
|
INPUT PARAMETERS:
|
|
LM - logit model in ALGLIB format
|
|
|
|
OUTPUT PARAMETERS:
|
|
V - coefficients, array[0..NClasses-2,0..NVars]
|
|
NVars - number of independent variables
|
|
NClasses - number of classes
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mnlunpack(const logitmodel &lm, real_2d_array &a, ae_int_t &nvars, ae_int_t &nclasses, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mnlunpack(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &nvars, &nclasses, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
"Packs" coefficients and creates logit model in ALGLIB format (MNLUnpack
|
|
reversed).
|
|
|
|
INPUT PARAMETERS:
|
|
A - model (see MNLUnpack)
|
|
NVars - number of independent variables
|
|
NClasses - number of classes
|
|
|
|
OUTPUT PARAMETERS:
|
|
LM - logit model.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mnlpack(const real_2d_array &a, const ae_int_t nvars, const ae_int_t nclasses, logitmodel &lm, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mnlpack(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), nvars, nclasses, const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average cross-entropy (in bits per element) on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
LM - logit model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
CrossEntropy/(NPoints*ln(2)).
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mnlavgce(const logitmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mnlavgce(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Relative classification error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
LM - logit model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
percent of incorrectly classified cases.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mnlrelclserror(const logitmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mnlrelclserror(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
RMS error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
LM - logit model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
root mean square error (error when estimating posterior probabilities).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mnlrmserror(const logitmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mnlrmserror(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
LM - logit model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
average error (error when estimating posterior probabilities).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mnlavgerror(const logitmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mnlavgerror(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average relative error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
LM - logit model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
average relative error (error when estimating posterior probabilities).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mnlavgrelerror(const logitmodel &lm, const real_2d_array &xy, const ae_int_t ssize, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mnlavgrelerror(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), ssize, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Classification error on test set = MNLRelClsError*NPoints
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t mnlclserror(const logitmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ae_int_t result = alglib_impl::mnlclserror(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<ae_int_t*>(&result));
|
|
}
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_MCPD) || !defined(AE_PARTIAL_BUILD)
|
|
/*************************************************************************
|
|
This structure is a MCPD (Markov Chains for Population Data) solver.
|
|
|
|
You should use ALGLIB functions in order to work with this object.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
_mcpdstate_owner::_mcpdstate_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mcpdstate_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::mcpdstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::mcpdstate), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mcpdstate));
|
|
alglib_impl::_mcpdstate_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_mcpdstate_owner::_mcpdstate_owner(const _mcpdstate_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mcpdstate_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mcpdstate copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::mcpdstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::mcpdstate), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mcpdstate));
|
|
alglib_impl::_mcpdstate_init_copy(p_struct, const_cast<alglib_impl::mcpdstate*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_mcpdstate_owner& _mcpdstate_owner::operator=(const _mcpdstate_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: mcpdstate assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mcpdstate assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_mcpdstate_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mcpdstate));
|
|
alglib_impl::_mcpdstate_init_copy(p_struct, const_cast<alglib_impl::mcpdstate*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_mcpdstate_owner::~_mcpdstate_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mcpdstate_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::mcpdstate* _mcpdstate_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::mcpdstate* _mcpdstate_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::mcpdstate*>(p_struct);
|
|
}
|
|
mcpdstate::mcpdstate() : _mcpdstate_owner()
|
|
{
|
|
}
|
|
|
|
mcpdstate::mcpdstate(const mcpdstate &rhs):_mcpdstate_owner(rhs)
|
|
{
|
|
}
|
|
|
|
mcpdstate& mcpdstate::operator=(const mcpdstate &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_mcpdstate_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
mcpdstate::~mcpdstate()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This structure is a MCPD training report:
|
|
InnerIterationsCount - number of inner iterations of the
|
|
underlying optimization algorithm
|
|
OuterIterationsCount - number of outer iterations of the
|
|
underlying optimization algorithm
|
|
NFEV - number of merit function evaluations
|
|
TerminationType - termination type
|
|
(same as for MinBLEIC optimizer, positive
|
|
values denote success, negative ones -
|
|
failure)
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
_mcpdreport_owner::_mcpdreport_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mcpdreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::mcpdreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mcpdreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mcpdreport));
|
|
alglib_impl::_mcpdreport_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_mcpdreport_owner::_mcpdreport_owner(const _mcpdreport_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mcpdreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mcpdreport copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::mcpdreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mcpdreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mcpdreport));
|
|
alglib_impl::_mcpdreport_init_copy(p_struct, const_cast<alglib_impl::mcpdreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_mcpdreport_owner& _mcpdreport_owner::operator=(const _mcpdreport_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: mcpdreport assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mcpdreport assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_mcpdreport_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mcpdreport));
|
|
alglib_impl::_mcpdreport_init_copy(p_struct, const_cast<alglib_impl::mcpdreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_mcpdreport_owner::~_mcpdreport_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mcpdreport_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::mcpdreport* _mcpdreport_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::mcpdreport* _mcpdreport_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::mcpdreport*>(p_struct);
|
|
}
|
|
mcpdreport::mcpdreport() : _mcpdreport_owner() ,inneriterationscount(p_struct->inneriterationscount),outeriterationscount(p_struct->outeriterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype)
|
|
{
|
|
}
|
|
|
|
mcpdreport::mcpdreport(const mcpdreport &rhs):_mcpdreport_owner(rhs) ,inneriterationscount(p_struct->inneriterationscount),outeriterationscount(p_struct->outeriterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype)
|
|
{
|
|
}
|
|
|
|
mcpdreport& mcpdreport::operator=(const mcpdreport &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_mcpdreport_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
mcpdreport::~mcpdreport()
|
|
{
|
|
}
|
|
|
|
/*************************************************************************
|
|
DESCRIPTION:
|
|
|
|
This function creates MCPD (Markov Chains for Population Data) solver.
|
|
|
|
This solver can be used to find transition matrix P for N-dimensional
|
|
prediction problem where transition from X[i] to X[i+1] is modelled as
|
|
X[i+1] = P*X[i]
|
|
where X[i] and X[i+1] are N-dimensional population vectors (components of
|
|
each X are non-negative), and P is a N*N transition matrix (elements of P
|
|
are non-negative, each column sums to 1.0).
|
|
|
|
Such models arise when when:
|
|
* there is some population of individuals
|
|
* individuals can have different states
|
|
* individuals can transit from one state to another
|
|
* population size is constant, i.e. there is no new individuals and no one
|
|
leaves population
|
|
* you want to model transitions of individuals from one state into another
|
|
|
|
USAGE:
|
|
|
|
Here we give very brief outline of the MCPD. We strongly recommend you to
|
|
read examples in the ALGLIB Reference Manual and to read ALGLIB User Guide
|
|
on data analysis which is available at http://www.alglib.net/dataanalysis/
|
|
|
|
1. User initializes algorithm state with MCPDCreate() call
|
|
|
|
2. User adds one or more tracks - sequences of states which describe
|
|
evolution of a system being modelled from different starting conditions
|
|
|
|
3. User may add optional boundary, equality and/or linear constraints on
|
|
the coefficients of P by calling one of the following functions:
|
|
* MCPDSetEC() to set equality constraints
|
|
* MCPDSetBC() to set bound constraints
|
|
* MCPDSetLC() to set linear constraints
|
|
|
|
4. Optionally, user may set custom weights for prediction errors (by
|
|
default, algorithm assigns non-equal, automatically chosen weights for
|
|
errors in the prediction of different components of X). It can be done
|
|
with a call of MCPDSetPredictionWeights() function.
|
|
|
|
5. User calls MCPDSolve() function which takes algorithm state and
|
|
pointer (delegate, etc.) to callback function which calculates F/G.
|
|
|
|
6. User calls MCPDResults() to get solution
|
|
|
|
INPUT PARAMETERS:
|
|
N - problem dimension, N>=1
|
|
|
|
OUTPUT PARAMETERS:
|
|
State - structure stores algorithm state
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdcreate(const ae_int_t n, mcpdstate &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdcreate(n, const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
DESCRIPTION:
|
|
|
|
This function is a specialized version of MCPDCreate() function, and we
|
|
recommend you to read comments for this function for general information
|
|
about MCPD solver.
|
|
|
|
This function creates MCPD (Markov Chains for Population Data) solver
|
|
for "Entry-state" model, i.e. model where transition from X[i] to X[i+1]
|
|
is modelled as
|
|
X[i+1] = P*X[i]
|
|
where
|
|
X[i] and X[i+1] are N-dimensional state vectors
|
|
P is a N*N transition matrix
|
|
and one selected component of X[] is called "entry" state and is treated
|
|
in a special way:
|
|
system state always transits from "entry" state to some another state
|
|
system state can not transit from any state into "entry" state
|
|
Such conditions basically mean that row of P which corresponds to "entry"
|
|
state is zero.
|
|
|
|
Such models arise when:
|
|
* there is some population of individuals
|
|
* individuals can have different states
|
|
* individuals can transit from one state to another
|
|
* population size is NOT constant - at every moment of time there is some
|
|
(unpredictable) amount of "new" individuals, which can transit into one
|
|
of the states at the next turn, but still no one leaves population
|
|
* you want to model transitions of individuals from one state into another
|
|
* but you do NOT want to predict amount of "new" individuals because it
|
|
does not depends on individuals already present (hence system can not
|
|
transit INTO entry state - it can only transit FROM it).
|
|
|
|
This model is discussed in more details in the ALGLIB User Guide (see
|
|
http://www.alglib.net/dataanalysis/ for more data).
|
|
|
|
INPUT PARAMETERS:
|
|
N - problem dimension, N>=2
|
|
EntryState- index of entry state, in 0..N-1
|
|
|
|
OUTPUT PARAMETERS:
|
|
State - structure stores algorithm state
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdcreateentry(const ae_int_t n, const ae_int_t entrystate, mcpdstate &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdcreateentry(n, entrystate, const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
DESCRIPTION:
|
|
|
|
This function is a specialized version of MCPDCreate() function, and we
|
|
recommend you to read comments for this function for general information
|
|
about MCPD solver.
|
|
|
|
This function creates MCPD (Markov Chains for Population Data) solver
|
|
for "Exit-state" model, i.e. model where transition from X[i] to X[i+1]
|
|
is modelled as
|
|
X[i+1] = P*X[i]
|
|
where
|
|
X[i] and X[i+1] are N-dimensional state vectors
|
|
P is a N*N transition matrix
|
|
and one selected component of X[] is called "exit" state and is treated
|
|
in a special way:
|
|
system state can transit from any state into "exit" state
|
|
system state can not transit from "exit" state into any other state
|
|
transition operator discards "exit" state (makes it zero at each turn)
|
|
Such conditions basically mean that column of P which corresponds to
|
|
"exit" state is zero. Multiplication by such P may decrease sum of vector
|
|
components.
|
|
|
|
Such models arise when:
|
|
* there is some population of individuals
|
|
* individuals can have different states
|
|
* individuals can transit from one state to another
|
|
* population size is NOT constant - individuals can move into "exit" state
|
|
and leave population at the next turn, but there are no new individuals
|
|
* amount of individuals which leave population can be predicted
|
|
* you want to model transitions of individuals from one state into another
|
|
(including transitions into the "exit" state)
|
|
|
|
This model is discussed in more details in the ALGLIB User Guide (see
|
|
http://www.alglib.net/dataanalysis/ for more data).
|
|
|
|
INPUT PARAMETERS:
|
|
N - problem dimension, N>=2
|
|
ExitState- index of exit state, in 0..N-1
|
|
|
|
OUTPUT PARAMETERS:
|
|
State - structure stores algorithm state
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdcreateexit(const ae_int_t n, const ae_int_t exitstate, mcpdstate &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdcreateexit(n, exitstate, const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
DESCRIPTION:
|
|
|
|
This function is a specialized version of MCPDCreate() function, and we
|
|
recommend you to read comments for this function for general information
|
|
about MCPD solver.
|
|
|
|
This function creates MCPD (Markov Chains for Population Data) solver
|
|
for "Entry-Exit-states" model, i.e. model where transition from X[i] to
|
|
X[i+1] is modelled as
|
|
X[i+1] = P*X[i]
|
|
where
|
|
X[i] and X[i+1] are N-dimensional state vectors
|
|
P is a N*N transition matrix
|
|
one selected component of X[] is called "entry" state and is treated in a
|
|
special way:
|
|
system state always transits from "entry" state to some another state
|
|
system state can not transit from any state into "entry" state
|
|
and another one component of X[] is called "exit" state and is treated in
|
|
a special way too:
|
|
system state can transit from any state into "exit" state
|
|
system state can not transit from "exit" state into any other state
|
|
transition operator discards "exit" state (makes it zero at each turn)
|
|
Such conditions basically mean that:
|
|
row of P which corresponds to "entry" state is zero
|
|
column of P which corresponds to "exit" state is zero
|
|
Multiplication by such P may decrease sum of vector components.
|
|
|
|
Such models arise when:
|
|
* there is some population of individuals
|
|
* individuals can have different states
|
|
* individuals can transit from one state to another
|
|
* population size is NOT constant
|
|
* at every moment of time there is some (unpredictable) amount of "new"
|
|
individuals, which can transit into one of the states at the next turn
|
|
* some individuals can move (predictably) into "exit" state and leave
|
|
population at the next turn
|
|
* you want to model transitions of individuals from one state into another,
|
|
including transitions from the "entry" state and into the "exit" state.
|
|
* but you do NOT want to predict amount of "new" individuals because it
|
|
does not depends on individuals already present (hence system can not
|
|
transit INTO entry state - it can only transit FROM it).
|
|
|
|
This model is discussed in more details in the ALGLIB User Guide (see
|
|
http://www.alglib.net/dataanalysis/ for more data).
|
|
|
|
INPUT PARAMETERS:
|
|
N - problem dimension, N>=2
|
|
EntryState- index of entry state, in 0..N-1
|
|
ExitState- index of exit state, in 0..N-1
|
|
|
|
OUTPUT PARAMETERS:
|
|
State - structure stores algorithm state
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdcreateentryexit(const ae_int_t n, const ae_int_t entrystate, const ae_int_t exitstate, mcpdstate &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdcreateentryexit(n, entrystate, exitstate, const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function is used to add a track - sequence of system states at the
|
|
different moments of its evolution.
|
|
|
|
You may add one or several tracks to the MCPD solver. In case you have
|
|
several tracks, they won't overwrite each other. For example, if you pass
|
|
two tracks, A1-A2-A3 (system at t=A+1, t=A+2 and t=A+3) and B1-B2-B3, then
|
|
solver will try to model transitions from t=A+1 to t=A+2, t=A+2 to t=A+3,
|
|
t=B+1 to t=B+2, t=B+2 to t=B+3. But it WONT mix these two tracks - i.e. it
|
|
wont try to model transition from t=A+3 to t=B+1.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
XY - track, array[K,N]:
|
|
* I-th row is a state at t=I
|
|
* elements of XY must be non-negative (exception will be
|
|
thrown on negative elements)
|
|
K - number of points in a track
|
|
* if given, only leading K rows of XY are used
|
|
* if not given, automatically determined from size of XY
|
|
|
|
NOTES:
|
|
|
|
1. Track may contain either proportional or population data:
|
|
* with proportional data all rows of XY must sum to 1.0, i.e. we have
|
|
proportions instead of absolute population values
|
|
* with population data rows of XY contain population counts and generally
|
|
do not sum to 1.0 (although they still must be non-negative)
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdaddtrack(const mcpdstate &s, const real_2d_array &xy, const ae_int_t k, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdaddtrack(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), k, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function is used to add a track - sequence of system states at the
|
|
different moments of its evolution.
|
|
|
|
You may add one or several tracks to the MCPD solver. In case you have
|
|
several tracks, they won't overwrite each other. For example, if you pass
|
|
two tracks, A1-A2-A3 (system at t=A+1, t=A+2 and t=A+3) and B1-B2-B3, then
|
|
solver will try to model transitions from t=A+1 to t=A+2, t=A+2 to t=A+3,
|
|
t=B+1 to t=B+2, t=B+2 to t=B+3. But it WONT mix these two tracks - i.e. it
|
|
wont try to model transition from t=A+3 to t=B+1.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
XY - track, array[K,N]:
|
|
* I-th row is a state at t=I
|
|
* elements of XY must be non-negative (exception will be
|
|
thrown on negative elements)
|
|
K - number of points in a track
|
|
* if given, only leading K rows of XY are used
|
|
* if not given, automatically determined from size of XY
|
|
|
|
NOTES:
|
|
|
|
1. Track may contain either proportional or population data:
|
|
* with proportional data all rows of XY must sum to 1.0, i.e. we have
|
|
proportions instead of absolute population values
|
|
* with population data rows of XY contain population counts and generally
|
|
do not sum to 1.0 (although they still must be non-negative)
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
void mcpdaddtrack(const mcpdstate &s, const real_2d_array &xy, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
ae_int_t k;
|
|
|
|
k = xy.rows();
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdaddtrack(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), k, &_alglib_env_state);
|
|
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
/*************************************************************************
|
|
This function is used to add equality constraints on the elements of the
|
|
transition matrix P.
|
|
|
|
MCPD solver has four types of constraints which can be placed on P:
|
|
* user-specified equality constraints (optional)
|
|
* user-specified bound constraints (optional)
|
|
* user-specified general linear constraints (optional)
|
|
* basic constraints (always present):
|
|
* non-negativity: P[i,j]>=0
|
|
* consistency: every column of P sums to 1.0
|
|
|
|
Final constraints which are passed to the underlying optimizer are
|
|
calculated as intersection of all present constraints. For example, you
|
|
may specify boundary constraint on P[0,0] and equality one:
|
|
0.1<=P[0,0]<=0.9
|
|
P[0,0]=0.5
|
|
Such combination of constraints will be silently reduced to their
|
|
intersection, which is P[0,0]=0.5.
|
|
|
|
This function can be used to place equality constraints on arbitrary
|
|
subset of elements of P. Set of constraints is specified by EC, which may
|
|
contain either NAN's or finite numbers from [0,1]. NAN denotes absence of
|
|
constraint, finite number denotes equality constraint on specific element
|
|
of P.
|
|
|
|
You can also use MCPDAddEC() function which allows to ADD equality
|
|
constraint for one element of P without changing constraints for other
|
|
elements.
|
|
|
|
These functions (MCPDSetEC and MCPDAddEC) interact as follows:
|
|
* there is internal matrix of equality constraints which is stored in the
|
|
MCPD solver
|
|
* MCPDSetEC() replaces this matrix by another one (SET)
|
|
* MCPDAddEC() modifies one element of this matrix and leaves other ones
|
|
unchanged (ADD)
|
|
* thus MCPDAddEC() call preserves all modifications done by previous
|
|
calls, while MCPDSetEC() completely discards all changes done to the
|
|
equality constraints.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
EC - equality constraints, array[N,N]. Elements of EC can be
|
|
either NAN's or finite numbers from [0,1]. NAN denotes
|
|
absence of constraints, while finite value denotes
|
|
equality constraint on the corresponding element of P.
|
|
|
|
NOTES:
|
|
|
|
1. infinite values of EC will lead to exception being thrown. Values less
|
|
than 0.0 or greater than 1.0 will lead to error code being returned after
|
|
call to MCPDSolve().
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdsetec(const mcpdstate &s, const real_2d_array &ec, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdsetec(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(ec.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function is used to add equality constraints on the elements of the
|
|
transition matrix P.
|
|
|
|
MCPD solver has four types of constraints which can be placed on P:
|
|
* user-specified equality constraints (optional)
|
|
* user-specified bound constraints (optional)
|
|
* user-specified general linear constraints (optional)
|
|
* basic constraints (always present):
|
|
* non-negativity: P[i,j]>=0
|
|
* consistency: every column of P sums to 1.0
|
|
|
|
Final constraints which are passed to the underlying optimizer are
|
|
calculated as intersection of all present constraints. For example, you
|
|
may specify boundary constraint on P[0,0] and equality one:
|
|
0.1<=P[0,0]<=0.9
|
|
P[0,0]=0.5
|
|
Such combination of constraints will be silently reduced to their
|
|
intersection, which is P[0,0]=0.5.
|
|
|
|
This function can be used to ADD equality constraint for one element of P
|
|
without changing constraints for other elements.
|
|
|
|
You can also use MCPDSetEC() function which allows you to specify
|
|
arbitrary set of equality constraints in one call.
|
|
|
|
These functions (MCPDSetEC and MCPDAddEC) interact as follows:
|
|
* there is internal matrix of equality constraints which is stored in the
|
|
MCPD solver
|
|
* MCPDSetEC() replaces this matrix by another one (SET)
|
|
* MCPDAddEC() modifies one element of this matrix and leaves other ones
|
|
unchanged (ADD)
|
|
* thus MCPDAddEC() call preserves all modifications done by previous
|
|
calls, while MCPDSetEC() completely discards all changes done to the
|
|
equality constraints.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
I - row index of element being constrained
|
|
J - column index of element being constrained
|
|
C - value (constraint for P[I,J]). Can be either NAN (no
|
|
constraint) or finite value from [0,1].
|
|
|
|
NOTES:
|
|
|
|
1. infinite values of C will lead to exception being thrown. Values less
|
|
than 0.0 or greater than 1.0 will lead to error code being returned after
|
|
call to MCPDSolve().
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdaddec(const mcpdstate &s, const ae_int_t i, const ae_int_t j, const double c, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdaddec(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), i, j, c, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function is used to add bound constraints on the elements of the
|
|
transition matrix P.
|
|
|
|
MCPD solver has four types of constraints which can be placed on P:
|
|
* user-specified equality constraints (optional)
|
|
* user-specified bound constraints (optional)
|
|
* user-specified general linear constraints (optional)
|
|
* basic constraints (always present):
|
|
* non-negativity: P[i,j]>=0
|
|
* consistency: every column of P sums to 1.0
|
|
|
|
Final constraints which are passed to the underlying optimizer are
|
|
calculated as intersection of all present constraints. For example, you
|
|
may specify boundary constraint on P[0,0] and equality one:
|
|
0.1<=P[0,0]<=0.9
|
|
P[0,0]=0.5
|
|
Such combination of constraints will be silently reduced to their
|
|
intersection, which is P[0,0]=0.5.
|
|
|
|
This function can be used to place bound constraints on arbitrary
|
|
subset of elements of P. Set of constraints is specified by BndL/BndU
|
|
matrices, which may contain arbitrary combination of finite numbers or
|
|
infinities (like -INF<x<=0.5 or 0.1<=x<+INF).
|
|
|
|
You can also use MCPDAddBC() function which allows to ADD bound constraint
|
|
for one element of P without changing constraints for other elements.
|
|
|
|
These functions (MCPDSetBC and MCPDAddBC) interact as follows:
|
|
* there is internal matrix of bound constraints which is stored in the
|
|
MCPD solver
|
|
* MCPDSetBC() replaces this matrix by another one (SET)
|
|
* MCPDAddBC() modifies one element of this matrix and leaves other ones
|
|
unchanged (ADD)
|
|
* thus MCPDAddBC() call preserves all modifications done by previous
|
|
calls, while MCPDSetBC() completely discards all changes done to the
|
|
equality constraints.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
BndL - lower bounds constraints, array[N,N]. Elements of BndL can
|
|
be finite numbers or -INF.
|
|
BndU - upper bounds constraints, array[N,N]. Elements of BndU can
|
|
be finite numbers or +INF.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdsetbc(const mcpdstate &s, const real_2d_array &bndl, const real_2d_array &bndu, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdsetbc(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(bndl.c_ptr()), const_cast<alglib_impl::ae_matrix*>(bndu.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function is used to add bound constraints on the elements of the
|
|
transition matrix P.
|
|
|
|
MCPD solver has four types of constraints which can be placed on P:
|
|
* user-specified equality constraints (optional)
|
|
* user-specified bound constraints (optional)
|
|
* user-specified general linear constraints (optional)
|
|
* basic constraints (always present):
|
|
* non-negativity: P[i,j]>=0
|
|
* consistency: every column of P sums to 1.0
|
|
|
|
Final constraints which are passed to the underlying optimizer are
|
|
calculated as intersection of all present constraints. For example, you
|
|
may specify boundary constraint on P[0,0] and equality one:
|
|
0.1<=P[0,0]<=0.9
|
|
P[0,0]=0.5
|
|
Such combination of constraints will be silently reduced to their
|
|
intersection, which is P[0,0]=0.5.
|
|
|
|
This function can be used to ADD bound constraint for one element of P
|
|
without changing constraints for other elements.
|
|
|
|
You can also use MCPDSetBC() function which allows to place bound
|
|
constraints on arbitrary subset of elements of P. Set of constraints is
|
|
specified by BndL/BndU matrices, which may contain arbitrary combination
|
|
of finite numbers or infinities (like -INF<x<=0.5 or 0.1<=x<+INF).
|
|
|
|
These functions (MCPDSetBC and MCPDAddBC) interact as follows:
|
|
* there is internal matrix of bound constraints which is stored in the
|
|
MCPD solver
|
|
* MCPDSetBC() replaces this matrix by another one (SET)
|
|
* MCPDAddBC() modifies one element of this matrix and leaves other ones
|
|
unchanged (ADD)
|
|
* thus MCPDAddBC() call preserves all modifications done by previous
|
|
calls, while MCPDSetBC() completely discards all changes done to the
|
|
equality constraints.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
I - row index of element being constrained
|
|
J - column index of element being constrained
|
|
BndL - lower bound
|
|
BndU - upper bound
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdaddbc(const mcpdstate &s, const ae_int_t i, const ae_int_t j, const double bndl, const double bndu, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdaddbc(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), i, j, bndl, bndu, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function is used to set linear equality/inequality constraints on the
|
|
elements of the transition matrix P.
|
|
|
|
This function can be used to set one or several general linear constraints
|
|
on the elements of P. Two types of constraints are supported:
|
|
* equality constraints
|
|
* inequality constraints (both less-or-equal and greater-or-equal)
|
|
|
|
Coefficients of constraints are specified by matrix C (one of the
|
|
parameters). One row of C corresponds to one constraint. Because
|
|
transition matrix P has N*N elements, we need N*N columns to store all
|
|
coefficients (they are stored row by row), and one more column to store
|
|
right part - hence C has N*N+1 columns. Constraint kind is stored in the
|
|
CT array.
|
|
|
|
Thus, I-th linear constraint is
|
|
P[0,0]*C[I,0] + P[0,1]*C[I,1] + .. + P[0,N-1]*C[I,N-1] +
|
|
+ P[1,0]*C[I,N] + P[1,1]*C[I,N+1] + ... +
|
|
+ P[N-1,N-1]*C[I,N*N-1] ?=? C[I,N*N]
|
|
where ?=? can be either "=" (CT[i]=0), "<=" (CT[i]<0) or ">=" (CT[i]>0).
|
|
|
|
Your constraint may involve only some subset of P (less than N*N elements).
|
|
For example it can be something like
|
|
P[0,0] + P[0,1] = 0.5
|
|
In this case you still should pass matrix with N*N+1 columns, but all its
|
|
elements (except for C[0,0], C[0,1] and C[0,N*N-1]) will be zero.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
C - array[K,N*N+1] - coefficients of constraints
|
|
(see above for complete description)
|
|
CT - array[K] - constraint types
|
|
(see above for complete description)
|
|
K - number of equality/inequality constraints, K>=0:
|
|
* if given, only leading K elements of C/CT are used
|
|
* if not given, automatically determined from sizes of C/CT
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdsetlc(const mcpdstate &s, const real_2d_array &c, const integer_1d_array &ct, const ae_int_t k, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdsetlc(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), const_cast<alglib_impl::ae_vector*>(ct.c_ptr()), k, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function is used to set linear equality/inequality constraints on the
|
|
elements of the transition matrix P.
|
|
|
|
This function can be used to set one or several general linear constraints
|
|
on the elements of P. Two types of constraints are supported:
|
|
* equality constraints
|
|
* inequality constraints (both less-or-equal and greater-or-equal)
|
|
|
|
Coefficients of constraints are specified by matrix C (one of the
|
|
parameters). One row of C corresponds to one constraint. Because
|
|
transition matrix P has N*N elements, we need N*N columns to store all
|
|
coefficients (they are stored row by row), and one more column to store
|
|
right part - hence C has N*N+1 columns. Constraint kind is stored in the
|
|
CT array.
|
|
|
|
Thus, I-th linear constraint is
|
|
P[0,0]*C[I,0] + P[0,1]*C[I,1] + .. + P[0,N-1]*C[I,N-1] +
|
|
+ P[1,0]*C[I,N] + P[1,1]*C[I,N+1] + ... +
|
|
+ P[N-1,N-1]*C[I,N*N-1] ?=? C[I,N*N]
|
|
where ?=? can be either "=" (CT[i]=0), "<=" (CT[i]<0) or ">=" (CT[i]>0).
|
|
|
|
Your constraint may involve only some subset of P (less than N*N elements).
|
|
For example it can be something like
|
|
P[0,0] + P[0,1] = 0.5
|
|
In this case you still should pass matrix with N*N+1 columns, but all its
|
|
elements (except for C[0,0], C[0,1] and C[0,N*N-1]) will be zero.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
C - array[K,N*N+1] - coefficients of constraints
|
|
(see above for complete description)
|
|
CT - array[K] - constraint types
|
|
(see above for complete description)
|
|
K - number of equality/inequality constraints, K>=0:
|
|
* if given, only leading K elements of C/CT are used
|
|
* if not given, automatically determined from sizes of C/CT
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
void mcpdsetlc(const mcpdstate &s, const real_2d_array &c, const integer_1d_array &ct, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
ae_int_t k;
|
|
if( (c.rows()!=ct.length()))
|
|
_ALGLIB_CPP_EXCEPTION("Error while calling 'mcpdsetlc': looks like one of arguments has wrong size");
|
|
k = c.rows();
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdsetlc(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), const_cast<alglib_impl::ae_vector*>(ct.c_ptr()), k, &_alglib_env_state);
|
|
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
/*************************************************************************
|
|
This function allows to tune amount of Tikhonov regularization being
|
|
applied to your problem.
|
|
|
|
By default, regularizing term is equal to r*||P-prior_P||^2, where r is a
|
|
small non-zero value, P is transition matrix, prior_P is identity matrix,
|
|
||X||^2 is a sum of squared elements of X.
|
|
|
|
This function allows you to change coefficient r. You can also change
|
|
prior values with MCPDSetPrior() function.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
V - regularization coefficient, finite non-negative value. It
|
|
is not recommended to specify zero value unless you are
|
|
pretty sure that you want it.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdsettikhonovregularizer(const mcpdstate &s, const double v, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdsettikhonovregularizer(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), v, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function allows to set prior values used for regularization of your
|
|
problem.
|
|
|
|
By default, regularizing term is equal to r*||P-prior_P||^2, where r is a
|
|
small non-zero value, P is transition matrix, prior_P is identity matrix,
|
|
||X||^2 is a sum of squared elements of X.
|
|
|
|
This function allows you to change prior values prior_P. You can also
|
|
change r with MCPDSetTikhonovRegularizer() function.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
PP - array[N,N], matrix of prior values:
|
|
1. elements must be real numbers from [0,1]
|
|
2. columns must sum to 1.0.
|
|
First property is checked (exception is thrown otherwise),
|
|
while second one is not checked/enforced.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdsetprior(const mcpdstate &s, const real_2d_array &pp, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdsetprior(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(pp.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function is used to change prediction weights
|
|
|
|
MCPD solver scales prediction errors as follows
|
|
Error(P) = ||W*(y-P*x)||^2
|
|
where
|
|
x is a system state at time t
|
|
y is a system state at time t+1
|
|
P is a transition matrix
|
|
W is a diagonal scaling matrix
|
|
|
|
By default, weights are chosen in order to minimize relative prediction
|
|
error instead of absolute one. For example, if one component of state is
|
|
about 0.5 in magnitude and another one is about 0.05, then algorithm will
|
|
make corresponding weights equal to 2.0 and 20.0.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
PW - array[N], weights:
|
|
* must be non-negative values (exception will be thrown otherwise)
|
|
* zero values will be replaced by automatically chosen values
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdsetpredictionweights(const mcpdstate &s, const real_1d_array &pw, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdsetpredictionweights(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(pw.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function is used to start solution of the MCPD problem.
|
|
|
|
After return from this function, you can use MCPDResults() to get solution
|
|
and completion code.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdsolve(const mcpdstate &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdsolve(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
MCPD results
|
|
|
|
INPUT PARAMETERS:
|
|
State - algorithm state
|
|
|
|
OUTPUT PARAMETERS:
|
|
P - array[N,N], transition matrix
|
|
Rep - optimization report. You should check Rep.TerminationType
|
|
in order to distinguish successful termination from
|
|
unsuccessful one. Speaking short, positive values denote
|
|
success, negative ones are failures.
|
|
More information about fields of this structure can be
|
|
found in the comments on MCPDReport datatype.
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdresults(const mcpdstate &s, real_2d_array &p, mcpdreport &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mcpdresults(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(p.c_ptr()), const_cast<alglib_impl::mcpdreport*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_MLPE) || !defined(AE_PARTIAL_BUILD)
|
|
/*************************************************************************
|
|
Neural networks ensemble
|
|
*************************************************************************/
|
|
_mlpensemble_owner::_mlpensemble_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mlpensemble_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::mlpensemble*)alglib_impl::ae_malloc(sizeof(alglib_impl::mlpensemble), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mlpensemble));
|
|
alglib_impl::_mlpensemble_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_mlpensemble_owner::_mlpensemble_owner(const _mlpensemble_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mlpensemble_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mlpensemble copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::mlpensemble*)alglib_impl::ae_malloc(sizeof(alglib_impl::mlpensemble), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mlpensemble));
|
|
alglib_impl::_mlpensemble_init_copy(p_struct, const_cast<alglib_impl::mlpensemble*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_mlpensemble_owner& _mlpensemble_owner::operator=(const _mlpensemble_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: mlpensemble assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mlpensemble assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_mlpensemble_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mlpensemble));
|
|
alglib_impl::_mlpensemble_init_copy(p_struct, const_cast<alglib_impl::mlpensemble*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_mlpensemble_owner::~_mlpensemble_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mlpensemble_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::mlpensemble* _mlpensemble_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::mlpensemble* _mlpensemble_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::mlpensemble*>(p_struct);
|
|
}
|
|
mlpensemble::mlpensemble() : _mlpensemble_owner()
|
|
{
|
|
}
|
|
|
|
mlpensemble::mlpensemble(const mlpensemble &rhs):_mlpensemble_owner(rhs)
|
|
{
|
|
}
|
|
|
|
mlpensemble& mlpensemble::operator=(const mlpensemble &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_mlpensemble_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
mlpensemble::~mlpensemble()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function serializes data structure to string.
|
|
|
|
Important properties of s_out:
|
|
* it contains alphanumeric characters, dots, underscores, minus signs
|
|
* these symbols are grouped into words, which are separated by spaces
|
|
and Windows-style (CR+LF) newlines
|
|
* although serializer uses spaces and CR+LF as separators, you can
|
|
replace any separator character by arbitrary combination of spaces,
|
|
tabs, Windows or Unix newlines. It allows flexible reformatting of
|
|
the string in case you want to include it into text or XML file.
|
|
But you should not insert separators into the middle of the "words"
|
|
nor you should change case of letters.
|
|
* s_out can be freely moved between 32-bit and 64-bit systems, little
|
|
and big endian machines, and so on. You can serialize structure on
|
|
32-bit machine and unserialize it on 64-bit one (or vice versa), or
|
|
serialize it on SPARC and unserialize on x86. You can also
|
|
serialize it in C++ version of ALGLIB and unserialize in C# one,
|
|
and vice versa.
|
|
*************************************************************************/
|
|
void mlpeserialize(mlpensemble &obj, std::string &s_out)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state state;
|
|
alglib_impl::ae_serializer serializer;
|
|
alglib_impl::ae_int_t ssize;
|
|
|
|
alglib_impl::ae_state_init(&state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&state, &_break_jump);
|
|
alglib_impl::ae_serializer_init(&serializer);
|
|
alglib_impl::ae_serializer_alloc_start(&serializer);
|
|
alglib_impl::mlpealloc(&serializer, obj.c_ptr(), &state);
|
|
ssize = alglib_impl::ae_serializer_get_alloc_size(&serializer);
|
|
s_out.clear();
|
|
s_out.reserve((size_t)(ssize+1));
|
|
alglib_impl::ae_serializer_sstart_str(&serializer, &s_out);
|
|
alglib_impl::mlpeserialize(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_stop(&serializer, &state);
|
|
alglib_impl::ae_assert( s_out.length()<=(size_t)ssize, "ALGLIB: serialization integrity error", &state);
|
|
alglib_impl::ae_serializer_clear(&serializer);
|
|
alglib_impl::ae_state_clear(&state);
|
|
}
|
|
/*************************************************************************
|
|
This function unserializes data structure from string.
|
|
*************************************************************************/
|
|
void mlpeunserialize(const std::string &s_in, mlpensemble &obj)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state state;
|
|
alglib_impl::ae_serializer serializer;
|
|
|
|
alglib_impl::ae_state_init(&state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&state, &_break_jump);
|
|
alglib_impl::ae_serializer_init(&serializer);
|
|
alglib_impl::ae_serializer_ustart_str(&serializer, &s_in);
|
|
alglib_impl::mlpeunserialize(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_stop(&serializer, &state);
|
|
alglib_impl::ae_serializer_clear(&serializer);
|
|
alglib_impl::ae_state_clear(&state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function serializes data structure to C++ stream.
|
|
|
|
Data stream generated by this function is same as string representation
|
|
generated by string version of serializer - alphanumeric characters,
|
|
dots, underscores, minus signs, which are grouped into words separated by
|
|
spaces and CR+LF.
|
|
|
|
We recommend you to read comments on string version of serializer to find
|
|
out more about serialization of AlGLIB objects.
|
|
*************************************************************************/
|
|
void mlpeserialize(mlpensemble &obj, std::ostream &s_out)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state state;
|
|
alglib_impl::ae_serializer serializer;
|
|
|
|
alglib_impl::ae_state_init(&state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&state, &_break_jump);
|
|
alglib_impl::ae_serializer_init(&serializer);
|
|
alglib_impl::ae_serializer_alloc_start(&serializer);
|
|
alglib_impl::mlpealloc(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_get_alloc_size(&serializer); // not actually needed, but we have to ask
|
|
alglib_impl::ae_serializer_sstart_stream(&serializer, &s_out);
|
|
alglib_impl::mlpeserialize(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_stop(&serializer, &state);
|
|
alglib_impl::ae_serializer_clear(&serializer);
|
|
alglib_impl::ae_state_clear(&state);
|
|
}
|
|
/*************************************************************************
|
|
This function unserializes data structure from stream.
|
|
*************************************************************************/
|
|
void mlpeunserialize(const std::istream &s_in, mlpensemble &obj)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state state;
|
|
alglib_impl::ae_serializer serializer;
|
|
|
|
alglib_impl::ae_state_init(&state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&state, &_break_jump);
|
|
alglib_impl::ae_serializer_init(&serializer);
|
|
alglib_impl::ae_serializer_ustart_stream(&serializer, &s_in);
|
|
alglib_impl::mlpeunserialize(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_stop(&serializer, &state);
|
|
alglib_impl::ae_serializer_clear(&serializer);
|
|
alglib_impl::ae_state_clear(&state);
|
|
}
|
|
|
|
/*************************************************************************
|
|
Like MLPCreate0, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreate0(const ae_int_t nin, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpecreate0(nin, nout, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Like MLPCreate1, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreate1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpecreate1(nin, nhid, nout, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Like MLPCreate2, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreate2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpecreate2(nin, nhid1, nhid2, nout, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateB0, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreateb0(const ae_int_t nin, const ae_int_t nout, const double b, const double d, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpecreateb0(nin, nout, b, d, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateB1, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreateb1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const double b, const double d, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpecreateb1(nin, nhid, nout, b, d, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateB2, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreateb2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const double b, const double d, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpecreateb2(nin, nhid1, nhid2, nout, b, d, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateR0, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreater0(const ae_int_t nin, const ae_int_t nout, const double a, const double b, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpecreater0(nin, nout, a, b, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateR1, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreater1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const double a, const double b, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpecreater1(nin, nhid, nout, a, b, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateR2, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreater2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const double a, const double b, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpecreater2(nin, nhid1, nhid2, nout, a, b, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateC0, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreatec0(const ae_int_t nin, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpecreatec0(nin, nout, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateC1, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreatec1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpecreatec1(nin, nhid, nout, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateC2, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreatec2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpecreatec2(nin, nhid1, nhid2, nout, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Creates ensemble from network. Only network geometry is copied.
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreatefromnetwork(const multilayerperceptron &network, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpecreatefromnetwork(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Randomization of MLP ensemble
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlperandomize(const mlpensemble &ensemble, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlperandomize(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Return ensemble properties (number of inputs and outputs).
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpeproperties(const mlpensemble &ensemble, ae_int_t &nin, ae_int_t &nout, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpeproperties(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &nin, &nout, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Return normalization type (whether ensemble is SOFTMAX-normalized or not).
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
bool mlpeissoftmax(const mlpensemble &ensemble, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
ae_bool result = alglib_impl::mlpeissoftmax(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<bool*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Procesing
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble- neural networks ensemble
|
|
X - input vector, array[0..NIn-1].
|
|
Y - (possibly) preallocated buffer; if size of Y is less than
|
|
NOut, it will be reallocated. If it is large enough, it
|
|
is NOT reallocated, so we can save some time on reallocation.
|
|
|
|
|
|
OUTPUT PARAMETERS:
|
|
Y - result. Regression estimate when solving regression task,
|
|
vector of posterior probabilities for classification task.
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpeprocess(const mlpensemble &ensemble, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpeprocess(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
'interactive' variant of MLPEProcess for languages like Python which
|
|
support constructs like "Y = MLPEProcess(LM,X)" and interactive mode of the
|
|
interpreter
|
|
|
|
This function allocates new array on each call, so it is significantly
|
|
slower than its 'non-interactive' counterpart, but it is more convenient
|
|
when you call it from command line.
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpeprocessi(const mlpensemble &ensemble, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpeprocessi(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Relative classification error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble- ensemble
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
percent of incorrectly classified cases.
|
|
Works both for classifier betwork and for regression networks which
|
|
are used as classifiers.
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlperelclserror(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlperelclserror(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average cross-entropy (in bits per element) on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble- ensemble
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
CrossEntropy/(NPoints*LN(2)).
|
|
Zero if ensemble solves regression task.
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpeavgce(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlpeavgce(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
RMS error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble- ensemble
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
root mean square error.
|
|
Its meaning for regression task is obvious. As for classification task
|
|
RMS error means error when estimating posterior probabilities.
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpermserror(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlpermserror(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble- ensemble
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
Its meaning for regression task is obvious. As for classification task
|
|
it means average error when estimating posterior probabilities.
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpeavgerror(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlpeavgerror(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average relative error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble- ensemble
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
Its meaning for regression task is obvious. As for classification task
|
|
it means average relative error when estimating posterior probabilities.
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpeavgrelerror(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::mlpeavgrelerror(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_MLPTRAIN) || !defined(AE_PARTIAL_BUILD)
|
|
/*************************************************************************
|
|
Training report:
|
|
* RelCLSError - fraction of misclassified cases.
|
|
* AvgCE - acerage cross-entropy
|
|
* RMSError - root-mean-square error
|
|
* AvgError - average error
|
|
* AvgRelError - average relative error
|
|
* NGrad - number of gradient calculations
|
|
* NHess - number of Hessian calculations
|
|
* NCholesky - number of Cholesky decompositions
|
|
|
|
NOTE 1: RelCLSError/AvgCE are zero on regression problems.
|
|
|
|
NOTE 2: on classification problems RMSError/AvgError/AvgRelError contain
|
|
errors in prediction of posterior probabilities
|
|
*************************************************************************/
|
|
_mlpreport_owner::_mlpreport_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mlpreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::mlpreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mlpreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mlpreport));
|
|
alglib_impl::_mlpreport_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_mlpreport_owner::_mlpreport_owner(const _mlpreport_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mlpreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mlpreport copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::mlpreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mlpreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mlpreport));
|
|
alglib_impl::_mlpreport_init_copy(p_struct, const_cast<alglib_impl::mlpreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_mlpreport_owner& _mlpreport_owner::operator=(const _mlpreport_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: mlpreport assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mlpreport assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_mlpreport_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mlpreport));
|
|
alglib_impl::_mlpreport_init_copy(p_struct, const_cast<alglib_impl::mlpreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_mlpreport_owner::~_mlpreport_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mlpreport_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::mlpreport* _mlpreport_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::mlpreport* _mlpreport_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::mlpreport*>(p_struct);
|
|
}
|
|
mlpreport::mlpreport() : _mlpreport_owner() ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror),ngrad(p_struct->ngrad),nhess(p_struct->nhess),ncholesky(p_struct->ncholesky)
|
|
{
|
|
}
|
|
|
|
mlpreport::mlpreport(const mlpreport &rhs):_mlpreport_owner(rhs) ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror),ngrad(p_struct->ngrad),nhess(p_struct->nhess),ncholesky(p_struct->ncholesky)
|
|
{
|
|
}
|
|
|
|
mlpreport& mlpreport::operator=(const mlpreport &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_mlpreport_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
mlpreport::~mlpreport()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Cross-validation estimates of generalization error
|
|
*************************************************************************/
|
|
_mlpcvreport_owner::_mlpcvreport_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mlpcvreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::mlpcvreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mlpcvreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mlpcvreport));
|
|
alglib_impl::_mlpcvreport_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_mlpcvreport_owner::_mlpcvreport_owner(const _mlpcvreport_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mlpcvreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mlpcvreport copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::mlpcvreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mlpcvreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mlpcvreport));
|
|
alglib_impl::_mlpcvreport_init_copy(p_struct, const_cast<alglib_impl::mlpcvreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_mlpcvreport_owner& _mlpcvreport_owner::operator=(const _mlpcvreport_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: mlpcvreport assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mlpcvreport assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_mlpcvreport_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mlpcvreport));
|
|
alglib_impl::_mlpcvreport_init_copy(p_struct, const_cast<alglib_impl::mlpcvreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_mlpcvreport_owner::~_mlpcvreport_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mlpcvreport_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::mlpcvreport* _mlpcvreport_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::mlpcvreport* _mlpcvreport_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::mlpcvreport*>(p_struct);
|
|
}
|
|
mlpcvreport::mlpcvreport() : _mlpcvreport_owner() ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror)
|
|
{
|
|
}
|
|
|
|
mlpcvreport::mlpcvreport(const mlpcvreport &rhs):_mlpcvreport_owner(rhs) ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror)
|
|
{
|
|
}
|
|
|
|
mlpcvreport& mlpcvreport::operator=(const mlpcvreport &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_mlpcvreport_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
mlpcvreport::~mlpcvreport()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Trainer object for neural network.
|
|
|
|
You should not try to access fields of this object directly - use ALGLIB
|
|
functions to work with this object.
|
|
*************************************************************************/
|
|
_mlptrainer_owner::_mlptrainer_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mlptrainer_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::mlptrainer*)alglib_impl::ae_malloc(sizeof(alglib_impl::mlptrainer), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mlptrainer));
|
|
alglib_impl::_mlptrainer_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_mlptrainer_owner::_mlptrainer_owner(const _mlptrainer_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mlptrainer_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mlptrainer copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::mlptrainer*)alglib_impl::ae_malloc(sizeof(alglib_impl::mlptrainer), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mlptrainer));
|
|
alglib_impl::_mlptrainer_init_copy(p_struct, const_cast<alglib_impl::mlptrainer*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_mlptrainer_owner& _mlptrainer_owner::operator=(const _mlptrainer_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: mlptrainer assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mlptrainer assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_mlptrainer_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::mlptrainer));
|
|
alglib_impl::_mlptrainer_init_copy(p_struct, const_cast<alglib_impl::mlptrainer*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_mlptrainer_owner::~_mlptrainer_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_mlptrainer_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::mlptrainer* _mlptrainer_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::mlptrainer* _mlptrainer_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::mlptrainer*>(p_struct);
|
|
}
|
|
mlptrainer::mlptrainer() : _mlptrainer_owner()
|
|
{
|
|
}
|
|
|
|
mlptrainer::mlptrainer(const mlptrainer &rhs):_mlptrainer_owner(rhs)
|
|
{
|
|
}
|
|
|
|
mlptrainer& mlptrainer::operator=(const mlptrainer &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_mlptrainer_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
mlptrainer::~mlptrainer()
|
|
{
|
|
}
|
|
|
|
/*************************************************************************
|
|
Neural network training using modified Levenberg-Marquardt with exact
|
|
Hessian calculation and regularization. Subroutine trains neural network
|
|
with restarts from random positions. Algorithm is well suited for small
|
|
and medium scale problems (hundreds of weights).
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network with initialized geometry
|
|
XY - training set
|
|
NPoints - training set size
|
|
Decay - weight decay constant, >=0.001
|
|
Decay term 'Decay*||Weights||^2' is added to error
|
|
function.
|
|
If you don't know what Decay to choose, use 0.001.
|
|
Restarts - number of restarts from random position, >0.
|
|
If you don't know what Restarts to choose, use 2.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network - trained neural network.
|
|
Info - return code:
|
|
* -9, if internal matrix inverse subroutine failed
|
|
* -2, if there is a point with class number
|
|
outside of [0..NOut-1].
|
|
* -1, if wrong parameters specified
|
|
(NPoints<0, Restarts<1).
|
|
* 2, if task has been solved.
|
|
Rep - training report
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.03.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlptrainlm(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, ae_int_t &info, mlpreport &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlptrainlm(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, decay, restarts, &info, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Neural network training using L-BFGS algorithm with regularization.
|
|
Subroutine trains neural network with restarts from random positions.
|
|
Algorithm is well suited for problems of any dimensionality (memory
|
|
requirements and step complexity are linear by weights number).
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network with initialized geometry
|
|
XY - training set
|
|
NPoints - training set size
|
|
Decay - weight decay constant, >=0.001
|
|
Decay term 'Decay*||Weights||^2' is added to error
|
|
function.
|
|
If you don't know what Decay to choose, use 0.001.
|
|
Restarts - number of restarts from random position, >0.
|
|
If you don't know what Restarts to choose, use 2.
|
|
WStep - stopping criterion. Algorithm stops if step size is
|
|
less than WStep. Recommended value - 0.01. Zero step
|
|
size means stopping after MaxIts iterations.
|
|
MaxIts - stopping criterion. Algorithm stops after MaxIts
|
|
iterations (NOT gradient calculations). Zero MaxIts
|
|
means stopping when step is sufficiently small.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network - trained neural network.
|
|
Info - return code:
|
|
* -8, if both WStep=0 and MaxIts=0
|
|
* -2, if there is a point with class number
|
|
outside of [0..NOut-1].
|
|
* -1, if wrong parameters specified
|
|
(NPoints<0, Restarts<1).
|
|
* 2, if task has been solved.
|
|
Rep - training report
|
|
|
|
-- ALGLIB --
|
|
Copyright 09.12.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlptrainlbfgs(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, const double wstep, const ae_int_t maxits, ae_int_t &info, mlpreport &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlptrainlbfgs(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, decay, restarts, wstep, maxits, &info, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Neural network training using early stopping (base algorithm - L-BFGS with
|
|
regularization).
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network with initialized geometry
|
|
TrnXY - training set
|
|
TrnSize - training set size, TrnSize>0
|
|
ValXY - validation set
|
|
ValSize - validation set size, ValSize>0
|
|
Decay - weight decay constant, >=0.001
|
|
Decay term 'Decay*||Weights||^2' is added to error
|
|
function.
|
|
If you don't know what Decay to choose, use 0.001.
|
|
Restarts - number of restarts, either:
|
|
* strictly positive number - algorithm make specified
|
|
number of restarts from random position.
|
|
* -1, in which case algorithm makes exactly one run
|
|
from the initial state of the network (no randomization).
|
|
If you don't know what Restarts to choose, choose one
|
|
one the following:
|
|
* -1 (deterministic start)
|
|
* +1 (one random restart)
|
|
* +5 (moderate amount of random restarts)
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network - trained neural network.
|
|
Info - return code:
|
|
* -2, if there is a point with class number
|
|
outside of [0..NOut-1].
|
|
* -1, if wrong parameters specified
|
|
(NPoints<0, Restarts<1, ...).
|
|
* 2, task has been solved, stopping criterion met -
|
|
sufficiently small step size. Not expected (we
|
|
use EARLY stopping) but possible and not an
|
|
error.
|
|
* 6, task has been solved, stopping criterion met -
|
|
increasing of validation set error.
|
|
Rep - training report
|
|
|
|
NOTE:
|
|
|
|
Algorithm stops if validation set error increases for a long enough or
|
|
step size is small enought (there are task where validation set may
|
|
decrease for eternity). In any case solution returned corresponds to the
|
|
minimum of validation set error.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.03.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlptraines(const multilayerperceptron &network, const real_2d_array &trnxy, const ae_int_t trnsize, const real_2d_array &valxy, const ae_int_t valsize, const double decay, const ae_int_t restarts, ae_int_t &info, mlpreport &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlptraines(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(trnxy.c_ptr()), trnsize, const_cast<alglib_impl::ae_matrix*>(valxy.c_ptr()), valsize, decay, restarts, &info, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Cross-validation estimate of generalization error.
|
|
|
|
Base algorithm - L-BFGS.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network with initialized geometry. Network is
|
|
not changed during cross-validation - it is used only
|
|
as a representative of its architecture.
|
|
XY - training set.
|
|
SSize - training set size
|
|
Decay - weight decay, same as in MLPTrainLBFGS
|
|
Restarts - number of restarts, >0.
|
|
restarts are counted for each partition separately, so
|
|
total number of restarts will be Restarts*FoldsCount.
|
|
WStep - stopping criterion, same as in MLPTrainLBFGS
|
|
MaxIts - stopping criterion, same as in MLPTrainLBFGS
|
|
FoldsCount - number of folds in k-fold cross-validation,
|
|
2<=FoldsCount<=SSize.
|
|
recommended value: 10.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code, same as in MLPTrainLBFGS
|
|
Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
|
|
CVRep - generalization error estimates
|
|
|
|
-- ALGLIB --
|
|
Copyright 09.12.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpkfoldcvlbfgs(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, const double wstep, const ae_int_t maxits, const ae_int_t foldscount, ae_int_t &info, mlpreport &rep, mlpcvreport &cvrep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpkfoldcvlbfgs(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, decay, restarts, wstep, maxits, foldscount, &info, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), const_cast<alglib_impl::mlpcvreport*>(cvrep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Cross-validation estimate of generalization error.
|
|
|
|
Base algorithm - Levenberg-Marquardt.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network with initialized geometry. Network is
|
|
not changed during cross-validation - it is used only
|
|
as a representative of its architecture.
|
|
XY - training set.
|
|
SSize - training set size
|
|
Decay - weight decay, same as in MLPTrainLBFGS
|
|
Restarts - number of restarts, >0.
|
|
restarts are counted for each partition separately, so
|
|
total number of restarts will be Restarts*FoldsCount.
|
|
FoldsCount - number of folds in k-fold cross-validation,
|
|
2<=FoldsCount<=SSize.
|
|
recommended value: 10.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code, same as in MLPTrainLBFGS
|
|
Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
|
|
CVRep - generalization error estimates
|
|
|
|
-- ALGLIB --
|
|
Copyright 09.12.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpkfoldcvlm(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, const ae_int_t foldscount, ae_int_t &info, mlpreport &rep, mlpcvreport &cvrep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpkfoldcvlm(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, decay, restarts, foldscount, &info, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), const_cast<alglib_impl::mlpcvreport*>(cvrep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function estimates generalization error using cross-validation on the
|
|
current dataset with current training settings.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
Network - neural network. It must have same number of inputs and
|
|
output/classes as was specified during creation of the
|
|
trainer object. Network is not changed during cross-
|
|
validation and is not trained - it is used only as
|
|
representative of its architecture. I.e., we estimate
|
|
generalization properties of ARCHITECTURE, not some
|
|
specific network.
|
|
NRestarts - number of restarts, >=0:
|
|
* NRestarts>0 means that for each cross-validation
|
|
round specified number of random restarts is
|
|
performed, with best network being chosen after
|
|
training.
|
|
* NRestarts=0 is same as NRestarts=1
|
|
FoldsCount - number of folds in k-fold cross-validation:
|
|
* 2<=FoldsCount<=size of dataset
|
|
* recommended value: 10.
|
|
* values larger than dataset size will be silently
|
|
truncated down to dataset size
|
|
|
|
OUTPUT PARAMETERS:
|
|
Rep - structure which contains cross-validation estimates:
|
|
* Rep.RelCLSError - fraction of misclassified cases.
|
|
* Rep.AvgCE - acerage cross-entropy
|
|
* Rep.RMSError - root-mean-square error
|
|
* Rep.AvgError - average error
|
|
* Rep.AvgRelError - average relative error
|
|
|
|
NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(),
|
|
or subset with only one point was given, zeros are returned as
|
|
estimates.
|
|
|
|
NOTE: this method performs FoldsCount cross-validation rounds, each one
|
|
with NRestarts random starts. Thus, FoldsCount*NRestarts networks
|
|
are trained in total.
|
|
|
|
NOTE: Rep.RelCLSError/Rep.AvgCE are zero on regression problems.
|
|
|
|
NOTE: on classification problems Rep.RMSError/Rep.AvgError/Rep.AvgRelError
|
|
contain errors in prediction of posterior probabilities.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpkfoldcv(const mlptrainer &s, const multilayerperceptron &network, const ae_int_t nrestarts, const ae_int_t foldscount, mlpreport &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpkfoldcv(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), nrestarts, foldscount, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Creation of the network trainer object for regression networks
|
|
|
|
INPUT PARAMETERS:
|
|
NIn - number of inputs, NIn>=1
|
|
NOut - number of outputs, NOut>=1
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - neural network trainer object.
|
|
This structure can be used to train any regression
|
|
network with NIn inputs and NOut outputs.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreatetrainer(const ae_int_t nin, const ae_int_t nout, mlptrainer &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpcreatetrainer(nin, nout, const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Creation of the network trainer object for classification networks
|
|
|
|
INPUT PARAMETERS:
|
|
NIn - number of inputs, NIn>=1
|
|
NClasses - number of classes, NClasses>=2
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - neural network trainer object.
|
|
This structure can be used to train any classification
|
|
network with NIn inputs and NOut outputs.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreatetrainercls(const ae_int_t nin, const ae_int_t nclasses, mlptrainer &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpcreatetrainercls(nin, nclasses, const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets "current dataset" of the trainer object to one passed
|
|
by user.
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
XY - training set, see below for information on the
|
|
training set format. This function checks correctness
|
|
of the dataset (no NANs/INFs, class numbers are
|
|
correct) and throws exception when incorrect dataset
|
|
is passed.
|
|
NPoints - points count, >=0.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
datasetformat is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetdataset(const mlptrainer &s, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpsetdataset(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets "current dataset" of the trainer object to one passed
|
|
by user (sparse matrix is used to store dataset).
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
XY - training set, see below for information on the
|
|
training set format. This function checks correctness
|
|
of the dataset (no NANs/INFs, class numbers are
|
|
correct) and throws exception when incorrect dataset
|
|
is passed. Any sparse storage format can be used:
|
|
Hash-table, CRS...
|
|
NPoints - points count, >=0
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
datasetformat is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetsparsedataset(const mlptrainer &s, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpsetsparsedataset(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets weight decay coefficient which is used for training.
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
Decay - weight decay coefficient, >=0. Weight decay term
|
|
'Decay*||Weights||^2' is added to error function. If
|
|
you don't know what Decay to choose, use 1.0E-3.
|
|
Weight decay can be set to zero, in this case network
|
|
is trained without weight decay.
|
|
|
|
NOTE: by default network uses some small nonzero value for weight decay.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetdecay(const mlptrainer &s, const double decay, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpsetdecay(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), decay, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets stopping criteria for the optimizer.
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
WStep - stopping criterion. Algorithm stops if step size is
|
|
less than WStep. Recommended value - 0.01. Zero step
|
|
size means stopping after MaxIts iterations.
|
|
WStep>=0.
|
|
MaxIts - stopping criterion. Algorithm stops after MaxIts
|
|
epochs (full passes over entire dataset). Zero MaxIts
|
|
means stopping when step is sufficiently small.
|
|
MaxIts>=0.
|
|
|
|
NOTE: by default, WStep=0.005 and MaxIts=0 are used. These values are also
|
|
used when MLPSetCond() is called with WStep=0 and MaxIts=0.
|
|
|
|
NOTE: these stopping criteria are used for all kinds of neural training -
|
|
from "conventional" networks to early stopping ensembles. When used
|
|
for "conventional" networks, they are used as the only stopping
|
|
criteria. When combined with early stopping, they used as ADDITIONAL
|
|
stopping criteria which can terminate early stopping algorithm.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetcond(const mlptrainer &s, const double wstep, const ae_int_t maxits, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpsetcond(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), wstep, maxits, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets training algorithm: batch training using L-BFGS will be
|
|
used.
|
|
|
|
This algorithm:
|
|
* the most robust for small-scale problems, but may be too slow for large
|
|
scale ones.
|
|
* perfoms full pass through the dataset before performing step
|
|
* uses conditions specified by MLPSetCond() for stopping
|
|
* is default one used by trainer object
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetalgobatch(const mlptrainer &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpsetalgobatch(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function trains neural network passed to this function, using current
|
|
dataset (one which was passed to MLPSetDataset() or MLPSetSparseDataset())
|
|
and current training settings. Training from NRestarts random starting
|
|
positions is performed, best network is chosen.
|
|
|
|
Training is performed using current training algorithm.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
Network - neural network. It must have same number of inputs and
|
|
output/classes as was specified during creation of the
|
|
trainer object.
|
|
NRestarts - number of restarts, >=0:
|
|
* NRestarts>0 means that specified number of random
|
|
restarts are performed, best network is chosen after
|
|
training
|
|
* NRestarts=0 means that current state of the network
|
|
is used for training.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network - trained network
|
|
|
|
NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(),
|
|
network is filled by zero values. Same behavior for functions
|
|
MLPStartTraining and MLPContinueTraining.
|
|
|
|
NOTE: this method uses sum-of-squares error function for training.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlptrainnetwork(const mlptrainer &s, const multilayerperceptron &network, const ae_int_t nrestarts, mlpreport &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlptrainnetwork(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), nrestarts, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
IMPORTANT: this is an "expert" version of the MLPTrain() function. We do
|
|
not recommend you to use it unless you are pretty sure that you
|
|
need ability to monitor training progress.
|
|
|
|
This function performs step-by-step training of the neural network. Here
|
|
"step-by-step" means that training starts with MLPStartTraining() call,
|
|
and then user subsequently calls MLPContinueTraining() to perform one more
|
|
iteration of the training.
|
|
|
|
After call to this function trainer object remembers network and is ready
|
|
to train it. However, no training is performed until first call to
|
|
MLPContinueTraining() function. Subsequent calls to MLPContinueTraining()
|
|
will advance training progress one iteration further.
|
|
|
|
EXAMPLE:
|
|
>
|
|
> ...initialize network and trainer object....
|
|
>
|
|
> MLPStartTraining(Trainer, Network, True)
|
|
> while MLPContinueTraining(Trainer, Network) do
|
|
> ...visualize training progress...
|
|
>
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
Network - neural network. It must have same number of inputs and
|
|
output/classes as was specified during creation of the
|
|
trainer object.
|
|
RandomStart - randomize network before training or not:
|
|
* True means that network is randomized and its
|
|
initial state (one which was passed to the trainer
|
|
object) is lost.
|
|
* False means that training is started from the
|
|
current state of the network
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network - neural network which is ready to training (weights are
|
|
initialized, preprocessor is initialized using current
|
|
training set)
|
|
|
|
NOTE: this method uses sum-of-squares error function for training.
|
|
|
|
NOTE: it is expected that trainer object settings are NOT changed during
|
|
step-by-step training, i.e. no one changes stopping criteria or
|
|
training set during training. It is possible and there is no defense
|
|
against such actions, but algorithm behavior in such cases is
|
|
undefined and can be unpredictable.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpstarttraining(const mlptrainer &s, const multilayerperceptron &network, const bool randomstart, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpstarttraining(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), randomstart, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
IMPORTANT: this is an "expert" version of the MLPTrain() function. We do
|
|
not recommend you to use it unless you are pretty sure that you
|
|
need ability to monitor training progress.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
This function performs step-by-step training of the neural network. Here
|
|
"step-by-step" means that training starts with MLPStartTraining() call,
|
|
and then user subsequently calls MLPContinueTraining() to perform one more
|
|
iteration of the training.
|
|
|
|
This function performs one more iteration of the training and returns
|
|
either True (training continues) or False (training stopped). In case True
|
|
was returned, Network weights are updated according to the current state
|
|
of the optimization progress. In case False was returned, no additional
|
|
updates is performed (previous update of the network weights moved us to
|
|
the final point, and no additional updates is needed).
|
|
|
|
EXAMPLE:
|
|
>
|
|
> [initialize network and trainer object]
|
|
>
|
|
> MLPStartTraining(Trainer, Network, True)
|
|
> while MLPContinueTraining(Trainer, Network) do
|
|
> [visualize training progress]
|
|
>
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
Network - neural network structure, which is used to store
|
|
current state of the training process.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network - weights of the neural network are rewritten by the
|
|
current approximation.
|
|
|
|
NOTE: this method uses sum-of-squares error function for training.
|
|
|
|
NOTE: it is expected that trainer object settings are NOT changed during
|
|
step-by-step training, i.e. no one changes stopping criteria or
|
|
training set during training. It is possible and there is no defense
|
|
against such actions, but algorithm behavior in such cases is
|
|
undefined and can be unpredictable.
|
|
|
|
NOTE: It is expected that Network is the same one which was passed to
|
|
MLPStartTraining() function. However, THIS function checks only
|
|
following:
|
|
* that number of network inputs is consistent with trainer object
|
|
settings
|
|
* that number of network outputs/classes is consistent with trainer
|
|
object settings
|
|
* that number of network weights is the same as number of weights in
|
|
the network passed to MLPStartTraining() function
|
|
Exception is thrown when these conditions are violated.
|
|
|
|
It is also expected that you do not change state of the network on
|
|
your own - the only party who has right to change network during its
|
|
training is a trainer object. Any attempt to interfere with trainer
|
|
may lead to unpredictable results.
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
bool mlpcontinuetraining(const mlptrainer &s, const multilayerperceptron &network, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
ae_bool result = alglib_impl::mlpcontinuetraining(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<bool*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Training neural networks ensemble using bootstrap aggregating (bagging).
|
|
Modified Levenberg-Marquardt algorithm is used as base training method.
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble - model with initialized geometry
|
|
XY - training set
|
|
NPoints - training set size
|
|
Decay - weight decay coefficient, >=0.001
|
|
Restarts - restarts, >0.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Ensemble - trained model
|
|
Info - return code:
|
|
* -2, if there is a point with class number
|
|
outside of [0..NClasses-1].
|
|
* -1, if incorrect parameters was passed
|
|
(NPoints<0, Restarts<1).
|
|
* 2, if task has been solved.
|
|
Rep - training report.
|
|
OOBErrors - out-of-bag generalization error estimate
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpebagginglm(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, ae_int_t &info, mlpreport &rep, mlpcvreport &ooberrors, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpebagginglm(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, decay, restarts, &info, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), const_cast<alglib_impl::mlpcvreport*>(ooberrors.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Training neural networks ensemble using bootstrap aggregating (bagging).
|
|
L-BFGS algorithm is used as base training method.
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble - model with initialized geometry
|
|
XY - training set
|
|
NPoints - training set size
|
|
Decay - weight decay coefficient, >=0.001
|
|
Restarts - restarts, >0.
|
|
WStep - stopping criterion, same as in MLPTrainLBFGS
|
|
MaxIts - stopping criterion, same as in MLPTrainLBFGS
|
|
|
|
OUTPUT PARAMETERS:
|
|
Ensemble - trained model
|
|
Info - return code:
|
|
* -8, if both WStep=0 and MaxIts=0
|
|
* -2, if there is a point with class number
|
|
outside of [0..NClasses-1].
|
|
* -1, if incorrect parameters was passed
|
|
(NPoints<0, Restarts<1).
|
|
* 2, if task has been solved.
|
|
Rep - training report.
|
|
OOBErrors - out-of-bag generalization error estimate
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpebagginglbfgs(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, const double wstep, const ae_int_t maxits, ae_int_t &info, mlpreport &rep, mlpcvreport &ooberrors, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpebagginglbfgs(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, decay, restarts, wstep, maxits, &info, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), const_cast<alglib_impl::mlpcvreport*>(ooberrors.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Training neural networks ensemble using early stopping.
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble - model with initialized geometry
|
|
XY - training set
|
|
NPoints - training set size
|
|
Decay - weight decay coefficient, >=0.001
|
|
Restarts - restarts, >0.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Ensemble - trained model
|
|
Info - return code:
|
|
* -2, if there is a point with class number
|
|
outside of [0..NClasses-1].
|
|
* -1, if incorrect parameters was passed
|
|
(NPoints<0, Restarts<1).
|
|
* 6, if task has been solved.
|
|
Rep - training report.
|
|
OOBErrors - out-of-bag generalization error estimate
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.03.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpetraines(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, ae_int_t &info, mlpreport &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlpetraines(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, decay, restarts, &info, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function trains neural network ensemble passed to this function using
|
|
current dataset and early stopping training algorithm. Each early stopping
|
|
round performs NRestarts random restarts (thus, EnsembleSize*NRestarts
|
|
training rounds is performed in total).
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object;
|
|
Ensemble - neural network ensemble. It must have same number of
|
|
inputs and outputs/classes as was specified during
|
|
creation of the trainer object.
|
|
NRestarts - number of restarts, >=0:
|
|
* NRestarts>0 means that specified number of random
|
|
restarts are performed during each ES round;
|
|
* NRestarts=0 is silently replaced by 1.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Ensemble - trained ensemble;
|
|
Rep - it contains all type of errors.
|
|
|
|
NOTE: this training method uses BOTH early stopping and weight decay! So,
|
|
you should select weight decay before starting training just as you
|
|
select it before training "conventional" networks.
|
|
|
|
NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(),
|
|
or single-point dataset was passed, ensemble is filled by zero
|
|
values.
|
|
|
|
NOTE: this method uses sum-of-squares error function for training.
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlptrainensemblees(const mlptrainer &s, const mlpensemble &ensemble, const ae_int_t nrestarts, mlpreport &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::mlptrainensemblees(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), nrestarts, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_CLUSTERING) || !defined(AE_PARTIAL_BUILD)
|
|
/*************************************************************************
|
|
This structure is a clusterization engine.
|
|
|
|
You should not try to access its fields directly.
|
|
Use ALGLIB functions in order to work with this object.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
_clusterizerstate_owner::_clusterizerstate_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_clusterizerstate_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::clusterizerstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::clusterizerstate), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::clusterizerstate));
|
|
alglib_impl::_clusterizerstate_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_clusterizerstate_owner::_clusterizerstate_owner(const _clusterizerstate_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_clusterizerstate_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: clusterizerstate copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::clusterizerstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::clusterizerstate), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::clusterizerstate));
|
|
alglib_impl::_clusterizerstate_init_copy(p_struct, const_cast<alglib_impl::clusterizerstate*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_clusterizerstate_owner& _clusterizerstate_owner::operator=(const _clusterizerstate_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: clusterizerstate assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: clusterizerstate assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_clusterizerstate_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::clusterizerstate));
|
|
alglib_impl::_clusterizerstate_init_copy(p_struct, const_cast<alglib_impl::clusterizerstate*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_clusterizerstate_owner::~_clusterizerstate_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_clusterizerstate_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::clusterizerstate* _clusterizerstate_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::clusterizerstate* _clusterizerstate_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::clusterizerstate*>(p_struct);
|
|
}
|
|
clusterizerstate::clusterizerstate() : _clusterizerstate_owner()
|
|
{
|
|
}
|
|
|
|
clusterizerstate::clusterizerstate(const clusterizerstate &rhs):_clusterizerstate_owner(rhs)
|
|
{
|
|
}
|
|
|
|
clusterizerstate& clusterizerstate::operator=(const clusterizerstate &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_clusterizerstate_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
clusterizerstate::~clusterizerstate()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This structure is used to store results of the agglomerative hierarchical
|
|
clustering (AHC).
|
|
|
|
Following information is returned:
|
|
|
|
* TerminationType - completion code:
|
|
* 1 for successful completion of algorithm
|
|
* -5 inappropriate combination of clustering algorithm and distance
|
|
function was used. As for now, it is possible only when Ward's
|
|
method is called for dataset with non-Euclidean distance function.
|
|
In case negative completion code is returned, other fields of report
|
|
structure are invalid and should not be used.
|
|
|
|
* NPoints contains number of points in the original dataset
|
|
|
|
* Z contains information about merges performed (see below). Z contains
|
|
indexes from the original (unsorted) dataset and it can be used when you
|
|
need to know what points were merged. However, it is not convenient when
|
|
you want to build a dendrograd (see below).
|
|
|
|
* if you want to build dendrogram, you can use Z, but it is not good
|
|
option, because Z contains indexes from unsorted dataset. Dendrogram
|
|
built from such dataset is likely to have intersections. So, you have to
|
|
reorder you points before building dendrogram.
|
|
Permutation which reorders point is returned in P. Another representation
|
|
of merges, which is more convenient for dendorgram construction, is
|
|
returned in PM.
|
|
|
|
* more information on format of Z, P and PM can be found below and in the
|
|
examples from ALGLIB Reference Manual.
|
|
|
|
FORMAL DESCRIPTION OF FIELDS:
|
|
NPoints number of points
|
|
Z array[NPoints-1,2], contains indexes of clusters
|
|
linked in pairs to form clustering tree. I-th row
|
|
corresponds to I-th merge:
|
|
* Z[I,0] - index of the first cluster to merge
|
|
* Z[I,1] - index of the second cluster to merge
|
|
* Z[I,0]<Z[I,1]
|
|
* clusters are numbered from 0 to 2*NPoints-2, with
|
|
indexes from 0 to NPoints-1 corresponding to points
|
|
of the original dataset, and indexes from NPoints to
|
|
2*NPoints-2 correspond to clusters generated by
|
|
subsequent merges (I-th row of Z creates cluster
|
|
with index NPoints+I).
|
|
|
|
IMPORTANT: indexes in Z[] are indexes in the ORIGINAL,
|
|
unsorted dataset. In addition to Z algorithm outputs
|
|
permutation which rearranges points in such way that
|
|
subsequent merges are performed on adjacent points
|
|
(such order is needed if you want to build dendrogram).
|
|
However, indexes in Z are related to original,
|
|
unrearranged sequence of points.
|
|
|
|
P array[NPoints], permutation which reorders points for
|
|
dendrogram construction. P[i] contains index of the
|
|
position where we should move I-th point of the
|
|
original dataset in order to apply merges PZ/PM.
|
|
|
|
PZ same as Z, but for permutation of points given by P.
|
|
The only thing which changed are indexes of the
|
|
original points; indexes of clusters remained same.
|
|
|
|
MergeDist array[NPoints-1], contains distances between clusters
|
|
being merged (MergeDist[i] correspond to merge stored
|
|
in Z[i,...]):
|
|
* CLINK, SLINK and average linkage algorithms report
|
|
"raw", unmodified distance metric.
|
|
* Ward's method reports weighted intra-cluster
|
|
variance, which is equal to ||Ca-Cb||^2 * Sa*Sb/(Sa+Sb).
|
|
Here A and B are clusters being merged, Ca is a
|
|
center of A, Cb is a center of B, Sa is a size of A,
|
|
Sb is a size of B.
|
|
|
|
PM array[NPoints-1,6], another representation of merges,
|
|
which is suited for dendrogram construction. It deals
|
|
with rearranged points (permutation P is applied) and
|
|
represents merges in a form which different from one
|
|
used by Z.
|
|
For each I from 0 to NPoints-2, I-th row of PM represents
|
|
merge performed on two clusters C0 and C1. Here:
|
|
* C0 contains points with indexes PM[I,0]...PM[I,1]
|
|
* C1 contains points with indexes PM[I,2]...PM[I,3]
|
|
* indexes stored in PM are given for dataset sorted
|
|
according to permutation P
|
|
* PM[I,1]=PM[I,2]-1 (only adjacent clusters are merged)
|
|
* PM[I,0]<=PM[I,1], PM[I,2]<=PM[I,3], i.e. both
|
|
clusters contain at least one point
|
|
* heights of "subdendrograms" corresponding to C0/C1
|
|
are stored in PM[I,4] and PM[I,5]. Subdendrograms
|
|
corresponding to single-point clusters have
|
|
height=0. Dendrogram of the merge result has height
|
|
H=max(H0,H1)+1.
|
|
|
|
NOTE: there is one-to-one correspondence between merges described by Z and
|
|
PM. I-th row of Z describes same merge of clusters as I-th row of PM,
|
|
with "left" cluster from Z corresponding to the "left" one from PM.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
_ahcreport_owner::_ahcreport_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_ahcreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::ahcreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::ahcreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::ahcreport));
|
|
alglib_impl::_ahcreport_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_ahcreport_owner::_ahcreport_owner(const _ahcreport_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_ahcreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: ahcreport copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::ahcreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::ahcreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::ahcreport));
|
|
alglib_impl::_ahcreport_init_copy(p_struct, const_cast<alglib_impl::ahcreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_ahcreport_owner& _ahcreport_owner::operator=(const _ahcreport_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: ahcreport assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: ahcreport assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_ahcreport_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::ahcreport));
|
|
alglib_impl::_ahcreport_init_copy(p_struct, const_cast<alglib_impl::ahcreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_ahcreport_owner::~_ahcreport_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_ahcreport_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::ahcreport* _ahcreport_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::ahcreport* _ahcreport_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::ahcreport*>(p_struct);
|
|
}
|
|
ahcreport::ahcreport() : _ahcreport_owner() ,terminationtype(p_struct->terminationtype),npoints(p_struct->npoints),p(&p_struct->p),z(&p_struct->z),pz(&p_struct->pz),pm(&p_struct->pm),mergedist(&p_struct->mergedist)
|
|
{
|
|
}
|
|
|
|
ahcreport::ahcreport(const ahcreport &rhs):_ahcreport_owner(rhs) ,terminationtype(p_struct->terminationtype),npoints(p_struct->npoints),p(&p_struct->p),z(&p_struct->z),pz(&p_struct->pz),pm(&p_struct->pm),mergedist(&p_struct->mergedist)
|
|
{
|
|
}
|
|
|
|
ahcreport& ahcreport::operator=(const ahcreport &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_ahcreport_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
ahcreport::~ahcreport()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This structure is used to store results of the k-means clustering
|
|
algorithm.
|
|
|
|
Following information is always returned:
|
|
* NPoints contains number of points in the original dataset
|
|
* TerminationType contains completion code, negative on failure, positive
|
|
on success
|
|
* K contains number of clusters
|
|
|
|
For positive TerminationType we return:
|
|
* NFeatures contains number of variables in the original dataset
|
|
* C, which contains centers found by algorithm
|
|
* CIdx, which maps points of the original dataset to clusters
|
|
|
|
FORMAL DESCRIPTION OF FIELDS:
|
|
NPoints number of points, >=0
|
|
NFeatures number of variables, >=1
|
|
TerminationType completion code:
|
|
* -5 if distance type is anything different from
|
|
Euclidean metric
|
|
* -3 for degenerate dataset: a) less than K distinct
|
|
points, b) K=0 for non-empty dataset.
|
|
* +1 for successful completion
|
|
K number of clusters
|
|
C array[K,NFeatures], rows of the array store centers
|
|
CIdx array[NPoints], which contains cluster indexes
|
|
IterationsCount actual number of iterations performed by clusterizer.
|
|
If algorithm performed more than one random restart,
|
|
total number of iterations is returned.
|
|
Energy merit function, "energy", sum of squared deviations
|
|
from cluster centers
|
|
|
|
-- ALGLIB --
|
|
Copyright 27.11.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
_kmeansreport_owner::_kmeansreport_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_kmeansreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::kmeansreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::kmeansreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::kmeansreport));
|
|
alglib_impl::_kmeansreport_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_kmeansreport_owner::_kmeansreport_owner(const _kmeansreport_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_kmeansreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: kmeansreport copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::kmeansreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::kmeansreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::kmeansreport));
|
|
alglib_impl::_kmeansreport_init_copy(p_struct, const_cast<alglib_impl::kmeansreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_kmeansreport_owner& _kmeansreport_owner::operator=(const _kmeansreport_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: kmeansreport assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: kmeansreport assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_kmeansreport_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::kmeansreport));
|
|
alglib_impl::_kmeansreport_init_copy(p_struct, const_cast<alglib_impl::kmeansreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_kmeansreport_owner::~_kmeansreport_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_kmeansreport_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::kmeansreport* _kmeansreport_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::kmeansreport* _kmeansreport_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::kmeansreport*>(p_struct);
|
|
}
|
|
kmeansreport::kmeansreport() : _kmeansreport_owner() ,npoints(p_struct->npoints),nfeatures(p_struct->nfeatures),terminationtype(p_struct->terminationtype),iterationscount(p_struct->iterationscount),energy(p_struct->energy),k(p_struct->k),c(&p_struct->c),cidx(&p_struct->cidx)
|
|
{
|
|
}
|
|
|
|
kmeansreport::kmeansreport(const kmeansreport &rhs):_kmeansreport_owner(rhs) ,npoints(p_struct->npoints),nfeatures(p_struct->nfeatures),terminationtype(p_struct->terminationtype),iterationscount(p_struct->iterationscount),energy(p_struct->energy),k(p_struct->k),c(&p_struct->c),cidx(&p_struct->cidx)
|
|
{
|
|
}
|
|
|
|
kmeansreport& kmeansreport::operator=(const kmeansreport &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_kmeansreport_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
kmeansreport::~kmeansreport()
|
|
{
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function initializes clusterizer object. Newly initialized object is
|
|
empty, i.e. it does not contain dataset. You should use it as follows:
|
|
1. creation
|
|
2. dataset is added with ClusterizerSetPoints()
|
|
3. additional parameters are set
|
|
3. clusterization is performed with one of the clustering functions
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizercreate(clusterizerstate &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::clusterizercreate(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function adds dataset to the clusterizer structure.
|
|
|
|
This function overrides all previous calls of ClusterizerSetPoints() or
|
|
ClusterizerSetDistances().
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
XY - array[NPoints,NFeatures], dataset
|
|
NPoints - number of points, >=0
|
|
NFeatures- number of features, >=1
|
|
DistType- distance function:
|
|
* 0 Chebyshev distance (L-inf norm)
|
|
* 1 city block distance (L1 norm)
|
|
* 2 Euclidean distance (L2 norm), non-squared
|
|
* 10 Pearson correlation:
|
|
dist(a,b) = 1-corr(a,b)
|
|
* 11 Absolute Pearson correlation:
|
|
dist(a,b) = 1-|corr(a,b)|
|
|
* 12 Uncentered Pearson correlation (cosine of the angle):
|
|
dist(a,b) = a'*b/(|a|*|b|)
|
|
* 13 Absolute uncentered Pearson correlation
|
|
dist(a,b) = |a'*b|/(|a|*|b|)
|
|
* 20 Spearman rank correlation:
|
|
dist(a,b) = 1-rankcorr(a,b)
|
|
* 21 Absolute Spearman rank correlation
|
|
dist(a,b) = 1-|rankcorr(a,b)|
|
|
|
|
NOTE 1: different distance functions have different performance penalty:
|
|
* Euclidean or Pearson correlation distances are the fastest ones
|
|
* Spearman correlation distance function is a bit slower
|
|
* city block and Chebyshev distances are order of magnitude slower
|
|
|
|
The reason behing difference in performance is that correlation-based
|
|
distance functions are computed using optimized linear algebra kernels,
|
|
while Chebyshev and city block distance functions are computed using
|
|
simple nested loops with two branches at each iteration.
|
|
|
|
NOTE 2: different clustering algorithms have different limitations:
|
|
* agglomerative hierarchical clustering algorithms may be used with
|
|
any kind of distance metric
|
|
* k-means++ clustering algorithm may be used only with Euclidean
|
|
distance function
|
|
Thus, list of specific clustering algorithms you may use depends
|
|
on distance function you specify when you set your dataset.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizersetpoints(const clusterizerstate &s, const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nfeatures, const ae_int_t disttype, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::clusterizersetpoints(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nfeatures, disttype, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function adds dataset to the clusterizer structure.
|
|
|
|
This function overrides all previous calls of ClusterizerSetPoints() or
|
|
ClusterizerSetDistances().
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
XY - array[NPoints,NFeatures], dataset
|
|
NPoints - number of points, >=0
|
|
NFeatures- number of features, >=1
|
|
DistType- distance function:
|
|
* 0 Chebyshev distance (L-inf norm)
|
|
* 1 city block distance (L1 norm)
|
|
* 2 Euclidean distance (L2 norm), non-squared
|
|
* 10 Pearson correlation:
|
|
dist(a,b) = 1-corr(a,b)
|
|
* 11 Absolute Pearson correlation:
|
|
dist(a,b) = 1-|corr(a,b)|
|
|
* 12 Uncentered Pearson correlation (cosine of the angle):
|
|
dist(a,b) = a'*b/(|a|*|b|)
|
|
* 13 Absolute uncentered Pearson correlation
|
|
dist(a,b) = |a'*b|/(|a|*|b|)
|
|
* 20 Spearman rank correlation:
|
|
dist(a,b) = 1-rankcorr(a,b)
|
|
* 21 Absolute Spearman rank correlation
|
|
dist(a,b) = 1-|rankcorr(a,b)|
|
|
|
|
NOTE 1: different distance functions have different performance penalty:
|
|
* Euclidean or Pearson correlation distances are the fastest ones
|
|
* Spearman correlation distance function is a bit slower
|
|
* city block and Chebyshev distances are order of magnitude slower
|
|
|
|
The reason behing difference in performance is that correlation-based
|
|
distance functions are computed using optimized linear algebra kernels,
|
|
while Chebyshev and city block distance functions are computed using
|
|
simple nested loops with two branches at each iteration.
|
|
|
|
NOTE 2: different clustering algorithms have different limitations:
|
|
* agglomerative hierarchical clustering algorithms may be used with
|
|
any kind of distance metric
|
|
* k-means++ clustering algorithm may be used only with Euclidean
|
|
distance function
|
|
Thus, list of specific clustering algorithms you may use depends
|
|
on distance function you specify when you set your dataset.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
void clusterizersetpoints(const clusterizerstate &s, const real_2d_array &xy, const ae_int_t disttype, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
ae_int_t npoints;
|
|
ae_int_t nfeatures;
|
|
|
|
npoints = xy.rows();
|
|
nfeatures = xy.cols();
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::clusterizersetpoints(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nfeatures, disttype, &_alglib_env_state);
|
|
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
/*************************************************************************
|
|
This function adds dataset given by distance matrix to the clusterizer
|
|
structure. It is important that dataset is not given explicitly - only
|
|
distance matrix is given.
|
|
|
|
This function overrides all previous calls of ClusterizerSetPoints() or
|
|
ClusterizerSetDistances().
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
D - array[NPoints,NPoints], distance matrix given by its upper
|
|
or lower triangle (main diagonal is ignored because its
|
|
entries are expected to be zero).
|
|
NPoints - number of points
|
|
IsUpper - whether upper or lower triangle of D is given.
|
|
|
|
NOTE 1: different clustering algorithms have different limitations:
|
|
* agglomerative hierarchical clustering algorithms may be used with
|
|
any kind of distance metric, including one which is given by
|
|
distance matrix
|
|
* k-means++ clustering algorithm may be used only with Euclidean
|
|
distance function and explicitly given points - it can not be
|
|
used with dataset given by distance matrix
|
|
Thus, if you call this function, you will be unable to use k-means
|
|
clustering algorithm to process your problem.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizersetdistances(const clusterizerstate &s, const real_2d_array &d, const ae_int_t npoints, const bool isupper, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::clusterizersetdistances(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(d.c_ptr()), npoints, isupper, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function adds dataset given by distance matrix to the clusterizer
|
|
structure. It is important that dataset is not given explicitly - only
|
|
distance matrix is given.
|
|
|
|
This function overrides all previous calls of ClusterizerSetPoints() or
|
|
ClusterizerSetDistances().
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
D - array[NPoints,NPoints], distance matrix given by its upper
|
|
or lower triangle (main diagonal is ignored because its
|
|
entries are expected to be zero).
|
|
NPoints - number of points
|
|
IsUpper - whether upper or lower triangle of D is given.
|
|
|
|
NOTE 1: different clustering algorithms have different limitations:
|
|
* agglomerative hierarchical clustering algorithms may be used with
|
|
any kind of distance metric, including one which is given by
|
|
distance matrix
|
|
* k-means++ clustering algorithm may be used only with Euclidean
|
|
distance function and explicitly given points - it can not be
|
|
used with dataset given by distance matrix
|
|
Thus, if you call this function, you will be unable to use k-means
|
|
clustering algorithm to process your problem.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
void clusterizersetdistances(const clusterizerstate &s, const real_2d_array &d, const bool isupper, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
ae_int_t npoints;
|
|
if( (d.rows()!=d.cols()))
|
|
_ALGLIB_CPP_EXCEPTION("Error while calling 'clusterizersetdistances': looks like one of arguments has wrong size");
|
|
npoints = d.rows();
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::clusterizersetdistances(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(d.c_ptr()), npoints, isupper, &_alglib_env_state);
|
|
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
/*************************************************************************
|
|
This function sets agglomerative hierarchical clustering algorithm
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
Algo - algorithm type:
|
|
* 0 complete linkage (default algorithm)
|
|
* 1 single linkage
|
|
* 2 unweighted average linkage
|
|
* 3 weighted average linkage
|
|
* 4 Ward's method
|
|
|
|
NOTE: Ward's method works correctly only with Euclidean distance, that's
|
|
why algorithm will return negative termination code (failure) for
|
|
any other distance type.
|
|
|
|
It is possible, however, to use this method with user-supplied
|
|
distance matrix. It is your responsibility to pass one which was
|
|
calculated with Euclidean distance function.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizersetahcalgo(const clusterizerstate &s, const ae_int_t algo, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::clusterizersetahcalgo(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), algo, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets k-means properties: number of restarts and maximum
|
|
number of iterations per one run.
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
Restarts- restarts count, >=1.
|
|
k-means++ algorithm performs several restarts and chooses
|
|
best set of centers (one with minimum squared distance).
|
|
MaxIts - maximum number of k-means iterations performed during one
|
|
run. >=0, zero value means that algorithm performs unlimited
|
|
number of iterations.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizersetkmeanslimits(const clusterizerstate &s, const ae_int_t restarts, const ae_int_t maxits, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::clusterizersetkmeanslimits(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), restarts, maxits, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets k-means initialization algorithm. Several different
|
|
algorithms can be chosen, including k-means++.
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
InitAlgo- initialization algorithm:
|
|
* 0 automatic selection ( different versions of ALGLIB
|
|
may select different algorithms)
|
|
* 1 random initialization
|
|
* 2 k-means++ initialization (best quality of initial
|
|
centers, but long non-parallelizable initialization
|
|
phase with bad cache locality)
|
|
* 3 "fast-greedy" algorithm with efficient, easy to
|
|
parallelize initialization. Quality of initial centers
|
|
is somewhat worse than that of k-means++. This
|
|
algorithm is a default one in the current version of
|
|
ALGLIB.
|
|
*-1 "debug" algorithm which always selects first K rows
|
|
of dataset; this algorithm is used for debug purposes
|
|
only. Do not use it in the industrial code!
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.01.2015 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizersetkmeansinit(const clusterizerstate &s, const ae_int_t initalgo, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::clusterizersetkmeansinit(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), initalgo, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets seed which is used to initialize internal RNG. By
|
|
default, deterministic seed is used - same for each run of clusterizer. If
|
|
you specify non-deterministic seed value, then some algorithms which
|
|
depend on random initialization (in current version: k-means) may return
|
|
slightly different results after each run.
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
Seed - seed:
|
|
* positive values = use deterministic seed for each run of
|
|
algorithms which depend on random initialization
|
|
* zero or negative values = use non-deterministic seed
|
|
|
|
-- ALGLIB --
|
|
Copyright 08.06.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizersetseed(const clusterizerstate &s, const ae_int_t seed, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::clusterizersetseed(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), seed, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function performs agglomerative hierarchical clustering
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
! * hardware vendor (Intel) implementations of linear algebra primitives
|
|
! (C++ and C# versions, x86/x64 platform)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
NOTE: Agglomerative hierarchical clustering algorithm has two phases:
|
|
distance matrix calculation and clustering itself. Only first phase
|
|
(distance matrix calculation) is accelerated by Intel MKL and
|
|
multithreading. Thus, acceleration is significant only for medium or
|
|
high-dimensional problems.
|
|
|
|
Although activating multithreading gives some speedup over single-
|
|
threaded execution, you should not expect nearly-linear scaling
|
|
with respect to cores count.
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
|
|
OUTPUT PARAMETERS:
|
|
Rep - clustering results; see description of AHCReport
|
|
structure for more information.
|
|
|
|
NOTE 1: hierarchical clustering algorithms require large amounts of memory.
|
|
In particular, this implementation needs sizeof(double)*NPoints^2
|
|
bytes, which are used to store distance matrix. In case we work
|
|
with user-supplied matrix, this amount is multiplied by 2 (we have
|
|
to store original matrix and to work with its copy).
|
|
|
|
For example, problem with 10000 points would require 800M of RAM,
|
|
even when working in a 1-dimensional space.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizerrunahc(const clusterizerstate &s, ahcreport &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::clusterizerrunahc(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), const_cast<alglib_impl::ahcreport*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function performs clustering by k-means++ algorithm.
|
|
|
|
You may change algorithm properties by calling:
|
|
* ClusterizerSetKMeansLimits() to change number of restarts or iterations
|
|
* ClusterizerSetKMeansInit() to change initialization algorithm
|
|
|
|
By default, one restart and unlimited number of iterations are used.
|
|
Initialization algorithm is chosen automatically.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
! * hardware vendor (Intel) implementations of linear algebra primitives
|
|
! (C++ and C# versions, x86/x64 platform)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
NOTE: k-means clustering algorithm has two phases: selection of initial
|
|
centers and clustering itself. ALGLIB parallelizes both phases.
|
|
Parallel version is optimized for the following scenario: medium or
|
|
high-dimensional problem (8 or more dimensions) with large number of
|
|
points and clusters. However, some speed-up can be obtained even
|
|
when assumptions above are violated.
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
K - number of clusters, K>=0.
|
|
K can be zero only when algorithm is called for empty
|
|
dataset, in this case completion code is set to
|
|
success (+1).
|
|
If K=0 and dataset size is non-zero, we can not
|
|
meaningfully assign points to some center (there are no
|
|
centers because K=0) and return -3 as completion code
|
|
(failure).
|
|
|
|
OUTPUT PARAMETERS:
|
|
Rep - clustering results; see description of KMeansReport
|
|
structure for more information.
|
|
|
|
NOTE 1: k-means clustering can be performed only for datasets with
|
|
Euclidean distance function. Algorithm will return negative
|
|
completion code in Rep.TerminationType in case dataset was added
|
|
to clusterizer with DistType other than Euclidean (or dataset was
|
|
specified by distance matrix instead of explicitly given points).
|
|
|
|
NOTE 2: by default, k-means uses non-deterministic seed to initialize RNG
|
|
which is used to select initial centers. As result, each run of
|
|
algorithm may return different values. If you need deterministic
|
|
behavior, use ClusterizerSetSeed() function.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizerrunkmeans(const clusterizerstate &s, const ae_int_t k, kmeansreport &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::clusterizerrunkmeans(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), k, const_cast<alglib_impl::kmeansreport*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function returns distance matrix for dataset
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
! * hardware vendor (Intel) implementations of linear algebra primitives
|
|
! (C++ and C# versions, x86/x64 platform)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
XY - array[NPoints,NFeatures], dataset
|
|
NPoints - number of points, >=0
|
|
NFeatures- number of features, >=1
|
|
DistType- distance function:
|
|
* 0 Chebyshev distance (L-inf norm)
|
|
* 1 city block distance (L1 norm)
|
|
* 2 Euclidean distance (L2 norm, non-squared)
|
|
* 10 Pearson correlation:
|
|
dist(a,b) = 1-corr(a,b)
|
|
* 11 Absolute Pearson correlation:
|
|
dist(a,b) = 1-|corr(a,b)|
|
|
* 12 Uncentered Pearson correlation (cosine of the angle):
|
|
dist(a,b) = a'*b/(|a|*|b|)
|
|
* 13 Absolute uncentered Pearson correlation
|
|
dist(a,b) = |a'*b|/(|a|*|b|)
|
|
* 20 Spearman rank correlation:
|
|
dist(a,b) = 1-rankcorr(a,b)
|
|
* 21 Absolute Spearman rank correlation
|
|
dist(a,b) = 1-|rankcorr(a,b)|
|
|
|
|
OUTPUT PARAMETERS:
|
|
D - array[NPoints,NPoints], distance matrix
|
|
(full matrix is returned, with lower and upper triangles)
|
|
|
|
NOTE: different distance functions have different performance penalty:
|
|
* Euclidean or Pearson correlation distances are the fastest ones
|
|
* Spearman correlation distance function is a bit slower
|
|
* city block and Chebyshev distances are order of magnitude slower
|
|
|
|
The reason behing difference in performance is that correlation-based
|
|
distance functions are computed using optimized linear algebra kernels,
|
|
while Chebyshev and city block distance functions are computed using
|
|
simple nested loops with two branches at each iteration.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizergetdistances(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nfeatures, const ae_int_t disttype, real_2d_array &d, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::clusterizergetdistances(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nfeatures, disttype, const_cast<alglib_impl::ae_matrix*>(d.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function takes as input clusterization report Rep, desired clusters
|
|
count K, and builds top K clusters from hierarchical clusterization tree.
|
|
It returns assignment of points to clusters (array of cluster indexes).
|
|
|
|
INPUT PARAMETERS:
|
|
Rep - report from ClusterizerRunAHC() performed on XY
|
|
K - desired number of clusters, 1<=K<=NPoints.
|
|
K can be zero only when NPoints=0.
|
|
|
|
OUTPUT PARAMETERS:
|
|
CIdx - array[NPoints], I-th element contains cluster index (from
|
|
0 to K-1) for I-th point of the dataset.
|
|
CZ - array[K]. This array allows to convert cluster indexes
|
|
returned by this function to indexes used by Rep.Z. J-th
|
|
cluster returned by this function corresponds to CZ[J]-th
|
|
cluster stored in Rep.Z/PZ/PM.
|
|
It is guaranteed that CZ[I]<CZ[I+1].
|
|
|
|
NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
|
|
Although they were obtained by manipulation with top K nodes of
|
|
dendrogram (i.e. hierarchical decomposition of dataset), this
|
|
function does not return information about hierarchy. Each of the
|
|
clusters stand on its own.
|
|
|
|
NOTE: Cluster indexes returned by this function does not correspond to
|
|
indexes returned in Rep.Z/PZ/PM. Either you work with hierarchical
|
|
representation of the dataset (dendrogram), or you work with "flat"
|
|
representation returned by this function. Each of representations
|
|
has its own clusters indexing system (former uses [0, 2*NPoints-2]),
|
|
while latter uses [0..K-1]), although it is possible to perform
|
|
conversion from one system to another by means of CZ array, returned
|
|
by this function, which allows you to convert indexes stored in CIdx
|
|
to the numeration system used by Rep.Z.
|
|
|
|
NOTE: this subroutine is optimized for moderate values of K. Say, for K=5
|
|
it will perform many times faster than for K=100. Its worst-case
|
|
performance is O(N*K), although in average case it perform better
|
|
(up to O(N*log(K))).
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizergetkclusters(const ahcreport &rep, const ae_int_t k, integer_1d_array &cidx, integer_1d_array &cz, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::clusterizergetkclusters(const_cast<alglib_impl::ahcreport*>(rep.c_ptr()), k, const_cast<alglib_impl::ae_vector*>(cidx.c_ptr()), const_cast<alglib_impl::ae_vector*>(cz.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function accepts AHC report Rep, desired minimum intercluster
|
|
distance and returns top clusters from hierarchical clusterization tree
|
|
which are separated by distance R or HIGHER.
|
|
|
|
It returns assignment of points to clusters (array of cluster indexes).
|
|
|
|
There is one more function with similar name - ClusterizerSeparatedByCorr,
|
|
which returns clusters with intercluster correlation equal to R or LOWER
|
|
(note: higher for distance, lower for correlation).
|
|
|
|
INPUT PARAMETERS:
|
|
Rep - report from ClusterizerRunAHC() performed on XY
|
|
R - desired minimum intercluster distance, R>=0
|
|
|
|
OUTPUT PARAMETERS:
|
|
K - number of clusters, 1<=K<=NPoints
|
|
CIdx - array[NPoints], I-th element contains cluster index (from
|
|
0 to K-1) for I-th point of the dataset.
|
|
CZ - array[K]. This array allows to convert cluster indexes
|
|
returned by this function to indexes used by Rep.Z. J-th
|
|
cluster returned by this function corresponds to CZ[J]-th
|
|
cluster stored in Rep.Z/PZ/PM.
|
|
It is guaranteed that CZ[I]<CZ[I+1].
|
|
|
|
NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
|
|
Although they were obtained by manipulation with top K nodes of
|
|
dendrogram (i.e. hierarchical decomposition of dataset), this
|
|
function does not return information about hierarchy. Each of the
|
|
clusters stand on its own.
|
|
|
|
NOTE: Cluster indexes returned by this function does not correspond to
|
|
indexes returned in Rep.Z/PZ/PM. Either you work with hierarchical
|
|
representation of the dataset (dendrogram), or you work with "flat"
|
|
representation returned by this function. Each of representations
|
|
has its own clusters indexing system (former uses [0, 2*NPoints-2]),
|
|
while latter uses [0..K-1]), although it is possible to perform
|
|
conversion from one system to another by means of CZ array, returned
|
|
by this function, which allows you to convert indexes stored in CIdx
|
|
to the numeration system used by Rep.Z.
|
|
|
|
NOTE: this subroutine is optimized for moderate values of K. Say, for K=5
|
|
it will perform many times faster than for K=100. Its worst-case
|
|
performance is O(N*K), although in average case it perform better
|
|
(up to O(N*log(K))).
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizerseparatedbydist(const ahcreport &rep, const double r, ae_int_t &k, integer_1d_array &cidx, integer_1d_array &cz, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::clusterizerseparatedbydist(const_cast<alglib_impl::ahcreport*>(rep.c_ptr()), r, &k, const_cast<alglib_impl::ae_vector*>(cidx.c_ptr()), const_cast<alglib_impl::ae_vector*>(cz.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function accepts AHC report Rep, desired maximum intercluster
|
|
correlation and returns top clusters from hierarchical clusterization tree
|
|
which are separated by correlation R or LOWER.
|
|
|
|
It returns assignment of points to clusters (array of cluster indexes).
|
|
|
|
There is one more function with similar name - ClusterizerSeparatedByDist,
|
|
which returns clusters with intercluster distance equal to R or HIGHER
|
|
(note: higher for distance, lower for correlation).
|
|
|
|
INPUT PARAMETERS:
|
|
Rep - report from ClusterizerRunAHC() performed on XY
|
|
R - desired maximum intercluster correlation, -1<=R<=+1
|
|
|
|
OUTPUT PARAMETERS:
|
|
K - number of clusters, 1<=K<=NPoints
|
|
CIdx - array[NPoints], I-th element contains cluster index (from
|
|
0 to K-1) for I-th point of the dataset.
|
|
CZ - array[K]. This array allows to convert cluster indexes
|
|
returned by this function to indexes used by Rep.Z. J-th
|
|
cluster returned by this function corresponds to CZ[J]-th
|
|
cluster stored in Rep.Z/PZ/PM.
|
|
It is guaranteed that CZ[I]<CZ[I+1].
|
|
|
|
NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
|
|
Although they were obtained by manipulation with top K nodes of
|
|
dendrogram (i.e. hierarchical decomposition of dataset), this
|
|
function does not return information about hierarchy. Each of the
|
|
clusters stand on its own.
|
|
|
|
NOTE: Cluster indexes returned by this function does not correspond to
|
|
indexes returned in Rep.Z/PZ/PM. Either you work with hierarchical
|
|
representation of the dataset (dendrogram), or you work with "flat"
|
|
representation returned by this function. Each of representations
|
|
has its own clusters indexing system (former uses [0, 2*NPoints-2]),
|
|
while latter uses [0..K-1]), although it is possible to perform
|
|
conversion from one system to another by means of CZ array, returned
|
|
by this function, which allows you to convert indexes stored in CIdx
|
|
to the numeration system used by Rep.Z.
|
|
|
|
NOTE: this subroutine is optimized for moderate values of K. Say, for K=5
|
|
it will perform many times faster than for K=100. Its worst-case
|
|
performance is O(N*K), although in average case it perform better
|
|
(up to O(N*log(K))).
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizerseparatedbycorr(const ahcreport &rep, const double r, ae_int_t &k, integer_1d_array &cidx, integer_1d_array &cz, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::clusterizerseparatedbycorr(const_cast<alglib_impl::ahcreport*>(rep.c_ptr()), r, &k, const_cast<alglib_impl::ae_vector*>(cidx.c_ptr()), const_cast<alglib_impl::ae_vector*>(cz.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_DFOREST) || !defined(AE_PARTIAL_BUILD)
|
|
/*************************************************************************
|
|
A random forest (decision forest) builder object.
|
|
|
|
Used to store dataset and specify decision forest training algorithm settings.
|
|
*************************************************************************/
|
|
_decisionforestbuilder_owner::_decisionforestbuilder_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_decisionforestbuilder_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::decisionforestbuilder*)alglib_impl::ae_malloc(sizeof(alglib_impl::decisionforestbuilder), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::decisionforestbuilder));
|
|
alglib_impl::_decisionforestbuilder_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_decisionforestbuilder_owner::_decisionforestbuilder_owner(const _decisionforestbuilder_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_decisionforestbuilder_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: decisionforestbuilder copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::decisionforestbuilder*)alglib_impl::ae_malloc(sizeof(alglib_impl::decisionforestbuilder), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::decisionforestbuilder));
|
|
alglib_impl::_decisionforestbuilder_init_copy(p_struct, const_cast<alglib_impl::decisionforestbuilder*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_decisionforestbuilder_owner& _decisionforestbuilder_owner::operator=(const _decisionforestbuilder_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: decisionforestbuilder assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: decisionforestbuilder assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_decisionforestbuilder_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::decisionforestbuilder));
|
|
alglib_impl::_decisionforestbuilder_init_copy(p_struct, const_cast<alglib_impl::decisionforestbuilder*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_decisionforestbuilder_owner::~_decisionforestbuilder_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_decisionforestbuilder_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::decisionforestbuilder* _decisionforestbuilder_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::decisionforestbuilder* _decisionforestbuilder_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::decisionforestbuilder*>(p_struct);
|
|
}
|
|
decisionforestbuilder::decisionforestbuilder() : _decisionforestbuilder_owner()
|
|
{
|
|
}
|
|
|
|
decisionforestbuilder::decisionforestbuilder(const decisionforestbuilder &rhs):_decisionforestbuilder_owner(rhs)
|
|
{
|
|
}
|
|
|
|
decisionforestbuilder& decisionforestbuilder::operator=(const decisionforestbuilder &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_decisionforestbuilder_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
decisionforestbuilder::~decisionforestbuilder()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Buffer object which is used to perform various requests (usually model
|
|
inference) in the multithreaded mode (multiple threads working with same
|
|
DF object).
|
|
|
|
This object should be created with DFCreateBuffer().
|
|
*************************************************************************/
|
|
_decisionforestbuffer_owner::_decisionforestbuffer_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_decisionforestbuffer_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::decisionforestbuffer*)alglib_impl::ae_malloc(sizeof(alglib_impl::decisionforestbuffer), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::decisionforestbuffer));
|
|
alglib_impl::_decisionforestbuffer_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_decisionforestbuffer_owner::_decisionforestbuffer_owner(const _decisionforestbuffer_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_decisionforestbuffer_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: decisionforestbuffer copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::decisionforestbuffer*)alglib_impl::ae_malloc(sizeof(alglib_impl::decisionforestbuffer), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::decisionforestbuffer));
|
|
alglib_impl::_decisionforestbuffer_init_copy(p_struct, const_cast<alglib_impl::decisionforestbuffer*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_decisionforestbuffer_owner& _decisionforestbuffer_owner::operator=(const _decisionforestbuffer_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: decisionforestbuffer assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: decisionforestbuffer assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_decisionforestbuffer_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::decisionforestbuffer));
|
|
alglib_impl::_decisionforestbuffer_init_copy(p_struct, const_cast<alglib_impl::decisionforestbuffer*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_decisionforestbuffer_owner::~_decisionforestbuffer_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_decisionforestbuffer_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::decisionforestbuffer* _decisionforestbuffer_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::decisionforestbuffer* _decisionforestbuffer_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::decisionforestbuffer*>(p_struct);
|
|
}
|
|
decisionforestbuffer::decisionforestbuffer() : _decisionforestbuffer_owner()
|
|
{
|
|
}
|
|
|
|
decisionforestbuffer::decisionforestbuffer(const decisionforestbuffer &rhs):_decisionforestbuffer_owner(rhs)
|
|
{
|
|
}
|
|
|
|
decisionforestbuffer& decisionforestbuffer::operator=(const decisionforestbuffer &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_decisionforestbuffer_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
decisionforestbuffer::~decisionforestbuffer()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Decision forest (random forest) model.
|
|
*************************************************************************/
|
|
_decisionforest_owner::_decisionforest_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_decisionforest_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::decisionforest*)alglib_impl::ae_malloc(sizeof(alglib_impl::decisionforest), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::decisionforest));
|
|
alglib_impl::_decisionforest_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_decisionforest_owner::_decisionforest_owner(const _decisionforest_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_decisionforest_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: decisionforest copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::decisionforest*)alglib_impl::ae_malloc(sizeof(alglib_impl::decisionforest), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::decisionforest));
|
|
alglib_impl::_decisionforest_init_copy(p_struct, const_cast<alglib_impl::decisionforest*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_decisionforest_owner& _decisionforest_owner::operator=(const _decisionforest_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: decisionforest assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: decisionforest assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_decisionforest_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::decisionforest));
|
|
alglib_impl::_decisionforest_init_copy(p_struct, const_cast<alglib_impl::decisionforest*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_decisionforest_owner::~_decisionforest_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_decisionforest_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::decisionforest* _decisionforest_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::decisionforest* _decisionforest_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::decisionforest*>(p_struct);
|
|
}
|
|
decisionforest::decisionforest() : _decisionforest_owner()
|
|
{
|
|
}
|
|
|
|
decisionforest::decisionforest(const decisionforest &rhs):_decisionforest_owner(rhs)
|
|
{
|
|
}
|
|
|
|
decisionforest& decisionforest::operator=(const decisionforest &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_decisionforest_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
decisionforest::~decisionforest()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Decision forest training report.
|
|
|
|
=== training/oob errors ==================================================
|
|
|
|
Following fields store training set errors:
|
|
* relclserror - fraction of misclassified cases, [0,1]
|
|
* avgce - average cross-entropy in bits per symbol
|
|
* rmserror - root-mean-square error
|
|
* avgerror - average error
|
|
* avgrelerror - average relative error
|
|
|
|
Out-of-bag estimates are stored in fields with same names, but "oob" prefix.
|
|
|
|
For classification problems:
|
|
* RMS, AVG and AVGREL errors are calculated for posterior probabilities
|
|
|
|
For regression problems:
|
|
* RELCLS and AVGCE errors are zero
|
|
|
|
=== variable importance ==================================================
|
|
|
|
Following fields are used to store variable importance information:
|
|
|
|
* topvars - variables ordered from the most important to
|
|
less important ones (according to current
|
|
choice of importance raiting).
|
|
For example, topvars[0] contains index of the
|
|
most important variable, and topvars[0:2] are
|
|
indexes of 3 most important ones and so on.
|
|
|
|
* varimportances - array[nvars], ratings (the larger, the more
|
|
important the variable is, always in [0,1]
|
|
range).
|
|
By default, filled by zeros (no importance
|
|
ratings are provided unless you explicitly
|
|
request them).
|
|
Zero rating means that variable is not important,
|
|
however you will rarely encounter such a thing,
|
|
in many cases unimportant variables produce
|
|
nearly-zero (but nonzero) ratings.
|
|
|
|
Variable importance report must be EXPLICITLY requested by calling:
|
|
* dfbuildersetimportancegini() function, if you need out-of-bag Gini-based
|
|
importance rating also known as MDI (fast to calculate, resistant to
|
|
overfitting issues, but has some bias towards continuous and
|
|
high-cardinality categorical variables)
|
|
* dfbuildersetimportancetrngini() function, if you need training set Gini-
|
|
-based importance rating (what other packages typically report).
|
|
* dfbuildersetimportancepermutation() function, if you need permutation-
|
|
based importance rating also known as MDA (slower to calculate, but less
|
|
biased)
|
|
* dfbuildersetimportancenone() function, if you do not need importance
|
|
ratings - ratings will be zero, topvars[] will be [0,1,2,...]
|
|
|
|
Different importance ratings (Gini or permutation) produce non-comparable
|
|
values. Although in all cases rating values lie in [0,1] range, there are
|
|
exist differences:
|
|
* informally speaking, Gini importance rating tends to divide "unit amount
|
|
of importance" between several important variables, i.e. it produces
|
|
estimates which roughly sum to 1.0 (or less than 1.0, if your task can
|
|
not be solved exactly). If all variables are equally important, they
|
|
will have same rating, roughly 1/NVars, even if every variable is
|
|
critically important.
|
|
* from the other side, permutation importance tells us what percentage of
|
|
the model predictive power will be ruined by permuting this specific
|
|
variable. It does not produce estimates which sum to one. Critically
|
|
important variable will have rating close to 1.0, and you may have
|
|
multiple variables with such a rating.
|
|
|
|
More information on variable importance ratings can be found in comments
|
|
on the dfbuildersetimportancegini() and dfbuildersetimportancepermutation()
|
|
functions.
|
|
*************************************************************************/
|
|
_dfreport_owner::_dfreport_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_dfreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::dfreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::dfreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::dfreport));
|
|
alglib_impl::_dfreport_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_dfreport_owner::_dfreport_owner(const _dfreport_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_dfreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: dfreport copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::dfreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::dfreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::dfreport));
|
|
alglib_impl::_dfreport_init_copy(p_struct, const_cast<alglib_impl::dfreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_dfreport_owner& _dfreport_owner::operator=(const _dfreport_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: dfreport assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: dfreport assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_dfreport_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::dfreport));
|
|
alglib_impl::_dfreport_init_copy(p_struct, const_cast<alglib_impl::dfreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_dfreport_owner::~_dfreport_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_dfreport_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::dfreport* _dfreport_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::dfreport* _dfreport_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::dfreport*>(p_struct);
|
|
}
|
|
dfreport::dfreport() : _dfreport_owner() ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror),oobrelclserror(p_struct->oobrelclserror),oobavgce(p_struct->oobavgce),oobrmserror(p_struct->oobrmserror),oobavgerror(p_struct->oobavgerror),oobavgrelerror(p_struct->oobavgrelerror),topvars(&p_struct->topvars),varimportances(&p_struct->varimportances)
|
|
{
|
|
}
|
|
|
|
dfreport::dfreport(const dfreport &rhs):_dfreport_owner(rhs) ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror),oobrelclserror(p_struct->oobrelclserror),oobavgce(p_struct->oobavgce),oobrmserror(p_struct->oobrmserror),oobavgerror(p_struct->oobavgerror),oobavgrelerror(p_struct->oobavgrelerror),topvars(&p_struct->topvars),varimportances(&p_struct->varimportances)
|
|
{
|
|
}
|
|
|
|
dfreport& dfreport::operator=(const dfreport &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_dfreport_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
dfreport::~dfreport()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function serializes data structure to string.
|
|
|
|
Important properties of s_out:
|
|
* it contains alphanumeric characters, dots, underscores, minus signs
|
|
* these symbols are grouped into words, which are separated by spaces
|
|
and Windows-style (CR+LF) newlines
|
|
* although serializer uses spaces and CR+LF as separators, you can
|
|
replace any separator character by arbitrary combination of spaces,
|
|
tabs, Windows or Unix newlines. It allows flexible reformatting of
|
|
the string in case you want to include it into text or XML file.
|
|
But you should not insert separators into the middle of the "words"
|
|
nor you should change case of letters.
|
|
* s_out can be freely moved between 32-bit and 64-bit systems, little
|
|
and big endian machines, and so on. You can serialize structure on
|
|
32-bit machine and unserialize it on 64-bit one (or vice versa), or
|
|
serialize it on SPARC and unserialize on x86. You can also
|
|
serialize it in C++ version of ALGLIB and unserialize in C# one,
|
|
and vice versa.
|
|
*************************************************************************/
|
|
void dfserialize(decisionforest &obj, std::string &s_out)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state state;
|
|
alglib_impl::ae_serializer serializer;
|
|
alglib_impl::ae_int_t ssize;
|
|
|
|
alglib_impl::ae_state_init(&state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&state, &_break_jump);
|
|
alglib_impl::ae_serializer_init(&serializer);
|
|
alglib_impl::ae_serializer_alloc_start(&serializer);
|
|
alglib_impl::dfalloc(&serializer, obj.c_ptr(), &state);
|
|
ssize = alglib_impl::ae_serializer_get_alloc_size(&serializer);
|
|
s_out.clear();
|
|
s_out.reserve((size_t)(ssize+1));
|
|
alglib_impl::ae_serializer_sstart_str(&serializer, &s_out);
|
|
alglib_impl::dfserialize(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_stop(&serializer, &state);
|
|
alglib_impl::ae_assert( s_out.length()<=(size_t)ssize, "ALGLIB: serialization integrity error", &state);
|
|
alglib_impl::ae_serializer_clear(&serializer);
|
|
alglib_impl::ae_state_clear(&state);
|
|
}
|
|
/*************************************************************************
|
|
This function unserializes data structure from string.
|
|
*************************************************************************/
|
|
void dfunserialize(const std::string &s_in, decisionforest &obj)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state state;
|
|
alglib_impl::ae_serializer serializer;
|
|
|
|
alglib_impl::ae_state_init(&state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&state, &_break_jump);
|
|
alglib_impl::ae_serializer_init(&serializer);
|
|
alglib_impl::ae_serializer_ustart_str(&serializer, &s_in);
|
|
alglib_impl::dfunserialize(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_stop(&serializer, &state);
|
|
alglib_impl::ae_serializer_clear(&serializer);
|
|
alglib_impl::ae_state_clear(&state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function serializes data structure to C++ stream.
|
|
|
|
Data stream generated by this function is same as string representation
|
|
generated by string version of serializer - alphanumeric characters,
|
|
dots, underscores, minus signs, which are grouped into words separated by
|
|
spaces and CR+LF.
|
|
|
|
We recommend you to read comments on string version of serializer to find
|
|
out more about serialization of AlGLIB objects.
|
|
*************************************************************************/
|
|
void dfserialize(decisionforest &obj, std::ostream &s_out)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state state;
|
|
alglib_impl::ae_serializer serializer;
|
|
|
|
alglib_impl::ae_state_init(&state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&state, &_break_jump);
|
|
alglib_impl::ae_serializer_init(&serializer);
|
|
alglib_impl::ae_serializer_alloc_start(&serializer);
|
|
alglib_impl::dfalloc(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_get_alloc_size(&serializer); // not actually needed, but we have to ask
|
|
alglib_impl::ae_serializer_sstart_stream(&serializer, &s_out);
|
|
alglib_impl::dfserialize(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_stop(&serializer, &state);
|
|
alglib_impl::ae_serializer_clear(&serializer);
|
|
alglib_impl::ae_state_clear(&state);
|
|
}
|
|
/*************************************************************************
|
|
This function unserializes data structure from stream.
|
|
*************************************************************************/
|
|
void dfunserialize(const std::istream &s_in, decisionforest &obj)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state state;
|
|
alglib_impl::ae_serializer serializer;
|
|
|
|
alglib_impl::ae_state_init(&state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&state, &_break_jump);
|
|
alglib_impl::ae_serializer_init(&serializer);
|
|
alglib_impl::ae_serializer_ustart_stream(&serializer, &s_in);
|
|
alglib_impl::dfunserialize(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_stop(&serializer, &state);
|
|
alglib_impl::ae_serializer_clear(&serializer);
|
|
alglib_impl::ae_state_clear(&state);
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function creates buffer structure which can be used to perform
|
|
parallel inference requests.
|
|
|
|
DF subpackage provides two sets of computing functions - ones which use
|
|
internal buffer of DF model (these functions are single-threaded because
|
|
they use same buffer, which can not shared between threads), and ones
|
|
which use external buffer.
|
|
|
|
This function is used to initialize external buffer.
|
|
|
|
INPUT PARAMETERS
|
|
Model - DF model which is associated with newly created buffer
|
|
|
|
OUTPUT PARAMETERS
|
|
Buf - external buffer.
|
|
|
|
|
|
IMPORTANT: buffer object should be used only with model which was used to
|
|
initialize buffer. Any attempt to use buffer with different
|
|
object is dangerous - you may get integrity check failure
|
|
(exception) because sizes of internal arrays do not fit to
|
|
dimensions of the model structure.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfcreatebuffer(const decisionforest &model, decisionforestbuffer &buf, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfcreatebuffer(const_cast<alglib_impl::decisionforest*>(model.c_ptr()), const_cast<alglib_impl::decisionforestbuffer*>(buf.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This subroutine creates DecisionForestBuilder object which is used to
|
|
train decision forests.
|
|
|
|
By default, new builder stores empty dataset and some reasonable default
|
|
settings. At the very least, you should specify dataset prior to building
|
|
decision forest. You can also tweak settings of the forest construction
|
|
algorithm (recommended, although default setting should work well).
|
|
|
|
Following actions are mandatory:
|
|
* calling dfbuildersetdataset() to specify dataset
|
|
* calling dfbuilderbuildrandomforest() to build decision forest using
|
|
current dataset and default settings
|
|
|
|
Additionally, you may call:
|
|
* dfbuildersetrndvars() or dfbuildersetrndvarsratio() to specify number of
|
|
variables randomly chosen for each split
|
|
* dfbuildersetsubsampleratio() to specify fraction of the dataset randomly
|
|
subsampled to build each tree
|
|
* dfbuildersetseed() to control random seed chosen for tree construction
|
|
|
|
INPUT PARAMETERS:
|
|
none
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildercreate(decisionforestbuilder &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfbuildercreate(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This subroutine adds dense dataset to the internal storage of the builder
|
|
object. Specifying your dataset in the dense format means that the dense
|
|
version of the forest construction algorithm will be invoked.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
XY - array[NPoints,NVars+1] (minimum size; actual size can
|
|
be larger, only leading part is used anyway), dataset:
|
|
* first NVars elements of each row store values of the
|
|
independent variables
|
|
* last column store class number (in 0...NClasses-1)
|
|
or real value of the dependent variable
|
|
NPoints - number of rows in the dataset, NPoints>=1
|
|
NVars - number of independent variables, NVars>=1
|
|
NClasses - indicates type of the problem being solved:
|
|
* NClasses>=2 means that classification problem is
|
|
solved (last column of the dataset stores class
|
|
number)
|
|
* NClasses=1 means that regression problem is solved
|
|
(last column of the dataset stores variable value)
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetdataset(const decisionforestbuilder &s, const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfbuildersetdataset(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, nclasses, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets number of variables (in [1,NVars] range) used by
|
|
decision forest construction algorithm.
|
|
|
|
The default option is to use roughly sqrt(NVars) variables.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
RndVars - number of randomly selected variables; values outside
|
|
of [1,NVars] range are silently clipped.
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetrndvars(const decisionforestbuilder &s, const ae_int_t rndvars, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfbuildersetrndvars(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), rndvars, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets number of variables used by decision forest construction
|
|
algorithm as a fraction of total variable count (0,1) range.
|
|
|
|
The default option is to use roughly sqrt(NVars) variables.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
F - round(NVars*F) variables are selected
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetrndvarsratio(const decisionforestbuilder &s, const double f, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfbuildersetrndvarsratio(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), f, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function tells decision forest builder to automatically choose number
|
|
of variables used by decision forest construction algorithm. Roughly
|
|
sqrt(NVars) variables will be used.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetrndvarsauto(const decisionforestbuilder &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfbuildersetrndvarsauto(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets size of dataset subsample generated the decision forest
|
|
construction algorithm. Size is specified as a fraction of total dataset
|
|
size.
|
|
|
|
The default option is to use 50% of the dataset for training, 50% for the
|
|
OOB estimates. You can decrease fraction F down to 10%, 1% or even below
|
|
in order to reduce overfitting.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
F - fraction of the dataset to use, in (0,1] range. Values
|
|
outside of this range will be silently clipped. At
|
|
least one element is always selected for the training
|
|
set.
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetsubsampleratio(const decisionforestbuilder &s, const double f, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfbuildersetsubsampleratio(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), f, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets seed used by internal RNG for random subsampling and
|
|
random selection of variable subsets.
|
|
|
|
By default random seed is used, i.e. every time you build decision forest,
|
|
we seed generator with new value obtained from system-wide RNG. Thus,
|
|
decision forest builder returns non-deterministic results. You can change
|
|
such behavior by specyfing fixed positive seed value.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
SeedVal - seed value:
|
|
* positive values are used for seeding RNG with fixed
|
|
seed, i.e. subsequent runs on same data will return
|
|
same decision forests
|
|
* non-positive seed means that random seed is used
|
|
for every run of builder, i.e. subsequent runs on
|
|
same datasets will return slightly different
|
|
decision forests
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder, see
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetseed(const decisionforestbuilder &s, const ae_int_t seedval, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfbuildersetseed(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), seedval, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets random decision forest construction algorithm.
|
|
|
|
As for now, only one decision forest construction algorithm is supported -
|
|
a dense "baseline" RDF algorithm.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
AlgoType - algorithm type:
|
|
* 0 = baseline dense RDF
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder, see
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetrdfalgo(const decisionforestbuilder &s, const ae_int_t algotype, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfbuildersetrdfalgo(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), algotype, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets split selection algorithm used by decision forest
|
|
classifier. You may choose several algorithms, with different speed and
|
|
quality of the results.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
SplitStrength- split type:
|
|
* 0 = split at the random position, fastest one
|
|
* 1 = split at the middle of the range
|
|
* 2 = strong split at the best point of the range (default)
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder, see
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetrdfsplitstrength(const decisionforestbuilder &s, const ae_int_t splitstrength, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfbuildersetrdfsplitstrength(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), splitstrength, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function tells decision forest construction algorithm to use
|
|
Gini impurity based variable importance estimation (also known as MDI).
|
|
|
|
This version of importance estimation algorithm analyzes mean decrease in
|
|
impurity (MDI) on training sample during splits. The result is divided
|
|
by impurity at the root node in order to produce estimate in [0,1] range.
|
|
|
|
Such estimates are fast to calculate and beautifully normalized (sum to
|
|
one) but have following downsides:
|
|
* They ALWAYS sum to 1.0, even if output is completely unpredictable. I.e.
|
|
MDI allows to order variables by importance, but does not tell us about
|
|
"absolute" importances of variables
|
|
* there exist some bias towards continuous and high-cardinality categorical
|
|
variables
|
|
|
|
NOTE: informally speaking, MDA (permutation importance) rating answers the
|
|
question "what part of the model predictive power is ruined by
|
|
permuting k-th variable?" while MDI tells us "what part of the model
|
|
predictive power was achieved due to usage of k-th variable".
|
|
|
|
Thus, MDA rates each variable independently at "0 to 1" scale while
|
|
MDI (and OOB-MDI too) tends to divide "unit amount of importance"
|
|
between several important variables.
|
|
|
|
If all variables are equally important, they will have same
|
|
MDI/OOB-MDI rating, equal (for OOB-MDI: roughly equal) to 1/NVars.
|
|
However, roughly same picture will be produced for the "all
|
|
variables provide information no one is critical" situation and for
|
|
the "all variables are critical, drop any one, everything is ruined"
|
|
situation.
|
|
|
|
Contrary to that, MDA will rate critical variable as ~1.0 important,
|
|
and important but non-critical variable will have less than unit
|
|
rating.
|
|
|
|
NOTE: quite an often MDA and MDI return same results. It generally happens
|
|
on problems with low test set error (a few percents at most) and
|
|
large enough training set to avoid overfitting.
|
|
|
|
The difference between MDA, MDI and OOB-MDI becomes important only
|
|
on "hard" tasks with high test set error and/or small training set.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder object. Next call to the forest
|
|
construction function will produce:
|
|
* importance estimates in rep.varimportances field
|
|
* variable ranks in rep.topvars field
|
|
|
|
-- ALGLIB --
|
|
Copyright 29.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetimportancetrngini(const decisionforestbuilder &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfbuildersetimportancetrngini(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function tells decision forest construction algorithm to use
|
|
out-of-bag version of Gini variable importance estimation (also known as
|
|
OOB-MDI).
|
|
|
|
This version of importance estimation algorithm analyzes mean decrease in
|
|
impurity (MDI) on out-of-bag sample during splits. The result is divided
|
|
by impurity at the root node in order to produce estimate in [0,1] range.
|
|
|
|
Such estimates are fast to calculate and resistant to overfitting issues
|
|
(thanks to the out-of-bag estimates used). However, OOB Gini rating has
|
|
following downsides:
|
|
* there exist some bias towards continuous and high-cardinality categorical
|
|
variables
|
|
* Gini rating allows us to order variables by importance, but it is hard
|
|
to define importance of the variable by itself.
|
|
|
|
NOTE: informally speaking, MDA (permutation importance) rating answers the
|
|
question "what part of the model predictive power is ruined by
|
|
permuting k-th variable?" while MDI tells us "what part of the model
|
|
predictive power was achieved due to usage of k-th variable".
|
|
|
|
Thus, MDA rates each variable independently at "0 to 1" scale while
|
|
MDI (and OOB-MDI too) tends to divide "unit amount of importance"
|
|
between several important variables.
|
|
|
|
If all variables are equally important, they will have same
|
|
MDI/OOB-MDI rating, equal (for OOB-MDI: roughly equal) to 1/NVars.
|
|
However, roughly same picture will be produced for the "all
|
|
variables provide information no one is critical" situation and for
|
|
the "all variables are critical, drop any one, everything is ruined"
|
|
situation.
|
|
|
|
Contrary to that, MDA will rate critical variable as ~1.0 important,
|
|
and important but non-critical variable will have less than unit
|
|
rating.
|
|
|
|
NOTE: quite an often MDA and MDI return same results. It generally happens
|
|
on problems with low test set error (a few percents at most) and
|
|
large enough training set to avoid overfitting.
|
|
|
|
The difference between MDA, MDI and OOB-MDI becomes important only
|
|
on "hard" tasks with high test set error and/or small training set.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder object. Next call to the forest
|
|
construction function will produce:
|
|
* importance estimates in rep.varimportances field
|
|
* variable ranks in rep.topvars field
|
|
|
|
-- ALGLIB --
|
|
Copyright 29.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetimportanceoobgini(const decisionforestbuilder &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfbuildersetimportanceoobgini(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function tells decision forest construction algorithm to use
|
|
permutation variable importance estimator (also known as MDA).
|
|
|
|
This version of importance estimation algorithm analyzes mean increase in
|
|
out-of-bag sum of squared residuals after random permutation of J-th
|
|
variable. The result is divided by error computed with all variables being
|
|
perturbed in order to produce R-squared-like estimate in [0,1] range.
|
|
|
|
Such estimate is slower to calculate than Gini-based rating because it
|
|
needs multiple inference runs for each of variables being studied.
|
|
|
|
ALGLIB uses parallelized and highly optimized algorithm which analyzes
|
|
path through the decision tree and allows to handle most perturbations
|
|
in O(1) time; nevertheless, requesting MDA importances may increase forest
|
|
construction time from 10% to 200% (or more, if you have thousands of
|
|
variables).
|
|
|
|
However, MDA rating has following benefits over Gini-based ones:
|
|
* no bias towards specific variable types
|
|
* ability to directly evaluate "absolute" importance of some variable at
|
|
"0 to 1" scale (contrary to Gini-based rating, which returns comparative
|
|
importances).
|
|
|
|
NOTE: informally speaking, MDA (permutation importance) rating answers the
|
|
question "what part of the model predictive power is ruined by
|
|
permuting k-th variable?" while MDI tells us "what part of the model
|
|
predictive power was achieved due to usage of k-th variable".
|
|
|
|
Thus, MDA rates each variable independently at "0 to 1" scale while
|
|
MDI (and OOB-MDI too) tends to divide "unit amount of importance"
|
|
between several important variables.
|
|
|
|
If all variables are equally important, they will have same
|
|
MDI/OOB-MDI rating, equal (for OOB-MDI: roughly equal) to 1/NVars.
|
|
However, roughly same picture will be produced for the "all
|
|
variables provide information no one is critical" situation and for
|
|
the "all variables are critical, drop any one, everything is ruined"
|
|
situation.
|
|
|
|
Contrary to that, MDA will rate critical variable as ~1.0 important,
|
|
and important but non-critical variable will have less than unit
|
|
rating.
|
|
|
|
NOTE: quite an often MDA and MDI return same results. It generally happens
|
|
on problems with low test set error (a few percents at most) and
|
|
large enough training set to avoid overfitting.
|
|
|
|
The difference between MDA, MDI and OOB-MDI becomes important only
|
|
on "hard" tasks with high test set error and/or small training set.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder object. Next call to the forest
|
|
construction function will produce:
|
|
* importance estimates in rep.varimportances field
|
|
* variable ranks in rep.topvars field
|
|
|
|
-- ALGLIB --
|
|
Copyright 29.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetimportancepermutation(const decisionforestbuilder &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfbuildersetimportancepermutation(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function tells decision forest construction algorithm to skip
|
|
variable importance estimation.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder object. Next call to the forest
|
|
construction function will result in forest being built
|
|
without variable importance estimation.
|
|
|
|
-- ALGLIB --
|
|
Copyright 29.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetimportancenone(const decisionforestbuilder &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfbuildersetimportancenone(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function is an alias for dfbuilderpeekprogress(), left in ALGLIB for
|
|
backward compatibility reasons.
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfbuildergetprogress(const decisionforestbuilder &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::dfbuildergetprogress(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function is used to peek into decision forest construction process
|
|
from some other thread and get current progress indicator.
|
|
|
|
It returns value in [0,1].
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object used to build forest
|
|
in some other thread
|
|
|
|
RESULT:
|
|
progress value, in [0,1]
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfbuilderpeekprogress(const decisionforestbuilder &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::dfbuilderpeekprogress(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
This subroutine builds decision forest according to current settings using
|
|
dataset internally stored in the builder object. Dense algorithm is used.
|
|
|
|
NOTE: this function uses dense algorithm for forest construction
|
|
independently from the dataset format (dense or sparse).
|
|
|
|
NOTE: forest built with this function is stored in-memory using 64-bit
|
|
data structures for offsets/indexes/split values. It is possible to
|
|
convert forest into more memory-efficient compressed binary
|
|
representation. Depending on the problem properties, 3.7x-5.7x
|
|
compression factors are possible.
|
|
|
|
The downsides of compression are (a) slight reduction in the model
|
|
accuracy and (b) ~1.5x reduction in the inference speed (due to
|
|
increased complexity of the storage format).
|
|
|
|
See comments on dfbinarycompression() for more info.
|
|
|
|
Default settings are used by the algorithm; you can tweak them with the
|
|
help of the following functions:
|
|
* dfbuildersetrfactor() - to control a fraction of the dataset used for
|
|
subsampling
|
|
* dfbuildersetrandomvars() - to control number of variables randomly chosen
|
|
for decision rule creation
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
NTrees - NTrees>=1, number of trees to train
|
|
|
|
OUTPUT PARAMETERS:
|
|
DF - decision forest. You can compress this forest to more
|
|
compact 16-bit representation with dfbinarycompression()
|
|
Rep - report, see below for information on its fields.
|
|
|
|
=== report information produced by forest construction function ==========
|
|
|
|
Decision forest training report includes following information:
|
|
* training set errors
|
|
* out-of-bag estimates of errors
|
|
* variable importance ratings
|
|
|
|
Following fields are used to store information:
|
|
* training set errors are stored in rep.relclserror, rep.avgce, rep.rmserror,
|
|
rep.avgerror and rep.avgrelerror
|
|
* out-of-bag estimates of errors are stored in rep.oobrelclserror, rep.oobavgce,
|
|
rep.oobrmserror, rep.oobavgerror and rep.oobavgrelerror
|
|
|
|
Variable importance reports, if requested by dfbuildersetimportancegini(),
|
|
dfbuildersetimportancetrngini() or dfbuildersetimportancepermutation()
|
|
call, are stored in:
|
|
* rep.varimportances field stores importance ratings
|
|
* rep.topvars stores variable indexes ordered from the most important to
|
|
less important ones
|
|
|
|
You can find more information about report fields in:
|
|
* comments on dfreport structure
|
|
* comments on dfbuildersetimportancegini function
|
|
* comments on dfbuildersetimportancetrngini function
|
|
* comments on dfbuildersetimportancepermutation function
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuilderbuildrandomforest(const decisionforestbuilder &s, const ae_int_t ntrees, decisionforest &df, dfreport &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfbuilderbuildrandomforest(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), ntrees, const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::dfreport*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function performs binary compression of the decision forest.
|
|
|
|
Original decision forest produced by the forest builder is stored using
|
|
64-bit representation for all numbers - offsets, variable indexes, split
|
|
points.
|
|
|
|
It is possible to significantly reduce model size by means of:
|
|
* using compressed dynamic encoding for integers (offsets and variable
|
|
indexes), which uses just 1 byte to store small ints (less than 128),
|
|
just 2 bytes for larger values (less than 128^2) and so on
|
|
* storing floating point numbers using 8-bit exponent and 16-bit mantissa
|
|
|
|
As result, model needs significantly less memory (compression factor
|
|
depends on variable and class counts). In particular:
|
|
* NVars<128 and NClasses<128 result in 4.4x-5.7x model size reduction
|
|
* NVars<16384 and NClasses<128 result in 3.7x-4.5x model size reduction
|
|
|
|
Such storage format performs lossless compression of all integers, but
|
|
compression of floating point values (split values) is lossy, with roughly
|
|
0.01% relative error introduced during rounding. Thus, we recommend you to
|
|
re-evaluate model accuracy after compression.
|
|
|
|
Another downside of compression is ~1.5x reduction in the inference
|
|
speed due to necessity of dynamic decompression of the compressed model.
|
|
|
|
INPUT PARAMETERS:
|
|
DF - decision forest built by forest builder
|
|
|
|
OUTPUT PARAMETERS:
|
|
DF - replaced by compressed forest
|
|
|
|
RESULT:
|
|
compression factor (in-RAM size of the compressed model vs than of the
|
|
uncompressed one), positive number larger than 1.0
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfbinarycompression(const decisionforest &df, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::dfbinarycompression(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Inference using decision forest
|
|
|
|
IMPORTANT: this function is thread-unsafe and may modify internal
|
|
structures of the model! You can not use same model object for
|
|
parallel evaluation from several threads.
|
|
|
|
Use dftsprocess() with independent thread-local buffers if
|
|
you need thread-safe evaluation.
|
|
|
|
INPUT PARAMETERS:
|
|
DF - decision forest model
|
|
X - input vector, array[NVars]
|
|
Y - possibly preallocated buffer, reallocated if too small
|
|
|
|
OUTPUT PARAMETERS:
|
|
Y - result. Regression estimate when solving regression task,
|
|
vector of posterior probabilities for classification task.
|
|
|
|
See also DFProcessI.
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 16.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfprocess(const decisionforest &df, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfprocess(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
'interactive' variant of DFProcess for languages like Python which support
|
|
constructs like "Y = DFProcessI(DF,X)" and interactive mode of interpreter
|
|
|
|
This function allocates new array on each call, so it is significantly
|
|
slower than its 'non-interactive' counterpart, but it is more convenient
|
|
when you call it from command line.
|
|
|
|
IMPORTANT: this function is thread-unsafe and may modify internal
|
|
structures of the model! You can not use same model object for
|
|
parallel evaluation from several threads.
|
|
|
|
Use dftsprocess() with independent thread-local buffers if
|
|
you need thread-safe evaluation.
|
|
|
|
-- ALGLIB --
|
|
Copyright 28.02.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfprocessi(const decisionforest &df, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfprocessi(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function returns first component of the inferred vector (i.e. one
|
|
with index #0).
|
|
|
|
It is a convenience wrapper for dfprocess() intended for either:
|
|
* 1-dimensional regression problems
|
|
* 2-class classification problems
|
|
|
|
In the former case this function returns inference result as scalar, which
|
|
is definitely more convenient that wrapping it as vector. In the latter
|
|
case it returns probability of object belonging to class #0.
|
|
|
|
If you call it for anything different from two cases above, it will work
|
|
as defined, i.e. return y[0], although it is of less use in such cases.
|
|
|
|
IMPORTANT: this function is thread-unsafe and modifies internal structures
|
|
of the model! You can not use same model object for parallel
|
|
evaluation from several threads.
|
|
|
|
Use dftsprocess() with independent thread-local buffers, if
|
|
you need thread-safe evaluation.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - DF model
|
|
X - input vector, array[0..NVars-1].
|
|
|
|
RESULT:
|
|
Y[0]
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfprocess0(const decisionforest &model, const real_1d_array &x, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::dfprocess0(const_cast<alglib_impl::decisionforest*>(model.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function returns most probable class number for an input X. It is
|
|
same as calling dfprocess(model,x,y), then determining i=argmax(y[i]) and
|
|
returning i.
|
|
|
|
A class number in [0,NOut) range in returned for classification problems,
|
|
-1 is returned when this function is called for regression problems.
|
|
|
|
IMPORTANT: this function is thread-unsafe and modifies internal structures
|
|
of the model! You can not use same model object for parallel
|
|
evaluation from several threads.
|
|
|
|
Use dftsprocess() with independent thread-local buffers, if
|
|
you need thread-safe evaluation.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - decision forest model
|
|
X - input vector, array[0..NVars-1].
|
|
|
|
RESULT:
|
|
class number, -1 for regression tasks
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t dfclassify(const decisionforest &model, const real_1d_array &x, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ae_int_t result = alglib_impl::dfclassify(const_cast<alglib_impl::decisionforest*>(model.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<ae_int_t*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Inference using decision forest
|
|
|
|
Thread-safe procesing using external buffer for temporaries.
|
|
|
|
This function is thread-safe (i.e . you can use same DF model from
|
|
multiple threads) as long as you use different buffer objects for different
|
|
threads.
|
|
|
|
INPUT PARAMETERS:
|
|
DF - decision forest model
|
|
Buf - buffer object, must be allocated specifically for this
|
|
model with dfcreatebuffer().
|
|
X - input vector, array[NVars]
|
|
Y - possibly preallocated buffer, reallocated if too small
|
|
|
|
OUTPUT PARAMETERS:
|
|
Y - result. Regression estimate when solving regression task,
|
|
vector of posterior probabilities for classification task.
|
|
|
|
See also DFProcessI.
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 16.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dftsprocess(const decisionforest &df, const decisionforestbuffer &buf, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dftsprocess(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::decisionforestbuffer*>(buf.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Relative classification error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
DF - decision forest model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
percent of incorrectly classified cases.
|
|
Zero if model solves regression task.
|
|
|
|
-- ALGLIB --
|
|
Copyright 16.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfrelclserror(const decisionforest &df, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::dfrelclserror(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average cross-entropy (in bits per element) on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
DF - decision forest model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
CrossEntropy/(NPoints*LN(2)).
|
|
Zero if model solves regression task.
|
|
|
|
-- ALGLIB --
|
|
Copyright 16.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfavgce(const decisionforest &df, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::dfavgce(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
RMS error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
DF - decision forest model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
root mean square error.
|
|
Its meaning for regression task is obvious. As for
|
|
classification task, RMS error means error when estimating posterior
|
|
probabilities.
|
|
|
|
-- ALGLIB --
|
|
Copyright 16.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfrmserror(const decisionforest &df, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::dfrmserror(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
DF - decision forest model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
Its meaning for regression task is obvious. As for
|
|
classification task, it means average error when estimating posterior
|
|
probabilities.
|
|
|
|
-- ALGLIB --
|
|
Copyright 16.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfavgerror(const decisionforest &df, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::dfavgerror(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average relative error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
DF - decision forest model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
Its meaning for regression task is obvious. As for
|
|
classification task, it means average relative error when estimating
|
|
posterior probability of belonging to the correct class.
|
|
|
|
-- ALGLIB --
|
|
Copyright 16.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfavgrelerror(const decisionforest &df, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::dfavgrelerror(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
This subroutine builds random decision forest.
|
|
|
|
--------- DEPRECATED VERSION! USE DECISION FOREST BUILDER OBJECT ---------
|
|
|
|
-- ALGLIB --
|
|
Copyright 19.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildrandomdecisionforest(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, const ae_int_t ntrees, const double r, ae_int_t &info, decisionforest &df, dfreport &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfbuildrandomdecisionforest(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, nclasses, ntrees, r, &info, const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::dfreport*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This subroutine builds random decision forest.
|
|
|
|
--------- DEPRECATED VERSION! USE DECISION FOREST BUILDER OBJECT ---------
|
|
|
|
-- ALGLIB --
|
|
Copyright 19.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildrandomdecisionforestx1(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, const ae_int_t ntrees, const ae_int_t nrndvars, const double r, ae_int_t &info, decisionforest &df, dfreport &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::dfbuildrandomdecisionforestx1(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, nclasses, ntrees, nrndvars, r, &info, const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::dfreport*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_KNN) || !defined(AE_PARTIAL_BUILD)
|
|
/*************************************************************************
|
|
Buffer object which is used to perform various requests (usually model
|
|
inference) in the multithreaded mode (multiple threads working with same
|
|
KNN object).
|
|
|
|
This object should be created with KNNCreateBuffer().
|
|
*************************************************************************/
|
|
_knnbuffer_owner::_knnbuffer_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_knnbuffer_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::knnbuffer*)alglib_impl::ae_malloc(sizeof(alglib_impl::knnbuffer), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::knnbuffer));
|
|
alglib_impl::_knnbuffer_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_knnbuffer_owner::_knnbuffer_owner(const _knnbuffer_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_knnbuffer_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: knnbuffer copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::knnbuffer*)alglib_impl::ae_malloc(sizeof(alglib_impl::knnbuffer), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::knnbuffer));
|
|
alglib_impl::_knnbuffer_init_copy(p_struct, const_cast<alglib_impl::knnbuffer*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_knnbuffer_owner& _knnbuffer_owner::operator=(const _knnbuffer_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: knnbuffer assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: knnbuffer assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_knnbuffer_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::knnbuffer));
|
|
alglib_impl::_knnbuffer_init_copy(p_struct, const_cast<alglib_impl::knnbuffer*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_knnbuffer_owner::~_knnbuffer_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_knnbuffer_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::knnbuffer* _knnbuffer_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::knnbuffer* _knnbuffer_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::knnbuffer*>(p_struct);
|
|
}
|
|
knnbuffer::knnbuffer() : _knnbuffer_owner()
|
|
{
|
|
}
|
|
|
|
knnbuffer::knnbuffer(const knnbuffer &rhs):_knnbuffer_owner(rhs)
|
|
{
|
|
}
|
|
|
|
knnbuffer& knnbuffer::operator=(const knnbuffer &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_knnbuffer_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
knnbuffer::~knnbuffer()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
A KNN builder object; this object encapsulates dataset and all related
|
|
settings, it is used to create an actual instance of KNN model.
|
|
*************************************************************************/
|
|
_knnbuilder_owner::_knnbuilder_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_knnbuilder_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::knnbuilder*)alglib_impl::ae_malloc(sizeof(alglib_impl::knnbuilder), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::knnbuilder));
|
|
alglib_impl::_knnbuilder_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_knnbuilder_owner::_knnbuilder_owner(const _knnbuilder_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_knnbuilder_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: knnbuilder copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::knnbuilder*)alglib_impl::ae_malloc(sizeof(alglib_impl::knnbuilder), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::knnbuilder));
|
|
alglib_impl::_knnbuilder_init_copy(p_struct, const_cast<alglib_impl::knnbuilder*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_knnbuilder_owner& _knnbuilder_owner::operator=(const _knnbuilder_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: knnbuilder assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: knnbuilder assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_knnbuilder_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::knnbuilder));
|
|
alglib_impl::_knnbuilder_init_copy(p_struct, const_cast<alglib_impl::knnbuilder*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_knnbuilder_owner::~_knnbuilder_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_knnbuilder_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::knnbuilder* _knnbuilder_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::knnbuilder* _knnbuilder_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::knnbuilder*>(p_struct);
|
|
}
|
|
knnbuilder::knnbuilder() : _knnbuilder_owner()
|
|
{
|
|
}
|
|
|
|
knnbuilder::knnbuilder(const knnbuilder &rhs):_knnbuilder_owner(rhs)
|
|
{
|
|
}
|
|
|
|
knnbuilder& knnbuilder::operator=(const knnbuilder &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_knnbuilder_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
knnbuilder::~knnbuilder()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
KNN model, can be used for classification or regression
|
|
*************************************************************************/
|
|
_knnmodel_owner::_knnmodel_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_knnmodel_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::knnmodel*)alglib_impl::ae_malloc(sizeof(alglib_impl::knnmodel), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::knnmodel));
|
|
alglib_impl::_knnmodel_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_knnmodel_owner::_knnmodel_owner(const _knnmodel_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_knnmodel_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: knnmodel copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::knnmodel*)alglib_impl::ae_malloc(sizeof(alglib_impl::knnmodel), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::knnmodel));
|
|
alglib_impl::_knnmodel_init_copy(p_struct, const_cast<alglib_impl::knnmodel*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_knnmodel_owner& _knnmodel_owner::operator=(const _knnmodel_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: knnmodel assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: knnmodel assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_knnmodel_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::knnmodel));
|
|
alglib_impl::_knnmodel_init_copy(p_struct, const_cast<alglib_impl::knnmodel*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_knnmodel_owner::~_knnmodel_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_knnmodel_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::knnmodel* _knnmodel_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::knnmodel* _knnmodel_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::knnmodel*>(p_struct);
|
|
}
|
|
knnmodel::knnmodel() : _knnmodel_owner()
|
|
{
|
|
}
|
|
|
|
knnmodel::knnmodel(const knnmodel &rhs):_knnmodel_owner(rhs)
|
|
{
|
|
}
|
|
|
|
knnmodel& knnmodel::operator=(const knnmodel &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_knnmodel_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
knnmodel::~knnmodel()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
KNN training report.
|
|
|
|
Following fields store training set errors:
|
|
* relclserror - fraction of misclassified cases, [0,1]
|
|
* avgce - average cross-entropy in bits per symbol
|
|
* rmserror - root-mean-square error
|
|
* avgerror - average error
|
|
* avgrelerror - average relative error
|
|
|
|
For classification problems:
|
|
* RMS, AVG and AVGREL errors are calculated for posterior probabilities
|
|
|
|
For regression problems:
|
|
* RELCLS and AVGCE errors are zero
|
|
*************************************************************************/
|
|
_knnreport_owner::_knnreport_owner()
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_knnreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
p_struct = (alglib_impl::knnreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::knnreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::knnreport));
|
|
alglib_impl::_knnreport_init(p_struct, &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_knnreport_owner::_knnreport_owner(const _knnreport_owner &rhs)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_knnreport_destroy(p_struct);
|
|
alglib_impl::ae_free(p_struct);
|
|
}
|
|
p_struct = NULL;
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
p_struct = NULL;
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: knnreport copy constructor failure (source is not initialized)", &_state);
|
|
p_struct = (alglib_impl::knnreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::knnreport), &_state);
|
|
memset(p_struct, 0, sizeof(alglib_impl::knnreport));
|
|
alglib_impl::_knnreport_init_copy(p_struct, const_cast<alglib_impl::knnreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
}
|
|
|
|
_knnreport_owner& _knnreport_owner::operator=(const _knnreport_owner &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _state;
|
|
|
|
alglib_impl::ae_state_init(&_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_state.error_msg);
|
|
return *this;
|
|
#endif
|
|
}
|
|
alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
|
|
alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: knnreport assignment constructor failure (destination is not initialized)", &_state);
|
|
alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: knnreport assignment constructor failure (source is not initialized)", &_state);
|
|
alglib_impl::_knnreport_destroy(p_struct);
|
|
memset(p_struct, 0, sizeof(alglib_impl::knnreport));
|
|
alglib_impl::_knnreport_init_copy(p_struct, const_cast<alglib_impl::knnreport*>(rhs.p_struct), &_state, ae_false);
|
|
ae_state_clear(&_state);
|
|
return *this;
|
|
}
|
|
|
|
_knnreport_owner::~_knnreport_owner()
|
|
{
|
|
if( p_struct!=NULL )
|
|
{
|
|
alglib_impl::_knnreport_destroy(p_struct);
|
|
ae_free(p_struct);
|
|
}
|
|
}
|
|
|
|
alglib_impl::knnreport* _knnreport_owner::c_ptr()
|
|
{
|
|
return p_struct;
|
|
}
|
|
|
|
alglib_impl::knnreport* _knnreport_owner::c_ptr() const
|
|
{
|
|
return const_cast<alglib_impl::knnreport*>(p_struct);
|
|
}
|
|
knnreport::knnreport() : _knnreport_owner() ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror)
|
|
{
|
|
}
|
|
|
|
knnreport::knnreport(const knnreport &rhs):_knnreport_owner(rhs) ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror)
|
|
{
|
|
}
|
|
|
|
knnreport& knnreport::operator=(const knnreport &rhs)
|
|
{
|
|
if( this==&rhs )
|
|
return *this;
|
|
_knnreport_owner::operator=(rhs);
|
|
return *this;
|
|
}
|
|
|
|
knnreport::~knnreport()
|
|
{
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function serializes data structure to string.
|
|
|
|
Important properties of s_out:
|
|
* it contains alphanumeric characters, dots, underscores, minus signs
|
|
* these symbols are grouped into words, which are separated by spaces
|
|
and Windows-style (CR+LF) newlines
|
|
* although serializer uses spaces and CR+LF as separators, you can
|
|
replace any separator character by arbitrary combination of spaces,
|
|
tabs, Windows or Unix newlines. It allows flexible reformatting of
|
|
the string in case you want to include it into text or XML file.
|
|
But you should not insert separators into the middle of the "words"
|
|
nor you should change case of letters.
|
|
* s_out can be freely moved between 32-bit and 64-bit systems, little
|
|
and big endian machines, and so on. You can serialize structure on
|
|
32-bit machine and unserialize it on 64-bit one (or vice versa), or
|
|
serialize it on SPARC and unserialize on x86. You can also
|
|
serialize it in C++ version of ALGLIB and unserialize in C# one,
|
|
and vice versa.
|
|
*************************************************************************/
|
|
void knnserialize(knnmodel &obj, std::string &s_out)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state state;
|
|
alglib_impl::ae_serializer serializer;
|
|
alglib_impl::ae_int_t ssize;
|
|
|
|
alglib_impl::ae_state_init(&state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&state, &_break_jump);
|
|
alglib_impl::ae_serializer_init(&serializer);
|
|
alglib_impl::ae_serializer_alloc_start(&serializer);
|
|
alglib_impl::knnalloc(&serializer, obj.c_ptr(), &state);
|
|
ssize = alglib_impl::ae_serializer_get_alloc_size(&serializer);
|
|
s_out.clear();
|
|
s_out.reserve((size_t)(ssize+1));
|
|
alglib_impl::ae_serializer_sstart_str(&serializer, &s_out);
|
|
alglib_impl::knnserialize(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_stop(&serializer, &state);
|
|
alglib_impl::ae_assert( s_out.length()<=(size_t)ssize, "ALGLIB: serialization integrity error", &state);
|
|
alglib_impl::ae_serializer_clear(&serializer);
|
|
alglib_impl::ae_state_clear(&state);
|
|
}
|
|
/*************************************************************************
|
|
This function unserializes data structure from string.
|
|
*************************************************************************/
|
|
void knnunserialize(const std::string &s_in, knnmodel &obj)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state state;
|
|
alglib_impl::ae_serializer serializer;
|
|
|
|
alglib_impl::ae_state_init(&state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&state, &_break_jump);
|
|
alglib_impl::ae_serializer_init(&serializer);
|
|
alglib_impl::ae_serializer_ustart_str(&serializer, &s_in);
|
|
alglib_impl::knnunserialize(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_stop(&serializer, &state);
|
|
alglib_impl::ae_serializer_clear(&serializer);
|
|
alglib_impl::ae_state_clear(&state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function serializes data structure to C++ stream.
|
|
|
|
Data stream generated by this function is same as string representation
|
|
generated by string version of serializer - alphanumeric characters,
|
|
dots, underscores, minus signs, which are grouped into words separated by
|
|
spaces and CR+LF.
|
|
|
|
We recommend you to read comments on string version of serializer to find
|
|
out more about serialization of AlGLIB objects.
|
|
*************************************************************************/
|
|
void knnserialize(knnmodel &obj, std::ostream &s_out)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state state;
|
|
alglib_impl::ae_serializer serializer;
|
|
|
|
alglib_impl::ae_state_init(&state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&state, &_break_jump);
|
|
alglib_impl::ae_serializer_init(&serializer);
|
|
alglib_impl::ae_serializer_alloc_start(&serializer);
|
|
alglib_impl::knnalloc(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_get_alloc_size(&serializer); // not actually needed, but we have to ask
|
|
alglib_impl::ae_serializer_sstart_stream(&serializer, &s_out);
|
|
alglib_impl::knnserialize(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_stop(&serializer, &state);
|
|
alglib_impl::ae_serializer_clear(&serializer);
|
|
alglib_impl::ae_state_clear(&state);
|
|
}
|
|
/*************************************************************************
|
|
This function unserializes data structure from stream.
|
|
*************************************************************************/
|
|
void knnunserialize(const std::istream &s_in, knnmodel &obj)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state state;
|
|
alglib_impl::ae_serializer serializer;
|
|
|
|
alglib_impl::ae_state_init(&state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&state, &_break_jump);
|
|
alglib_impl::ae_serializer_init(&serializer);
|
|
alglib_impl::ae_serializer_ustart_stream(&serializer, &s_in);
|
|
alglib_impl::knnunserialize(&serializer, obj.c_ptr(), &state);
|
|
alglib_impl::ae_serializer_stop(&serializer, &state);
|
|
alglib_impl::ae_serializer_clear(&serializer);
|
|
alglib_impl::ae_state_clear(&state);
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function creates buffer structure which can be used to perform
|
|
parallel KNN requests.
|
|
|
|
KNN subpackage provides two sets of computing functions - ones which use
|
|
internal buffer of KNN model (these functions are single-threaded because
|
|
they use same buffer, which can not shared between threads), and ones
|
|
which use external buffer.
|
|
|
|
This function is used to initialize external buffer.
|
|
|
|
INPUT PARAMETERS
|
|
Model - KNN model which is associated with newly created buffer
|
|
|
|
OUTPUT PARAMETERS
|
|
Buf - external buffer.
|
|
|
|
|
|
IMPORTANT: buffer object should be used only with model which was used to
|
|
initialize buffer. Any attempt to use buffer with different
|
|
object is dangerous - you may get integrity check failure
|
|
(exception) because sizes of internal arrays do not fit to
|
|
dimensions of the model structure.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knncreatebuffer(const knnmodel &model, knnbuffer &buf, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::knncreatebuffer(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::knnbuffer*>(buf.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This subroutine creates KNNBuilder object which is used to train KNN models.
|
|
|
|
By default, new builder stores empty dataset and some reasonable default
|
|
settings. At the very least, you should specify dataset prior to building
|
|
KNN model. You can also tweak settings of the model construction algorithm
|
|
(recommended, although default settings should work well).
|
|
|
|
Following actions are mandatory:
|
|
* calling knnbuildersetdataset() to specify dataset
|
|
* calling knnbuilderbuildknnmodel() to build KNN model using current
|
|
dataset and default settings
|
|
|
|
Additionally, you may call:
|
|
* knnbuildersetnorm() to change norm being used
|
|
|
|
INPUT PARAMETERS:
|
|
none
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - KNN builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnbuildercreate(knnbuilder &s, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::knnbuildercreate(const_cast<alglib_impl::knnbuilder*>(s.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Specifies regression problem (one or more continuous output variables are
|
|
predicted). There also exists "classification" version of this function.
|
|
|
|
This subroutine adds dense dataset to the internal storage of the builder
|
|
object. Specifying your dataset in the dense format means that the dense
|
|
version of the KNN construction algorithm will be invoked.
|
|
|
|
INPUT PARAMETERS:
|
|
S - KNN builder object
|
|
XY - array[NPoints,NVars+NOut] (note: actual size can be
|
|
larger, only leading part is used anyway), dataset:
|
|
* first NVars elements of each row store values of the
|
|
independent variables
|
|
* next NOut elements store values of the dependent
|
|
variables
|
|
NPoints - number of rows in the dataset, NPoints>=1
|
|
NVars - number of independent variables, NVars>=1
|
|
NOut - number of dependent variables, NOut>=1
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - KNN builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnbuildersetdatasetreg(const knnbuilder &s, const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nout, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::knnbuildersetdatasetreg(const_cast<alglib_impl::knnbuilder*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, nout, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Specifies classification problem (two or more classes are predicted).
|
|
There also exists "regression" version of this function.
|
|
|
|
This subroutine adds dense dataset to the internal storage of the builder
|
|
object. Specifying your dataset in the dense format means that the dense
|
|
version of the KNN construction algorithm will be invoked.
|
|
|
|
INPUT PARAMETERS:
|
|
S - KNN builder object
|
|
XY - array[NPoints,NVars+1] (note: actual size can be
|
|
larger, only leading part is used anyway), dataset:
|
|
* first NVars elements of each row store values of the
|
|
independent variables
|
|
* next element stores class index, in [0,NClasses)
|
|
NPoints - number of rows in the dataset, NPoints>=1
|
|
NVars - number of independent variables, NVars>=1
|
|
NClasses - number of classes, NClasses>=2
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - KNN builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnbuildersetdatasetcls(const knnbuilder &s, const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::knnbuildersetdatasetcls(const_cast<alglib_impl::knnbuilder*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, nclasses, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function sets norm type used for neighbor search.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
NormType - norm type:
|
|
* 0 inf-norm
|
|
* 1 1-norm
|
|
* 2 Euclidean norm (default)
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnbuildersetnorm(const knnbuilder &s, const ae_int_t nrmtype, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::knnbuildersetnorm(const_cast<alglib_impl::knnbuilder*>(s.c_ptr()), nrmtype, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This subroutine builds KNN model according to current settings, using
|
|
dataset internally stored in the builder object.
|
|
|
|
The model being built performs inference using Eps-approximate K nearest
|
|
neighbors search algorithm, with:
|
|
* K=1, Eps=0 corresponding to the "nearest neighbor algorithm"
|
|
* K>1, Eps=0 corresponding to the "K nearest neighbors algorithm"
|
|
* K>=1, Eps>0 corresponding to "approximate nearest neighbors algorithm"
|
|
|
|
An approximate KNN is a good option for high-dimensional datasets (exact
|
|
KNN works slowly when dimensions count grows).
|
|
|
|
An ALGLIB implementation of kd-trees is used to perform k-nn searches.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
S - KNN builder object
|
|
K - number of neighbors to search for, K>=1
|
|
Eps - approximation factor:
|
|
* Eps=0 means that exact kNN search is performed
|
|
* Eps>0 means that (1+Eps)-approximate search is performed
|
|
|
|
OUTPUT PARAMETERS:
|
|
Model - KNN model
|
|
Rep - report
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnbuilderbuildknnmodel(const knnbuilder &s, const ae_int_t k, const double eps, knnmodel &model, knnreport &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::knnbuilderbuildknnmodel(const_cast<alglib_impl::knnbuilder*>(s.c_ptr()), k, eps, const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::knnreport*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Changing search settings of KNN model.
|
|
|
|
K and EPS parameters of KNN (AKNN) search are specified during model
|
|
construction. However, plain KNN algorithm with Euclidean distance allows
|
|
you to change them at any moment.
|
|
|
|
NOTE: future versions of KNN model may support advanced versions of KNN,
|
|
such as NCA or LMNN. It is possible that such algorithms won't allow
|
|
you to change search settings on the fly. If you call this function
|
|
for an algorithm which does not support on-the-fly changes, it will
|
|
throw an exception.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
K - K>=1, neighbors count
|
|
EPS - accuracy of the EPS-approximate NN search. Set to 0.0, if
|
|
you want to perform "classic" KNN search. Specify larger
|
|
values if you need to speed-up high-dimensional KNN
|
|
queries.
|
|
|
|
OUTPUT PARAMETERS:
|
|
nothing on success, exception on failure
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnrewritekeps(const knnmodel &model, const ae_int_t k, const double eps, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::knnrewritekeps(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), k, eps, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Inference using KNN model.
|
|
|
|
See also knnprocess0(), knnprocessi() and knnclassify() for options with a
|
|
bit more convenient interface.
|
|
|
|
IMPORTANT: this function is thread-unsafe and modifies internal structures
|
|
of the model! You can not use same model object for parallel
|
|
evaluation from several threads.
|
|
|
|
Use knntsprocess() with independent thread-local buffers, if
|
|
you need thread-safe evaluation.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
X - input vector, array[0..NVars-1].
|
|
Y - possible preallocated buffer. Reused if long enough.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Y - result. Regression estimate when solving regression task,
|
|
vector of posterior probabilities for classification task.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnprocess(const knnmodel &model, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::knnprocess(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function returns first component of the inferred vector (i.e. one
|
|
with index #0).
|
|
|
|
It is a convenience wrapper for knnprocess() intended for either:
|
|
* 1-dimensional regression problems
|
|
* 2-class classification problems
|
|
|
|
In the former case this function returns inference result as scalar, which
|
|
is definitely more convenient that wrapping it as vector. In the latter
|
|
case it returns probability of object belonging to class #0.
|
|
|
|
If you call it for anything different from two cases above, it will work
|
|
as defined, i.e. return y[0], although it is of less use in such cases.
|
|
|
|
IMPORTANT: this function is thread-unsafe and modifies internal structures
|
|
of the model! You can not use same model object for parallel
|
|
evaluation from several threads.
|
|
|
|
Use knntsprocess() with independent thread-local buffers, if
|
|
you need thread-safe evaluation.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
X - input vector, array[0..NVars-1].
|
|
|
|
RESULT:
|
|
Y[0]
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double knnprocess0(const knnmodel &model, const real_1d_array &x, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::knnprocess0(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
This function returns most probable class number for an input X. It is
|
|
same as calling knnprocess(model,x,y), then determining i=argmax(y[i]) and
|
|
returning i.
|
|
|
|
A class number in [0,NOut) range in returned for classification problems,
|
|
-1 is returned when this function is called for regression problems.
|
|
|
|
IMPORTANT: this function is thread-unsafe and modifies internal structures
|
|
of the model! You can not use same model object for parallel
|
|
evaluation from several threads.
|
|
|
|
Use knntsprocess() with independent thread-local buffers, if
|
|
you need thread-safe evaluation.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
X - input vector, array[0..NVars-1].
|
|
|
|
RESULT:
|
|
class number, -1 for regression tasks
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t knnclassify(const knnmodel &model, const real_1d_array &x, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::ae_int_t result = alglib_impl::knnclassify(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<ae_int_t*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
'interactive' variant of knnprocess() for languages like Python which
|
|
support constructs like "y = knnprocessi(model,x)" and interactive mode of
|
|
the interpreter.
|
|
|
|
This function allocates new array on each call, so it is significantly
|
|
slower than its 'non-interactive' counterpart, but it is more convenient
|
|
when you call it from command line.
|
|
|
|
IMPORTANT: this function is thread-unsafe and may modify internal
|
|
structures of the model! You can not use same model object for
|
|
parallel evaluation from several threads.
|
|
|
|
Use knntsprocess() with independent thread-local buffers if
|
|
you need thread-safe evaluation.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnprocessi(const knnmodel &model, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::knnprocessi(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Thread-safe procesing using external buffer for temporaries.
|
|
|
|
This function is thread-safe (i.e . you can use same KNN model from
|
|
multiple threads) as long as you use different buffer objects for different
|
|
threads.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
Buf - buffer object, must be allocated specifically for this
|
|
model with knncreatebuffer().
|
|
X - input vector, array[NVars]
|
|
|
|
OUTPUT PARAMETERS:
|
|
Y - result, array[NOut]. Regression estimate when solving
|
|
regression task, vector of posterior probabilities for
|
|
a classification task.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knntsprocess(const knnmodel &model, const knnbuffer &buf, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::knntsprocess(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::knnbuffer*>(buf.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
|
|
/*************************************************************************
|
|
Relative classification error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
percent of incorrectly classified cases.
|
|
Zero if model solves regression task.
|
|
|
|
NOTE: if you need several different kinds of error metrics, it is better
|
|
to use knnallerrors() which computes all error metric with just one
|
|
pass over dataset.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double knnrelclserror(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::knnrelclserror(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average cross-entropy (in bits per element) on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
CrossEntropy/NPoints.
|
|
Zero if model solves regression task.
|
|
|
|
NOTE: the cross-entropy metric is too unstable when used to evaluate KNN
|
|
models (such models can report exactly zero probabilities), so we
|
|
do not recommend using it.
|
|
|
|
NOTE: if you need several different kinds of error metrics, it is better
|
|
to use knnallerrors() which computes all error metric with just one
|
|
pass over dataset.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double knnavgce(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::knnavgce(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
RMS error on the test set.
|
|
|
|
Its meaning for regression task is obvious. As for classification problems,
|
|
RMS error means error when estimating posterior probabilities.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
root mean square error.
|
|
|
|
NOTE: if you need several different kinds of error metrics, it is better
|
|
to use knnallerrors() which computes all error metric with just one
|
|
pass over dataset.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double knnrmserror(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::knnrmserror(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average error on the test set
|
|
|
|
Its meaning for regression task is obvious. As for classification problems,
|
|
average error means error when estimating posterior probabilities.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
average error
|
|
|
|
NOTE: if you need several different kinds of error metrics, it is better
|
|
to use knnallerrors() which computes all error metric with just one
|
|
pass over dataset.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double knnavgerror(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::knnavgerror(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Average relative error on the test set
|
|
|
|
Its meaning for regression task is obvious. As for classification problems,
|
|
average relative error means error when estimating posterior probabilities.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
average relative error
|
|
|
|
NOTE: if you need several different kinds of error metrics, it is better
|
|
to use knnallerrors() which computes all error metric with just one
|
|
pass over dataset.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double knnavgrelerror(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return 0;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
double result = alglib_impl::knnavgrelerror(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return *(reinterpret_cast<double*>(&result));
|
|
}
|
|
|
|
/*************************************************************************
|
|
Calculates all kinds of errors for the model in one call.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
XY - test set:
|
|
* one row per point
|
|
* first NVars columns store independent variables
|
|
* depending on problem type:
|
|
* next column stores class number in [0,NClasses) - for
|
|
classification problems
|
|
* next NOut columns store dependent variables - for
|
|
regression problems
|
|
NPoints - test set size, NPoints>=0
|
|
|
|
OUTPUT PARAMETERS:
|
|
Rep - following fields are loaded with errors for both regression
|
|
and classification models:
|
|
* rep.rmserror - RMS error for the output
|
|
* rep.avgerror - average error
|
|
* rep.avgrelerror - average relative error
|
|
following fields are set only for classification models,
|
|
zero for regression ones:
|
|
* relclserror - relative classification error, in [0,1]
|
|
* avgce - average cross-entropy in bits per dataset entry
|
|
|
|
NOTE: the cross-entropy metric is too unstable when used to evaluate KNN
|
|
models (such models can report exactly zero probabilities), so we
|
|
do not recommend using it.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnallerrors(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, knnreport &rep, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::knnallerrors(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, const_cast<alglib_impl::knnreport*>(rep.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_DATACOMP) || !defined(AE_PARTIAL_BUILD)
|
|
/*************************************************************************
|
|
k-means++ clusterization.
|
|
Backward compatibility function, we recommend to use CLUSTERING subpackage
|
|
as better replacement.
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.03.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void kmeansgenerate(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t k, const ae_int_t restarts, ae_int_t &info, real_2d_array &c, integer_1d_array &xyc, const xparams _xparams)
|
|
{
|
|
jmp_buf _break_jump;
|
|
alglib_impl::ae_state _alglib_env_state;
|
|
alglib_impl::ae_state_init(&_alglib_env_state);
|
|
if( setjmp(_break_jump) )
|
|
{
|
|
#if !defined(AE_NO_EXCEPTIONS)
|
|
_ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
|
|
#else
|
|
_ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
|
|
return;
|
|
#endif
|
|
}
|
|
ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
|
|
if( _xparams.flags!=0x0 )
|
|
ae_state_set_flags(&_alglib_env_state, _xparams.flags);
|
|
alglib_impl::kmeansgenerate(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, k, restarts, &info, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), const_cast<alglib_impl::ae_vector*>(xyc.c_ptr()), &_alglib_env_state);
|
|
alglib_impl::ae_state_clear(&_alglib_env_state);
|
|
return;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// THIS SECTION CONTAINS IMPLEMENTATION OF COMPUTATIONAL CORE
|
|
//
|
|
/////////////////////////////////////////////////////////////////////////
|
|
namespace alglib_impl
|
|
{
|
|
#if defined(AE_COMPILE_PCA) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_BDSS) || !defined(AE_PARTIAL_BUILD)
|
|
static double bdss_xlny(double x, double y, ae_state *_state);
|
|
static double bdss_getcv(/* Integer */ ae_vector* cnt,
|
|
ae_int_t nc,
|
|
ae_state *_state);
|
|
static void bdss_tieaddc(/* Integer */ ae_vector* c,
|
|
/* Integer */ ae_vector* ties,
|
|
ae_int_t ntie,
|
|
ae_int_t nc,
|
|
/* Integer */ ae_vector* cnt,
|
|
ae_state *_state);
|
|
static void bdss_tiesubc(/* Integer */ ae_vector* c,
|
|
/* Integer */ ae_vector* ties,
|
|
ae_int_t ntie,
|
|
ae_int_t nc,
|
|
/* Integer */ ae_vector* cnt,
|
|
ae_state *_state);
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_MLPBASE) || !defined(AE_PARTIAL_BUILD)
|
|
static ae_int_t mlpbase_mlpvnum = 7;
|
|
static ae_int_t mlpbase_mlpfirstversion = 0;
|
|
static ae_int_t mlpbase_nfieldwidth = 4;
|
|
static ae_int_t mlpbase_hlconnfieldwidth = 5;
|
|
static ae_int_t mlpbase_hlnfieldwidth = 4;
|
|
static ae_int_t mlpbase_gradbasecasecost = 50000;
|
|
static ae_int_t mlpbase_microbatchsize = 64;
|
|
static void mlpbase_addinputlayer(ae_int_t ncount,
|
|
/* Integer */ ae_vector* lsizes,
|
|
/* Integer */ ae_vector* ltypes,
|
|
/* Integer */ ae_vector* lconnfirst,
|
|
/* Integer */ ae_vector* lconnlast,
|
|
ae_int_t* lastproc,
|
|
ae_state *_state);
|
|
static void mlpbase_addbiasedsummatorlayer(ae_int_t ncount,
|
|
/* Integer */ ae_vector* lsizes,
|
|
/* Integer */ ae_vector* ltypes,
|
|
/* Integer */ ae_vector* lconnfirst,
|
|
/* Integer */ ae_vector* lconnlast,
|
|
ae_int_t* lastproc,
|
|
ae_state *_state);
|
|
static void mlpbase_addactivationlayer(ae_int_t functype,
|
|
/* Integer */ ae_vector* lsizes,
|
|
/* Integer */ ae_vector* ltypes,
|
|
/* Integer */ ae_vector* lconnfirst,
|
|
/* Integer */ ae_vector* lconnlast,
|
|
ae_int_t* lastproc,
|
|
ae_state *_state);
|
|
static void mlpbase_addzerolayer(/* Integer */ ae_vector* lsizes,
|
|
/* Integer */ ae_vector* ltypes,
|
|
/* Integer */ ae_vector* lconnfirst,
|
|
/* Integer */ ae_vector* lconnlast,
|
|
ae_int_t* lastproc,
|
|
ae_state *_state);
|
|
static void mlpbase_hladdinputlayer(multilayerperceptron* network,
|
|
ae_int_t* connidx,
|
|
ae_int_t* neuroidx,
|
|
ae_int_t* structinfoidx,
|
|
ae_int_t nin,
|
|
ae_state *_state);
|
|
static void mlpbase_hladdoutputlayer(multilayerperceptron* network,
|
|
ae_int_t* connidx,
|
|
ae_int_t* neuroidx,
|
|
ae_int_t* structinfoidx,
|
|
ae_int_t* weightsidx,
|
|
ae_int_t k,
|
|
ae_int_t nprev,
|
|
ae_int_t nout,
|
|
ae_bool iscls,
|
|
ae_bool islinearout,
|
|
ae_state *_state);
|
|
static void mlpbase_hladdhiddenlayer(multilayerperceptron* network,
|
|
ae_int_t* connidx,
|
|
ae_int_t* neuroidx,
|
|
ae_int_t* structinfoidx,
|
|
ae_int_t* weightsidx,
|
|
ae_int_t k,
|
|
ae_int_t nprev,
|
|
ae_int_t ncur,
|
|
ae_state *_state);
|
|
static void mlpbase_fillhighlevelinformation(multilayerperceptron* network,
|
|
ae_int_t nin,
|
|
ae_int_t nhid1,
|
|
ae_int_t nhid2,
|
|
ae_int_t nout,
|
|
ae_bool iscls,
|
|
ae_bool islinearout,
|
|
ae_state *_state);
|
|
static void mlpbase_mlpcreate(ae_int_t nin,
|
|
ae_int_t nout,
|
|
/* Integer */ ae_vector* lsizes,
|
|
/* Integer */ ae_vector* ltypes,
|
|
/* Integer */ ae_vector* lconnfirst,
|
|
/* Integer */ ae_vector* lconnlast,
|
|
ae_int_t layerscount,
|
|
ae_bool isclsnet,
|
|
multilayerperceptron* network,
|
|
ae_state *_state);
|
|
static void mlpbase_mlphessianbatchinternal(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t ssize,
|
|
ae_bool naturalerr,
|
|
double* e,
|
|
/* Real */ ae_vector* grad,
|
|
/* Real */ ae_matrix* h,
|
|
ae_state *_state);
|
|
static void mlpbase_mlpinternalcalculategradient(multilayerperceptron* network,
|
|
/* Real */ ae_vector* neurons,
|
|
/* Real */ ae_vector* weights,
|
|
/* Real */ ae_vector* derror,
|
|
/* Real */ ae_vector* grad,
|
|
ae_bool naturalerrorfunc,
|
|
ae_state *_state);
|
|
static void mlpbase_mlpchunkedgradient(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t cstart,
|
|
ae_int_t csize,
|
|
/* Real */ ae_vector* batch4buf,
|
|
/* Real */ ae_vector* hpcbuf,
|
|
double* e,
|
|
ae_bool naturalerrorfunc,
|
|
ae_state *_state);
|
|
static void mlpbase_mlpchunkedprocess(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t cstart,
|
|
ae_int_t csize,
|
|
/* Real */ ae_vector* batch4buf,
|
|
/* Real */ ae_vector* hpcbuf,
|
|
ae_state *_state);
|
|
static double mlpbase_safecrossentropy(double t,
|
|
double z,
|
|
ae_state *_state);
|
|
static void mlpbase_randomizebackwardpass(multilayerperceptron* network,
|
|
ae_int_t neuronidx,
|
|
double v,
|
|
ae_state *_state);
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_LDA) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_SSA) || !defined(AE_PARTIAL_BUILD)
|
|
static ae_bool ssa_hassomethingtoanalyze(ssamodel* s, ae_state *_state);
|
|
static ae_bool ssa_issequencebigenough(ssamodel* s,
|
|
ae_int_t i,
|
|
ae_state *_state);
|
|
static void ssa_updatebasis(ssamodel* s,
|
|
ae_int_t appendlen,
|
|
double updateits,
|
|
ae_state *_state);
|
|
static void ssa_analyzesequence(ssamodel* s,
|
|
/* Real */ ae_vector* data,
|
|
ae_int_t i0,
|
|
ae_int_t i1,
|
|
/* Real */ ae_vector* trend,
|
|
/* Real */ ae_vector* noise,
|
|
ae_int_t offs,
|
|
ae_state *_state);
|
|
static void ssa_forecastavgsequence(ssamodel* s,
|
|
/* Real */ ae_vector* data,
|
|
ae_int_t i0,
|
|
ae_int_t i1,
|
|
ae_int_t m,
|
|
ae_int_t forecastlen,
|
|
ae_bool smooth,
|
|
/* Real */ ae_vector* trend,
|
|
ae_int_t offs,
|
|
ae_state *_state);
|
|
static void ssa_realtimedequeue(ssamodel* s,
|
|
double beta,
|
|
ae_int_t cnt,
|
|
ae_state *_state);
|
|
static void ssa_updatexxtprepare(ssamodel* s,
|
|
ae_int_t updatesize,
|
|
ae_int_t windowwidth,
|
|
ae_int_t memorylimit,
|
|
ae_state *_state);
|
|
static void ssa_updatexxtsend(ssamodel* s,
|
|
/* Real */ ae_vector* u,
|
|
ae_int_t i0,
|
|
/* Real */ ae_matrix* xxt,
|
|
ae_state *_state);
|
|
static void ssa_updatexxtfinalize(ssamodel* s,
|
|
/* Real */ ae_matrix* xxt,
|
|
ae_state *_state);
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_LINREG) || !defined(AE_PARTIAL_BUILD)
|
|
static ae_int_t linreg_lrvnum = 5;
|
|
static void linreg_lrinternal(/* Real */ ae_matrix* xy,
|
|
/* Real */ ae_vector* s,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t* info,
|
|
linearmodel* lm,
|
|
lrreport* ar,
|
|
ae_state *_state);
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_FILTERS) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_LOGIT) || !defined(AE_PARTIAL_BUILD)
|
|
static double logit_xtol = 100*ae_machineepsilon;
|
|
static double logit_ftol = 0.0001;
|
|
static double logit_gtol = 0.3;
|
|
static ae_int_t logit_maxfev = 20;
|
|
static double logit_stpmin = 1.0E-2;
|
|
static double logit_stpmax = 1.0E5;
|
|
static ae_int_t logit_logitvnum = 6;
|
|
static void logit_mnliexp(/* Real */ ae_vector* w,
|
|
/* Real */ ae_vector* x,
|
|
ae_state *_state);
|
|
static void logit_mnlallerrors(logitmodel* lm,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
double* relcls,
|
|
double* avgce,
|
|
double* rms,
|
|
double* avg,
|
|
double* avgrel,
|
|
ae_state *_state);
|
|
static void logit_mnlmcsrch(ae_int_t n,
|
|
/* Real */ ae_vector* x,
|
|
double* f,
|
|
/* Real */ ae_vector* g,
|
|
/* Real */ ae_vector* s,
|
|
double* stp,
|
|
ae_int_t* info,
|
|
ae_int_t* nfev,
|
|
/* Real */ ae_vector* wa,
|
|
logitmcstate* state,
|
|
ae_int_t* stage,
|
|
ae_state *_state);
|
|
static void logit_mnlmcstep(double* stx,
|
|
double* fx,
|
|
double* dx,
|
|
double* sty,
|
|
double* fy,
|
|
double* dy,
|
|
double* stp,
|
|
double fp,
|
|
double dp,
|
|
ae_bool* brackt,
|
|
double stmin,
|
|
double stmax,
|
|
ae_int_t* info,
|
|
ae_state *_state);
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_MCPD) || !defined(AE_PARTIAL_BUILD)
|
|
static double mcpd_xtol = 1.0E-8;
|
|
static void mcpd_mcpdinit(ae_int_t n,
|
|
ae_int_t entrystate,
|
|
ae_int_t exitstate,
|
|
mcpdstate* s,
|
|
ae_state *_state);
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_MLPE) || !defined(AE_PARTIAL_BUILD)
|
|
static ae_int_t mlpe_mlpefirstversion = 1;
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_MLPTRAIN) || !defined(AE_PARTIAL_BUILD)
|
|
static double mlptrain_mindecay = 0.001;
|
|
static ae_int_t mlptrain_defaultlbfgsfactor = 6;
|
|
static void mlptrain_mlpkfoldcvgeneral(multilayerperceptron* n,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
double decay,
|
|
ae_int_t restarts,
|
|
ae_int_t foldscount,
|
|
ae_bool lmalgorithm,
|
|
double wstep,
|
|
ae_int_t maxits,
|
|
ae_int_t* info,
|
|
mlpreport* rep,
|
|
mlpcvreport* cvrep,
|
|
ae_state *_state);
|
|
static void mlptrain_mlpkfoldsplit(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nclasses,
|
|
ae_int_t foldscount,
|
|
ae_bool stratifiedsplits,
|
|
/* Integer */ ae_vector* folds,
|
|
ae_state *_state);
|
|
static void mlptrain_mthreadcv(mlptrainer* s,
|
|
ae_int_t rowsize,
|
|
ae_int_t nrestarts,
|
|
/* Integer */ ae_vector* folds,
|
|
ae_int_t fold,
|
|
ae_int_t dfold,
|
|
/* Real */ ae_matrix* cvy,
|
|
ae_shared_pool* pooldatacv,
|
|
ae_int_t wcount,
|
|
ae_state *_state);
|
|
ae_bool _trypexec_mlptrain_mthreadcv(mlptrainer* s,
|
|
ae_int_t rowsize,
|
|
ae_int_t nrestarts,
|
|
/* Integer */ ae_vector* folds,
|
|
ae_int_t fold,
|
|
ae_int_t dfold,
|
|
/* Real */ ae_matrix* cvy,
|
|
ae_shared_pool* pooldatacv,
|
|
ae_int_t wcount, ae_state *_state);
|
|
static void mlptrain_mlptrainnetworkx(mlptrainer* s,
|
|
ae_int_t nrestarts,
|
|
ae_int_t algokind,
|
|
/* Integer */ ae_vector* trnsubset,
|
|
ae_int_t trnsubsetsize,
|
|
/* Integer */ ae_vector* valsubset,
|
|
ae_int_t valsubsetsize,
|
|
multilayerperceptron* network,
|
|
mlpreport* rep,
|
|
ae_bool isrootcall,
|
|
ae_shared_pool* sessions,
|
|
ae_state *_state);
|
|
ae_bool _trypexec_mlptrain_mlptrainnetworkx(mlptrainer* s,
|
|
ae_int_t nrestarts,
|
|
ae_int_t algokind,
|
|
/* Integer */ ae_vector* trnsubset,
|
|
ae_int_t trnsubsetsize,
|
|
/* Integer */ ae_vector* valsubset,
|
|
ae_int_t valsubsetsize,
|
|
multilayerperceptron* network,
|
|
mlpreport* rep,
|
|
ae_bool isrootcall,
|
|
ae_shared_pool* sessions, ae_state *_state);
|
|
static void mlptrain_mlptrainensemblex(mlptrainer* s,
|
|
mlpensemble* ensemble,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_int_t nrestarts,
|
|
ae_int_t trainingmethod,
|
|
sinteger* ngrad,
|
|
ae_bool isrootcall,
|
|
ae_shared_pool* esessions,
|
|
ae_state *_state);
|
|
ae_bool _trypexec_mlptrain_mlptrainensemblex(mlptrainer* s,
|
|
mlpensemble* ensemble,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_int_t nrestarts,
|
|
ae_int_t trainingmethod,
|
|
sinteger* ngrad,
|
|
ae_bool isrootcall,
|
|
ae_shared_pool* esessions, ae_state *_state);
|
|
static void mlptrain_mlpstarttrainingx(mlptrainer* s,
|
|
ae_bool randomstart,
|
|
ae_int_t algokind,
|
|
/* Integer */ ae_vector* subset,
|
|
ae_int_t subsetsize,
|
|
smlptrnsession* session,
|
|
ae_state *_state);
|
|
static ae_bool mlptrain_mlpcontinuetrainingx(mlptrainer* s,
|
|
/* Integer */ ae_vector* subset,
|
|
ae_int_t subsetsize,
|
|
ae_int_t* ngradbatch,
|
|
smlptrnsession* session,
|
|
ae_state *_state);
|
|
static void mlptrain_mlpebagginginternal(mlpensemble* ensemble,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
double decay,
|
|
ae_int_t restarts,
|
|
double wstep,
|
|
ae_int_t maxits,
|
|
ae_bool lmalgorithm,
|
|
ae_int_t* info,
|
|
mlpreport* rep,
|
|
mlpcvreport* ooberrors,
|
|
ae_state *_state);
|
|
static void mlptrain_initmlptrnsession(multilayerperceptron* networktrained,
|
|
ae_bool randomizenetwork,
|
|
mlptrainer* trainer,
|
|
smlptrnsession* session,
|
|
ae_state *_state);
|
|
static void mlptrain_initmlptrnsessions(multilayerperceptron* networktrained,
|
|
ae_bool randomizenetwork,
|
|
mlptrainer* trainer,
|
|
ae_shared_pool* sessions,
|
|
ae_state *_state);
|
|
static void mlptrain_initmlpetrnsession(multilayerperceptron* individualnetwork,
|
|
mlptrainer* trainer,
|
|
mlpetrnsession* session,
|
|
ae_state *_state);
|
|
static void mlptrain_initmlpetrnsessions(multilayerperceptron* individualnetwork,
|
|
mlptrainer* trainer,
|
|
ae_shared_pool* sessions,
|
|
ae_state *_state);
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_CLUSTERING) || !defined(AE_PARTIAL_BUILD)
|
|
static ae_int_t clustering_kmeansblocksize = 32;
|
|
static ae_int_t clustering_kmeansparalleldim = 8;
|
|
static ae_int_t clustering_kmeansparallelk = 4;
|
|
static double clustering_complexitymultiplier = 1.0;
|
|
static void clustering_selectinitialcenters(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t initalgo,
|
|
hqrndstate* rs,
|
|
ae_int_t k,
|
|
/* Real */ ae_matrix* ct,
|
|
apbuffers* initbuf,
|
|
ae_shared_pool* updatepool,
|
|
ae_state *_state);
|
|
static ae_bool clustering_fixcenters(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
/* Real */ ae_matrix* ct,
|
|
ae_int_t k,
|
|
apbuffers* initbuf,
|
|
ae_shared_pool* updatepool,
|
|
ae_state *_state);
|
|
static void clustering_clusterizerrunahcinternal(clusterizerstate* s,
|
|
/* Real */ ae_matrix* d,
|
|
ahcreport* rep,
|
|
ae_state *_state);
|
|
static void clustering_evaluatedistancematrixrec(/* Real */ ae_matrix* xy,
|
|
ae_int_t nfeatures,
|
|
ae_int_t disttype,
|
|
/* Real */ ae_matrix* d,
|
|
ae_int_t i0,
|
|
ae_int_t i1,
|
|
ae_int_t j0,
|
|
ae_int_t j1,
|
|
ae_state *_state);
|
|
ae_bool _trypexec_clustering_evaluatedistancematrixrec(/* Real */ ae_matrix* xy,
|
|
ae_int_t nfeatures,
|
|
ae_int_t disttype,
|
|
/* Real */ ae_matrix* d,
|
|
ae_int_t i0,
|
|
ae_int_t i1,
|
|
ae_int_t j0,
|
|
ae_int_t j1, ae_state *_state);
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_DFOREST) || !defined(AE_PARTIAL_BUILD)
|
|
static ae_int_t dforest_innernodewidth = 3;
|
|
static ae_int_t dforest_leafnodewidth = 2;
|
|
static ae_int_t dforest_dfusestrongsplits = 1;
|
|
static ae_int_t dforest_dfuseevs = 2;
|
|
static ae_int_t dforest_dfuncompressedv0 = 0;
|
|
static ae_int_t dforest_dfcompressedv0 = 1;
|
|
static ae_int_t dforest_needtrngini = 1;
|
|
static ae_int_t dforest_needoobgini = 2;
|
|
static ae_int_t dforest_needpermutation = 3;
|
|
static ae_int_t dforest_permutationimportancebatchsize = 512;
|
|
static void dforest_buildrandomtree(decisionforestbuilder* s,
|
|
ae_int_t treeidx0,
|
|
ae_int_t treeidx1,
|
|
ae_state *_state);
|
|
ae_bool _trypexec_dforest_buildrandomtree(decisionforestbuilder* s,
|
|
ae_int_t treeidx0,
|
|
ae_int_t treeidx1, ae_state *_state);
|
|
static void dforest_buildrandomtreerec(decisionforestbuilder* s,
|
|
dfworkbuf* workbuf,
|
|
ae_int_t workingset,
|
|
ae_int_t varstoselect,
|
|
/* Real */ ae_vector* treebuf,
|
|
dfvotebuf* votebuf,
|
|
hqrndstate* rs,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_int_t oobidx0,
|
|
ae_int_t oobidx1,
|
|
double meanloss,
|
|
double topmostmeanloss,
|
|
ae_int_t* treesize,
|
|
ae_state *_state);
|
|
static void dforest_estimatevariableimportance(decisionforestbuilder* s,
|
|
ae_int_t sessionseed,
|
|
decisionforest* df,
|
|
ae_int_t ntrees,
|
|
dfreport* rep,
|
|
ae_state *_state);
|
|
ae_bool _trypexec_dforest_estimatevariableimportance(decisionforestbuilder* s,
|
|
ae_int_t sessionseed,
|
|
decisionforest* df,
|
|
ae_int_t ntrees,
|
|
dfreport* rep, ae_state *_state);
|
|
static void dforest_estimatepermutationimportances(decisionforestbuilder* s,
|
|
decisionforest* df,
|
|
ae_int_t ntrees,
|
|
ae_shared_pool* permpool,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_state *_state);
|
|
ae_bool _trypexec_dforest_estimatepermutationimportances(decisionforestbuilder* s,
|
|
decisionforest* df,
|
|
ae_int_t ntrees,
|
|
ae_shared_pool* permpool,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1, ae_state *_state);
|
|
static void dforest_cleanreport(decisionforestbuilder* s,
|
|
dfreport* rep,
|
|
ae_state *_state);
|
|
static double dforest_meannrms2(ae_int_t nclasses,
|
|
/* Integer */ ae_vector* trnlabelsi,
|
|
/* Real */ ae_vector* trnlabelsr,
|
|
ae_int_t trnidx0,
|
|
ae_int_t trnidx1,
|
|
/* Integer */ ae_vector* tstlabelsi,
|
|
/* Real */ ae_vector* tstlabelsr,
|
|
ae_int_t tstidx0,
|
|
ae_int_t tstidx1,
|
|
/* Integer */ ae_vector* tmpi,
|
|
ae_state *_state);
|
|
static void dforest_choosecurrentsplitdense(decisionforestbuilder* s,
|
|
dfworkbuf* workbuf,
|
|
ae_int_t* varsinpool,
|
|
ae_int_t varstoselect,
|
|
hqrndstate* rs,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_int_t* varbest,
|
|
double* splitbest,
|
|
ae_state *_state);
|
|
static void dforest_evaluatedensesplit(decisionforestbuilder* s,
|
|
dfworkbuf* workbuf,
|
|
hqrndstate* rs,
|
|
ae_int_t splitvar,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_int_t* info,
|
|
double* split,
|
|
double* rms,
|
|
ae_state *_state);
|
|
static void dforest_classifiersplit(decisionforestbuilder* s,
|
|
dfworkbuf* workbuf,
|
|
/* Real */ ae_vector* x,
|
|
/* Integer */ ae_vector* c,
|
|
ae_int_t n,
|
|
hqrndstate* rs,
|
|
ae_int_t* info,
|
|
double* threshold,
|
|
double* e,
|
|
/* Real */ ae_vector* sortrbuf,
|
|
/* Integer */ ae_vector* sortibuf,
|
|
ae_state *_state);
|
|
static void dforest_regressionsplit(decisionforestbuilder* s,
|
|
dfworkbuf* workbuf,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_int_t n,
|
|
ae_int_t* info,
|
|
double* threshold,
|
|
double* e,
|
|
/* Real */ ae_vector* sortrbuf,
|
|
/* Real */ ae_vector* sortrbuf2,
|
|
ae_state *_state);
|
|
static double dforest_getsplit(decisionforestbuilder* s,
|
|
double a,
|
|
double b,
|
|
hqrndstate* rs,
|
|
ae_state *_state);
|
|
static void dforest_outputleaf(decisionforestbuilder* s,
|
|
dfworkbuf* workbuf,
|
|
/* Real */ ae_vector* treebuf,
|
|
dfvotebuf* votebuf,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_int_t oobidx0,
|
|
ae_int_t oobidx1,
|
|
ae_int_t* treesize,
|
|
double leafval,
|
|
ae_state *_state);
|
|
static void dforest_analyzeandpreprocessdataset(decisionforestbuilder* s,
|
|
ae_state *_state);
|
|
static void dforest_mergetrees(decisionforestbuilder* s,
|
|
decisionforest* df,
|
|
ae_state *_state);
|
|
static void dforest_processvotingresults(decisionforestbuilder* s,
|
|
ae_int_t ntrees,
|
|
dfvotebuf* buf,
|
|
dfreport* rep,
|
|
ae_state *_state);
|
|
static double dforest_binarycompression(decisionforest* df,
|
|
ae_bool usemantissa8,
|
|
ae_state *_state);
|
|
static ae_int_t dforest_computecompressedsizerec(decisionforest* df,
|
|
ae_bool usemantissa8,
|
|
ae_int_t treeroot,
|
|
ae_int_t treepos,
|
|
/* Integer */ ae_vector* compressedsizes,
|
|
ae_bool savecompressedsizes,
|
|
ae_state *_state);
|
|
static void dforest_compressrec(decisionforest* df,
|
|
ae_bool usemantissa8,
|
|
ae_int_t treeroot,
|
|
ae_int_t treepos,
|
|
/* Integer */ ae_vector* compressedsizes,
|
|
ae_vector* buf,
|
|
ae_int_t* dstoffs,
|
|
ae_state *_state);
|
|
static ae_int_t dforest_computecompresseduintsize(ae_int_t v,
|
|
ae_state *_state);
|
|
static void dforest_streamuint(ae_vector* buf,
|
|
ae_int_t* offs,
|
|
ae_int_t v,
|
|
ae_state *_state);
|
|
static ae_int_t dforest_unstreamuint(ae_vector* buf,
|
|
ae_int_t* offs,
|
|
ae_state *_state);
|
|
static void dforest_streamfloat(ae_vector* buf,
|
|
ae_bool usemantissa8,
|
|
ae_int_t* offs,
|
|
double v,
|
|
ae_state *_state);
|
|
static double dforest_unstreamfloat(ae_vector* buf,
|
|
ae_bool usemantissa8,
|
|
ae_int_t* offs,
|
|
ae_state *_state);
|
|
static ae_int_t dforest_dfclserror(decisionforest* df,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state);
|
|
static void dforest_dfprocessinternaluncompressed(decisionforest* df,
|
|
ae_int_t subtreeroot,
|
|
ae_int_t nodeoffs,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state);
|
|
static void dforest_dfprocessinternalcompressed(decisionforest* df,
|
|
ae_int_t offs,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state);
|
|
static double dforest_xfastpow(double r, ae_int_t n, ae_state *_state);
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_KNN) || !defined(AE_PARTIAL_BUILD)
|
|
static ae_int_t knn_knnfirstversion = 0;
|
|
static void knn_clearreport(knnreport* rep, ae_state *_state);
|
|
static void knn_processinternal(knnmodel* model,
|
|
knnbuffer* buf,
|
|
ae_state *_state);
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_DATACOMP) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
#endif
|
|
|
|
#if defined(AE_COMPILE_PCA) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
/*************************************************************************
|
|
Principal components analysis
|
|
|
|
This function builds orthogonal basis where first axis corresponds to
|
|
direction with maximum variance, second axis maximizes variance in the
|
|
subspace orthogonal to first axis and so on.
|
|
|
|
This function builds FULL basis, i.e. returns N vectors corresponding to
|
|
ALL directions, no matter how informative. If you need just a few (say,
|
|
10 or 50) of the most important directions, you may find it faster to use
|
|
one of the reduced versions:
|
|
* pcatruncatedsubspace() - for subspace iteration based method
|
|
|
|
It should be noted that, unlike LDA, PCA does not use class labels.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
! * hardware vendor (Intel) implementations of linear algebra primitives
|
|
! (C++ and C# versions, x86/x64 platform)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
X - dataset, array[0..NPoints-1,0..NVars-1].
|
|
matrix contains ONLY INDEPENDENT VARIABLES.
|
|
NPoints - dataset size, NPoints>=0
|
|
NVars - number of independent variables, NVars>=1
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code:
|
|
* -4, if SVD subroutine haven't converged
|
|
* -1, if wrong parameters has been passed (NPoints<0,
|
|
NVars<1)
|
|
* 1, if task is solved
|
|
S2 - array[0..NVars-1]. variance values corresponding
|
|
to basis vectors.
|
|
V - array[0..NVars-1,0..NVars-1]
|
|
matrix, whose columns store basis vectors.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void pcabuildbasis(/* Real */ ae_matrix* x,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t* info,
|
|
/* Real */ ae_vector* s2,
|
|
/* Real */ ae_matrix* v,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_matrix a;
|
|
ae_matrix u;
|
|
ae_matrix vt;
|
|
ae_vector m;
|
|
ae_vector t;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
double mean;
|
|
double variance;
|
|
double skewness;
|
|
double kurtosis;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&a, 0, sizeof(a));
|
|
memset(&u, 0, sizeof(u));
|
|
memset(&vt, 0, sizeof(vt));
|
|
memset(&m, 0, sizeof(m));
|
|
memset(&t, 0, sizeof(t));
|
|
*info = 0;
|
|
ae_vector_clear(s2);
|
|
ae_matrix_clear(v);
|
|
ae_matrix_init(&a, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&u, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&vt, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&m, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Check input data
|
|
*/
|
|
if( npoints<0||nvars<1 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
*info = 1;
|
|
|
|
/*
|
|
* Special case: NPoints=0
|
|
*/
|
|
if( npoints==0 )
|
|
{
|
|
ae_vector_set_length(s2, nvars, _state);
|
|
ae_matrix_set_length(v, nvars, nvars, _state);
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
s2->ptr.p_double[i] = (double)(0);
|
|
}
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
if( i==j )
|
|
{
|
|
v->ptr.pp_double[i][j] = (double)(1);
|
|
}
|
|
else
|
|
{
|
|
v->ptr.pp_double[i][j] = (double)(0);
|
|
}
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Calculate means
|
|
*/
|
|
ae_vector_set_length(&m, nvars, _state);
|
|
ae_vector_set_length(&t, npoints, _state);
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
ae_v_move(&t.ptr.p_double[0], 1, &x->ptr.pp_double[0][j], x->stride, ae_v_len(0,npoints-1));
|
|
samplemoments(&t, npoints, &mean, &variance, &skewness, &kurtosis, _state);
|
|
m.ptr.p_double[j] = mean;
|
|
}
|
|
|
|
/*
|
|
* Center, apply SVD, prepare output
|
|
*/
|
|
ae_matrix_set_length(&a, ae_maxint(npoints, nvars, _state), nvars, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_v_move(&a.ptr.pp_double[i][0], 1, &x->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
|
|
ae_v_sub(&a.ptr.pp_double[i][0], 1, &m.ptr.p_double[0], 1, ae_v_len(0,nvars-1));
|
|
}
|
|
for(i=npoints; i<=nvars-1; i++)
|
|
{
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
a.ptr.pp_double[i][j] = (double)(0);
|
|
}
|
|
}
|
|
if( !rmatrixsvd(&a, ae_maxint(npoints, nvars, _state), nvars, 0, 1, 2, s2, &u, &vt, _state) )
|
|
{
|
|
*info = -4;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
if( npoints!=1 )
|
|
{
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
s2->ptr.p_double[i] = ae_sqr(s2->ptr.p_double[i], _state)/(npoints-1);
|
|
}
|
|
}
|
|
ae_matrix_set_length(v, nvars, nvars, _state);
|
|
copyandtranspose(&vt, 0, nvars-1, 0, nvars-1, v, 0, nvars-1, 0, nvars-1, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Principal components analysis
|
|
|
|
This function performs truncated PCA, i.e. returns just a few most important
|
|
directions.
|
|
|
|
Internally it uses iterative eigensolver which is very efficient when only
|
|
a minor fraction of full basis is required. Thus, if you need full basis,
|
|
it is better to use pcabuildbasis() function.
|
|
|
|
It should be noted that, unlike LDA, PCA does not use class labels.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
! * hardware vendor (Intel) implementations of linear algebra primitives
|
|
! (C++ and C# versions, x86/x64 platform)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
X - dataset, array[0..NPoints-1,0..NVars-1].
|
|
matrix contains ONLY INDEPENDENT VARIABLES.
|
|
NPoints - dataset size, NPoints>=0
|
|
NVars - number of independent variables, NVars>=1
|
|
NNeeded - number of requested components, in [1,NVars] range;
|
|
this function is efficient only for NNeeded<<NVars.
|
|
Eps - desired precision of vectors returned; underlying
|
|
solver will stop iterations as soon as absolute error
|
|
in corresponding singular values reduces to roughly
|
|
eps*MAX(lambda[]), with lambda[] being array of eigen
|
|
values.
|
|
Zero value means that algorithm performs number of
|
|
iterations specified by maxits parameter, without
|
|
paying attention to precision.
|
|
MaxIts - number of iterations performed by subspace iteration
|
|
method. Zero value means that no limit on iteration
|
|
count is placed (eps-based stopping condition is used).
|
|
|
|
|
|
OUTPUT PARAMETERS:
|
|
S2 - array[NNeeded]. Variance values corresponding
|
|
to basis vectors.
|
|
V - array[NVars,NNeeded]
|
|
matrix, whose columns store basis vectors.
|
|
|
|
NOTE: passing eps=0 and maxits=0 results in small eps being selected as
|
|
stopping condition. Exact value of automatically selected eps is version-
|
|
-dependent.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.01.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void pcatruncatedsubspace(/* Real */ ae_matrix* x,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t nneeded,
|
|
double eps,
|
|
ae_int_t maxits,
|
|
/* Real */ ae_vector* s2,
|
|
/* Real */ ae_matrix* v,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_matrix a;
|
|
ae_matrix b;
|
|
ae_vector means;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
double vv;
|
|
eigsubspacestate solver;
|
|
eigsubspacereport rep;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&a, 0, sizeof(a));
|
|
memset(&b, 0, sizeof(b));
|
|
memset(&means, 0, sizeof(means));
|
|
memset(&solver, 0, sizeof(solver));
|
|
memset(&rep, 0, sizeof(rep));
|
|
ae_vector_clear(s2);
|
|
ae_matrix_clear(v);
|
|
ae_matrix_init(&a, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&b, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&means, 0, DT_REAL, _state, ae_true);
|
|
_eigsubspacestate_init(&solver, _state, ae_true);
|
|
_eigsubspacereport_init(&rep, _state, ae_true);
|
|
|
|
ae_assert(npoints>=0, "PCATruncatedSubspace: npoints<0", _state);
|
|
ae_assert(nvars>=1, "PCATruncatedSubspace: nvars<1", _state);
|
|
ae_assert(nneeded>0, "PCATruncatedSubspace: nneeded<1", _state);
|
|
ae_assert(nneeded<=nvars, "PCATruncatedSubspace: nneeded>nvars", _state);
|
|
ae_assert(maxits>=0, "PCATruncatedSubspace: maxits<0", _state);
|
|
ae_assert(ae_isfinite(eps, _state)&&ae_fp_greater_eq(eps,(double)(0)), "PCATruncatedSubspace: eps<0 or is not finite", _state);
|
|
ae_assert(x->rows>=npoints, "PCATruncatedSubspace: rows(x)<npoints", _state);
|
|
ae_assert(x->cols>=nvars||npoints==0, "PCATruncatedSubspace: cols(x)<nvars", _state);
|
|
|
|
/*
|
|
* Special case: NPoints=0
|
|
*/
|
|
if( npoints==0 )
|
|
{
|
|
ae_vector_set_length(s2, nneeded, _state);
|
|
ae_matrix_set_length(v, nvars, nneeded, _state);
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
s2->ptr.p_double[i] = (double)(0);
|
|
}
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
for(j=0; j<=nneeded-1; j++)
|
|
{
|
|
if( i==j )
|
|
{
|
|
v->ptr.pp_double[i][j] = (double)(1);
|
|
}
|
|
else
|
|
{
|
|
v->ptr.pp_double[i][j] = (double)(0);
|
|
}
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Center matrix
|
|
*/
|
|
ae_vector_set_length(&means, nvars, _state);
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
means.ptr.p_double[i] = (double)(0);
|
|
}
|
|
vv = (double)1/(double)npoints;
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_v_addd(&means.ptr.p_double[0], 1, &x->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1), vv);
|
|
}
|
|
ae_matrix_set_length(&a, npoints, nvars, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_v_move(&a.ptr.pp_double[i][0], 1, &x->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
|
|
ae_v_sub(&a.ptr.pp_double[i][0], 1, &means.ptr.p_double[0], 1, ae_v_len(0,nvars-1));
|
|
}
|
|
|
|
/*
|
|
* Find eigenvalues with subspace iteration solver
|
|
*/
|
|
eigsubspacecreate(nvars, nneeded, &solver, _state);
|
|
eigsubspacesetcond(&solver, eps, maxits, _state);
|
|
eigsubspaceoocstart(&solver, 0, _state);
|
|
while(eigsubspaceooccontinue(&solver, _state))
|
|
{
|
|
ae_assert(solver.requesttype==0, "PCATruncatedSubspace: integrity check failed", _state);
|
|
k = solver.requestsize;
|
|
rmatrixsetlengthatleast(&b, npoints, k, _state);
|
|
rmatrixgemm(npoints, k, nvars, 1.0, &a, 0, 0, 0, &solver.x, 0, 0, 0, 0.0, &b, 0, 0, _state);
|
|
rmatrixgemm(nvars, k, npoints, 1.0, &a, 0, 0, 1, &b, 0, 0, 0, 0.0, &solver.ax, 0, 0, _state);
|
|
}
|
|
eigsubspaceoocstop(&solver, s2, v, &rep, _state);
|
|
if( npoints!=1 )
|
|
{
|
|
for(i=0; i<=nneeded-1; i++)
|
|
{
|
|
s2->ptr.p_double[i] = s2->ptr.p_double[i]/(npoints-1);
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Sparse truncated principal components analysis
|
|
|
|
This function performs sparse truncated PCA, i.e. returns just a few most
|
|
important principal components for a sparse input X.
|
|
|
|
Internally it uses iterative eigensolver which is very efficient when only
|
|
a minor fraction of full basis is required.
|
|
|
|
It should be noted that, unlike LDA, PCA does not use class labels.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
! * hardware vendor (Intel) implementations of linear algebra primitives
|
|
! (C++ and C# versions, x86/x64 platform)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
X - sparse dataset, sparse npoints*nvars matrix. It is
|
|
recommended to use CRS sparse storage format; non-CRS
|
|
input will be internally converted to CRS.
|
|
Matrix contains ONLY INDEPENDENT VARIABLES, and must
|
|
be EXACTLY npoints*nvars.
|
|
NPoints - dataset size, NPoints>=0
|
|
NVars - number of independent variables, NVars>=1
|
|
NNeeded - number of requested components, in [1,NVars] range;
|
|
this function is efficient only for NNeeded<<NVars.
|
|
Eps - desired precision of vectors returned; underlying
|
|
solver will stop iterations as soon as absolute error
|
|
in corresponding singular values reduces to roughly
|
|
eps*MAX(lambda[]), with lambda[] being array of eigen
|
|
values.
|
|
Zero value means that algorithm performs number of
|
|
iterations specified by maxits parameter, without
|
|
paying attention to precision.
|
|
MaxIts - number of iterations performed by subspace iteration
|
|
method. Zero value means that no limit on iteration
|
|
count is placed (eps-based stopping condition is used).
|
|
|
|
|
|
OUTPUT PARAMETERS:
|
|
S2 - array[NNeeded]. Variance values corresponding
|
|
to basis vectors.
|
|
V - array[NVars,NNeeded]
|
|
matrix, whose columns store basis vectors.
|
|
|
|
NOTE: passing eps=0 and maxits=0 results in small eps being selected as
|
|
a stopping condition. Exact value of automatically selected eps is
|
|
version-dependent.
|
|
|
|
NOTE: zero MaxIts is silently replaced by some reasonable value which
|
|
prevents eternal loops (possible when inputs are degenerate and too
|
|
stringent stopping criteria are specified). In current version it
|
|
is 50+2*NVars.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.01.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void pcatruncatedsubspacesparse(sparsematrix* x,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t nneeded,
|
|
double eps,
|
|
ae_int_t maxits,
|
|
/* Real */ ae_vector* s2,
|
|
/* Real */ ae_matrix* v,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
sparsematrix xcrs;
|
|
ae_vector b1;
|
|
ae_vector c1;
|
|
ae_vector z1;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
double vv;
|
|
ae_vector means;
|
|
eigsubspacestate solver;
|
|
eigsubspacereport rep;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&xcrs, 0, sizeof(xcrs));
|
|
memset(&b1, 0, sizeof(b1));
|
|
memset(&c1, 0, sizeof(c1));
|
|
memset(&z1, 0, sizeof(z1));
|
|
memset(&means, 0, sizeof(means));
|
|
memset(&solver, 0, sizeof(solver));
|
|
memset(&rep, 0, sizeof(rep));
|
|
ae_vector_clear(s2);
|
|
ae_matrix_clear(v);
|
|
_sparsematrix_init(&xcrs, _state, ae_true);
|
|
ae_vector_init(&b1, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&c1, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&z1, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&means, 0, DT_REAL, _state, ae_true);
|
|
_eigsubspacestate_init(&solver, _state, ae_true);
|
|
_eigsubspacereport_init(&rep, _state, ae_true);
|
|
|
|
ae_assert(npoints>=0, "PCATruncatedSubspaceSparse: npoints<0", _state);
|
|
ae_assert(nvars>=1, "PCATruncatedSubspaceSparse: nvars<1", _state);
|
|
ae_assert(nneeded>0, "PCATruncatedSubspaceSparse: nneeded<1", _state);
|
|
ae_assert(nneeded<=nvars, "PCATruncatedSubspaceSparse: nneeded>nvars", _state);
|
|
ae_assert(maxits>=0, "PCATruncatedSubspaceSparse: maxits<0", _state);
|
|
ae_assert(ae_isfinite(eps, _state)&&ae_fp_greater_eq(eps,(double)(0)), "PCATruncatedSubspaceSparse: eps<0 or is not finite", _state);
|
|
if( npoints>0 )
|
|
{
|
|
ae_assert(sparsegetnrows(x, _state)==npoints, "PCATruncatedSubspaceSparse: rows(x)!=npoints", _state);
|
|
ae_assert(sparsegetncols(x, _state)==nvars, "PCATruncatedSubspaceSparse: cols(x)!=nvars", _state);
|
|
}
|
|
|
|
/*
|
|
* Special case: NPoints=0
|
|
*/
|
|
if( npoints==0 )
|
|
{
|
|
ae_vector_set_length(s2, nneeded, _state);
|
|
ae_matrix_set_length(v, nvars, nneeded, _state);
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
s2->ptr.p_double[i] = (double)(0);
|
|
}
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
for(j=0; j<=nneeded-1; j++)
|
|
{
|
|
if( i==j )
|
|
{
|
|
v->ptr.pp_double[i][j] = (double)(1);
|
|
}
|
|
else
|
|
{
|
|
v->ptr.pp_double[i][j] = (double)(0);
|
|
}
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* If input data are not in CRS format, perform conversion to CRS
|
|
*/
|
|
if( !sparseiscrs(x, _state) )
|
|
{
|
|
sparsecopytocrs(x, &xcrs, _state);
|
|
pcatruncatedsubspacesparse(&xcrs, npoints, nvars, nneeded, eps, maxits, s2, v, _state);
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Initialize parameters, prepare buffers
|
|
*/
|
|
ae_vector_set_length(&b1, npoints, _state);
|
|
ae_vector_set_length(&z1, nvars, _state);
|
|
if( ae_fp_eq(eps,(double)(0))&&maxits==0 )
|
|
{
|
|
eps = 1.0E-6;
|
|
}
|
|
if( maxits==0 )
|
|
{
|
|
maxits = 50+2*nvars;
|
|
}
|
|
|
|
/*
|
|
* Calculate mean values
|
|
*/
|
|
vv = (double)1/(double)npoints;
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
b1.ptr.p_double[i] = vv;
|
|
}
|
|
sparsemtv(x, &b1, &means, _state);
|
|
|
|
/*
|
|
* Find eigenvalues with subspace iteration solver
|
|
*/
|
|
eigsubspacecreate(nvars, nneeded, &solver, _state);
|
|
eigsubspacesetcond(&solver, eps, maxits, _state);
|
|
eigsubspaceoocstart(&solver, 0, _state);
|
|
while(eigsubspaceooccontinue(&solver, _state))
|
|
{
|
|
ae_assert(solver.requesttype==0, "PCATruncatedSubspace: integrity check failed", _state);
|
|
for(k=0; k<=solver.requestsize-1; k++)
|
|
{
|
|
|
|
/*
|
|
* Calculate B1=(X-meansX)*Zk
|
|
*/
|
|
ae_v_move(&z1.ptr.p_double[0], 1, &solver.x.ptr.pp_double[0][k], solver.x.stride, ae_v_len(0,nvars-1));
|
|
sparsemv(x, &z1, &b1, _state);
|
|
vv = ae_v_dotproduct(&solver.x.ptr.pp_double[0][k], solver.x.stride, &means.ptr.p_double[0], 1, ae_v_len(0,nvars-1));
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
b1.ptr.p_double[i] = b1.ptr.p_double[i]-vv;
|
|
}
|
|
|
|
/*
|
|
* Calculate (X-meansX)^T*B1
|
|
*/
|
|
sparsemtv(x, &b1, &c1, _state);
|
|
vv = (double)(0);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
vv = vv+b1.ptr.p_double[i];
|
|
}
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
solver.ax.ptr.pp_double[j][k] = c1.ptr.p_double[j]-vv*means.ptr.p_double[j];
|
|
}
|
|
}
|
|
}
|
|
eigsubspaceoocstop(&solver, s2, v, &rep, _state);
|
|
if( npoints!=1 )
|
|
{
|
|
for(i=0; i<=nneeded-1; i++)
|
|
{
|
|
s2->ptr.p_double[i] = s2->ptr.p_double[i]/(npoints-1);
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_BDSS) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
/*************************************************************************
|
|
This set of routines (DSErrAllocate, DSErrAccumulate, DSErrFinish)
|
|
calculates different error functions (classification error, cross-entropy,
|
|
rms, avg, avg.rel errors).
|
|
|
|
1. DSErrAllocate prepares buffer.
|
|
2. DSErrAccumulate accumulates individual errors:
|
|
* Y contains predicted output (posterior probabilities for classification)
|
|
* DesiredY contains desired output (class number for classification)
|
|
3. DSErrFinish outputs results:
|
|
* Buf[0] contains relative classification error (zero for regression tasks)
|
|
* Buf[1] contains avg. cross-entropy (zero for regression tasks)
|
|
* Buf[2] contains rms error (regression, classification)
|
|
* Buf[3] contains average error (regression, classification)
|
|
* Buf[4] contains average relative error (regression, classification)
|
|
|
|
NOTES(1):
|
|
"NClasses>0" means that we have classification task.
|
|
"NClasses<0" means regression task with -NClasses real outputs.
|
|
|
|
NOTES(2):
|
|
rms. avg, avg.rel errors for classification tasks are interpreted as
|
|
errors in posterior probabilities with respect to probabilities given
|
|
by training/test set.
|
|
|
|
-- ALGLIB --
|
|
Copyright 11.01.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dserrallocate(ae_int_t nclasses,
|
|
/* Real */ ae_vector* buf,
|
|
ae_state *_state)
|
|
{
|
|
|
|
ae_vector_clear(buf);
|
|
|
|
ae_vector_set_length(buf, 7+1, _state);
|
|
buf->ptr.p_double[0] = (double)(0);
|
|
buf->ptr.p_double[1] = (double)(0);
|
|
buf->ptr.p_double[2] = (double)(0);
|
|
buf->ptr.p_double[3] = (double)(0);
|
|
buf->ptr.p_double[4] = (double)(0);
|
|
buf->ptr.p_double[5] = (double)(nclasses);
|
|
buf->ptr.p_double[6] = (double)(0);
|
|
buf->ptr.p_double[7] = (double)(0);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
See DSErrAllocate for comments on this routine.
|
|
|
|
-- ALGLIB --
|
|
Copyright 11.01.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dserraccumulate(/* Real */ ae_vector* buf,
|
|
/* Real */ ae_vector* y,
|
|
/* Real */ ae_vector* desiredy,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t nclasses;
|
|
ae_int_t nout;
|
|
ae_int_t offs;
|
|
ae_int_t mmax;
|
|
ae_int_t rmax;
|
|
ae_int_t j;
|
|
double v;
|
|
double ev;
|
|
|
|
|
|
offs = 5;
|
|
nclasses = ae_round(buf->ptr.p_double[offs], _state);
|
|
if( nclasses>0 )
|
|
{
|
|
|
|
/*
|
|
* Classification
|
|
*/
|
|
rmax = ae_round(desiredy->ptr.p_double[0], _state);
|
|
mmax = 0;
|
|
for(j=1; j<=nclasses-1; j++)
|
|
{
|
|
if( ae_fp_greater(y->ptr.p_double[j],y->ptr.p_double[mmax]) )
|
|
{
|
|
mmax = j;
|
|
}
|
|
}
|
|
if( mmax!=rmax )
|
|
{
|
|
buf->ptr.p_double[0] = buf->ptr.p_double[0]+1;
|
|
}
|
|
if( ae_fp_greater(y->ptr.p_double[rmax],(double)(0)) )
|
|
{
|
|
buf->ptr.p_double[1] = buf->ptr.p_double[1]-ae_log(y->ptr.p_double[rmax], _state);
|
|
}
|
|
else
|
|
{
|
|
buf->ptr.p_double[1] = buf->ptr.p_double[1]+ae_log(ae_maxrealnumber, _state);
|
|
}
|
|
for(j=0; j<=nclasses-1; j++)
|
|
{
|
|
v = y->ptr.p_double[j];
|
|
if( j==rmax )
|
|
{
|
|
ev = (double)(1);
|
|
}
|
|
else
|
|
{
|
|
ev = (double)(0);
|
|
}
|
|
buf->ptr.p_double[2] = buf->ptr.p_double[2]+ae_sqr(v-ev, _state);
|
|
buf->ptr.p_double[3] = buf->ptr.p_double[3]+ae_fabs(v-ev, _state);
|
|
if( ae_fp_neq(ev,(double)(0)) )
|
|
{
|
|
buf->ptr.p_double[4] = buf->ptr.p_double[4]+ae_fabs((v-ev)/ev, _state);
|
|
buf->ptr.p_double[offs+2] = buf->ptr.p_double[offs+2]+1;
|
|
}
|
|
}
|
|
buf->ptr.p_double[offs+1] = buf->ptr.p_double[offs+1]+1;
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Regression
|
|
*/
|
|
nout = -nclasses;
|
|
rmax = 0;
|
|
for(j=1; j<=nout-1; j++)
|
|
{
|
|
if( ae_fp_greater(desiredy->ptr.p_double[j],desiredy->ptr.p_double[rmax]) )
|
|
{
|
|
rmax = j;
|
|
}
|
|
}
|
|
mmax = 0;
|
|
for(j=1; j<=nout-1; j++)
|
|
{
|
|
if( ae_fp_greater(y->ptr.p_double[j],y->ptr.p_double[mmax]) )
|
|
{
|
|
mmax = j;
|
|
}
|
|
}
|
|
if( mmax!=rmax )
|
|
{
|
|
buf->ptr.p_double[0] = buf->ptr.p_double[0]+1;
|
|
}
|
|
for(j=0; j<=nout-1; j++)
|
|
{
|
|
v = y->ptr.p_double[j];
|
|
ev = desiredy->ptr.p_double[j];
|
|
buf->ptr.p_double[2] = buf->ptr.p_double[2]+ae_sqr(v-ev, _state);
|
|
buf->ptr.p_double[3] = buf->ptr.p_double[3]+ae_fabs(v-ev, _state);
|
|
if( ae_fp_neq(ev,(double)(0)) )
|
|
{
|
|
buf->ptr.p_double[4] = buf->ptr.p_double[4]+ae_fabs((v-ev)/ev, _state);
|
|
buf->ptr.p_double[offs+2] = buf->ptr.p_double[offs+2]+1;
|
|
}
|
|
}
|
|
buf->ptr.p_double[offs+1] = buf->ptr.p_double[offs+1]+1;
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
See DSErrAllocate for comments on this routine.
|
|
|
|
-- ALGLIB --
|
|
Copyright 11.01.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dserrfinish(/* Real */ ae_vector* buf, ae_state *_state)
|
|
{
|
|
ae_int_t nout;
|
|
ae_int_t offs;
|
|
|
|
|
|
offs = 5;
|
|
nout = ae_iabs(ae_round(buf->ptr.p_double[offs], _state), _state);
|
|
if( ae_fp_neq(buf->ptr.p_double[offs+1],(double)(0)) )
|
|
{
|
|
buf->ptr.p_double[0] = buf->ptr.p_double[0]/buf->ptr.p_double[offs+1];
|
|
buf->ptr.p_double[1] = buf->ptr.p_double[1]/buf->ptr.p_double[offs+1];
|
|
buf->ptr.p_double[2] = ae_sqrt(buf->ptr.p_double[2]/(nout*buf->ptr.p_double[offs+1]), _state);
|
|
buf->ptr.p_double[3] = buf->ptr.p_double[3]/(nout*buf->ptr.p_double[offs+1]);
|
|
}
|
|
if( ae_fp_neq(buf->ptr.p_double[offs+2],(double)(0)) )
|
|
{
|
|
buf->ptr.p_double[4] = buf->ptr.p_double[4]/buf->ptr.p_double[offs+2];
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
|
|
-- ALGLIB --
|
|
Copyright 19.05.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dsnormalize(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t* info,
|
|
/* Real */ ae_vector* means,
|
|
/* Real */ ae_vector* sigmas,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_vector tmp;
|
|
double mean;
|
|
double variance;
|
|
double skewness;
|
|
double kurtosis;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&tmp, 0, sizeof(tmp));
|
|
*info = 0;
|
|
ae_vector_clear(means);
|
|
ae_vector_clear(sigmas);
|
|
ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Test parameters
|
|
*/
|
|
if( npoints<=0||nvars<1 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
*info = 1;
|
|
|
|
/*
|
|
* Standartization
|
|
*/
|
|
ae_vector_set_length(means, nvars-1+1, _state);
|
|
ae_vector_set_length(sigmas, nvars-1+1, _state);
|
|
ae_vector_set_length(&tmp, npoints-1+1, _state);
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
ae_v_move(&tmp.ptr.p_double[0], 1, &xy->ptr.pp_double[0][j], xy->stride, ae_v_len(0,npoints-1));
|
|
samplemoments(&tmp, npoints, &mean, &variance, &skewness, &kurtosis, _state);
|
|
means->ptr.p_double[j] = mean;
|
|
sigmas->ptr.p_double[j] = ae_sqrt(variance, _state);
|
|
if( ae_fp_eq(sigmas->ptr.p_double[j],(double)(0)) )
|
|
{
|
|
sigmas->ptr.p_double[j] = (double)(1);
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
xy->ptr.pp_double[i][j] = (xy->ptr.pp_double[i][j]-means->ptr.p_double[j])/sigmas->ptr.p_double[j];
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
|
|
-- ALGLIB --
|
|
Copyright 19.05.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dsnormalizec(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t* info,
|
|
/* Real */ ae_vector* means,
|
|
/* Real */ ae_vector* sigmas,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t j;
|
|
ae_vector tmp;
|
|
double mean;
|
|
double variance;
|
|
double skewness;
|
|
double kurtosis;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&tmp, 0, sizeof(tmp));
|
|
*info = 0;
|
|
ae_vector_clear(means);
|
|
ae_vector_clear(sigmas);
|
|
ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Test parameters
|
|
*/
|
|
if( npoints<=0||nvars<1 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
*info = 1;
|
|
|
|
/*
|
|
* Standartization
|
|
*/
|
|
ae_vector_set_length(means, nvars-1+1, _state);
|
|
ae_vector_set_length(sigmas, nvars-1+1, _state);
|
|
ae_vector_set_length(&tmp, npoints-1+1, _state);
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
ae_v_move(&tmp.ptr.p_double[0], 1, &xy->ptr.pp_double[0][j], xy->stride, ae_v_len(0,npoints-1));
|
|
samplemoments(&tmp, npoints, &mean, &variance, &skewness, &kurtosis, _state);
|
|
means->ptr.p_double[j] = mean;
|
|
sigmas->ptr.p_double[j] = ae_sqrt(variance, _state);
|
|
if( ae_fp_eq(sigmas->ptr.p_double[j],(double)(0)) )
|
|
{
|
|
sigmas->ptr.p_double[j] = (double)(1);
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
|
|
-- ALGLIB --
|
|
Copyright 19.05.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dsgetmeanmindistance(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_vector tmp;
|
|
ae_vector tmp2;
|
|
double v;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&tmp, 0, sizeof(tmp));
|
|
memset(&tmp2, 0, sizeof(tmp2));
|
|
ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&tmp2, 0, DT_REAL, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Test parameters
|
|
*/
|
|
if( npoints<=0||nvars<1 )
|
|
{
|
|
result = (double)(0);
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Process
|
|
*/
|
|
ae_vector_set_length(&tmp, npoints-1+1, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
tmp.ptr.p_double[i] = ae_maxrealnumber;
|
|
}
|
|
ae_vector_set_length(&tmp2, nvars-1+1, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
for(j=i+1; j<=npoints-1; j++)
|
|
{
|
|
ae_v_move(&tmp2.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
|
|
ae_v_sub(&tmp2.ptr.p_double[0], 1, &xy->ptr.pp_double[j][0], 1, ae_v_len(0,nvars-1));
|
|
v = ae_v_dotproduct(&tmp2.ptr.p_double[0], 1, &tmp2.ptr.p_double[0], 1, ae_v_len(0,nvars-1));
|
|
v = ae_sqrt(v, _state);
|
|
tmp.ptr.p_double[i] = ae_minreal(tmp.ptr.p_double[i], v, _state);
|
|
tmp.ptr.p_double[j] = ae_minreal(tmp.ptr.p_double[j], v, _state);
|
|
}
|
|
}
|
|
result = (double)(0);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
result = result+tmp.ptr.p_double[i]/npoints;
|
|
}
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
|
|
-- ALGLIB --
|
|
Copyright 19.05.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dstie(/* Real */ ae_vector* a,
|
|
ae_int_t n,
|
|
/* Integer */ ae_vector* ties,
|
|
ae_int_t* tiecount,
|
|
/* Integer */ ae_vector* p1,
|
|
/* Integer */ ae_vector* p2,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t k;
|
|
ae_vector tmp;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&tmp, 0, sizeof(tmp));
|
|
ae_vector_clear(ties);
|
|
*tiecount = 0;
|
|
ae_vector_clear(p1);
|
|
ae_vector_clear(p2);
|
|
ae_vector_init(&tmp, 0, DT_INT, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Special case
|
|
*/
|
|
if( n<=0 )
|
|
{
|
|
*tiecount = 0;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Sort A
|
|
*/
|
|
tagsort(a, n, p1, p2, _state);
|
|
|
|
/*
|
|
* Process ties
|
|
*/
|
|
*tiecount = 1;
|
|
for(i=1; i<=n-1; i++)
|
|
{
|
|
if( ae_fp_neq(a->ptr.p_double[i],a->ptr.p_double[i-1]) )
|
|
{
|
|
*tiecount = *tiecount+1;
|
|
}
|
|
}
|
|
ae_vector_set_length(ties, *tiecount+1, _state);
|
|
ties->ptr.p_int[0] = 0;
|
|
k = 1;
|
|
for(i=1; i<=n-1; i++)
|
|
{
|
|
if( ae_fp_neq(a->ptr.p_double[i],a->ptr.p_double[i-1]) )
|
|
{
|
|
ties->ptr.p_int[k] = i;
|
|
k = k+1;
|
|
}
|
|
}
|
|
ties->ptr.p_int[*tiecount] = n;
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
|
|
-- ALGLIB --
|
|
Copyright 11.12.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dstiefasti(/* Real */ ae_vector* a,
|
|
/* Integer */ ae_vector* b,
|
|
ae_int_t n,
|
|
/* Integer */ ae_vector* ties,
|
|
ae_int_t* tiecount,
|
|
/* Real */ ae_vector* bufr,
|
|
/* Integer */ ae_vector* bufi,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t k;
|
|
ae_vector tmp;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&tmp, 0, sizeof(tmp));
|
|
*tiecount = 0;
|
|
ae_vector_init(&tmp, 0, DT_INT, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Special case
|
|
*/
|
|
if( n<=0 )
|
|
{
|
|
*tiecount = 0;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Sort A
|
|
*/
|
|
tagsortfasti(a, b, bufr, bufi, n, _state);
|
|
|
|
/*
|
|
* Process ties
|
|
*/
|
|
ties->ptr.p_int[0] = 0;
|
|
k = 1;
|
|
for(i=1; i<=n-1; i++)
|
|
{
|
|
if( ae_fp_neq(a->ptr.p_double[i],a->ptr.p_double[i-1]) )
|
|
{
|
|
ties->ptr.p_int[k] = i;
|
|
k = k+1;
|
|
}
|
|
}
|
|
ties->ptr.p_int[k] = n;
|
|
*tiecount = k;
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Optimal binary classification
|
|
|
|
Algorithms finds optimal (=with minimal cross-entropy) binary partition.
|
|
Internal subroutine.
|
|
|
|
INPUT PARAMETERS:
|
|
A - array[0..N-1], variable
|
|
C - array[0..N-1], class numbers (0 or 1).
|
|
N - array size
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - completetion code:
|
|
* -3, all values of A[] are same (partition is impossible)
|
|
* -2, one of C[] is incorrect (<0, >1)
|
|
* -1, incorrect pararemets were passed (N<=0).
|
|
* 1, OK
|
|
Threshold- partiton boundary. Left part contains values which are
|
|
strictly less than Threshold. Right part contains values
|
|
which are greater than or equal to Threshold.
|
|
PAL, PBL- probabilities P(0|v<Threshold) and P(1|v<Threshold)
|
|
PAR, PBR- probabilities P(0|v>=Threshold) and P(1|v>=Threshold)
|
|
CVE - cross-validation estimate of cross-entropy
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.05.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dsoptimalsplit2(/* Real */ ae_vector* a,
|
|
/* Integer */ ae_vector* c,
|
|
ae_int_t n,
|
|
ae_int_t* info,
|
|
double* threshold,
|
|
double* pal,
|
|
double* pbl,
|
|
double* par,
|
|
double* pbr,
|
|
double* cve,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector _a;
|
|
ae_vector _c;
|
|
ae_int_t i;
|
|
ae_int_t t;
|
|
double s;
|
|
ae_vector ties;
|
|
ae_int_t tiecount;
|
|
ae_vector p1;
|
|
ae_vector p2;
|
|
ae_int_t k;
|
|
ae_int_t koptimal;
|
|
double pak;
|
|
double pbk;
|
|
double cvoptimal;
|
|
double cv;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_a, 0, sizeof(_a));
|
|
memset(&_c, 0, sizeof(_c));
|
|
memset(&ties, 0, sizeof(ties));
|
|
memset(&p1, 0, sizeof(p1));
|
|
memset(&p2, 0, sizeof(p2));
|
|
ae_vector_init_copy(&_a, a, _state, ae_true);
|
|
a = &_a;
|
|
ae_vector_init_copy(&_c, c, _state, ae_true);
|
|
c = &_c;
|
|
*info = 0;
|
|
*threshold = 0;
|
|
*pal = 0;
|
|
*pbl = 0;
|
|
*par = 0;
|
|
*pbr = 0;
|
|
*cve = 0;
|
|
ae_vector_init(&ties, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&p1, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&p2, 0, DT_INT, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Test for errors in inputs
|
|
*/
|
|
if( n<=0 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
if( c->ptr.p_int[i]!=0&&c->ptr.p_int[i]!=1 )
|
|
{
|
|
*info = -2;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
*info = 1;
|
|
|
|
/*
|
|
* Tie
|
|
*/
|
|
dstie(a, n, &ties, &tiecount, &p1, &p2, _state);
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
if( p2.ptr.p_int[i]!=i )
|
|
{
|
|
t = c->ptr.p_int[i];
|
|
c->ptr.p_int[i] = c->ptr.p_int[p2.ptr.p_int[i]];
|
|
c->ptr.p_int[p2.ptr.p_int[i]] = t;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Special case: number of ties is 1.
|
|
*
|
|
* NOTE: we assume that P[i,j] equals to 0 or 1,
|
|
* intermediate values are not allowed.
|
|
*/
|
|
if( tiecount==1 )
|
|
{
|
|
*info = -3;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* General case, number of ties > 1
|
|
*
|
|
* NOTE: we assume that P[i,j] equals to 0 or 1,
|
|
* intermediate values are not allowed.
|
|
*/
|
|
*pal = (double)(0);
|
|
*pbl = (double)(0);
|
|
*par = (double)(0);
|
|
*pbr = (double)(0);
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
if( c->ptr.p_int[i]==0 )
|
|
{
|
|
*par = *par+1;
|
|
}
|
|
if( c->ptr.p_int[i]==1 )
|
|
{
|
|
*pbr = *pbr+1;
|
|
}
|
|
}
|
|
koptimal = -1;
|
|
cvoptimal = ae_maxrealnumber;
|
|
for(k=0; k<=tiecount-2; k++)
|
|
{
|
|
|
|
/*
|
|
* first, obtain information about K-th tie which is
|
|
* moved from R-part to L-part
|
|
*/
|
|
pak = (double)(0);
|
|
pbk = (double)(0);
|
|
for(i=ties.ptr.p_int[k]; i<=ties.ptr.p_int[k+1]-1; i++)
|
|
{
|
|
if( c->ptr.p_int[i]==0 )
|
|
{
|
|
pak = pak+1;
|
|
}
|
|
if( c->ptr.p_int[i]==1 )
|
|
{
|
|
pbk = pbk+1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Calculate cross-validation CE
|
|
*/
|
|
cv = (double)(0);
|
|
cv = cv-bdss_xlny(*pal+pak, (*pal+pak)/(*pal+pak+(*pbl)+pbk+1), _state);
|
|
cv = cv-bdss_xlny(*pbl+pbk, (*pbl+pbk)/(*pal+pak+1+(*pbl)+pbk), _state);
|
|
cv = cv-bdss_xlny(*par-pak, (*par-pak)/(*par-pak+(*pbr)-pbk+1), _state);
|
|
cv = cv-bdss_xlny(*pbr-pbk, (*pbr-pbk)/(*par-pak+1+(*pbr)-pbk), _state);
|
|
|
|
/*
|
|
* Compare with best
|
|
*/
|
|
if( ae_fp_less(cv,cvoptimal) )
|
|
{
|
|
cvoptimal = cv;
|
|
koptimal = k;
|
|
}
|
|
|
|
/*
|
|
* update
|
|
*/
|
|
*pal = *pal+pak;
|
|
*pbl = *pbl+pbk;
|
|
*par = *par-pak;
|
|
*pbr = *pbr-pbk;
|
|
}
|
|
*cve = cvoptimal;
|
|
*threshold = 0.5*(a->ptr.p_double[ties.ptr.p_int[koptimal]]+a->ptr.p_double[ties.ptr.p_int[koptimal+1]]);
|
|
*pal = (double)(0);
|
|
*pbl = (double)(0);
|
|
*par = (double)(0);
|
|
*pbr = (double)(0);
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
if( ae_fp_less(a->ptr.p_double[i],*threshold) )
|
|
{
|
|
if( c->ptr.p_int[i]==0 )
|
|
{
|
|
*pal = *pal+1;
|
|
}
|
|
else
|
|
{
|
|
*pbl = *pbl+1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( c->ptr.p_int[i]==0 )
|
|
{
|
|
*par = *par+1;
|
|
}
|
|
else
|
|
{
|
|
*pbr = *pbr+1;
|
|
}
|
|
}
|
|
}
|
|
s = *pal+(*pbl);
|
|
*pal = *pal/s;
|
|
*pbl = *pbl/s;
|
|
s = *par+(*pbr);
|
|
*par = *par/s;
|
|
*pbr = *pbr/s;
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Optimal partition, internal subroutine. Fast version.
|
|
|
|
Accepts:
|
|
A array[0..N-1] array of attributes array[0..N-1]
|
|
C array[0..N-1] array of class labels
|
|
TiesBuf array[0..N] temporaries (ties)
|
|
CntBuf array[0..2*NC-1] temporaries (counts)
|
|
Alpha centering factor (0<=alpha<=1, recommended value - 0.05)
|
|
BufR array[0..N-1] temporaries
|
|
BufI array[0..N-1] temporaries
|
|
|
|
Output:
|
|
Info error code (">0"=OK, "<0"=bad)
|
|
RMS training set RMS error
|
|
CVRMS leave-one-out RMS error
|
|
|
|
Note:
|
|
content of all arrays is changed by subroutine;
|
|
it doesn't allocate temporaries.
|
|
|
|
-- ALGLIB --
|
|
Copyright 11.12.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dsoptimalsplit2fast(/* Real */ ae_vector* a,
|
|
/* Integer */ ae_vector* c,
|
|
/* Integer */ ae_vector* tiesbuf,
|
|
/* Integer */ ae_vector* cntbuf,
|
|
/* Real */ ae_vector* bufr,
|
|
/* Integer */ ae_vector* bufi,
|
|
ae_int_t n,
|
|
ae_int_t nc,
|
|
double alpha,
|
|
ae_int_t* info,
|
|
double* threshold,
|
|
double* rms,
|
|
double* cvrms,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t k;
|
|
ae_int_t cl;
|
|
ae_int_t tiecount;
|
|
double cbest;
|
|
double cc;
|
|
ae_int_t koptimal;
|
|
ae_int_t sl;
|
|
ae_int_t sr;
|
|
double v;
|
|
double w;
|
|
double x;
|
|
|
|
*info = 0;
|
|
*threshold = 0;
|
|
*rms = 0;
|
|
*cvrms = 0;
|
|
|
|
|
|
/*
|
|
* Test for errors in inputs
|
|
*/
|
|
if( n<=0||nc<2 )
|
|
{
|
|
*info = -1;
|
|
return;
|
|
}
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
if( c->ptr.p_int[i]<0||c->ptr.p_int[i]>=nc )
|
|
{
|
|
*info = -2;
|
|
return;
|
|
}
|
|
}
|
|
*info = 1;
|
|
|
|
/*
|
|
* Tie
|
|
*/
|
|
dstiefasti(a, c, n, tiesbuf, &tiecount, bufr, bufi, _state);
|
|
|
|
/*
|
|
* Special case: number of ties is 1.
|
|
*/
|
|
if( tiecount==1 )
|
|
{
|
|
*info = -3;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* General case, number of ties > 1
|
|
*/
|
|
for(i=0; i<=2*nc-1; i++)
|
|
{
|
|
cntbuf->ptr.p_int[i] = 0;
|
|
}
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
cntbuf->ptr.p_int[nc+c->ptr.p_int[i]] = cntbuf->ptr.p_int[nc+c->ptr.p_int[i]]+1;
|
|
}
|
|
koptimal = -1;
|
|
*threshold = a->ptr.p_double[n-1];
|
|
cbest = ae_maxrealnumber;
|
|
sl = 0;
|
|
sr = n;
|
|
for(k=0; k<=tiecount-2; k++)
|
|
{
|
|
|
|
/*
|
|
* first, move Kth tie from right to left
|
|
*/
|
|
for(i=tiesbuf->ptr.p_int[k]; i<=tiesbuf->ptr.p_int[k+1]-1; i++)
|
|
{
|
|
cl = c->ptr.p_int[i];
|
|
cntbuf->ptr.p_int[cl] = cntbuf->ptr.p_int[cl]+1;
|
|
cntbuf->ptr.p_int[nc+cl] = cntbuf->ptr.p_int[nc+cl]-1;
|
|
}
|
|
sl = sl+(tiesbuf->ptr.p_int[k+1]-tiesbuf->ptr.p_int[k]);
|
|
sr = sr-(tiesbuf->ptr.p_int[k+1]-tiesbuf->ptr.p_int[k]);
|
|
|
|
/*
|
|
* Calculate RMS error
|
|
*/
|
|
v = (double)(0);
|
|
for(i=0; i<=nc-1; i++)
|
|
{
|
|
w = (double)(cntbuf->ptr.p_int[i]);
|
|
v = v+w*ae_sqr(w/sl-1, _state);
|
|
v = v+(sl-w)*ae_sqr(w/sl, _state);
|
|
w = (double)(cntbuf->ptr.p_int[nc+i]);
|
|
v = v+w*ae_sqr(w/sr-1, _state);
|
|
v = v+(sr-w)*ae_sqr(w/sr, _state);
|
|
}
|
|
v = ae_sqrt(v/(nc*n), _state);
|
|
|
|
/*
|
|
* Compare with best
|
|
*/
|
|
x = (double)(2*sl)/(double)(sl+sr)-1;
|
|
cc = v*(1-alpha+alpha*ae_sqr(x, _state));
|
|
if( ae_fp_less(cc,cbest) )
|
|
{
|
|
|
|
/*
|
|
* store split
|
|
*/
|
|
*rms = v;
|
|
koptimal = k;
|
|
cbest = cc;
|
|
|
|
/*
|
|
* calculate CVRMS error
|
|
*/
|
|
*cvrms = (double)(0);
|
|
for(i=0; i<=nc-1; i++)
|
|
{
|
|
if( sl>1 )
|
|
{
|
|
w = (double)(cntbuf->ptr.p_int[i]);
|
|
*cvrms = *cvrms+w*ae_sqr((w-1)/(sl-1)-1, _state);
|
|
*cvrms = *cvrms+(sl-w)*ae_sqr(w/(sl-1), _state);
|
|
}
|
|
else
|
|
{
|
|
w = (double)(cntbuf->ptr.p_int[i]);
|
|
*cvrms = *cvrms+w*ae_sqr((double)1/(double)nc-1, _state);
|
|
*cvrms = *cvrms+(sl-w)*ae_sqr((double)1/(double)nc, _state);
|
|
}
|
|
if( sr>1 )
|
|
{
|
|
w = (double)(cntbuf->ptr.p_int[nc+i]);
|
|
*cvrms = *cvrms+w*ae_sqr((w-1)/(sr-1)-1, _state);
|
|
*cvrms = *cvrms+(sr-w)*ae_sqr(w/(sr-1), _state);
|
|
}
|
|
else
|
|
{
|
|
w = (double)(cntbuf->ptr.p_int[nc+i]);
|
|
*cvrms = *cvrms+w*ae_sqr((double)1/(double)nc-1, _state);
|
|
*cvrms = *cvrms+(sr-w)*ae_sqr((double)1/(double)nc, _state);
|
|
}
|
|
}
|
|
*cvrms = ae_sqrt(*cvrms/(nc*n), _state);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Calculate threshold.
|
|
* Code is a bit complicated because there can be such
|
|
* numbers that 0.5(A+B) equals to A or B (if A-B=epsilon)
|
|
*/
|
|
*threshold = 0.5*(a->ptr.p_double[tiesbuf->ptr.p_int[koptimal]]+a->ptr.p_double[tiesbuf->ptr.p_int[koptimal+1]]);
|
|
if( ae_fp_less_eq(*threshold,a->ptr.p_double[tiesbuf->ptr.p_int[koptimal]]) )
|
|
{
|
|
*threshold = a->ptr.p_double[tiesbuf->ptr.p_int[koptimal+1]];
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Automatic non-optimal discretization, internal subroutine.
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.05.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dssplitk(/* Real */ ae_vector* a,
|
|
/* Integer */ ae_vector* c,
|
|
ae_int_t n,
|
|
ae_int_t nc,
|
|
ae_int_t kmax,
|
|
ae_int_t* info,
|
|
/* Real */ ae_vector* thresholds,
|
|
ae_int_t* ni,
|
|
double* cve,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector _a;
|
|
ae_vector _c;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t j1;
|
|
ae_int_t k;
|
|
ae_vector ties;
|
|
ae_int_t tiecount;
|
|
ae_vector p1;
|
|
ae_vector p2;
|
|
ae_vector cnt;
|
|
double v2;
|
|
ae_int_t bestk;
|
|
double bestcve;
|
|
ae_vector bestsizes;
|
|
double curcve;
|
|
ae_vector cursizes;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_a, 0, sizeof(_a));
|
|
memset(&_c, 0, sizeof(_c));
|
|
memset(&ties, 0, sizeof(ties));
|
|
memset(&p1, 0, sizeof(p1));
|
|
memset(&p2, 0, sizeof(p2));
|
|
memset(&cnt, 0, sizeof(cnt));
|
|
memset(&bestsizes, 0, sizeof(bestsizes));
|
|
memset(&cursizes, 0, sizeof(cursizes));
|
|
ae_vector_init_copy(&_a, a, _state, ae_true);
|
|
a = &_a;
|
|
ae_vector_init_copy(&_c, c, _state, ae_true);
|
|
c = &_c;
|
|
*info = 0;
|
|
ae_vector_clear(thresholds);
|
|
*ni = 0;
|
|
*cve = 0;
|
|
ae_vector_init(&ties, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&p1, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&p2, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&cnt, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&bestsizes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&cursizes, 0, DT_INT, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Test for errors in inputs
|
|
*/
|
|
if( (n<=0||nc<2)||kmax<2 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
if( c->ptr.p_int[i]<0||c->ptr.p_int[i]>=nc )
|
|
{
|
|
*info = -2;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
*info = 1;
|
|
|
|
/*
|
|
* Tie
|
|
*/
|
|
dstie(a, n, &ties, &tiecount, &p1, &p2, _state);
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
if( p2.ptr.p_int[i]!=i )
|
|
{
|
|
k = c->ptr.p_int[i];
|
|
c->ptr.p_int[i] = c->ptr.p_int[p2.ptr.p_int[i]];
|
|
c->ptr.p_int[p2.ptr.p_int[i]] = k;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Special cases
|
|
*/
|
|
if( tiecount==1 )
|
|
{
|
|
*info = -3;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* General case:
|
|
* 0. allocate arrays
|
|
*/
|
|
kmax = ae_minint(kmax, tiecount, _state);
|
|
ae_vector_set_length(&bestsizes, kmax-1+1, _state);
|
|
ae_vector_set_length(&cursizes, kmax-1+1, _state);
|
|
ae_vector_set_length(&cnt, nc-1+1, _state);
|
|
|
|
/*
|
|
* General case:
|
|
* 1. prepare "weak" solution (two subintervals, divided at median)
|
|
*/
|
|
v2 = ae_maxrealnumber;
|
|
j = -1;
|
|
for(i=1; i<=tiecount-1; i++)
|
|
{
|
|
if( ae_fp_less(ae_fabs(ties.ptr.p_int[i]-0.5*(n-1), _state),v2) )
|
|
{
|
|
v2 = ae_fabs(ties.ptr.p_int[i]-0.5*n, _state);
|
|
j = i;
|
|
}
|
|
}
|
|
ae_assert(j>0, "DSSplitK: internal error #1!", _state);
|
|
bestk = 2;
|
|
bestsizes.ptr.p_int[0] = ties.ptr.p_int[j];
|
|
bestsizes.ptr.p_int[1] = n-j;
|
|
bestcve = (double)(0);
|
|
for(i=0; i<=nc-1; i++)
|
|
{
|
|
cnt.ptr.p_int[i] = 0;
|
|
}
|
|
for(i=0; i<=j-1; i++)
|
|
{
|
|
bdss_tieaddc(c, &ties, i, nc, &cnt, _state);
|
|
}
|
|
bestcve = bestcve+bdss_getcv(&cnt, nc, _state);
|
|
for(i=0; i<=nc-1; i++)
|
|
{
|
|
cnt.ptr.p_int[i] = 0;
|
|
}
|
|
for(i=j; i<=tiecount-1; i++)
|
|
{
|
|
bdss_tieaddc(c, &ties, i, nc, &cnt, _state);
|
|
}
|
|
bestcve = bestcve+bdss_getcv(&cnt, nc, _state);
|
|
|
|
/*
|
|
* General case:
|
|
* 2. Use greedy algorithm to find sub-optimal split in O(KMax*N) time
|
|
*/
|
|
for(k=2; k<=kmax; k++)
|
|
{
|
|
|
|
/*
|
|
* Prepare greedy K-interval split
|
|
*/
|
|
for(i=0; i<=k-1; i++)
|
|
{
|
|
cursizes.ptr.p_int[i] = 0;
|
|
}
|
|
i = 0;
|
|
j = 0;
|
|
while(j<=tiecount-1&&i<=k-1)
|
|
{
|
|
|
|
/*
|
|
* Rule: I-th bin is empty, fill it
|
|
*/
|
|
if( cursizes.ptr.p_int[i]==0 )
|
|
{
|
|
cursizes.ptr.p_int[i] = ties.ptr.p_int[j+1]-ties.ptr.p_int[j];
|
|
j = j+1;
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Rule: (K-1-I) bins left, (K-1-I) ties left (1 tie per bin); next bin
|
|
*/
|
|
if( tiecount-j==k-1-i )
|
|
{
|
|
i = i+1;
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Rule: last bin, always place in current
|
|
*/
|
|
if( i==k-1 )
|
|
{
|
|
cursizes.ptr.p_int[i] = cursizes.ptr.p_int[i]+ties.ptr.p_int[j+1]-ties.ptr.p_int[j];
|
|
j = j+1;
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Place J-th tie in I-th bin, or leave for I+1-th bin.
|
|
*/
|
|
if( ae_fp_less(ae_fabs(cursizes.ptr.p_int[i]+ties.ptr.p_int[j+1]-ties.ptr.p_int[j]-(double)n/(double)k, _state),ae_fabs(cursizes.ptr.p_int[i]-(double)n/(double)k, _state)) )
|
|
{
|
|
cursizes.ptr.p_int[i] = cursizes.ptr.p_int[i]+ties.ptr.p_int[j+1]-ties.ptr.p_int[j];
|
|
j = j+1;
|
|
}
|
|
else
|
|
{
|
|
i = i+1;
|
|
}
|
|
}
|
|
ae_assert(cursizes.ptr.p_int[k-1]!=0&&j==tiecount, "DSSplitK: internal error #1", _state);
|
|
|
|
/*
|
|
* Calculate CVE
|
|
*/
|
|
curcve = (double)(0);
|
|
j = 0;
|
|
for(i=0; i<=k-1; i++)
|
|
{
|
|
for(j1=0; j1<=nc-1; j1++)
|
|
{
|
|
cnt.ptr.p_int[j1] = 0;
|
|
}
|
|
for(j1=j; j1<=j+cursizes.ptr.p_int[i]-1; j1++)
|
|
{
|
|
cnt.ptr.p_int[c->ptr.p_int[j1]] = cnt.ptr.p_int[c->ptr.p_int[j1]]+1;
|
|
}
|
|
curcve = curcve+bdss_getcv(&cnt, nc, _state);
|
|
j = j+cursizes.ptr.p_int[i];
|
|
}
|
|
|
|
/*
|
|
* Choose best variant
|
|
*/
|
|
if( ae_fp_less(curcve,bestcve) )
|
|
{
|
|
for(i=0; i<=k-1; i++)
|
|
{
|
|
bestsizes.ptr.p_int[i] = cursizes.ptr.p_int[i];
|
|
}
|
|
bestcve = curcve;
|
|
bestk = k;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Transform from sizes to thresholds
|
|
*/
|
|
*cve = bestcve;
|
|
*ni = bestk;
|
|
ae_vector_set_length(thresholds, *ni-2+1, _state);
|
|
j = bestsizes.ptr.p_int[0];
|
|
for(i=1; i<=bestk-1; i++)
|
|
{
|
|
thresholds->ptr.p_double[i-1] = 0.5*(a->ptr.p_double[j-1]+a->ptr.p_double[j]);
|
|
j = j+bestsizes.ptr.p_int[i];
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Automatic optimal discretization, internal subroutine.
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.05.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dsoptimalsplitk(/* Real */ ae_vector* a,
|
|
/* Integer */ ae_vector* c,
|
|
ae_int_t n,
|
|
ae_int_t nc,
|
|
ae_int_t kmax,
|
|
ae_int_t* info,
|
|
/* Real */ ae_vector* thresholds,
|
|
ae_int_t* ni,
|
|
double* cve,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector _a;
|
|
ae_vector _c;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t s;
|
|
ae_int_t jl;
|
|
ae_int_t jr;
|
|
double v2;
|
|
ae_vector ties;
|
|
ae_int_t tiecount;
|
|
ae_vector p1;
|
|
ae_vector p2;
|
|
double cvtemp;
|
|
ae_vector cnt;
|
|
ae_vector cnt2;
|
|
ae_matrix cv;
|
|
ae_matrix splits;
|
|
ae_int_t k;
|
|
ae_int_t koptimal;
|
|
double cvoptimal;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_a, 0, sizeof(_a));
|
|
memset(&_c, 0, sizeof(_c));
|
|
memset(&ties, 0, sizeof(ties));
|
|
memset(&p1, 0, sizeof(p1));
|
|
memset(&p2, 0, sizeof(p2));
|
|
memset(&cnt, 0, sizeof(cnt));
|
|
memset(&cnt2, 0, sizeof(cnt2));
|
|
memset(&cv, 0, sizeof(cv));
|
|
memset(&splits, 0, sizeof(splits));
|
|
ae_vector_init_copy(&_a, a, _state, ae_true);
|
|
a = &_a;
|
|
ae_vector_init_copy(&_c, c, _state, ae_true);
|
|
c = &_c;
|
|
*info = 0;
|
|
ae_vector_clear(thresholds);
|
|
*ni = 0;
|
|
*cve = 0;
|
|
ae_vector_init(&ties, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&p1, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&p2, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&cnt, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&cnt2, 0, DT_INT, _state, ae_true);
|
|
ae_matrix_init(&cv, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&splits, 0, 0, DT_INT, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Test for errors in inputs
|
|
*/
|
|
if( (n<=0||nc<2)||kmax<2 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
if( c->ptr.p_int[i]<0||c->ptr.p_int[i]>=nc )
|
|
{
|
|
*info = -2;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
*info = 1;
|
|
|
|
/*
|
|
* Tie
|
|
*/
|
|
dstie(a, n, &ties, &tiecount, &p1, &p2, _state);
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
if( p2.ptr.p_int[i]!=i )
|
|
{
|
|
k = c->ptr.p_int[i];
|
|
c->ptr.p_int[i] = c->ptr.p_int[p2.ptr.p_int[i]];
|
|
c->ptr.p_int[p2.ptr.p_int[i]] = k;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Special cases
|
|
*/
|
|
if( tiecount==1 )
|
|
{
|
|
*info = -3;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* General case
|
|
* Use dynamic programming to find best split in O(KMax*NC*TieCount^2) time
|
|
*/
|
|
kmax = ae_minint(kmax, tiecount, _state);
|
|
ae_matrix_set_length(&cv, kmax-1+1, tiecount-1+1, _state);
|
|
ae_matrix_set_length(&splits, kmax-1+1, tiecount-1+1, _state);
|
|
ae_vector_set_length(&cnt, nc-1+1, _state);
|
|
ae_vector_set_length(&cnt2, nc-1+1, _state);
|
|
for(j=0; j<=nc-1; j++)
|
|
{
|
|
cnt.ptr.p_int[j] = 0;
|
|
}
|
|
for(j=0; j<=tiecount-1; j++)
|
|
{
|
|
bdss_tieaddc(c, &ties, j, nc, &cnt, _state);
|
|
splits.ptr.pp_int[0][j] = 0;
|
|
cv.ptr.pp_double[0][j] = bdss_getcv(&cnt, nc, _state);
|
|
}
|
|
for(k=1; k<=kmax-1; k++)
|
|
{
|
|
for(j=0; j<=nc-1; j++)
|
|
{
|
|
cnt.ptr.p_int[j] = 0;
|
|
}
|
|
|
|
/*
|
|
* Subtask size J in [K..TieCount-1]:
|
|
* optimal K-splitting on ties from 0-th to J-th.
|
|
*/
|
|
for(j=k; j<=tiecount-1; j++)
|
|
{
|
|
|
|
/*
|
|
* Update Cnt - let it contain classes of ties from K-th to J-th
|
|
*/
|
|
bdss_tieaddc(c, &ties, j, nc, &cnt, _state);
|
|
|
|
/*
|
|
* Search for optimal split point S in [K..J]
|
|
*/
|
|
for(i=0; i<=nc-1; i++)
|
|
{
|
|
cnt2.ptr.p_int[i] = cnt.ptr.p_int[i];
|
|
}
|
|
cv.ptr.pp_double[k][j] = cv.ptr.pp_double[k-1][j-1]+bdss_getcv(&cnt2, nc, _state);
|
|
splits.ptr.pp_int[k][j] = j;
|
|
for(s=k+1; s<=j; s++)
|
|
{
|
|
|
|
/*
|
|
* Update Cnt2 - let it contain classes of ties from S-th to J-th
|
|
*/
|
|
bdss_tiesubc(c, &ties, s-1, nc, &cnt2, _state);
|
|
|
|
/*
|
|
* Calculate CVE
|
|
*/
|
|
cvtemp = cv.ptr.pp_double[k-1][s-1]+bdss_getcv(&cnt2, nc, _state);
|
|
if( ae_fp_less(cvtemp,cv.ptr.pp_double[k][j]) )
|
|
{
|
|
cv.ptr.pp_double[k][j] = cvtemp;
|
|
splits.ptr.pp_int[k][j] = s;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Choose best partition, output result
|
|
*/
|
|
koptimal = -1;
|
|
cvoptimal = ae_maxrealnumber;
|
|
for(k=0; k<=kmax-1; k++)
|
|
{
|
|
if( ae_fp_less(cv.ptr.pp_double[k][tiecount-1],cvoptimal) )
|
|
{
|
|
cvoptimal = cv.ptr.pp_double[k][tiecount-1];
|
|
koptimal = k;
|
|
}
|
|
}
|
|
ae_assert(koptimal>=0, "DSOptimalSplitK: internal error #1!", _state);
|
|
if( koptimal==0 )
|
|
{
|
|
|
|
/*
|
|
* Special case: best partition is one big interval.
|
|
* Even 2-partition is not better.
|
|
* This is possible when dealing with "weak" predictor variables.
|
|
*
|
|
* Make binary split as close to the median as possible.
|
|
*/
|
|
v2 = ae_maxrealnumber;
|
|
j = -1;
|
|
for(i=1; i<=tiecount-1; i++)
|
|
{
|
|
if( ae_fp_less(ae_fabs(ties.ptr.p_int[i]-0.5*(n-1), _state),v2) )
|
|
{
|
|
v2 = ae_fabs(ties.ptr.p_int[i]-0.5*(n-1), _state);
|
|
j = i;
|
|
}
|
|
}
|
|
ae_assert(j>0, "DSOptimalSplitK: internal error #2!", _state);
|
|
ae_vector_set_length(thresholds, 0+1, _state);
|
|
thresholds->ptr.p_double[0] = 0.5*(a->ptr.p_double[ties.ptr.p_int[j-1]]+a->ptr.p_double[ties.ptr.p_int[j]]);
|
|
*ni = 2;
|
|
*cve = (double)(0);
|
|
for(i=0; i<=nc-1; i++)
|
|
{
|
|
cnt.ptr.p_int[i] = 0;
|
|
}
|
|
for(i=0; i<=j-1; i++)
|
|
{
|
|
bdss_tieaddc(c, &ties, i, nc, &cnt, _state);
|
|
}
|
|
*cve = *cve+bdss_getcv(&cnt, nc, _state);
|
|
for(i=0; i<=nc-1; i++)
|
|
{
|
|
cnt.ptr.p_int[i] = 0;
|
|
}
|
|
for(i=j; i<=tiecount-1; i++)
|
|
{
|
|
bdss_tieaddc(c, &ties, i, nc, &cnt, _state);
|
|
}
|
|
*cve = *cve+bdss_getcv(&cnt, nc, _state);
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* General case: 2 or more intervals
|
|
*
|
|
* NOTE: we initialize both JL and JR (left and right bounds),
|
|
* altough algorithm needs only JL.
|
|
*/
|
|
ae_vector_set_length(thresholds, koptimal-1+1, _state);
|
|
*ni = koptimal+1;
|
|
*cve = cv.ptr.pp_double[koptimal][tiecount-1];
|
|
jl = splits.ptr.pp_int[koptimal][tiecount-1];
|
|
jr = tiecount-1;
|
|
for(k=koptimal; k>=1; k--)
|
|
{
|
|
thresholds->ptr.p_double[k-1] = 0.5*(a->ptr.p_double[ties.ptr.p_int[jl-1]]+a->ptr.p_double[ties.ptr.p_int[jl]]);
|
|
jr = jl-1;
|
|
jl = splits.ptr.pp_int[k-1][jl-1];
|
|
}
|
|
touchint(&jr, _state);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal function
|
|
*************************************************************************/
|
|
static double bdss_xlny(double x, double y, ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
if( ae_fp_eq(x,(double)(0)) )
|
|
{
|
|
result = (double)(0);
|
|
}
|
|
else
|
|
{
|
|
result = x*ae_log(y, _state);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal function,
|
|
returns number of samples of class I in Cnt[I]
|
|
*************************************************************************/
|
|
static double bdss_getcv(/* Integer */ ae_vector* cnt,
|
|
ae_int_t nc,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
double s;
|
|
double result;
|
|
|
|
|
|
s = (double)(0);
|
|
for(i=0; i<=nc-1; i++)
|
|
{
|
|
s = s+cnt->ptr.p_int[i];
|
|
}
|
|
result = (double)(0);
|
|
for(i=0; i<=nc-1; i++)
|
|
{
|
|
result = result-bdss_xlny((double)(cnt->ptr.p_int[i]), cnt->ptr.p_int[i]/(s+nc-1), _state);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal function, adds number of samples of class I in tie NTie to Cnt[I]
|
|
*************************************************************************/
|
|
static void bdss_tieaddc(/* Integer */ ae_vector* c,
|
|
/* Integer */ ae_vector* ties,
|
|
ae_int_t ntie,
|
|
ae_int_t nc,
|
|
/* Integer */ ae_vector* cnt,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
|
|
|
|
for(i=ties->ptr.p_int[ntie]; i<=ties->ptr.p_int[ntie+1]-1; i++)
|
|
{
|
|
cnt->ptr.p_int[c->ptr.p_int[i]] = cnt->ptr.p_int[c->ptr.p_int[i]]+1;
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal function, subtracts number of samples of class I in tie NTie to Cnt[I]
|
|
*************************************************************************/
|
|
static void bdss_tiesubc(/* Integer */ ae_vector* c,
|
|
/* Integer */ ae_vector* ties,
|
|
ae_int_t ntie,
|
|
ae_int_t nc,
|
|
/* Integer */ ae_vector* cnt,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
|
|
|
|
for(i=ties->ptr.p_int[ntie]; i<=ties->ptr.p_int[ntie+1]-1; i++)
|
|
{
|
|
cnt->ptr.p_int[c->ptr.p_int[i]] = cnt->ptr.p_int[c->ptr.p_int[i]]-1;
|
|
}
|
|
}
|
|
|
|
|
|
void _cvreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
cvreport *p = (cvreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _cvreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
cvreport *dst = (cvreport*)_dst;
|
|
cvreport *src = (cvreport*)_src;
|
|
dst->relclserror = src->relclserror;
|
|
dst->avgce = src->avgce;
|
|
dst->rmserror = src->rmserror;
|
|
dst->avgerror = src->avgerror;
|
|
dst->avgrelerror = src->avgrelerror;
|
|
}
|
|
|
|
|
|
void _cvreport_clear(void* _p)
|
|
{
|
|
cvreport *p = (cvreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _cvreport_destroy(void* _p)
|
|
{
|
|
cvreport *p = (cvreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_MLPBASE) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns number of weights updates which is required for
|
|
gradient calculation problem to be splitted.
|
|
*************************************************************************/
|
|
ae_int_t mlpgradsplitcost(ae_state *_state)
|
|
{
|
|
ae_int_t result;
|
|
|
|
|
|
result = mlpbase_gradbasecasecost;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns number of elements in subset of dataset which is
|
|
required for gradient calculation problem to be splitted.
|
|
*************************************************************************/
|
|
ae_int_t mlpgradsplitsize(ae_state *_state)
|
|
{
|
|
ae_int_t result;
|
|
|
|
|
|
result = mlpbase_microbatchsize;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Creates neural network with NIn inputs, NOut outputs, without hidden
|
|
layers, with linear output layer. Network weights are filled with small
|
|
random values.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreate0(ae_int_t nin,
|
|
ae_int_t nout,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector lsizes;
|
|
ae_vector ltypes;
|
|
ae_vector lconnfirst;
|
|
ae_vector lconnlast;
|
|
ae_int_t layerscount;
|
|
ae_int_t lastproc;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&lsizes, 0, sizeof(lsizes));
|
|
memset(<ypes, 0, sizeof(ltypes));
|
|
memset(&lconnfirst, 0, sizeof(lconnfirst));
|
|
memset(&lconnlast, 0, sizeof(lconnlast));
|
|
_multilayerperceptron_clear(network);
|
|
ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
|
|
|
|
layerscount = 1+3;
|
|
|
|
/*
|
|
* Allocate arrays
|
|
*/
|
|
ae_vector_set_length(&lsizes, layerscount-1+1, _state);
|
|
ae_vector_set_length(<ypes, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
|
|
|
|
/*
|
|
* Layers
|
|
*/
|
|
mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(-5, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
|
|
/*
|
|
* Create
|
|
*/
|
|
mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
|
|
mlpbase_fillhighlevelinformation(network, nin, 0, 0, nout, ae_false, ae_true, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Same as MLPCreate0, but with one hidden layer (NHid neurons) with
|
|
non-linear activation function. Output layer is linear.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreate1(ae_int_t nin,
|
|
ae_int_t nhid,
|
|
ae_int_t nout,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector lsizes;
|
|
ae_vector ltypes;
|
|
ae_vector lconnfirst;
|
|
ae_vector lconnlast;
|
|
ae_int_t layerscount;
|
|
ae_int_t lastproc;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&lsizes, 0, sizeof(lsizes));
|
|
memset(<ypes, 0, sizeof(ltypes));
|
|
memset(&lconnfirst, 0, sizeof(lconnfirst));
|
|
memset(&lconnlast, 0, sizeof(lconnlast));
|
|
_multilayerperceptron_clear(network);
|
|
ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
|
|
|
|
layerscount = 1+3+3;
|
|
|
|
/*
|
|
* Allocate arrays
|
|
*/
|
|
ae_vector_set_length(&lsizes, layerscount-1+1, _state);
|
|
ae_vector_set_length(<ypes, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
|
|
|
|
/*
|
|
* Layers
|
|
*/
|
|
mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nhid, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(-5, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
|
|
/*
|
|
* Create
|
|
*/
|
|
mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
|
|
mlpbase_fillhighlevelinformation(network, nin, nhid, 0, nout, ae_false, ae_true, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Same as MLPCreate0, but with two hidden layers (NHid1 and NHid2 neurons)
|
|
with non-linear activation function. Output layer is linear.
|
|
$ALL
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreate2(ae_int_t nin,
|
|
ae_int_t nhid1,
|
|
ae_int_t nhid2,
|
|
ae_int_t nout,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector lsizes;
|
|
ae_vector ltypes;
|
|
ae_vector lconnfirst;
|
|
ae_vector lconnlast;
|
|
ae_int_t layerscount;
|
|
ae_int_t lastproc;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&lsizes, 0, sizeof(lsizes));
|
|
memset(<ypes, 0, sizeof(ltypes));
|
|
memset(&lconnfirst, 0, sizeof(lconnfirst));
|
|
memset(&lconnlast, 0, sizeof(lconnlast));
|
|
_multilayerperceptron_clear(network);
|
|
ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
|
|
|
|
layerscount = 1+3+3+3;
|
|
|
|
/*
|
|
* Allocate arrays
|
|
*/
|
|
ae_vector_set_length(&lsizes, layerscount-1+1, _state);
|
|
ae_vector_set_length(<ypes, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
|
|
|
|
/*
|
|
* Layers
|
|
*/
|
|
mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nhid1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nhid2, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(-5, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
|
|
/*
|
|
* Create
|
|
*/
|
|
mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
|
|
mlpbase_fillhighlevelinformation(network, nin, nhid1, nhid2, nout, ae_false, ae_true, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Creates neural network with NIn inputs, NOut outputs, without hidden
|
|
layers with non-linear output layer. Network weights are filled with small
|
|
random values.
|
|
|
|
Activation function of the output layer takes values:
|
|
|
|
(B, +INF), if D>=0
|
|
|
|
or
|
|
|
|
(-INF, B), if D<0.
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreateb0(ae_int_t nin,
|
|
ae_int_t nout,
|
|
double b,
|
|
double d,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector lsizes;
|
|
ae_vector ltypes;
|
|
ae_vector lconnfirst;
|
|
ae_vector lconnlast;
|
|
ae_int_t layerscount;
|
|
ae_int_t lastproc;
|
|
ae_int_t i;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&lsizes, 0, sizeof(lsizes));
|
|
memset(<ypes, 0, sizeof(ltypes));
|
|
memset(&lconnfirst, 0, sizeof(lconnfirst));
|
|
memset(&lconnlast, 0, sizeof(lconnlast));
|
|
_multilayerperceptron_clear(network);
|
|
ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
|
|
|
|
layerscount = 1+3;
|
|
if( ae_fp_greater_eq(d,(double)(0)) )
|
|
{
|
|
d = (double)(1);
|
|
}
|
|
else
|
|
{
|
|
d = (double)(-1);
|
|
}
|
|
|
|
/*
|
|
* Allocate arrays
|
|
*/
|
|
ae_vector_set_length(&lsizes, layerscount-1+1, _state);
|
|
ae_vector_set_length(<ypes, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
|
|
|
|
/*
|
|
* Layers
|
|
*/
|
|
mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(3, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
|
|
/*
|
|
* Create
|
|
*/
|
|
mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
|
|
mlpbase_fillhighlevelinformation(network, nin, 0, 0, nout, ae_false, ae_false, _state);
|
|
|
|
/*
|
|
* Turn on ouputs shift/scaling.
|
|
*/
|
|
for(i=nin; i<=nin+nout-1; i++)
|
|
{
|
|
network->columnmeans.ptr.p_double[i] = b;
|
|
network->columnsigmas.ptr.p_double[i] = d;
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Same as MLPCreateB0 but with non-linear hidden layer.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreateb1(ae_int_t nin,
|
|
ae_int_t nhid,
|
|
ae_int_t nout,
|
|
double b,
|
|
double d,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector lsizes;
|
|
ae_vector ltypes;
|
|
ae_vector lconnfirst;
|
|
ae_vector lconnlast;
|
|
ae_int_t layerscount;
|
|
ae_int_t lastproc;
|
|
ae_int_t i;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&lsizes, 0, sizeof(lsizes));
|
|
memset(<ypes, 0, sizeof(ltypes));
|
|
memset(&lconnfirst, 0, sizeof(lconnfirst));
|
|
memset(&lconnlast, 0, sizeof(lconnlast));
|
|
_multilayerperceptron_clear(network);
|
|
ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
|
|
|
|
layerscount = 1+3+3;
|
|
if( ae_fp_greater_eq(d,(double)(0)) )
|
|
{
|
|
d = (double)(1);
|
|
}
|
|
else
|
|
{
|
|
d = (double)(-1);
|
|
}
|
|
|
|
/*
|
|
* Allocate arrays
|
|
*/
|
|
ae_vector_set_length(&lsizes, layerscount-1+1, _state);
|
|
ae_vector_set_length(<ypes, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
|
|
|
|
/*
|
|
* Layers
|
|
*/
|
|
mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nhid, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(3, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
|
|
/*
|
|
* Create
|
|
*/
|
|
mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
|
|
mlpbase_fillhighlevelinformation(network, nin, nhid, 0, nout, ae_false, ae_false, _state);
|
|
|
|
/*
|
|
* Turn on ouputs shift/scaling.
|
|
*/
|
|
for(i=nin; i<=nin+nout-1; i++)
|
|
{
|
|
network->columnmeans.ptr.p_double[i] = b;
|
|
network->columnsigmas.ptr.p_double[i] = d;
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Same as MLPCreateB0 but with two non-linear hidden layers.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreateb2(ae_int_t nin,
|
|
ae_int_t nhid1,
|
|
ae_int_t nhid2,
|
|
ae_int_t nout,
|
|
double b,
|
|
double d,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector lsizes;
|
|
ae_vector ltypes;
|
|
ae_vector lconnfirst;
|
|
ae_vector lconnlast;
|
|
ae_int_t layerscount;
|
|
ae_int_t lastproc;
|
|
ae_int_t i;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&lsizes, 0, sizeof(lsizes));
|
|
memset(<ypes, 0, sizeof(ltypes));
|
|
memset(&lconnfirst, 0, sizeof(lconnfirst));
|
|
memset(&lconnlast, 0, sizeof(lconnlast));
|
|
_multilayerperceptron_clear(network);
|
|
ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
|
|
|
|
layerscount = 1+3+3+3;
|
|
if( ae_fp_greater_eq(d,(double)(0)) )
|
|
{
|
|
d = (double)(1);
|
|
}
|
|
else
|
|
{
|
|
d = (double)(-1);
|
|
}
|
|
|
|
/*
|
|
* Allocate arrays
|
|
*/
|
|
ae_vector_set_length(&lsizes, layerscount-1+1, _state);
|
|
ae_vector_set_length(<ypes, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
|
|
|
|
/*
|
|
* Layers
|
|
*/
|
|
mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nhid1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nhid2, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(3, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
|
|
/*
|
|
* Create
|
|
*/
|
|
mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
|
|
mlpbase_fillhighlevelinformation(network, nin, nhid1, nhid2, nout, ae_false, ae_false, _state);
|
|
|
|
/*
|
|
* Turn on ouputs shift/scaling.
|
|
*/
|
|
for(i=nin; i<=nin+nout-1; i++)
|
|
{
|
|
network->columnmeans.ptr.p_double[i] = b;
|
|
network->columnsigmas.ptr.p_double[i] = d;
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Creates neural network with NIn inputs, NOut outputs, without hidden
|
|
layers with non-linear output layer. Network weights are filled with small
|
|
random values. Activation function of the output layer takes values [A,B].
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreater0(ae_int_t nin,
|
|
ae_int_t nout,
|
|
double a,
|
|
double b,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector lsizes;
|
|
ae_vector ltypes;
|
|
ae_vector lconnfirst;
|
|
ae_vector lconnlast;
|
|
ae_int_t layerscount;
|
|
ae_int_t lastproc;
|
|
ae_int_t i;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&lsizes, 0, sizeof(lsizes));
|
|
memset(<ypes, 0, sizeof(ltypes));
|
|
memset(&lconnfirst, 0, sizeof(lconnfirst));
|
|
memset(&lconnlast, 0, sizeof(lconnlast));
|
|
_multilayerperceptron_clear(network);
|
|
ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
|
|
|
|
layerscount = 1+3;
|
|
|
|
/*
|
|
* Allocate arrays
|
|
*/
|
|
ae_vector_set_length(&lsizes, layerscount-1+1, _state);
|
|
ae_vector_set_length(<ypes, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
|
|
|
|
/*
|
|
* Layers
|
|
*/
|
|
mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
|
|
/*
|
|
* Create
|
|
*/
|
|
mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
|
|
mlpbase_fillhighlevelinformation(network, nin, 0, 0, nout, ae_false, ae_false, _state);
|
|
|
|
/*
|
|
* Turn on outputs shift/scaling.
|
|
*/
|
|
for(i=nin; i<=nin+nout-1; i++)
|
|
{
|
|
network->columnmeans.ptr.p_double[i] = 0.5*(a+b);
|
|
network->columnsigmas.ptr.p_double[i] = 0.5*(a-b);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Same as MLPCreateR0, but with non-linear hidden layer.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreater1(ae_int_t nin,
|
|
ae_int_t nhid,
|
|
ae_int_t nout,
|
|
double a,
|
|
double b,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector lsizes;
|
|
ae_vector ltypes;
|
|
ae_vector lconnfirst;
|
|
ae_vector lconnlast;
|
|
ae_int_t layerscount;
|
|
ae_int_t lastproc;
|
|
ae_int_t i;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&lsizes, 0, sizeof(lsizes));
|
|
memset(<ypes, 0, sizeof(ltypes));
|
|
memset(&lconnfirst, 0, sizeof(lconnfirst));
|
|
memset(&lconnlast, 0, sizeof(lconnlast));
|
|
_multilayerperceptron_clear(network);
|
|
ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
|
|
|
|
layerscount = 1+3+3;
|
|
|
|
/*
|
|
* Allocate arrays
|
|
*/
|
|
ae_vector_set_length(&lsizes, layerscount-1+1, _state);
|
|
ae_vector_set_length(<ypes, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
|
|
|
|
/*
|
|
* Layers
|
|
*/
|
|
mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nhid, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
|
|
/*
|
|
* Create
|
|
*/
|
|
mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
|
|
mlpbase_fillhighlevelinformation(network, nin, nhid, 0, nout, ae_false, ae_false, _state);
|
|
|
|
/*
|
|
* Turn on outputs shift/scaling.
|
|
*/
|
|
for(i=nin; i<=nin+nout-1; i++)
|
|
{
|
|
network->columnmeans.ptr.p_double[i] = 0.5*(a+b);
|
|
network->columnsigmas.ptr.p_double[i] = 0.5*(a-b);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Same as MLPCreateR0, but with two non-linear hidden layers.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreater2(ae_int_t nin,
|
|
ae_int_t nhid1,
|
|
ae_int_t nhid2,
|
|
ae_int_t nout,
|
|
double a,
|
|
double b,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector lsizes;
|
|
ae_vector ltypes;
|
|
ae_vector lconnfirst;
|
|
ae_vector lconnlast;
|
|
ae_int_t layerscount;
|
|
ae_int_t lastproc;
|
|
ae_int_t i;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&lsizes, 0, sizeof(lsizes));
|
|
memset(<ypes, 0, sizeof(ltypes));
|
|
memset(&lconnfirst, 0, sizeof(lconnfirst));
|
|
memset(&lconnlast, 0, sizeof(lconnlast));
|
|
_multilayerperceptron_clear(network);
|
|
ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
|
|
|
|
layerscount = 1+3+3+3;
|
|
|
|
/*
|
|
* Allocate arrays
|
|
*/
|
|
ae_vector_set_length(&lsizes, layerscount-1+1, _state);
|
|
ae_vector_set_length(<ypes, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
|
|
|
|
/*
|
|
* Layers
|
|
*/
|
|
mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nhid1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nhid2, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
|
|
/*
|
|
* Create
|
|
*/
|
|
mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
|
|
mlpbase_fillhighlevelinformation(network, nin, nhid1, nhid2, nout, ae_false, ae_false, _state);
|
|
|
|
/*
|
|
* Turn on outputs shift/scaling.
|
|
*/
|
|
for(i=nin; i<=nin+nout-1; i++)
|
|
{
|
|
network->columnmeans.ptr.p_double[i] = 0.5*(a+b);
|
|
network->columnsigmas.ptr.p_double[i] = 0.5*(a-b);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Creates classifier network with NIn inputs and NOut possible classes.
|
|
Network contains no hidden layers and linear output layer with SOFTMAX-
|
|
normalization (so outputs sums up to 1.0 and converge to posterior
|
|
probabilities).
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreatec0(ae_int_t nin,
|
|
ae_int_t nout,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector lsizes;
|
|
ae_vector ltypes;
|
|
ae_vector lconnfirst;
|
|
ae_vector lconnlast;
|
|
ae_int_t layerscount;
|
|
ae_int_t lastproc;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&lsizes, 0, sizeof(lsizes));
|
|
memset(<ypes, 0, sizeof(ltypes));
|
|
memset(&lconnfirst, 0, sizeof(lconnfirst));
|
|
memset(&lconnlast, 0, sizeof(lconnlast));
|
|
_multilayerperceptron_clear(network);
|
|
ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
|
|
|
|
ae_assert(nout>=2, "MLPCreateC0: NOut<2!", _state);
|
|
layerscount = 1+2+1;
|
|
|
|
/*
|
|
* Allocate arrays
|
|
*/
|
|
ae_vector_set_length(&lsizes, layerscount-1+1, _state);
|
|
ae_vector_set_length(<ypes, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
|
|
|
|
/*
|
|
* Layers
|
|
*/
|
|
mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nout-1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addzerolayer(&lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
|
|
/*
|
|
* Create
|
|
*/
|
|
mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_true, network, _state);
|
|
mlpbase_fillhighlevelinformation(network, nin, 0, 0, nout, ae_true, ae_true, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Same as MLPCreateC0, but with one non-linear hidden layer.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreatec1(ae_int_t nin,
|
|
ae_int_t nhid,
|
|
ae_int_t nout,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector lsizes;
|
|
ae_vector ltypes;
|
|
ae_vector lconnfirst;
|
|
ae_vector lconnlast;
|
|
ae_int_t layerscount;
|
|
ae_int_t lastproc;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&lsizes, 0, sizeof(lsizes));
|
|
memset(<ypes, 0, sizeof(ltypes));
|
|
memset(&lconnfirst, 0, sizeof(lconnfirst));
|
|
memset(&lconnlast, 0, sizeof(lconnlast));
|
|
_multilayerperceptron_clear(network);
|
|
ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
|
|
|
|
ae_assert(nout>=2, "MLPCreateC1: NOut<2!", _state);
|
|
layerscount = 1+3+2+1;
|
|
|
|
/*
|
|
* Allocate arrays
|
|
*/
|
|
ae_vector_set_length(&lsizes, layerscount-1+1, _state);
|
|
ae_vector_set_length(<ypes, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
|
|
|
|
/*
|
|
* Layers
|
|
*/
|
|
mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nhid, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nout-1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addzerolayer(&lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
|
|
/*
|
|
* Create
|
|
*/
|
|
mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_true, network, _state);
|
|
mlpbase_fillhighlevelinformation(network, nin, nhid, 0, nout, ae_true, ae_true, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Same as MLPCreateC0, but with two non-linear hidden layers.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreatec2(ae_int_t nin,
|
|
ae_int_t nhid1,
|
|
ae_int_t nhid2,
|
|
ae_int_t nout,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector lsizes;
|
|
ae_vector ltypes;
|
|
ae_vector lconnfirst;
|
|
ae_vector lconnlast;
|
|
ae_int_t layerscount;
|
|
ae_int_t lastproc;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&lsizes, 0, sizeof(lsizes));
|
|
memset(<ypes, 0, sizeof(ltypes));
|
|
memset(&lconnfirst, 0, sizeof(lconnfirst));
|
|
memset(&lconnlast, 0, sizeof(lconnlast));
|
|
_multilayerperceptron_clear(network);
|
|
ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
|
|
|
|
ae_assert(nout>=2, "MLPCreateC2: NOut<2!", _state);
|
|
layerscount = 1+3+3+2+1;
|
|
|
|
/*
|
|
* Allocate arrays
|
|
*/
|
|
ae_vector_set_length(&lsizes, layerscount-1+1, _state);
|
|
ae_vector_set_length(<ypes, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
|
|
|
|
/*
|
|
* Layers
|
|
*/
|
|
mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nhid1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nhid2, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addbiasedsummatorlayer(nout-1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
mlpbase_addzerolayer(&lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
|
|
|
|
/*
|
|
* Create
|
|
*/
|
|
mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_true, network, _state);
|
|
mlpbase_fillhighlevelinformation(network, nin, nhid1, nhid2, nout, ae_true, ae_true, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Copying of neural network
|
|
|
|
INPUT PARAMETERS:
|
|
Network1 - original
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network2 - copy
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcopy(multilayerperceptron* network1,
|
|
multilayerperceptron* network2,
|
|
ae_state *_state)
|
|
{
|
|
|
|
_multilayerperceptron_clear(network2);
|
|
|
|
mlpcopyshared(network1, network2, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Copying of neural network (second parameter is passed as shared object).
|
|
|
|
INPUT PARAMETERS:
|
|
Network1 - original
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network2 - copy
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcopyshared(multilayerperceptron* network1,
|
|
multilayerperceptron* network2,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t wcount;
|
|
ae_int_t i;
|
|
mlpbuffers buf;
|
|
smlpgrad sgrad;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&buf, 0, sizeof(buf));
|
|
memset(&sgrad, 0, sizeof(sgrad));
|
|
_mlpbuffers_init(&buf, _state, ae_true);
|
|
_smlpgrad_init(&sgrad, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Copy scalar and array fields
|
|
*/
|
|
network2->hlnetworktype = network1->hlnetworktype;
|
|
network2->hlnormtype = network1->hlnormtype;
|
|
copyintegerarray(&network1->hllayersizes, &network2->hllayersizes, _state);
|
|
copyintegerarray(&network1->hlconnections, &network2->hlconnections, _state);
|
|
copyintegerarray(&network1->hlneurons, &network2->hlneurons, _state);
|
|
copyintegerarray(&network1->structinfo, &network2->structinfo, _state);
|
|
copyrealarray(&network1->weights, &network2->weights, _state);
|
|
copyrealarray(&network1->columnmeans, &network2->columnmeans, _state);
|
|
copyrealarray(&network1->columnsigmas, &network2->columnsigmas, _state);
|
|
copyrealarray(&network1->neurons, &network2->neurons, _state);
|
|
copyrealarray(&network1->dfdnet, &network2->dfdnet, _state);
|
|
copyrealarray(&network1->derror, &network2->derror, _state);
|
|
copyrealarray(&network1->x, &network2->x, _state);
|
|
copyrealarray(&network1->y, &network2->y, _state);
|
|
copyrealarray(&network1->nwbuf, &network2->nwbuf, _state);
|
|
copyintegerarray(&network1->integerbuf, &network2->integerbuf, _state);
|
|
|
|
/*
|
|
* copy buffers
|
|
*/
|
|
wcount = mlpgetweightscount(network1, _state);
|
|
ae_shared_pool_set_seed(&network2->buf, &buf, sizeof(buf), _mlpbuffers_init, _mlpbuffers_init_copy, _mlpbuffers_destroy, _state);
|
|
ae_vector_set_length(&sgrad.g, wcount, _state);
|
|
sgrad.f = 0.0;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
sgrad.g.ptr.p_double[i] = 0.0;
|
|
}
|
|
ae_shared_pool_set_seed(&network2->gradbuf, &sgrad, sizeof(sgrad), _smlpgrad_init, _smlpgrad_init_copy, _smlpgrad_destroy, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function compares architectures of neural networks. Only geometries
|
|
are compared, weights and other parameters are not tested.
|
|
|
|
-- ALGLIB --
|
|
Copyright 20.06.2013 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_bool mlpsamearchitecture(multilayerperceptron* network1,
|
|
multilayerperceptron* network2,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t ninfo;
|
|
ae_bool result;
|
|
|
|
|
|
ae_assert(network1->structinfo.cnt>0&&network1->structinfo.cnt>=network1->structinfo.ptr.p_int[0], "MLPSameArchitecture: Network1 is uninitialized", _state);
|
|
ae_assert(network2->structinfo.cnt>0&&network2->structinfo.cnt>=network2->structinfo.ptr.p_int[0], "MLPSameArchitecture: Network2 is uninitialized", _state);
|
|
result = ae_false;
|
|
if( network1->structinfo.ptr.p_int[0]!=network2->structinfo.ptr.p_int[0] )
|
|
{
|
|
return result;
|
|
}
|
|
ninfo = network1->structinfo.ptr.p_int[0];
|
|
for(i=0; i<=ninfo-1; i++)
|
|
{
|
|
if( network1->structinfo.ptr.p_int[i]!=network2->structinfo.ptr.p_int[i] )
|
|
{
|
|
return result;
|
|
}
|
|
}
|
|
result = ae_true;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function copies tunable parameters (weights/means/sigmas) from one
|
|
network to another with same architecture. It performs some rudimentary
|
|
checks that architectures are same, and throws exception if check fails.
|
|
|
|
It is intended for fast copying of states between two network which are
|
|
known to have same geometry.
|
|
|
|
INPUT PARAMETERS:
|
|
Network1 - source, must be correctly initialized
|
|
Network2 - target, must have same architecture
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network2 - network state is copied from source to target
|
|
|
|
-- ALGLIB --
|
|
Copyright 20.06.2013 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcopytunableparameters(multilayerperceptron* network1,
|
|
multilayerperceptron* network2,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t ninfo;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
|
|
|
|
ae_assert(network1->structinfo.cnt>0&&network1->structinfo.cnt>=network1->structinfo.ptr.p_int[0], "MLPCopyTunableParameters: Network1 is uninitialized", _state);
|
|
ae_assert(network2->structinfo.cnt>0&&network2->structinfo.cnt>=network2->structinfo.ptr.p_int[0], "MLPCopyTunableParameters: Network2 is uninitialized", _state);
|
|
ae_assert(network1->structinfo.ptr.p_int[0]==network2->structinfo.ptr.p_int[0], "MLPCopyTunableParameters: Network1 geometry differs from that of Network2", _state);
|
|
ninfo = network1->structinfo.ptr.p_int[0];
|
|
for(i=0; i<=ninfo-1; i++)
|
|
{
|
|
ae_assert(network1->structinfo.ptr.p_int[i]==network2->structinfo.ptr.p_int[i], "MLPCopyTunableParameters: Network1 geometry differs from that of Network2", _state);
|
|
}
|
|
mlpproperties(network1, &nin, &nout, &wcount, _state);
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
network2->weights.ptr.p_double[i] = network1->weights.ptr.p_double[i];
|
|
}
|
|
if( mlpissoftmax(network1, _state) )
|
|
{
|
|
for(i=0; i<=nin-1; i++)
|
|
{
|
|
network2->columnmeans.ptr.p_double[i] = network1->columnmeans.ptr.p_double[i];
|
|
network2->columnsigmas.ptr.p_double[i] = network1->columnsigmas.ptr.p_double[i];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for(i=0; i<=nin+nout-1; i++)
|
|
{
|
|
network2->columnmeans.ptr.p_double[i] = network1->columnmeans.ptr.p_double[i];
|
|
network2->columnsigmas.ptr.p_double[i] = network1->columnsigmas.ptr.p_double[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function exports tunable parameters (weights/means/sigmas) from
|
|
network to contiguous array. Nothing is guaranteed about array format, the
|
|
only thing you can count for is that MLPImportTunableParameters() will be
|
|
able to parse it.
|
|
|
|
It is intended for fast copying of states between network and backup array
|
|
|
|
INPUT PARAMETERS:
|
|
Network - source, must be correctly initialized
|
|
P - array to use. If its size is enough to store data, it
|
|
is reused.
|
|
|
|
OUTPUT PARAMETERS:
|
|
P - array which stores network parameters, resized if needed
|
|
PCount - number of parameters stored in array.
|
|
|
|
-- ALGLIB --
|
|
Copyright 20.06.2013 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpexporttunableparameters(multilayerperceptron* network,
|
|
/* Real */ ae_vector* p,
|
|
ae_int_t* pcount,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t k;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
|
|
*pcount = 0;
|
|
|
|
ae_assert(network->structinfo.cnt>0&&network->structinfo.cnt>=network->structinfo.ptr.p_int[0], "MLPExportTunableParameters: Network is uninitialized", _state);
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
*pcount = wcount+2*nin;
|
|
rvectorsetlengthatleast(p, *pcount, _state);
|
|
k = 0;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
p->ptr.p_double[k] = network->weights.ptr.p_double[i];
|
|
k = k+1;
|
|
}
|
|
for(i=0; i<=nin-1; i++)
|
|
{
|
|
p->ptr.p_double[k] = network->columnmeans.ptr.p_double[i];
|
|
k = k+1;
|
|
p->ptr.p_double[k] = network->columnsigmas.ptr.p_double[i];
|
|
k = k+1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*pcount = wcount+2*(nin+nout);
|
|
rvectorsetlengthatleast(p, *pcount, _state);
|
|
k = 0;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
p->ptr.p_double[k] = network->weights.ptr.p_double[i];
|
|
k = k+1;
|
|
}
|
|
for(i=0; i<=nin+nout-1; i++)
|
|
{
|
|
p->ptr.p_double[k] = network->columnmeans.ptr.p_double[i];
|
|
k = k+1;
|
|
p->ptr.p_double[k] = network->columnsigmas.ptr.p_double[i];
|
|
k = k+1;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function imports tunable parameters (weights/means/sigmas) which
|
|
were exported by MLPExportTunableParameters().
|
|
|
|
It is intended for fast copying of states between network and backup array
|
|
|
|
INPUT PARAMETERS:
|
|
Network - target:
|
|
* must be correctly initialized
|
|
* must have same geometry as network used to export params
|
|
P - array with parameters
|
|
|
|
-- ALGLIB --
|
|
Copyright 20.06.2013 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpimporttunableparameters(multilayerperceptron* network,
|
|
/* Real */ ae_vector* p,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t k;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
|
|
|
|
ae_assert(network->structinfo.cnt>0&&network->structinfo.cnt>=network->structinfo.ptr.p_int[0], "MLPImportTunableParameters: Network is uninitialized", _state);
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
k = 0;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
network->weights.ptr.p_double[i] = p->ptr.p_double[k];
|
|
k = k+1;
|
|
}
|
|
for(i=0; i<=nin-1; i++)
|
|
{
|
|
network->columnmeans.ptr.p_double[i] = p->ptr.p_double[k];
|
|
k = k+1;
|
|
network->columnsigmas.ptr.p_double[i] = p->ptr.p_double[k];
|
|
k = k+1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
k = 0;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
network->weights.ptr.p_double[i] = p->ptr.p_double[k];
|
|
k = k+1;
|
|
}
|
|
for(i=0; i<=nin+nout-1; i++)
|
|
{
|
|
network->columnmeans.ptr.p_double[i] = p->ptr.p_double[k];
|
|
k = k+1;
|
|
network->columnsigmas.ptr.p_double[i] = p->ptr.p_double[k];
|
|
k = k+1;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serialization of MultiLayerPerceptron strucure
|
|
|
|
INPUT PARAMETERS:
|
|
Network - original
|
|
|
|
OUTPUT PARAMETERS:
|
|
RA - array of real numbers which stores network,
|
|
array[0..RLen-1]
|
|
RLen - RA lenght
|
|
|
|
-- ALGLIB --
|
|
Copyright 29.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpserializeold(multilayerperceptron* network,
|
|
/* Real */ ae_vector* ra,
|
|
ae_int_t* rlen,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t ssize;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t sigmalen;
|
|
ae_int_t offs;
|
|
|
|
ae_vector_clear(ra);
|
|
*rlen = 0;
|
|
|
|
|
|
/*
|
|
* Unload info
|
|
*/
|
|
ssize = network->structinfo.ptr.p_int[0];
|
|
nin = network->structinfo.ptr.p_int[1];
|
|
nout = network->structinfo.ptr.p_int[2];
|
|
wcount = network->structinfo.ptr.p_int[4];
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
sigmalen = nin;
|
|
}
|
|
else
|
|
{
|
|
sigmalen = nin+nout;
|
|
}
|
|
|
|
/*
|
|
* RA format:
|
|
* LEN DESRC.
|
|
* 1 RLen
|
|
* 1 version (MLPVNum)
|
|
* 1 StructInfo size
|
|
* SSize StructInfo
|
|
* WCount Weights
|
|
* SigmaLen ColumnMeans
|
|
* SigmaLen ColumnSigmas
|
|
*/
|
|
*rlen = 3+ssize+wcount+2*sigmalen;
|
|
ae_vector_set_length(ra, *rlen-1+1, _state);
|
|
ra->ptr.p_double[0] = (double)(*rlen);
|
|
ra->ptr.p_double[1] = (double)(mlpbase_mlpvnum);
|
|
ra->ptr.p_double[2] = (double)(ssize);
|
|
offs = 3;
|
|
for(i=0; i<=ssize-1; i++)
|
|
{
|
|
ra->ptr.p_double[offs+i] = (double)(network->structinfo.ptr.p_int[i]);
|
|
}
|
|
offs = offs+ssize;
|
|
ae_v_move(&ra->ptr.p_double[offs], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(offs,offs+wcount-1));
|
|
offs = offs+wcount;
|
|
ae_v_move(&ra->ptr.p_double[offs], 1, &network->columnmeans.ptr.p_double[0], 1, ae_v_len(offs,offs+sigmalen-1));
|
|
offs = offs+sigmalen;
|
|
ae_v_move(&ra->ptr.p_double[offs], 1, &network->columnsigmas.ptr.p_double[0], 1, ae_v_len(offs,offs+sigmalen-1));
|
|
offs = offs+sigmalen;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Unserialization of MultiLayerPerceptron strucure
|
|
|
|
INPUT PARAMETERS:
|
|
RA - real array which stores network
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network - restored network
|
|
|
|
-- ALGLIB --
|
|
Copyright 29.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpunserializeold(/* Real */ ae_vector* ra,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t ssize;
|
|
ae_int_t ntotal;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t sigmalen;
|
|
ae_int_t offs;
|
|
|
|
_multilayerperceptron_clear(network);
|
|
|
|
ae_assert(ae_round(ra->ptr.p_double[1], _state)==mlpbase_mlpvnum, "MLPUnserialize: incorrect array!", _state);
|
|
|
|
/*
|
|
* Unload StructInfo from IA
|
|
*/
|
|
offs = 3;
|
|
ssize = ae_round(ra->ptr.p_double[2], _state);
|
|
ae_vector_set_length(&network->structinfo, ssize-1+1, _state);
|
|
for(i=0; i<=ssize-1; i++)
|
|
{
|
|
network->structinfo.ptr.p_int[i] = ae_round(ra->ptr.p_double[offs+i], _state);
|
|
}
|
|
offs = offs+ssize;
|
|
|
|
/*
|
|
* Unload info from StructInfo
|
|
*/
|
|
ssize = network->structinfo.ptr.p_int[0];
|
|
nin = network->structinfo.ptr.p_int[1];
|
|
nout = network->structinfo.ptr.p_int[2];
|
|
ntotal = network->structinfo.ptr.p_int[3];
|
|
wcount = network->structinfo.ptr.p_int[4];
|
|
if( network->structinfo.ptr.p_int[6]==0 )
|
|
{
|
|
sigmalen = nin+nout;
|
|
}
|
|
else
|
|
{
|
|
sigmalen = nin;
|
|
}
|
|
|
|
/*
|
|
* Allocate space for other fields
|
|
*/
|
|
ae_vector_set_length(&network->weights, wcount-1+1, _state);
|
|
ae_vector_set_length(&network->columnmeans, sigmalen-1+1, _state);
|
|
ae_vector_set_length(&network->columnsigmas, sigmalen-1+1, _state);
|
|
ae_vector_set_length(&network->neurons, ntotal-1+1, _state);
|
|
ae_vector_set_length(&network->nwbuf, ae_maxint(wcount, 2*nout, _state)-1+1, _state);
|
|
ae_vector_set_length(&network->dfdnet, ntotal-1+1, _state);
|
|
ae_vector_set_length(&network->x, nin-1+1, _state);
|
|
ae_vector_set_length(&network->y, nout-1+1, _state);
|
|
ae_vector_set_length(&network->derror, ntotal-1+1, _state);
|
|
|
|
/*
|
|
* Copy parameters from RA
|
|
*/
|
|
ae_v_move(&network->weights.ptr.p_double[0], 1, &ra->ptr.p_double[offs], 1, ae_v_len(0,wcount-1));
|
|
offs = offs+wcount;
|
|
ae_v_move(&network->columnmeans.ptr.p_double[0], 1, &ra->ptr.p_double[offs], 1, ae_v_len(0,sigmalen-1));
|
|
offs = offs+sigmalen;
|
|
ae_v_move(&network->columnsigmas.ptr.p_double[0], 1, &ra->ptr.p_double[offs], 1, ae_v_len(0,sigmalen-1));
|
|
offs = offs+sigmalen;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Randomization of neural network weights
|
|
|
|
-- ALGLIB --
|
|
Copyright 06.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlprandomize(multilayerperceptron* network, ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t ntotal;
|
|
ae_int_t istart;
|
|
hqrndstate r;
|
|
ae_int_t entrysize;
|
|
ae_int_t entryoffs;
|
|
ae_int_t neuronidx;
|
|
ae_int_t neurontype;
|
|
double vmean;
|
|
double vvar;
|
|
ae_int_t i;
|
|
ae_int_t n1;
|
|
ae_int_t n2;
|
|
double desiredsigma;
|
|
ae_int_t montecarlocnt;
|
|
double ef;
|
|
double ef2;
|
|
double v;
|
|
double wscale;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&r, 0, sizeof(r));
|
|
_hqrndstate_init(&r, _state, ae_true);
|
|
|
|
hqrndrandomize(&r, _state);
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
ntotal = network->structinfo.ptr.p_int[3];
|
|
istart = network->structinfo.ptr.p_int[5];
|
|
desiredsigma = 0.5;
|
|
montecarlocnt = 20;
|
|
|
|
/*
|
|
* Stage 1:
|
|
* * Network.Weights is filled by standard deviation of weights
|
|
* * default values: sigma=1
|
|
*/
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
network->weights.ptr.p_double[i] = 1.0;
|
|
}
|
|
|
|
/*
|
|
* Stage 2:
|
|
* * assume that input neurons have zero mean and unit standard deviation
|
|
* * assume that constant neurons have zero standard deviation
|
|
* * perform forward pass along neurons
|
|
* * for each non-input non-constant neuron:
|
|
* * calculate mean and standard deviation of neuron's output
|
|
* assuming that we know means/deviations of neurons which feed it
|
|
* and assuming that weights has unit variance and zero mean.
|
|
* * for each nonlinear neuron additionally we perform backward pass:
|
|
* * scale variances of weights which feed it in such way that neuron's
|
|
* input has unit standard deviation
|
|
*
|
|
* NOTE: this algorithm assumes that each connection feeds at most one
|
|
* non-linear neuron. This assumption can be incorrect in upcoming
|
|
* architectures with strong neurons. However, algorithm should
|
|
* work smoothly even in this case.
|
|
*
|
|
* During this stage we use Network.RndBuf, which is grouped into NTotal
|
|
* entries, each of them having following format:
|
|
*
|
|
* Buf[Offset+0] mean value of neuron's output
|
|
* Buf[Offset+1] standard deviation of neuron's output
|
|
*
|
|
*
|
|
*/
|
|
entrysize = 2;
|
|
rvectorsetlengthatleast(&network->rndbuf, entrysize*ntotal, _state);
|
|
for(neuronidx=0; neuronidx<=ntotal-1; neuronidx++)
|
|
{
|
|
neurontype = network->structinfo.ptr.p_int[istart+neuronidx*mlpbase_nfieldwidth+0];
|
|
entryoffs = entrysize*neuronidx;
|
|
if( neurontype==-2 )
|
|
{
|
|
|
|
/*
|
|
* Input neuron: zero mean, unit variance.
|
|
*/
|
|
network->rndbuf.ptr.p_double[entryoffs+0] = 0.0;
|
|
network->rndbuf.ptr.p_double[entryoffs+1] = 1.0;
|
|
continue;
|
|
}
|
|
if( neurontype==-3 )
|
|
{
|
|
|
|
/*
|
|
* "-1" neuron: mean=-1, zero variance.
|
|
*/
|
|
network->rndbuf.ptr.p_double[entryoffs+0] = -1.0;
|
|
network->rndbuf.ptr.p_double[entryoffs+1] = 0.0;
|
|
continue;
|
|
}
|
|
if( neurontype==-4 )
|
|
{
|
|
|
|
/*
|
|
* "0" neuron: mean=0, zero variance.
|
|
*/
|
|
network->rndbuf.ptr.p_double[entryoffs+0] = 0.0;
|
|
network->rndbuf.ptr.p_double[entryoffs+1] = 0.0;
|
|
continue;
|
|
}
|
|
if( neurontype==0 )
|
|
{
|
|
|
|
/*
|
|
* Adaptive summator neuron:
|
|
* * calculate its mean and variance.
|
|
* * we assume that weights of this neuron have unit variance and zero mean.
|
|
* * thus, neuron's output is always have zero mean
|
|
* * as for variance, it is a bit more interesting:
|
|
* * let n[i] is i-th input neuron
|
|
* * let w[i] is i-th weight
|
|
* * we assume that n[i] and w[i] are independently distributed
|
|
* * Var(n0*w0+n1*w1+...) = Var(n0*w0)+Var(n1*w1)+...
|
|
* * Var(X*Y) = mean(X)^2*Var(Y) + mean(Y)^2*Var(X) + Var(X)*Var(Y)
|
|
* * mean(w[i])=0, var(w[i])=1
|
|
* * Var(n[i]*w[i]) = mean(n[i])^2 + Var(n[i])
|
|
*/
|
|
n1 = network->structinfo.ptr.p_int[istart+neuronidx*mlpbase_nfieldwidth+2];
|
|
n2 = n1+network->structinfo.ptr.p_int[istart+neuronidx*mlpbase_nfieldwidth+1]-1;
|
|
vmean = 0.0;
|
|
vvar = 0.0;
|
|
for(i=n1; i<=n2; i++)
|
|
{
|
|
vvar = vvar+ae_sqr(network->rndbuf.ptr.p_double[entrysize*i+0], _state)+ae_sqr(network->rndbuf.ptr.p_double[entrysize*i+1], _state);
|
|
}
|
|
network->rndbuf.ptr.p_double[entryoffs+0] = vmean;
|
|
network->rndbuf.ptr.p_double[entryoffs+1] = ae_sqrt(vvar, _state);
|
|
continue;
|
|
}
|
|
if( neurontype==-5 )
|
|
{
|
|
|
|
/*
|
|
* Linear activation function
|
|
*/
|
|
i = network->structinfo.ptr.p_int[istart+neuronidx*mlpbase_nfieldwidth+2];
|
|
vmean = network->rndbuf.ptr.p_double[entrysize*i+0];
|
|
vvar = ae_sqr(network->rndbuf.ptr.p_double[entrysize*i+1], _state);
|
|
if( ae_fp_greater(vvar,(double)(0)) )
|
|
{
|
|
wscale = desiredsigma/ae_sqrt(vvar, _state);
|
|
}
|
|
else
|
|
{
|
|
wscale = 1.0;
|
|
}
|
|
mlpbase_randomizebackwardpass(network, i, wscale, _state);
|
|
network->rndbuf.ptr.p_double[entryoffs+0] = vmean*wscale;
|
|
network->rndbuf.ptr.p_double[entryoffs+1] = desiredsigma;
|
|
continue;
|
|
}
|
|
if( neurontype>0 )
|
|
{
|
|
|
|
/*
|
|
* Nonlinear activation function:
|
|
* * scale its inputs
|
|
* * estimate mean/sigma of its output using Monte-Carlo method
|
|
* (we simulate different inputs with unit deviation and
|
|
* sample activation function output on such inputs)
|
|
*/
|
|
i = network->structinfo.ptr.p_int[istart+neuronidx*mlpbase_nfieldwidth+2];
|
|
vmean = network->rndbuf.ptr.p_double[entrysize*i+0];
|
|
vvar = ae_sqr(network->rndbuf.ptr.p_double[entrysize*i+1], _state);
|
|
if( ae_fp_greater(vvar,(double)(0)) )
|
|
{
|
|
wscale = desiredsigma/ae_sqrt(vvar, _state);
|
|
}
|
|
else
|
|
{
|
|
wscale = 1.0;
|
|
}
|
|
mlpbase_randomizebackwardpass(network, i, wscale, _state);
|
|
ef = 0.0;
|
|
ef2 = 0.0;
|
|
vmean = vmean*wscale;
|
|
for(i=0; i<=montecarlocnt-1; i++)
|
|
{
|
|
v = vmean+desiredsigma*hqrndnormal(&r, _state);
|
|
ef = ef+v;
|
|
ef2 = ef2+v*v;
|
|
}
|
|
ef = ef/montecarlocnt;
|
|
ef2 = ef2/montecarlocnt;
|
|
network->rndbuf.ptr.p_double[entryoffs+0] = ef;
|
|
network->rndbuf.ptr.p_double[entryoffs+1] = ae_maxreal(ef2-ef*ef, 0.0, _state);
|
|
continue;
|
|
}
|
|
ae_assert(ae_false, "MLPRandomize: unexpected neuron type", _state);
|
|
}
|
|
|
|
/*
|
|
* Stage 3: generate weights.
|
|
*/
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
network->weights.ptr.p_double[i] = network->weights.ptr.p_double[i]*hqrndnormal(&r, _state);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Randomization of neural network weights and standartisator
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlprandomizefull(multilayerperceptron* network, ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t ntotal;
|
|
ae_int_t istart;
|
|
ae_int_t offs;
|
|
ae_int_t ntype;
|
|
|
|
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
ntotal = network->structinfo.ptr.p_int[3];
|
|
istart = network->structinfo.ptr.p_int[5];
|
|
|
|
/*
|
|
* Process network
|
|
*/
|
|
mlprandomize(network, _state);
|
|
for(i=0; i<=nin-1; i++)
|
|
{
|
|
network->columnmeans.ptr.p_double[i] = ae_randomreal(_state)-0.5;
|
|
network->columnsigmas.ptr.p_double[i] = ae_randomreal(_state)+0.5;
|
|
}
|
|
if( !mlpissoftmax(network, _state) )
|
|
{
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
offs = istart+(ntotal-nout+i)*mlpbase_nfieldwidth;
|
|
ntype = network->structinfo.ptr.p_int[offs+0];
|
|
if( ntype==0 )
|
|
{
|
|
|
|
/*
|
|
* Shifts are changed only for linear outputs neurons
|
|
*/
|
|
network->columnmeans.ptr.p_double[nin+i] = 2*ae_randomreal(_state)-1;
|
|
}
|
|
if( ntype==0||ntype==3 )
|
|
{
|
|
|
|
/*
|
|
* Scales are changed only for linear or bounded outputs neurons.
|
|
* Note that scale randomization preserves sign.
|
|
*/
|
|
network->columnsigmas.ptr.p_double[nin+i] = ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state)*(1.5*ae_randomreal(_state)+0.5);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal subroutine.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpinitpreprocessor(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t ssize,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t jmax;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t ntotal;
|
|
ae_int_t istart;
|
|
ae_int_t offs;
|
|
ae_int_t ntype;
|
|
ae_vector means;
|
|
ae_vector sigmas;
|
|
double s;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&means, 0, sizeof(means));
|
|
memset(&sigmas, 0, sizeof(sigmas));
|
|
ae_vector_init(&means, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&sigmas, 0, DT_REAL, _state, ae_true);
|
|
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
ntotal = network->structinfo.ptr.p_int[3];
|
|
istart = network->structinfo.ptr.p_int[5];
|
|
|
|
/*
|
|
* Means/Sigmas
|
|
*/
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
jmax = nin-1;
|
|
}
|
|
else
|
|
{
|
|
jmax = nin+nout-1;
|
|
}
|
|
ae_vector_set_length(&means, jmax+1, _state);
|
|
ae_vector_set_length(&sigmas, jmax+1, _state);
|
|
for(i=0; i<=jmax; i++)
|
|
{
|
|
means.ptr.p_double[i] = (double)(0);
|
|
sigmas.ptr.p_double[i] = (double)(0);
|
|
}
|
|
for(i=0; i<=ssize-1; i++)
|
|
{
|
|
for(j=0; j<=jmax; j++)
|
|
{
|
|
means.ptr.p_double[j] = means.ptr.p_double[j]+xy->ptr.pp_double[i][j];
|
|
}
|
|
}
|
|
for(i=0; i<=jmax; i++)
|
|
{
|
|
means.ptr.p_double[i] = means.ptr.p_double[i]/ssize;
|
|
}
|
|
for(i=0; i<=ssize-1; i++)
|
|
{
|
|
for(j=0; j<=jmax; j++)
|
|
{
|
|
sigmas.ptr.p_double[j] = sigmas.ptr.p_double[j]+ae_sqr(xy->ptr.pp_double[i][j]-means.ptr.p_double[j], _state);
|
|
}
|
|
}
|
|
for(i=0; i<=jmax; i++)
|
|
{
|
|
sigmas.ptr.p_double[i] = ae_sqrt(sigmas.ptr.p_double[i]/ssize, _state);
|
|
}
|
|
|
|
/*
|
|
* Inputs
|
|
*/
|
|
for(i=0; i<=nin-1; i++)
|
|
{
|
|
network->columnmeans.ptr.p_double[i] = means.ptr.p_double[i];
|
|
network->columnsigmas.ptr.p_double[i] = sigmas.ptr.p_double[i];
|
|
if( ae_fp_eq(network->columnsigmas.ptr.p_double[i],(double)(0)) )
|
|
{
|
|
network->columnsigmas.ptr.p_double[i] = (double)(1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Outputs
|
|
*/
|
|
if( !mlpissoftmax(network, _state) )
|
|
{
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
offs = istart+(ntotal-nout+i)*mlpbase_nfieldwidth;
|
|
ntype = network->structinfo.ptr.p_int[offs+0];
|
|
|
|
/*
|
|
* Linear outputs
|
|
*/
|
|
if( ntype==0 )
|
|
{
|
|
network->columnmeans.ptr.p_double[nin+i] = means.ptr.p_double[nin+i];
|
|
network->columnsigmas.ptr.p_double[nin+i] = sigmas.ptr.p_double[nin+i];
|
|
if( ae_fp_eq(network->columnsigmas.ptr.p_double[nin+i],(double)(0)) )
|
|
{
|
|
network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Bounded outputs (half-interval)
|
|
*/
|
|
if( ntype==3 )
|
|
{
|
|
s = means.ptr.p_double[nin+i]-network->columnmeans.ptr.p_double[nin+i];
|
|
if( ae_fp_eq(s,(double)(0)) )
|
|
{
|
|
s = (double)(ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state));
|
|
}
|
|
if( ae_fp_eq(s,(double)(0)) )
|
|
{
|
|
s = 1.0;
|
|
}
|
|
network->columnsigmas.ptr.p_double[nin+i] = ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state)*ae_fabs(s, _state);
|
|
if( ae_fp_eq(network->columnsigmas.ptr.p_double[nin+i],(double)(0)) )
|
|
{
|
|
network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal subroutine.
|
|
Initialization for preprocessor based on a sample.
|
|
|
|
INPUT
|
|
Network - initialized neural network;
|
|
XY - sample, given by sparse matrix;
|
|
SSize - sample size.
|
|
|
|
OUTPUT
|
|
Network - neural network with initialised preprocessor.
|
|
|
|
-- ALGLIB --
|
|
Copyright 26.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpinitpreprocessorsparse(multilayerperceptron* network,
|
|
sparsematrix* xy,
|
|
ae_int_t ssize,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t jmax;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t ntotal;
|
|
ae_int_t istart;
|
|
ae_int_t offs;
|
|
ae_int_t ntype;
|
|
ae_vector means;
|
|
ae_vector sigmas;
|
|
double s;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&means, 0, sizeof(means));
|
|
memset(&sigmas, 0, sizeof(sigmas));
|
|
ae_vector_init(&means, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&sigmas, 0, DT_REAL, _state, ae_true);
|
|
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
ntotal = network->structinfo.ptr.p_int[3];
|
|
istart = network->structinfo.ptr.p_int[5];
|
|
|
|
/*
|
|
* Means/Sigmas
|
|
*/
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
jmax = nin-1;
|
|
}
|
|
else
|
|
{
|
|
jmax = nin+nout-1;
|
|
}
|
|
ae_vector_set_length(&means, jmax+1, _state);
|
|
ae_vector_set_length(&sigmas, jmax+1, _state);
|
|
for(i=0; i<=jmax; i++)
|
|
{
|
|
means.ptr.p_double[i] = (double)(0);
|
|
sigmas.ptr.p_double[i] = (double)(0);
|
|
}
|
|
for(i=0; i<=ssize-1; i++)
|
|
{
|
|
sparsegetrow(xy, i, &network->xyrow, _state);
|
|
for(j=0; j<=jmax; j++)
|
|
{
|
|
means.ptr.p_double[j] = means.ptr.p_double[j]+network->xyrow.ptr.p_double[j];
|
|
}
|
|
}
|
|
for(i=0; i<=jmax; i++)
|
|
{
|
|
means.ptr.p_double[i] = means.ptr.p_double[i]/ssize;
|
|
}
|
|
for(i=0; i<=ssize-1; i++)
|
|
{
|
|
sparsegetrow(xy, i, &network->xyrow, _state);
|
|
for(j=0; j<=jmax; j++)
|
|
{
|
|
sigmas.ptr.p_double[j] = sigmas.ptr.p_double[j]+ae_sqr(network->xyrow.ptr.p_double[j]-means.ptr.p_double[j], _state);
|
|
}
|
|
}
|
|
for(i=0; i<=jmax; i++)
|
|
{
|
|
sigmas.ptr.p_double[i] = ae_sqrt(sigmas.ptr.p_double[i]/ssize, _state);
|
|
}
|
|
|
|
/*
|
|
* Inputs
|
|
*/
|
|
for(i=0; i<=nin-1; i++)
|
|
{
|
|
network->columnmeans.ptr.p_double[i] = means.ptr.p_double[i];
|
|
network->columnsigmas.ptr.p_double[i] = sigmas.ptr.p_double[i];
|
|
if( ae_fp_eq(network->columnsigmas.ptr.p_double[i],(double)(0)) )
|
|
{
|
|
network->columnsigmas.ptr.p_double[i] = (double)(1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Outputs
|
|
*/
|
|
if( !mlpissoftmax(network, _state) )
|
|
{
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
offs = istart+(ntotal-nout+i)*mlpbase_nfieldwidth;
|
|
ntype = network->structinfo.ptr.p_int[offs+0];
|
|
|
|
/*
|
|
* Linear outputs
|
|
*/
|
|
if( ntype==0 )
|
|
{
|
|
network->columnmeans.ptr.p_double[nin+i] = means.ptr.p_double[nin+i];
|
|
network->columnsigmas.ptr.p_double[nin+i] = sigmas.ptr.p_double[nin+i];
|
|
if( ae_fp_eq(network->columnsigmas.ptr.p_double[nin+i],(double)(0)) )
|
|
{
|
|
network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Bounded outputs (half-interval)
|
|
*/
|
|
if( ntype==3 )
|
|
{
|
|
s = means.ptr.p_double[nin+i]-network->columnmeans.ptr.p_double[nin+i];
|
|
if( ae_fp_eq(s,(double)(0)) )
|
|
{
|
|
s = (double)(ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state));
|
|
}
|
|
if( ae_fp_eq(s,(double)(0)) )
|
|
{
|
|
s = 1.0;
|
|
}
|
|
network->columnsigmas.ptr.p_double[nin+i] = ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state)*ae_fabs(s, _state);
|
|
if( ae_fp_eq(network->columnsigmas.ptr.p_double[nin+i],(double)(0)) )
|
|
{
|
|
network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal subroutine.
|
|
Initialization for preprocessor based on a subsample.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
XY - original dataset; one sample = one row;
|
|
first NIn columns contain inputs,
|
|
next NOut columns - desired outputs.
|
|
SetSize - real size of XY, SetSize>=0;
|
|
Idx - subset of SubsetSize elements, array[SubsetSize]:
|
|
* Idx[I] stores row index in the original dataset which is
|
|
given by XY. Gradient is calculated with respect to rows
|
|
whose indexes are stored in Idx[].
|
|
* Idx[] must store correct indexes; this function throws
|
|
an exception in case incorrect index (less than 0 or
|
|
larger than rows(XY)) is given
|
|
* Idx[] may store indexes in any order and even with
|
|
repetitions.
|
|
SubsetSize- number of elements in Idx[] array.
|
|
|
|
OUTPUT:
|
|
Network - neural network with initialised preprocessor.
|
|
|
|
NOTE: when SubsetSize<0 is used full dataset by call MLPInitPreprocessor
|
|
function.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpinitpreprocessorsubset(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t setsize,
|
|
/* Integer */ ae_vector* idx,
|
|
ae_int_t subsetsize,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t jmax;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t ntotal;
|
|
ae_int_t istart;
|
|
ae_int_t offs;
|
|
ae_int_t ntype;
|
|
ae_vector means;
|
|
ae_vector sigmas;
|
|
double s;
|
|
ae_int_t npoints;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&means, 0, sizeof(means));
|
|
memset(&sigmas, 0, sizeof(sigmas));
|
|
ae_vector_init(&means, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&sigmas, 0, DT_REAL, _state, ae_true);
|
|
|
|
ae_assert(setsize>=0, "MLPInitPreprocessorSubset: SetSize<0", _state);
|
|
if( subsetsize<0 )
|
|
{
|
|
mlpinitpreprocessor(network, xy, setsize, _state);
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
ae_assert(subsetsize<=idx->cnt, "MLPInitPreprocessorSubset: SubsetSize>Length(Idx)", _state);
|
|
npoints = setsize;
|
|
for(i=0; i<=subsetsize-1; i++)
|
|
{
|
|
ae_assert(idx->ptr.p_int[i]>=0, "MLPInitPreprocessorSubset: incorrect index of XY row(Idx[I]<0)", _state);
|
|
ae_assert(idx->ptr.p_int[i]<=npoints-1, "MLPInitPreprocessorSubset: incorrect index of XY row(Idx[I]>Rows(XY)-1)", _state);
|
|
}
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
ntotal = network->structinfo.ptr.p_int[3];
|
|
istart = network->structinfo.ptr.p_int[5];
|
|
|
|
/*
|
|
* Means/Sigmas
|
|
*/
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
jmax = nin-1;
|
|
}
|
|
else
|
|
{
|
|
jmax = nin+nout-1;
|
|
}
|
|
ae_vector_set_length(&means, jmax+1, _state);
|
|
ae_vector_set_length(&sigmas, jmax+1, _state);
|
|
for(i=0; i<=jmax; i++)
|
|
{
|
|
means.ptr.p_double[i] = (double)(0);
|
|
sigmas.ptr.p_double[i] = (double)(0);
|
|
}
|
|
for(i=0; i<=subsetsize-1; i++)
|
|
{
|
|
for(j=0; j<=jmax; j++)
|
|
{
|
|
means.ptr.p_double[j] = means.ptr.p_double[j]+xy->ptr.pp_double[idx->ptr.p_int[i]][j];
|
|
}
|
|
}
|
|
for(i=0; i<=jmax; i++)
|
|
{
|
|
means.ptr.p_double[i] = means.ptr.p_double[i]/subsetsize;
|
|
}
|
|
for(i=0; i<=subsetsize-1; i++)
|
|
{
|
|
for(j=0; j<=jmax; j++)
|
|
{
|
|
sigmas.ptr.p_double[j] = sigmas.ptr.p_double[j]+ae_sqr(xy->ptr.pp_double[idx->ptr.p_int[i]][j]-means.ptr.p_double[j], _state);
|
|
}
|
|
}
|
|
for(i=0; i<=jmax; i++)
|
|
{
|
|
sigmas.ptr.p_double[i] = ae_sqrt(sigmas.ptr.p_double[i]/subsetsize, _state);
|
|
}
|
|
|
|
/*
|
|
* Inputs
|
|
*/
|
|
for(i=0; i<=nin-1; i++)
|
|
{
|
|
network->columnmeans.ptr.p_double[i] = means.ptr.p_double[i];
|
|
network->columnsigmas.ptr.p_double[i] = sigmas.ptr.p_double[i];
|
|
if( ae_fp_eq(network->columnsigmas.ptr.p_double[i],(double)(0)) )
|
|
{
|
|
network->columnsigmas.ptr.p_double[i] = (double)(1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Outputs
|
|
*/
|
|
if( !mlpissoftmax(network, _state) )
|
|
{
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
offs = istart+(ntotal-nout+i)*mlpbase_nfieldwidth;
|
|
ntype = network->structinfo.ptr.p_int[offs+0];
|
|
|
|
/*
|
|
* Linear outputs
|
|
*/
|
|
if( ntype==0 )
|
|
{
|
|
network->columnmeans.ptr.p_double[nin+i] = means.ptr.p_double[nin+i];
|
|
network->columnsigmas.ptr.p_double[nin+i] = sigmas.ptr.p_double[nin+i];
|
|
if( ae_fp_eq(network->columnsigmas.ptr.p_double[nin+i],(double)(0)) )
|
|
{
|
|
network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Bounded outputs (half-interval)
|
|
*/
|
|
if( ntype==3 )
|
|
{
|
|
s = means.ptr.p_double[nin+i]-network->columnmeans.ptr.p_double[nin+i];
|
|
if( ae_fp_eq(s,(double)(0)) )
|
|
{
|
|
s = (double)(ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state));
|
|
}
|
|
if( ae_fp_eq(s,(double)(0)) )
|
|
{
|
|
s = 1.0;
|
|
}
|
|
network->columnsigmas.ptr.p_double[nin+i] = ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state)*ae_fabs(s, _state);
|
|
if( ae_fp_eq(network->columnsigmas.ptr.p_double[nin+i],(double)(0)) )
|
|
{
|
|
network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal subroutine.
|
|
Initialization for preprocessor based on a subsample.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
XY - original dataset, given by sparse matrix;
|
|
one sample = one row;
|
|
first NIn columns contain inputs,
|
|
next NOut columns - desired outputs.
|
|
SetSize - real size of XY, SetSize>=0;
|
|
Idx - subset of SubsetSize elements, array[SubsetSize]:
|
|
* Idx[I] stores row index in the original dataset which is
|
|
given by XY. Gradient is calculated with respect to rows
|
|
whose indexes are stored in Idx[].
|
|
* Idx[] must store correct indexes; this function throws
|
|
an exception in case incorrect index (less than 0 or
|
|
larger than rows(XY)) is given
|
|
* Idx[] may store indexes in any order and even with
|
|
repetitions.
|
|
SubsetSize- number of elements in Idx[] array.
|
|
|
|
OUTPUT:
|
|
Network - neural network with initialised preprocessor.
|
|
|
|
NOTE: when SubsetSize<0 is used full dataset by call
|
|
MLPInitPreprocessorSparse function.
|
|
|
|
-- ALGLIB --
|
|
Copyright 26.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpinitpreprocessorsparsesubset(multilayerperceptron* network,
|
|
sparsematrix* xy,
|
|
ae_int_t setsize,
|
|
/* Integer */ ae_vector* idx,
|
|
ae_int_t subsetsize,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t jmax;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t ntotal;
|
|
ae_int_t istart;
|
|
ae_int_t offs;
|
|
ae_int_t ntype;
|
|
ae_vector means;
|
|
ae_vector sigmas;
|
|
double s;
|
|
ae_int_t npoints;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&means, 0, sizeof(means));
|
|
memset(&sigmas, 0, sizeof(sigmas));
|
|
ae_vector_init(&means, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&sigmas, 0, DT_REAL, _state, ae_true);
|
|
|
|
ae_assert(setsize>=0, "MLPInitPreprocessorSparseSubset: SetSize<0", _state);
|
|
if( subsetsize<0 )
|
|
{
|
|
mlpinitpreprocessorsparse(network, xy, setsize, _state);
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
ae_assert(subsetsize<=idx->cnt, "MLPInitPreprocessorSparseSubset: SubsetSize>Length(Idx)", _state);
|
|
npoints = setsize;
|
|
for(i=0; i<=subsetsize-1; i++)
|
|
{
|
|
ae_assert(idx->ptr.p_int[i]>=0, "MLPInitPreprocessorSparseSubset: incorrect index of XY row(Idx[I]<0)", _state);
|
|
ae_assert(idx->ptr.p_int[i]<=npoints-1, "MLPInitPreprocessorSparseSubset: incorrect index of XY row(Idx[I]>Rows(XY)-1)", _state);
|
|
}
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
ntotal = network->structinfo.ptr.p_int[3];
|
|
istart = network->structinfo.ptr.p_int[5];
|
|
|
|
/*
|
|
* Means/Sigmas
|
|
*/
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
jmax = nin-1;
|
|
}
|
|
else
|
|
{
|
|
jmax = nin+nout-1;
|
|
}
|
|
ae_vector_set_length(&means, jmax+1, _state);
|
|
ae_vector_set_length(&sigmas, jmax+1, _state);
|
|
for(i=0; i<=jmax; i++)
|
|
{
|
|
means.ptr.p_double[i] = (double)(0);
|
|
sigmas.ptr.p_double[i] = (double)(0);
|
|
}
|
|
for(i=0; i<=subsetsize-1; i++)
|
|
{
|
|
sparsegetrow(xy, idx->ptr.p_int[i], &network->xyrow, _state);
|
|
for(j=0; j<=jmax; j++)
|
|
{
|
|
means.ptr.p_double[j] = means.ptr.p_double[j]+network->xyrow.ptr.p_double[j];
|
|
}
|
|
}
|
|
for(i=0; i<=jmax; i++)
|
|
{
|
|
means.ptr.p_double[i] = means.ptr.p_double[i]/subsetsize;
|
|
}
|
|
for(i=0; i<=subsetsize-1; i++)
|
|
{
|
|
sparsegetrow(xy, idx->ptr.p_int[i], &network->xyrow, _state);
|
|
for(j=0; j<=jmax; j++)
|
|
{
|
|
sigmas.ptr.p_double[j] = sigmas.ptr.p_double[j]+ae_sqr(network->xyrow.ptr.p_double[j]-means.ptr.p_double[j], _state);
|
|
}
|
|
}
|
|
for(i=0; i<=jmax; i++)
|
|
{
|
|
sigmas.ptr.p_double[i] = ae_sqrt(sigmas.ptr.p_double[i]/subsetsize, _state);
|
|
}
|
|
|
|
/*
|
|
* Inputs
|
|
*/
|
|
for(i=0; i<=nin-1; i++)
|
|
{
|
|
network->columnmeans.ptr.p_double[i] = means.ptr.p_double[i];
|
|
network->columnsigmas.ptr.p_double[i] = sigmas.ptr.p_double[i];
|
|
if( ae_fp_eq(network->columnsigmas.ptr.p_double[i],(double)(0)) )
|
|
{
|
|
network->columnsigmas.ptr.p_double[i] = (double)(1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Outputs
|
|
*/
|
|
if( !mlpissoftmax(network, _state) )
|
|
{
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
offs = istart+(ntotal-nout+i)*mlpbase_nfieldwidth;
|
|
ntype = network->structinfo.ptr.p_int[offs+0];
|
|
|
|
/*
|
|
* Linear outputs
|
|
*/
|
|
if( ntype==0 )
|
|
{
|
|
network->columnmeans.ptr.p_double[nin+i] = means.ptr.p_double[nin+i];
|
|
network->columnsigmas.ptr.p_double[nin+i] = sigmas.ptr.p_double[nin+i];
|
|
if( ae_fp_eq(network->columnsigmas.ptr.p_double[nin+i],(double)(0)) )
|
|
{
|
|
network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Bounded outputs (half-interval)
|
|
*/
|
|
if( ntype==3 )
|
|
{
|
|
s = means.ptr.p_double[nin+i]-network->columnmeans.ptr.p_double[nin+i];
|
|
if( ae_fp_eq(s,(double)(0)) )
|
|
{
|
|
s = (double)(ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state));
|
|
}
|
|
if( ae_fp_eq(s,(double)(0)) )
|
|
{
|
|
s = 1.0;
|
|
}
|
|
network->columnsigmas.ptr.p_double[nin+i] = ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state)*ae_fabs(s, _state);
|
|
if( ae_fp_eq(network->columnsigmas.ptr.p_double[nin+i],(double)(0)) )
|
|
{
|
|
network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Returns information about initialized network: number of inputs, outputs,
|
|
weights.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpproperties(multilayerperceptron* network,
|
|
ae_int_t* nin,
|
|
ae_int_t* nout,
|
|
ae_int_t* wcount,
|
|
ae_state *_state)
|
|
{
|
|
|
|
*nin = 0;
|
|
*nout = 0;
|
|
*wcount = 0;
|
|
|
|
*nin = network->structinfo.ptr.p_int[1];
|
|
*nout = network->structinfo.ptr.p_int[2];
|
|
*wcount = network->structinfo.ptr.p_int[4];
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Returns number of "internal", low-level neurons in the network (one which
|
|
is stored in StructInfo).
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t mlpntotal(multilayerperceptron* network, ae_state *_state)
|
|
{
|
|
ae_int_t result;
|
|
|
|
|
|
result = network->structinfo.ptr.p_int[3];
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Returns number of inputs.
|
|
|
|
-- ALGLIB --
|
|
Copyright 19.10.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t mlpgetinputscount(multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t result;
|
|
|
|
|
|
result = network->structinfo.ptr.p_int[1];
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Returns number of outputs.
|
|
|
|
-- ALGLIB --
|
|
Copyright 19.10.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t mlpgetoutputscount(multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t result;
|
|
|
|
|
|
result = network->structinfo.ptr.p_int[2];
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Returns number of weights.
|
|
|
|
-- ALGLIB --
|
|
Copyright 19.10.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t mlpgetweightscount(multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t result;
|
|
|
|
|
|
result = network->structinfo.ptr.p_int[4];
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Tells whether network is SOFTMAX-normalized (i.e. classifier) or not.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_bool mlpissoftmax(multilayerperceptron* network, ae_state *_state)
|
|
{
|
|
ae_bool result;
|
|
|
|
|
|
result = network->structinfo.ptr.p_int[6]==1;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns total number of layers (including input, hidden and
|
|
output layers).
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t mlpgetlayerscount(multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t result;
|
|
|
|
|
|
result = network->hllayersizes.cnt;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns size of K-th layer.
|
|
|
|
K=0 corresponds to input layer, K=CNT-1 corresponds to output layer.
|
|
|
|
Size of the output layer is always equal to the number of outputs, although
|
|
when we have softmax-normalized network, last neuron doesn't have any
|
|
connections - it is just zero.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t mlpgetlayersize(multilayerperceptron* network,
|
|
ae_int_t k,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t result;
|
|
|
|
|
|
ae_assert(k>=0&&k<network->hllayersizes.cnt, "MLPGetLayerSize: incorrect layer index", _state);
|
|
result = network->hllayersizes.ptr.p_int[k];
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns offset/scaling coefficients for I-th input of the
|
|
network.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network
|
|
I - input index
|
|
|
|
OUTPUT PARAMETERS:
|
|
Mean - mean term
|
|
Sigma - sigma term, guaranteed to be nonzero.
|
|
|
|
I-th input is passed through linear transformation
|
|
IN[i] = (IN[i]-Mean)/Sigma
|
|
before feeding to the network
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgetinputscaling(multilayerperceptron* network,
|
|
ae_int_t i,
|
|
double* mean,
|
|
double* sigma,
|
|
ae_state *_state)
|
|
{
|
|
|
|
*mean = 0;
|
|
*sigma = 0;
|
|
|
|
ae_assert(i>=0&&i<network->hllayersizes.ptr.p_int[0], "MLPGetInputScaling: incorrect (nonexistent) I", _state);
|
|
*mean = network->columnmeans.ptr.p_double[i];
|
|
*sigma = network->columnsigmas.ptr.p_double[i];
|
|
if( ae_fp_eq(*sigma,(double)(0)) )
|
|
{
|
|
*sigma = (double)(1);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns offset/scaling coefficients for I-th output of the
|
|
network.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network
|
|
I - input index
|
|
|
|
OUTPUT PARAMETERS:
|
|
Mean - mean term
|
|
Sigma - sigma term, guaranteed to be nonzero.
|
|
|
|
I-th output is passed through linear transformation
|
|
OUT[i] = OUT[i]*Sigma+Mean
|
|
before returning it to user. In case we have SOFTMAX-normalized network,
|
|
we return (Mean,Sigma)=(0.0,1.0).
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgetoutputscaling(multilayerperceptron* network,
|
|
ae_int_t i,
|
|
double* mean,
|
|
double* sigma,
|
|
ae_state *_state)
|
|
{
|
|
|
|
*mean = 0;
|
|
*sigma = 0;
|
|
|
|
ae_assert(i>=0&&i<network->hllayersizes.ptr.p_int[network->hllayersizes.cnt-1], "MLPGetOutputScaling: incorrect (nonexistent) I", _state);
|
|
if( network->structinfo.ptr.p_int[6]==1 )
|
|
{
|
|
*mean = (double)(0);
|
|
*sigma = (double)(1);
|
|
}
|
|
else
|
|
{
|
|
*mean = network->columnmeans.ptr.p_double[network->hllayersizes.ptr.p_int[0]+i];
|
|
*sigma = network->columnsigmas.ptr.p_double[network->hllayersizes.ptr.p_int[0]+i];
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns information about Ith neuron of Kth layer
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network
|
|
K - layer index
|
|
I - neuron index (within layer)
|
|
|
|
OUTPUT PARAMETERS:
|
|
FKind - activation function type (used by MLPActivationFunction())
|
|
this value is zero for input or linear neurons
|
|
Threshold - also called offset, bias
|
|
zero for input neurons
|
|
|
|
NOTE: this function throws exception if layer or neuron with given index
|
|
do not exists.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgetneuroninfo(multilayerperceptron* network,
|
|
ae_int_t k,
|
|
ae_int_t i,
|
|
ae_int_t* fkind,
|
|
double* threshold,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t ncnt;
|
|
ae_int_t istart;
|
|
ae_int_t highlevelidx;
|
|
ae_int_t activationoffset;
|
|
|
|
*fkind = 0;
|
|
*threshold = 0;
|
|
|
|
ncnt = network->hlneurons.cnt/mlpbase_hlnfieldwidth;
|
|
istart = network->structinfo.ptr.p_int[5];
|
|
|
|
/*
|
|
* search
|
|
*/
|
|
network->integerbuf.ptr.p_int[0] = k;
|
|
network->integerbuf.ptr.p_int[1] = i;
|
|
highlevelidx = recsearch(&network->hlneurons, mlpbase_hlnfieldwidth, 2, 0, ncnt, &network->integerbuf, _state);
|
|
ae_assert(highlevelidx>=0, "MLPGetNeuronInfo: incorrect (nonexistent) layer or neuron index", _state);
|
|
|
|
/*
|
|
* 1. find offset of the activation function record in the
|
|
*/
|
|
if( network->hlneurons.ptr.p_int[highlevelidx*mlpbase_hlnfieldwidth+2]>=0 )
|
|
{
|
|
activationoffset = istart+network->hlneurons.ptr.p_int[highlevelidx*mlpbase_hlnfieldwidth+2]*mlpbase_nfieldwidth;
|
|
*fkind = network->structinfo.ptr.p_int[activationoffset+0];
|
|
}
|
|
else
|
|
{
|
|
*fkind = 0;
|
|
}
|
|
if( network->hlneurons.ptr.p_int[highlevelidx*mlpbase_hlnfieldwidth+3]>=0 )
|
|
{
|
|
*threshold = network->weights.ptr.p_double[network->hlneurons.ptr.p_int[highlevelidx*mlpbase_hlnfieldwidth+3]];
|
|
}
|
|
else
|
|
{
|
|
*threshold = (double)(0);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns information about connection from I0-th neuron of
|
|
K0-th layer to I1-th neuron of K1-th layer.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network
|
|
K0 - layer index
|
|
I0 - neuron index (within layer)
|
|
K1 - layer index
|
|
I1 - neuron index (within layer)
|
|
|
|
RESULT:
|
|
connection weight (zero for non-existent connections)
|
|
|
|
This function:
|
|
1. throws exception if layer or neuron with given index do not exists.
|
|
2. returns zero if neurons exist, but there is no connection between them
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpgetweight(multilayerperceptron* network,
|
|
ae_int_t k0,
|
|
ae_int_t i0,
|
|
ae_int_t k1,
|
|
ae_int_t i1,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t ccnt;
|
|
ae_int_t highlevelidx;
|
|
double result;
|
|
|
|
|
|
ccnt = network->hlconnections.cnt/mlpbase_hlconnfieldwidth;
|
|
|
|
/*
|
|
* check params
|
|
*/
|
|
ae_assert(k0>=0&&k0<network->hllayersizes.cnt, "MLPGetWeight: incorrect (nonexistent) K0", _state);
|
|
ae_assert(i0>=0&&i0<network->hllayersizes.ptr.p_int[k0], "MLPGetWeight: incorrect (nonexistent) I0", _state);
|
|
ae_assert(k1>=0&&k1<network->hllayersizes.cnt, "MLPGetWeight: incorrect (nonexistent) K1", _state);
|
|
ae_assert(i1>=0&&i1<network->hllayersizes.ptr.p_int[k1], "MLPGetWeight: incorrect (nonexistent) I1", _state);
|
|
|
|
/*
|
|
* search
|
|
*/
|
|
network->integerbuf.ptr.p_int[0] = k0;
|
|
network->integerbuf.ptr.p_int[1] = i0;
|
|
network->integerbuf.ptr.p_int[2] = k1;
|
|
network->integerbuf.ptr.p_int[3] = i1;
|
|
highlevelidx = recsearch(&network->hlconnections, mlpbase_hlconnfieldwidth, 4, 0, ccnt, &network->integerbuf, _state);
|
|
if( highlevelidx>=0 )
|
|
{
|
|
result = network->weights.ptr.p_double[network->hlconnections.ptr.p_int[highlevelidx*mlpbase_hlconnfieldwidth+4]];
|
|
}
|
|
else
|
|
{
|
|
result = (double)(0);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets offset/scaling coefficients for I-th input of the
|
|
network.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network
|
|
I - input index
|
|
Mean - mean term
|
|
Sigma - sigma term (if zero, will be replaced by 1.0)
|
|
|
|
NTE: I-th input is passed through linear transformation
|
|
IN[i] = (IN[i]-Mean)/Sigma
|
|
before feeding to the network. This function sets Mean and Sigma.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetinputscaling(multilayerperceptron* network,
|
|
ae_int_t i,
|
|
double mean,
|
|
double sigma,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(i>=0&&i<network->hllayersizes.ptr.p_int[0], "MLPSetInputScaling: incorrect (nonexistent) I", _state);
|
|
ae_assert(ae_isfinite(mean, _state), "MLPSetInputScaling: infinite or NAN Mean", _state);
|
|
ae_assert(ae_isfinite(sigma, _state), "MLPSetInputScaling: infinite or NAN Sigma", _state);
|
|
if( ae_fp_eq(sigma,(double)(0)) )
|
|
{
|
|
sigma = (double)(1);
|
|
}
|
|
network->columnmeans.ptr.p_double[i] = mean;
|
|
network->columnsigmas.ptr.p_double[i] = sigma;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets offset/scaling coefficients for I-th output of the
|
|
network.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network
|
|
I - input index
|
|
Mean - mean term
|
|
Sigma - sigma term (if zero, will be replaced by 1.0)
|
|
|
|
OUTPUT PARAMETERS:
|
|
|
|
NOTE: I-th output is passed through linear transformation
|
|
OUT[i] = OUT[i]*Sigma+Mean
|
|
before returning it to user. This function sets Sigma/Mean. In case we
|
|
have SOFTMAX-normalized network, you can not set (Sigma,Mean) to anything
|
|
other than(0.0,1.0) - this function will throw exception.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetoutputscaling(multilayerperceptron* network,
|
|
ae_int_t i,
|
|
double mean,
|
|
double sigma,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(i>=0&&i<network->hllayersizes.ptr.p_int[network->hllayersizes.cnt-1], "MLPSetOutputScaling: incorrect (nonexistent) I", _state);
|
|
ae_assert(ae_isfinite(mean, _state), "MLPSetOutputScaling: infinite or NAN Mean", _state);
|
|
ae_assert(ae_isfinite(sigma, _state), "MLPSetOutputScaling: infinite or NAN Sigma", _state);
|
|
if( network->structinfo.ptr.p_int[6]==1 )
|
|
{
|
|
ae_assert(ae_fp_eq(mean,(double)(0)), "MLPSetOutputScaling: you can not set non-zero Mean term for classifier network", _state);
|
|
ae_assert(ae_fp_eq(sigma,(double)(1)), "MLPSetOutputScaling: you can not set non-unit Sigma term for classifier network", _state);
|
|
}
|
|
else
|
|
{
|
|
if( ae_fp_eq(sigma,(double)(0)) )
|
|
{
|
|
sigma = (double)(1);
|
|
}
|
|
network->columnmeans.ptr.p_double[network->hllayersizes.ptr.p_int[0]+i] = mean;
|
|
network->columnsigmas.ptr.p_double[network->hllayersizes.ptr.p_int[0]+i] = sigma;
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function modifies information about Ith neuron of Kth layer
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network
|
|
K - layer index
|
|
I - neuron index (within layer)
|
|
FKind - activation function type (used by MLPActivationFunction())
|
|
this value must be zero for input neurons
|
|
(you can not set activation function for input neurons)
|
|
Threshold - also called offset, bias
|
|
this value must be zero for input neurons
|
|
(you can not set threshold for input neurons)
|
|
|
|
NOTES:
|
|
1. this function throws exception if layer or neuron with given index do
|
|
not exists.
|
|
2. this function also throws exception when you try to set non-linear
|
|
activation function for input neurons (any kind of network) or for output
|
|
neurons of classifier network.
|
|
3. this function throws exception when you try to set non-zero threshold for
|
|
input neurons (any kind of network).
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetneuroninfo(multilayerperceptron* network,
|
|
ae_int_t k,
|
|
ae_int_t i,
|
|
ae_int_t fkind,
|
|
double threshold,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t ncnt;
|
|
ae_int_t istart;
|
|
ae_int_t highlevelidx;
|
|
ae_int_t activationoffset;
|
|
|
|
|
|
ae_assert(ae_isfinite(threshold, _state), "MLPSetNeuronInfo: infinite or NAN Threshold", _state);
|
|
|
|
/*
|
|
* convenience vars
|
|
*/
|
|
ncnt = network->hlneurons.cnt/mlpbase_hlnfieldwidth;
|
|
istart = network->structinfo.ptr.p_int[5];
|
|
|
|
/*
|
|
* search
|
|
*/
|
|
network->integerbuf.ptr.p_int[0] = k;
|
|
network->integerbuf.ptr.p_int[1] = i;
|
|
highlevelidx = recsearch(&network->hlneurons, mlpbase_hlnfieldwidth, 2, 0, ncnt, &network->integerbuf, _state);
|
|
ae_assert(highlevelidx>=0, "MLPSetNeuronInfo: incorrect (nonexistent) layer or neuron index", _state);
|
|
|
|
/*
|
|
* activation function
|
|
*/
|
|
if( network->hlneurons.ptr.p_int[highlevelidx*mlpbase_hlnfieldwidth+2]>=0 )
|
|
{
|
|
activationoffset = istart+network->hlneurons.ptr.p_int[highlevelidx*mlpbase_hlnfieldwidth+2]*mlpbase_nfieldwidth;
|
|
network->structinfo.ptr.p_int[activationoffset+0] = fkind;
|
|
}
|
|
else
|
|
{
|
|
ae_assert(fkind==0, "MLPSetNeuronInfo: you try to set activation function for neuron which can not have one", _state);
|
|
}
|
|
|
|
/*
|
|
* Threshold
|
|
*/
|
|
if( network->hlneurons.ptr.p_int[highlevelidx*mlpbase_hlnfieldwidth+3]>=0 )
|
|
{
|
|
network->weights.ptr.p_double[network->hlneurons.ptr.p_int[highlevelidx*mlpbase_hlnfieldwidth+3]] = threshold;
|
|
}
|
|
else
|
|
{
|
|
ae_assert(ae_fp_eq(threshold,(double)(0)), "MLPSetNeuronInfo: you try to set non-zero threshold for neuron which can not have one", _state);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function modifies information about connection from I0-th neuron of
|
|
K0-th layer to I1-th neuron of K1-th layer.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network
|
|
K0 - layer index
|
|
I0 - neuron index (within layer)
|
|
K1 - layer index
|
|
I1 - neuron index (within layer)
|
|
W - connection weight (must be zero for non-existent
|
|
connections)
|
|
|
|
This function:
|
|
1. throws exception if layer or neuron with given index do not exists.
|
|
2. throws exception if you try to set non-zero weight for non-existent
|
|
connection
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetweight(multilayerperceptron* network,
|
|
ae_int_t k0,
|
|
ae_int_t i0,
|
|
ae_int_t k1,
|
|
ae_int_t i1,
|
|
double w,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t ccnt;
|
|
ae_int_t highlevelidx;
|
|
|
|
|
|
ccnt = network->hlconnections.cnt/mlpbase_hlconnfieldwidth;
|
|
|
|
/*
|
|
* check params
|
|
*/
|
|
ae_assert(k0>=0&&k0<network->hllayersizes.cnt, "MLPSetWeight: incorrect (nonexistent) K0", _state);
|
|
ae_assert(i0>=0&&i0<network->hllayersizes.ptr.p_int[k0], "MLPSetWeight: incorrect (nonexistent) I0", _state);
|
|
ae_assert(k1>=0&&k1<network->hllayersizes.cnt, "MLPSetWeight: incorrect (nonexistent) K1", _state);
|
|
ae_assert(i1>=0&&i1<network->hllayersizes.ptr.p_int[k1], "MLPSetWeight: incorrect (nonexistent) I1", _state);
|
|
ae_assert(ae_isfinite(w, _state), "MLPSetWeight: infinite or NAN weight", _state);
|
|
|
|
/*
|
|
* search
|
|
*/
|
|
network->integerbuf.ptr.p_int[0] = k0;
|
|
network->integerbuf.ptr.p_int[1] = i0;
|
|
network->integerbuf.ptr.p_int[2] = k1;
|
|
network->integerbuf.ptr.p_int[3] = i1;
|
|
highlevelidx = recsearch(&network->hlconnections, mlpbase_hlconnfieldwidth, 4, 0, ccnt, &network->integerbuf, _state);
|
|
if( highlevelidx>=0 )
|
|
{
|
|
network->weights.ptr.p_double[network->hlconnections.ptr.p_int[highlevelidx*mlpbase_hlconnfieldwidth+4]] = w;
|
|
}
|
|
else
|
|
{
|
|
ae_assert(ae_fp_eq(w,(double)(0)), "MLPSetWeight: you try to set non-zero weight for non-existent connection", _state);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Neural network activation function
|
|
|
|
INPUT PARAMETERS:
|
|
NET - neuron input
|
|
K - function index (zero for linear function)
|
|
|
|
OUTPUT PARAMETERS:
|
|
F - function
|
|
DF - its derivative
|
|
D2F - its second derivative
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpactivationfunction(double net,
|
|
ae_int_t k,
|
|
double* f,
|
|
double* df,
|
|
double* d2f,
|
|
ae_state *_state)
|
|
{
|
|
double net2;
|
|
double arg;
|
|
double root;
|
|
double r;
|
|
|
|
*f = 0;
|
|
*df = 0;
|
|
*d2f = 0;
|
|
|
|
if( k==0||k==-5 )
|
|
{
|
|
*f = net;
|
|
*df = (double)(1);
|
|
*d2f = (double)(0);
|
|
return;
|
|
}
|
|
if( k==1 )
|
|
{
|
|
|
|
/*
|
|
* TanH activation function
|
|
*/
|
|
if( ae_fp_less(ae_fabs(net, _state),(double)(100)) )
|
|
{
|
|
*f = ae_tanh(net, _state);
|
|
}
|
|
else
|
|
{
|
|
*f = (double)(ae_sign(net, _state));
|
|
}
|
|
*df = 1-*f*(*f);
|
|
*d2f = -2*(*f)*(*df);
|
|
return;
|
|
}
|
|
if( k==3 )
|
|
{
|
|
|
|
/*
|
|
* EX activation function
|
|
*/
|
|
if( ae_fp_greater_eq(net,(double)(0)) )
|
|
{
|
|
net2 = net*net;
|
|
arg = net2+1;
|
|
root = ae_sqrt(arg, _state);
|
|
*f = net+root;
|
|
r = net/root;
|
|
*df = 1+r;
|
|
*d2f = (root-net*r)/arg;
|
|
}
|
|
else
|
|
{
|
|
*f = ae_exp(net, _state);
|
|
*df = *f;
|
|
*d2f = *f;
|
|
}
|
|
return;
|
|
}
|
|
if( k==2 )
|
|
{
|
|
*f = ae_exp(-ae_sqr(net, _state), _state);
|
|
*df = -2*net*(*f);
|
|
*d2f = -2*(*f+*df*net);
|
|
return;
|
|
}
|
|
*f = (double)(0);
|
|
*df = (double)(0);
|
|
*d2f = (double)(0);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Procesing
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network
|
|
X - input vector, array[0..NIn-1].
|
|
|
|
OUTPUT PARAMETERS:
|
|
Y - result. Regression estimate when solving regression task,
|
|
vector of posterior probabilities for classification task.
|
|
|
|
See also MLPProcessI
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpprocess(multilayerperceptron* network,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
if( y->cnt<network->structinfo.ptr.p_int[2] )
|
|
{
|
|
ae_vector_set_length(y, network->structinfo.ptr.p_int[2], _state);
|
|
}
|
|
mlpinternalprocessvector(&network->structinfo, &network->weights, &network->columnmeans, &network->columnsigmas, &network->neurons, &network->dfdnet, x, y, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
'interactive' variant of MLPProcess for languages like Python which
|
|
support constructs like "Y = MLPProcess(NN,X)" and interactive mode of the
|
|
interpreter
|
|
|
|
This function allocates new array on each call, so it is significantly
|
|
slower than its 'non-interactive' counterpart, but it is more convenient
|
|
when you call it from command line.
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.09.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpprocessi(multilayerperceptron* network,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state)
|
|
{
|
|
|
|
ae_vector_clear(y);
|
|
|
|
mlpprocess(network, x, y, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Error of the neural network on dataset.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format;
|
|
NPoints - points count.
|
|
|
|
RESULT:
|
|
sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlperror(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
ae_assert(xy->rows>=npoints, "MLPError: XY has less than NPoints rows", _state);
|
|
if( npoints>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPError: XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPError: XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
mlpallerrorsx(network, xy, &network->dummysxy, npoints, 0, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
|
|
result = ae_sqr(network->err.rmserror, _state)*npoints*mlpgetoutputscount(network, _state)/2;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Error of the neural network on dataset given by sparse matrix.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network
|
|
XY - training set, see below for information on the
|
|
training set format. This function checks correctness
|
|
of the dataset (no NANs/INFs, class numbers are
|
|
correct) and throws exception when incorrect dataset
|
|
is passed. Sparse matrix must use CRS format for
|
|
storage.
|
|
NPoints - points count, >=0
|
|
|
|
RESULT:
|
|
sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlperrorsparse(multilayerperceptron* network,
|
|
sparsematrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
ae_assert(sparseiscrs(xy, _state), "MLPErrorSparse: XY is not in CRS format.", _state);
|
|
ae_assert(sparsegetnrows(xy, _state)>=npoints, "MLPErrorSparse: XY has less than NPoints rows", _state);
|
|
if( npoints>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+1, "MLPErrorSparse: XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPErrorSparse: XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
mlpallerrorsx(network, &network->dummydxy, xy, npoints, 1, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
|
|
result = ae_sqr(network->err.rmserror, _state)*npoints*mlpgetoutputscount(network, _state)/2;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Natural error function for neural network, internal subroutine.
|
|
|
|
NOTE: this function is single-threaded. Unlike other error function, it
|
|
receives no speed-up from being executed in SMP mode.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlperrorn(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t ssize,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t k;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
double e;
|
|
double result;
|
|
|
|
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
result = (double)(0);
|
|
for(i=0; i<=ssize-1; i++)
|
|
{
|
|
|
|
/*
|
|
* Process vector
|
|
*/
|
|
ae_v_move(&network->x.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nin-1));
|
|
mlpprocess(network, &network->x, &network->y, _state);
|
|
|
|
/*
|
|
* Update error function
|
|
*/
|
|
if( network->structinfo.ptr.p_int[6]==0 )
|
|
{
|
|
|
|
/*
|
|
* Least squares error function
|
|
*/
|
|
ae_v_sub(&network->y.ptr.p_double[0], 1, &xy->ptr.pp_double[i][nin], 1, ae_v_len(0,nout-1));
|
|
e = ae_v_dotproduct(&network->y.ptr.p_double[0], 1, &network->y.ptr.p_double[0], 1, ae_v_len(0,nout-1));
|
|
result = result+e/2;
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Cross-entropy error function
|
|
*/
|
|
k = ae_round(xy->ptr.pp_double[i][nin], _state);
|
|
if( k>=0&&k<nout )
|
|
{
|
|
result = result+mlpbase_safecrossentropy((double)(1), network->y.ptr.p_double[k], _state);
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Classification error of the neural network on dataset.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format;
|
|
NPoints - points count.
|
|
|
|
RESULT:
|
|
classification error (number of misclassified cases)
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t mlpclserror(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t result;
|
|
|
|
|
|
ae_assert(xy->rows>=npoints, "MLPClsError: XY has less than NPoints rows", _state);
|
|
if( npoints>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPClsError: XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPClsError: XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
mlpallerrorsx(network, xy, &network->dummysxy, npoints, 0, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
|
|
result = ae_round(npoints*network->err.relclserror, _state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Relative classification error on the test set.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format;
|
|
NPoints - points count.
|
|
|
|
RESULT:
|
|
Percent of incorrectly classified cases. Works both for classifier
|
|
networks and general purpose networks used as classifiers.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.12.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlprelclserror(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
ae_assert(xy->rows>=npoints, "MLPRelClsError: XY has less than NPoints rows", _state);
|
|
if( npoints>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPRelClsError: XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPRelClsError: XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
if( npoints>0 )
|
|
{
|
|
result = (double)mlpclserror(network, xy, npoints, _state)/(double)npoints;
|
|
}
|
|
else
|
|
{
|
|
result = 0.0;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Relative classification error on the test set given by sparse matrix.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format. Sparse matrix must use CRS format
|
|
for storage.
|
|
NPoints - points count, >=0.
|
|
|
|
RESULT:
|
|
Percent of incorrectly classified cases. Works both for classifier
|
|
networks and general purpose networks used as classifiers.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 09.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlprelclserrorsparse(multilayerperceptron* network,
|
|
sparsematrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
ae_assert(sparseiscrs(xy, _state), "MLPRelClsErrorSparse: sparse matrix XY is not in CRS format.", _state);
|
|
ae_assert(sparsegetnrows(xy, _state)>=npoints, "MLPRelClsErrorSparse: sparse matrix XY has less than NPoints rows", _state);
|
|
if( npoints>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+1, "MLPRelClsErrorSparse: sparse matrix XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPRelClsErrorSparse: sparse matrix XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
mlpallerrorsx(network, &network->dummydxy, xy, npoints, 1, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
|
|
result = network->err.relclserror;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average cross-entropy (in bits per element) on the test set.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format;
|
|
NPoints - points count.
|
|
|
|
RESULT:
|
|
CrossEntropy/(NPoints*LN(2)).
|
|
Zero if network solves regression task.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 08.01.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpavgce(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
ae_assert(xy->rows>=npoints, "MLPAvgCE: XY has less than NPoints rows", _state);
|
|
if( npoints>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPAvgCE: XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPAvgCE: XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
mlpallerrorsx(network, xy, &network->dummysxy, npoints, 0, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
|
|
result = network->err.avgce;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average cross-entropy (in bits per element) on the test set given by
|
|
sparse matrix.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format. This function checks correctness
|
|
of the dataset (no NANs/INFs, class numbers are
|
|
correct) and throws exception when incorrect dataset
|
|
is passed. Sparse matrix must use CRS format for
|
|
storage.
|
|
NPoints - points count, >=0.
|
|
|
|
RESULT:
|
|
CrossEntropy/(NPoints*LN(2)).
|
|
Zero if network solves regression task.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 9.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpavgcesparse(multilayerperceptron* network,
|
|
sparsematrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
ae_assert(sparseiscrs(xy, _state), "MLPAvgCESparse: sparse matrix XY is not in CRS format.", _state);
|
|
ae_assert(sparsegetnrows(xy, _state)>=npoints, "MLPAvgCESparse: sparse matrix XY has less than NPoints rows", _state);
|
|
if( npoints>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+1, "MLPAvgCESparse: sparse matrix XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPAvgCESparse: sparse matrix XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
mlpallerrorsx(network, &network->dummydxy, xy, npoints, 1, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
|
|
result = network->err.avgce;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
RMS error on the test set given.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format;
|
|
NPoints - points count.
|
|
|
|
RESULT:
|
|
Root mean square error. Its meaning for regression task is obvious. As for
|
|
classification task, RMS error means error when estimating posterior
|
|
probabilities.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlprmserror(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
ae_assert(xy->rows>=npoints, "MLPRMSError: XY has less than NPoints rows", _state);
|
|
if( npoints>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPRMSError: XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPRMSError: XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
mlpallerrorsx(network, xy, &network->dummysxy, npoints, 0, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
|
|
result = network->err.rmserror;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
RMS error on the test set given by sparse matrix.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format. This function checks correctness
|
|
of the dataset (no NANs/INFs, class numbers are
|
|
correct) and throws exception when incorrect dataset
|
|
is passed. Sparse matrix must use CRS format for
|
|
storage.
|
|
NPoints - points count, >=0.
|
|
|
|
RESULT:
|
|
Root mean square error. Its meaning for regression task is obvious. As for
|
|
classification task, RMS error means error when estimating posterior
|
|
probabilities.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 09.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlprmserrorsparse(multilayerperceptron* network,
|
|
sparsematrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
ae_assert(sparseiscrs(xy, _state), "MLPRMSErrorSparse: sparse matrix XY is not in CRS format.", _state);
|
|
ae_assert(sparsegetnrows(xy, _state)>=npoints, "MLPRMSErrorSparse: sparse matrix XY has less than NPoints rows", _state);
|
|
if( npoints>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+1, "MLPRMSErrorSparse: sparse matrix XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPRMSErrorSparse: sparse matrix XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
mlpallerrorsx(network, &network->dummydxy, xy, npoints, 1, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
|
|
result = network->err.rmserror;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average absolute error on the test set.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format;
|
|
NPoints - points count.
|
|
|
|
RESULT:
|
|
Its meaning for regression task is obvious. As for classification task, it
|
|
means average error when estimating posterior probabilities.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 11.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpavgerror(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
ae_assert(xy->rows>=npoints, "MLPAvgError: XY has less than NPoints rows", _state);
|
|
if( npoints>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPAvgError: XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPAvgError: XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
mlpallerrorsx(network, xy, &network->dummysxy, npoints, 0, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
|
|
result = network->err.avgerror;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average absolute error on the test set given by sparse matrix.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format. This function checks correctness
|
|
of the dataset (no NANs/INFs, class numbers are
|
|
correct) and throws exception when incorrect dataset
|
|
is passed. Sparse matrix must use CRS format for
|
|
storage.
|
|
NPoints - points count, >=0.
|
|
|
|
RESULT:
|
|
Its meaning for regression task is obvious. As for classification task, it
|
|
means average error when estimating posterior probabilities.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 09.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpavgerrorsparse(multilayerperceptron* network,
|
|
sparsematrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
ae_assert(sparseiscrs(xy, _state), "MLPAvgErrorSparse: XY is not in CRS format.", _state);
|
|
ae_assert(sparsegetnrows(xy, _state)>=npoints, "MLPAvgErrorSparse: XY has less than NPoints rows", _state);
|
|
if( npoints>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+1, "MLPAvgErrorSparse: XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPAvgErrorSparse: XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
mlpallerrorsx(network, &network->dummydxy, xy, npoints, 1, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
|
|
result = network->err.avgerror;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average relative error on the test set.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format;
|
|
NPoints - points count.
|
|
|
|
RESULT:
|
|
Its meaning for regression task is obvious. As for classification task, it
|
|
means average relative error when estimating posterior probability of
|
|
belonging to the correct class.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 11.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpavgrelerror(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
ae_assert(xy->rows>=npoints, "MLPAvgRelError: XY has less than NPoints rows", _state);
|
|
if( npoints>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPAvgRelError: XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPAvgRelError: XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
mlpallerrorsx(network, xy, &network->dummysxy, npoints, 0, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
|
|
result = network->err.avgrelerror;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average relative error on the test set given by sparse matrix.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format. This function checks correctness
|
|
of the dataset (no NANs/INFs, class numbers are
|
|
correct) and throws exception when incorrect dataset
|
|
is passed. Sparse matrix must use CRS format for
|
|
storage.
|
|
NPoints - points count, >=0.
|
|
|
|
RESULT:
|
|
Its meaning for regression task is obvious. As for classification task, it
|
|
means average relative error when estimating posterior probability of
|
|
belonging to the correct class.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 09.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpavgrelerrorsparse(multilayerperceptron* network,
|
|
sparsematrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
ae_assert(sparseiscrs(xy, _state), "MLPAvgRelErrorSparse: XY is not in CRS format.", _state);
|
|
ae_assert(sparsegetnrows(xy, _state)>=npoints, "MLPAvgRelErrorSparse: XY has less than NPoints rows", _state);
|
|
if( npoints>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+1, "MLPAvgRelErrorSparse: XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPAvgRelErrorSparse: XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
mlpallerrorsx(network, &network->dummydxy, xy, npoints, 1, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
|
|
result = network->err.avgrelerror;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Gradient calculation
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
X - input vector, length of array must be at least NIn
|
|
DesiredY- desired outputs, length of array must be at least NOut
|
|
Grad - possibly preallocated array. If size of array is smaller
|
|
than WCount, it will be reallocated. It is recommended to
|
|
reuse previously allocated array to reduce allocation
|
|
overhead.
|
|
|
|
OUTPUT PARAMETERS:
|
|
E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
|
|
Grad - gradient of E with respect to weights of network, array[WCount]
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgrad(multilayerperceptron* network,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* desiredy,
|
|
double* e,
|
|
/* Real */ ae_vector* grad,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t nout;
|
|
ae_int_t ntotal;
|
|
|
|
*e = 0;
|
|
|
|
|
|
/*
|
|
* Alloc
|
|
*/
|
|
rvectorsetlengthatleast(grad, network->structinfo.ptr.p_int[4], _state);
|
|
|
|
/*
|
|
* Prepare dError/dOut, internal structures
|
|
*/
|
|
mlpprocess(network, x, &network->y, _state);
|
|
nout = network->structinfo.ptr.p_int[2];
|
|
ntotal = network->structinfo.ptr.p_int[3];
|
|
*e = (double)(0);
|
|
for(i=0; i<=ntotal-1; i++)
|
|
{
|
|
network->derror.ptr.p_double[i] = (double)(0);
|
|
}
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
network->derror.ptr.p_double[ntotal-nout+i] = network->y.ptr.p_double[i]-desiredy->ptr.p_double[i];
|
|
*e = *e+ae_sqr(network->y.ptr.p_double[i]-desiredy->ptr.p_double[i], _state)/2;
|
|
}
|
|
|
|
/*
|
|
* gradient
|
|
*/
|
|
mlpbase_mlpinternalcalculategradient(network, &network->neurons, &network->weights, &network->derror, grad, ae_false, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Gradient calculation (natural error function is used)
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
X - input vector, length of array must be at least NIn
|
|
DesiredY- desired outputs, length of array must be at least NOut
|
|
Grad - possibly preallocated array. If size of array is smaller
|
|
than WCount, it will be reallocated. It is recommended to
|
|
reuse previously allocated array to reduce allocation
|
|
overhead.
|
|
|
|
OUTPUT PARAMETERS:
|
|
E - error function, sum-of-squares for regression networks,
|
|
cross-entropy for classification networks.
|
|
Grad - gradient of E with respect to weights of network, array[WCount]
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgradn(multilayerperceptron* network,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* desiredy,
|
|
double* e,
|
|
/* Real */ ae_vector* grad,
|
|
ae_state *_state)
|
|
{
|
|
double s;
|
|
ae_int_t i;
|
|
ae_int_t nout;
|
|
ae_int_t ntotal;
|
|
|
|
*e = 0;
|
|
|
|
|
|
/*
|
|
* Alloc
|
|
*/
|
|
rvectorsetlengthatleast(grad, network->structinfo.ptr.p_int[4], _state);
|
|
|
|
/*
|
|
* Prepare dError/dOut, internal structures
|
|
*/
|
|
mlpprocess(network, x, &network->y, _state);
|
|
nout = network->structinfo.ptr.p_int[2];
|
|
ntotal = network->structinfo.ptr.p_int[3];
|
|
for(i=0; i<=ntotal-1; i++)
|
|
{
|
|
network->derror.ptr.p_double[i] = (double)(0);
|
|
}
|
|
*e = (double)(0);
|
|
if( network->structinfo.ptr.p_int[6]==0 )
|
|
{
|
|
|
|
/*
|
|
* Regression network, least squares
|
|
*/
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
network->derror.ptr.p_double[ntotal-nout+i] = network->y.ptr.p_double[i]-desiredy->ptr.p_double[i];
|
|
*e = *e+ae_sqr(network->y.ptr.p_double[i]-desiredy->ptr.p_double[i], _state)/2;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Classification network, cross-entropy
|
|
*/
|
|
s = (double)(0);
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
s = s+desiredy->ptr.p_double[i];
|
|
}
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
network->derror.ptr.p_double[ntotal-nout+i] = s*network->y.ptr.p_double[i]-desiredy->ptr.p_double[i];
|
|
*e = *e+mlpbase_safecrossentropy(desiredy->ptr.p_double[i], network->y.ptr.p_double[i], _state);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* gradient
|
|
*/
|
|
mlpbase_mlpinternalcalculategradient(network, &network->neurons, &network->weights, &network->derror, grad, ae_true, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Batch gradient calculation for a set of inputs/outputs
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
XY - original dataset in dense format; one sample = one row:
|
|
* first NIn columns contain inputs,
|
|
* for regression problem, next NOut columns store
|
|
desired outputs.
|
|
* for classification problem, next column (just one!)
|
|
stores class number.
|
|
SSize - number of elements in XY
|
|
Grad - possibly preallocated array. If size of array is smaller
|
|
than WCount, it will be reallocated. It is recommended to
|
|
reuse previously allocated array to reduce allocation
|
|
overhead.
|
|
|
|
OUTPUT PARAMETERS:
|
|
E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
|
|
Grad - gradient of E with respect to weights of network, array[WCount]
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgradbatch(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t ssize,
|
|
double* e,
|
|
/* Real */ ae_vector* grad,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t subset0;
|
|
ae_int_t subset1;
|
|
ae_int_t subsettype;
|
|
smlpgrad *sgrad;
|
|
ae_smart_ptr _sgrad;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_sgrad, 0, sizeof(_sgrad));
|
|
*e = 0;
|
|
ae_smart_ptr_init(&_sgrad, (void**)&sgrad, _state, ae_true);
|
|
|
|
ae_assert(ssize>=0, "MLPGradBatchSparse: SSize<0", _state);
|
|
subset0 = 0;
|
|
subset1 = ssize;
|
|
subsettype = 0;
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
rvectorsetlengthatleast(grad, wcount, _state);
|
|
ae_shared_pool_first_recycled(&network->gradbuf, &_sgrad, _state);
|
|
while(sgrad!=NULL)
|
|
{
|
|
sgrad->f = 0.0;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
sgrad->g.ptr.p_double[i] = 0.0;
|
|
}
|
|
ae_shared_pool_next_recycled(&network->gradbuf, &_sgrad, _state);
|
|
}
|
|
mlpgradbatchx(network, xy, &network->dummysxy, ssize, 0, &network->dummyidx, subset0, subset1, subsettype, &network->buf, &network->gradbuf, _state);
|
|
*e = 0.0;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
grad->ptr.p_double[i] = 0.0;
|
|
}
|
|
ae_shared_pool_first_recycled(&network->gradbuf, &_sgrad, _state);
|
|
while(sgrad!=NULL)
|
|
{
|
|
*e = *e+sgrad->f;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
grad->ptr.p_double[i] = grad->ptr.p_double[i]+sgrad->g.ptr.p_double[i];
|
|
}
|
|
ae_shared_pool_next_recycled(&network->gradbuf, &_sgrad, _state);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Batch gradient calculation for a set of inputs/outputs given by sparse
|
|
matrices
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
XY - original dataset in sparse format; one sample = one row:
|
|
* MATRIX MUST BE STORED IN CRS FORMAT
|
|
* first NIn columns contain inputs.
|
|
* for regression problem, next NOut columns store
|
|
desired outputs.
|
|
* for classification problem, next column (just one!)
|
|
stores class number.
|
|
SSize - number of elements in XY
|
|
Grad - possibly preallocated array. If size of array is smaller
|
|
than WCount, it will be reallocated. It is recommended to
|
|
reuse previously allocated array to reduce allocation
|
|
overhead.
|
|
|
|
OUTPUT PARAMETERS:
|
|
E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
|
|
Grad - gradient of E with respect to weights of network, array[WCount]
|
|
|
|
-- ALGLIB --
|
|
Copyright 26.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgradbatchsparse(multilayerperceptron* network,
|
|
sparsematrix* xy,
|
|
ae_int_t ssize,
|
|
double* e,
|
|
/* Real */ ae_vector* grad,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t subset0;
|
|
ae_int_t subset1;
|
|
ae_int_t subsettype;
|
|
smlpgrad *sgrad;
|
|
ae_smart_ptr _sgrad;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_sgrad, 0, sizeof(_sgrad));
|
|
*e = 0;
|
|
ae_smart_ptr_init(&_sgrad, (void**)&sgrad, _state, ae_true);
|
|
|
|
ae_assert(ssize>=0, "MLPGradBatchSparse: SSize<0", _state);
|
|
ae_assert(sparseiscrs(xy, _state), "MLPGradBatchSparse: sparse matrix XY must be in CRS format.", _state);
|
|
subset0 = 0;
|
|
subset1 = ssize;
|
|
subsettype = 0;
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
rvectorsetlengthatleast(grad, wcount, _state);
|
|
ae_shared_pool_first_recycled(&network->gradbuf, &_sgrad, _state);
|
|
while(sgrad!=NULL)
|
|
{
|
|
sgrad->f = 0.0;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
sgrad->g.ptr.p_double[i] = 0.0;
|
|
}
|
|
ae_shared_pool_next_recycled(&network->gradbuf, &_sgrad, _state);
|
|
}
|
|
mlpgradbatchx(network, &network->dummydxy, xy, ssize, 1, &network->dummyidx, subset0, subset1, subsettype, &network->buf, &network->gradbuf, _state);
|
|
*e = 0.0;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
grad->ptr.p_double[i] = 0.0;
|
|
}
|
|
ae_shared_pool_first_recycled(&network->gradbuf, &_sgrad, _state);
|
|
while(sgrad!=NULL)
|
|
{
|
|
*e = *e+sgrad->f;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
grad->ptr.p_double[i] = grad->ptr.p_double[i]+sgrad->g.ptr.p_double[i];
|
|
}
|
|
ae_shared_pool_next_recycled(&network->gradbuf, &_sgrad, _state);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Batch gradient calculation for a subset of dataset
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
XY - original dataset in dense format; one sample = one row:
|
|
* first NIn columns contain inputs,
|
|
* for regression problem, next NOut columns store
|
|
desired outputs.
|
|
* for classification problem, next column (just one!)
|
|
stores class number.
|
|
SetSize - real size of XY, SetSize>=0;
|
|
Idx - subset of SubsetSize elements, array[SubsetSize]:
|
|
* Idx[I] stores row index in the original dataset which is
|
|
given by XY. Gradient is calculated with respect to rows
|
|
whose indexes are stored in Idx[].
|
|
* Idx[] must store correct indexes; this function throws
|
|
an exception in case incorrect index (less than 0 or
|
|
larger than rows(XY)) is given
|
|
* Idx[] may store indexes in any order and even with
|
|
repetitions.
|
|
SubsetSize- number of elements in Idx[] array:
|
|
* positive value means that subset given by Idx[] is processed
|
|
* zero value results in zero gradient
|
|
* negative value means that full dataset is processed
|
|
Grad - possibly preallocated array. If size of array is smaller
|
|
than WCount, it will be reallocated. It is recommended to
|
|
reuse previously allocated array to reduce allocation
|
|
overhead.
|
|
|
|
OUTPUT PARAMETERS:
|
|
E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
|
|
Grad - gradient of E with respect to weights of network,
|
|
array[WCount]
|
|
|
|
-- ALGLIB --
|
|
Copyright 26.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgradbatchsubset(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t setsize,
|
|
/* Integer */ ae_vector* idx,
|
|
ae_int_t subsetsize,
|
|
double* e,
|
|
/* Real */ ae_vector* grad,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t npoints;
|
|
ae_int_t subset0;
|
|
ae_int_t subset1;
|
|
ae_int_t subsettype;
|
|
smlpgrad *sgrad;
|
|
ae_smart_ptr _sgrad;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_sgrad, 0, sizeof(_sgrad));
|
|
*e = 0;
|
|
ae_smart_ptr_init(&_sgrad, (void**)&sgrad, _state, ae_true);
|
|
|
|
ae_assert(setsize>=0, "MLPGradBatchSubset: SetSize<0", _state);
|
|
ae_assert(subsetsize<=idx->cnt, "MLPGradBatchSubset: SubsetSize>Length(Idx)", _state);
|
|
npoints = setsize;
|
|
if( subsetsize<0 )
|
|
{
|
|
subset0 = 0;
|
|
subset1 = setsize;
|
|
subsettype = 0;
|
|
}
|
|
else
|
|
{
|
|
subset0 = 0;
|
|
subset1 = subsetsize;
|
|
subsettype = 1;
|
|
for(i=0; i<=subsetsize-1; i++)
|
|
{
|
|
ae_assert(idx->ptr.p_int[i]>=0, "MLPGradBatchSubset: incorrect index of XY row(Idx[I]<0)", _state);
|
|
ae_assert(idx->ptr.p_int[i]<=npoints-1, "MLPGradBatchSubset: incorrect index of XY row(Idx[I]>Rows(XY)-1)", _state);
|
|
}
|
|
}
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
rvectorsetlengthatleast(grad, wcount, _state);
|
|
ae_shared_pool_first_recycled(&network->gradbuf, &_sgrad, _state);
|
|
while(sgrad!=NULL)
|
|
{
|
|
sgrad->f = 0.0;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
sgrad->g.ptr.p_double[i] = 0.0;
|
|
}
|
|
ae_shared_pool_next_recycled(&network->gradbuf, &_sgrad, _state);
|
|
}
|
|
mlpgradbatchx(network, xy, &network->dummysxy, setsize, 0, idx, subset0, subset1, subsettype, &network->buf, &network->gradbuf, _state);
|
|
*e = 0.0;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
grad->ptr.p_double[i] = 0.0;
|
|
}
|
|
ae_shared_pool_first_recycled(&network->gradbuf, &_sgrad, _state);
|
|
while(sgrad!=NULL)
|
|
{
|
|
*e = *e+sgrad->f;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
grad->ptr.p_double[i] = grad->ptr.p_double[i]+sgrad->g.ptr.p_double[i];
|
|
}
|
|
ae_shared_pool_next_recycled(&network->gradbuf, &_sgrad, _state);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Batch gradient calculation for a set of inputs/outputs for a subset of
|
|
dataset given by set of indexes.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
XY - original dataset in sparse format; one sample = one row:
|
|
* MATRIX MUST BE STORED IN CRS FORMAT
|
|
* first NIn columns contain inputs,
|
|
* for regression problem, next NOut columns store
|
|
desired outputs.
|
|
* for classification problem, next column (just one!)
|
|
stores class number.
|
|
SetSize - real size of XY, SetSize>=0;
|
|
Idx - subset of SubsetSize elements, array[SubsetSize]:
|
|
* Idx[I] stores row index in the original dataset which is
|
|
given by XY. Gradient is calculated with respect to rows
|
|
whose indexes are stored in Idx[].
|
|
* Idx[] must store correct indexes; this function throws
|
|
an exception in case incorrect index (less than 0 or
|
|
larger than rows(XY)) is given
|
|
* Idx[] may store indexes in any order and even with
|
|
repetitions.
|
|
SubsetSize- number of elements in Idx[] array:
|
|
* positive value means that subset given by Idx[] is processed
|
|
* zero value results in zero gradient
|
|
* negative value means that full dataset is processed
|
|
Grad - possibly preallocated array. If size of array is smaller
|
|
than WCount, it will be reallocated. It is recommended to
|
|
reuse previously allocated array to reduce allocation
|
|
overhead.
|
|
|
|
OUTPUT PARAMETERS:
|
|
E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
|
|
Grad - gradient of E with respect to weights of network,
|
|
array[WCount]
|
|
|
|
NOTE: when SubsetSize<0 is used full dataset by call MLPGradBatchSparse
|
|
function.
|
|
|
|
-- ALGLIB --
|
|
Copyright 26.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgradbatchsparsesubset(multilayerperceptron* network,
|
|
sparsematrix* xy,
|
|
ae_int_t setsize,
|
|
/* Integer */ ae_vector* idx,
|
|
ae_int_t subsetsize,
|
|
double* e,
|
|
/* Real */ ae_vector* grad,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t npoints;
|
|
ae_int_t subset0;
|
|
ae_int_t subset1;
|
|
ae_int_t subsettype;
|
|
smlpgrad *sgrad;
|
|
ae_smart_ptr _sgrad;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_sgrad, 0, sizeof(_sgrad));
|
|
*e = 0;
|
|
ae_smart_ptr_init(&_sgrad, (void**)&sgrad, _state, ae_true);
|
|
|
|
ae_assert(setsize>=0, "MLPGradBatchSparseSubset: SetSize<0", _state);
|
|
ae_assert(subsetsize<=idx->cnt, "MLPGradBatchSparseSubset: SubsetSize>Length(Idx)", _state);
|
|
ae_assert(sparseiscrs(xy, _state), "MLPGradBatchSparseSubset: sparse matrix XY must be in CRS format.", _state);
|
|
npoints = setsize;
|
|
if( subsetsize<0 )
|
|
{
|
|
subset0 = 0;
|
|
subset1 = setsize;
|
|
subsettype = 0;
|
|
}
|
|
else
|
|
{
|
|
subset0 = 0;
|
|
subset1 = subsetsize;
|
|
subsettype = 1;
|
|
for(i=0; i<=subsetsize-1; i++)
|
|
{
|
|
ae_assert(idx->ptr.p_int[i]>=0, "MLPGradBatchSparseSubset: incorrect index of XY row(Idx[I]<0)", _state);
|
|
ae_assert(idx->ptr.p_int[i]<=npoints-1, "MLPGradBatchSparseSubset: incorrect index of XY row(Idx[I]>Rows(XY)-1)", _state);
|
|
}
|
|
}
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
rvectorsetlengthatleast(grad, wcount, _state);
|
|
ae_shared_pool_first_recycled(&network->gradbuf, &_sgrad, _state);
|
|
while(sgrad!=NULL)
|
|
{
|
|
sgrad->f = 0.0;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
sgrad->g.ptr.p_double[i] = 0.0;
|
|
}
|
|
ae_shared_pool_next_recycled(&network->gradbuf, &_sgrad, _state);
|
|
}
|
|
mlpgradbatchx(network, &network->dummydxy, xy, setsize, 1, idx, subset0, subset1, subsettype, &network->buf, &network->gradbuf, _state);
|
|
*e = 0.0;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
grad->ptr.p_double[i] = 0.0;
|
|
}
|
|
ae_shared_pool_first_recycled(&network->gradbuf, &_sgrad, _state);
|
|
while(sgrad!=NULL)
|
|
{
|
|
*e = *e+sgrad->f;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
grad->ptr.p_double[i] = grad->ptr.p_double[i]+sgrad->g.ptr.p_double[i];
|
|
}
|
|
ae_shared_pool_next_recycled(&network->gradbuf, &_sgrad, _state);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal function which actually calculates batch gradient for a subset or
|
|
full dataset, which can be represented in different formats.
|
|
|
|
THIS FUNCTION IS NOT INTENDED TO BE USED BY ALGLIB USERS!
|
|
|
|
-- ALGLIB --
|
|
Copyright 26.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgradbatchx(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* densexy,
|
|
sparsematrix* sparsexy,
|
|
ae_int_t datasetsize,
|
|
ae_int_t datasettype,
|
|
/* Integer */ ae_vector* idx,
|
|
ae_int_t subset0,
|
|
ae_int_t subset1,
|
|
ae_int_t subsettype,
|
|
ae_shared_pool* buf,
|
|
ae_shared_pool* gradbuf,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t rowsize;
|
|
ae_int_t srcidx;
|
|
ae_int_t cstart;
|
|
ae_int_t csize;
|
|
ae_int_t j;
|
|
double problemcost;
|
|
mlpbuffers *buf2;
|
|
ae_smart_ptr _buf2;
|
|
ae_int_t len0;
|
|
ae_int_t len1;
|
|
mlpbuffers *pbuf;
|
|
ae_smart_ptr _pbuf;
|
|
smlpgrad *sgrad;
|
|
ae_smart_ptr _sgrad;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_buf2, 0, sizeof(_buf2));
|
|
memset(&_pbuf, 0, sizeof(_pbuf));
|
|
memset(&_sgrad, 0, sizeof(_sgrad));
|
|
ae_smart_ptr_init(&_buf2, (void**)&buf2, _state, ae_true);
|
|
ae_smart_ptr_init(&_pbuf, (void**)&pbuf, _state, ae_true);
|
|
ae_smart_ptr_init(&_sgrad, (void**)&sgrad, _state, ae_true);
|
|
|
|
ae_assert(datasetsize>=0, "MLPGradBatchX: SetSize<0", _state);
|
|
ae_assert(datasettype==0||datasettype==1, "MLPGradBatchX: DatasetType is incorrect", _state);
|
|
ae_assert(subsettype==0||subsettype==1, "MLPGradBatchX: SubsetType is incorrect", _state);
|
|
|
|
/*
|
|
* Determine network and dataset properties
|
|
*/
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
rowsize = nin+1;
|
|
}
|
|
else
|
|
{
|
|
rowsize = nin+nout;
|
|
}
|
|
|
|
/*
|
|
* Split problem.
|
|
*
|
|
* Splitting problem allows us to reduce effect of single-precision
|
|
* arithmetics (SSE-optimized version of MLPChunkedGradient uses single
|
|
* precision internally, but converts them to double precision after
|
|
* results are exported from HPC buffer to network). Small batches are
|
|
* calculated in single precision, results are aggregated in double
|
|
* precision, and it allows us to avoid accumulation of errors when
|
|
* we process very large batches (tens of thousands of items).
|
|
*
|
|
* NOTE: it is important to use real arithmetics for ProblemCost
|
|
* because ProblemCost may be larger than MAXINT.
|
|
*/
|
|
problemcost = (double)(subset1-subset0);
|
|
problemcost = problemcost*wcount*2;
|
|
if( ae_fp_greater_eq(problemcost,smpactivationlevel(_state))&&subset1-subset0>=2*mlpbase_microbatchsize )
|
|
{
|
|
if( _trypexec_mlpgradbatchx(network,densexy,sparsexy,datasetsize,datasettype,idx,subset0,subset1,subsettype,buf,gradbuf, _state) )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
if( subset1-subset0>=2*mlpbase_microbatchsize&&ae_fp_greater(problemcost,spawnlevel(_state)) )
|
|
{
|
|
splitlength(subset1-subset0, mlpbase_microbatchsize, &len0, &len1, _state);
|
|
mlpgradbatchx(network, densexy, sparsexy, datasetsize, datasettype, idx, subset0, subset0+len0, subsettype, buf, gradbuf, _state);
|
|
mlpgradbatchx(network, densexy, sparsexy, datasetsize, datasettype, idx, subset0+len0, subset1, subsettype, buf, gradbuf, _state);
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Chunked processing
|
|
*/
|
|
ae_shared_pool_retrieve(gradbuf, &_sgrad, _state);
|
|
ae_shared_pool_retrieve(buf, &_pbuf, _state);
|
|
hpcpreparechunkedgradient(&network->weights, wcount, mlpntotal(network, _state), nin, nout, pbuf, _state);
|
|
cstart = subset0;
|
|
while(cstart<subset1)
|
|
{
|
|
|
|
/*
|
|
* Determine size of current chunk and copy it to PBuf.XY
|
|
*/
|
|
csize = ae_minint(subset1, cstart+pbuf->chunksize, _state)-cstart;
|
|
for(j=0; j<=csize-1; j++)
|
|
{
|
|
srcidx = -1;
|
|
if( subsettype==0 )
|
|
{
|
|
srcidx = cstart+j;
|
|
}
|
|
if( subsettype==1 )
|
|
{
|
|
srcidx = idx->ptr.p_int[cstart+j];
|
|
}
|
|
ae_assert(srcidx>=0, "MLPGradBatchX: internal error", _state);
|
|
if( datasettype==0 )
|
|
{
|
|
ae_v_move(&pbuf->xy.ptr.pp_double[j][0], 1, &densexy->ptr.pp_double[srcidx][0], 1, ae_v_len(0,rowsize-1));
|
|
}
|
|
if( datasettype==1 )
|
|
{
|
|
sparsegetrow(sparsexy, srcidx, &pbuf->xyrow, _state);
|
|
ae_v_move(&pbuf->xy.ptr.pp_double[j][0], 1, &pbuf->xyrow.ptr.p_double[0], 1, ae_v_len(0,rowsize-1));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Process chunk and advance line pointer
|
|
*/
|
|
mlpbase_mlpchunkedgradient(network, &pbuf->xy, 0, csize, &pbuf->batch4buf, &pbuf->hpcbuf, &sgrad->f, ae_false, _state);
|
|
cstart = cstart+pbuf->chunksize;
|
|
}
|
|
hpcfinalizechunkedgradient(pbuf, &sgrad->g, _state);
|
|
ae_shared_pool_recycle(buf, &_pbuf, _state);
|
|
ae_shared_pool_recycle(gradbuf, &_sgrad, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serial stub for GPL edition.
|
|
*************************************************************************/
|
|
ae_bool _trypexec_mlpgradbatchx(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* densexy,
|
|
sparsematrix* sparsexy,
|
|
ae_int_t datasetsize,
|
|
ae_int_t datasettype,
|
|
/* Integer */ ae_vector* idx,
|
|
ae_int_t subset0,
|
|
ae_int_t subset1,
|
|
ae_int_t subsettype,
|
|
ae_shared_pool* buf,
|
|
ae_shared_pool* gradbuf,
|
|
ae_state *_state)
|
|
{
|
|
return ae_false;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Batch gradient calculation for a set of inputs/outputs
|
|
(natural error function is used)
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
XY - set of inputs/outputs; one sample = one row;
|
|
first NIn columns contain inputs,
|
|
next NOut columns - desired outputs.
|
|
SSize - number of elements in XY
|
|
Grad - possibly preallocated array. If size of array is smaller
|
|
than WCount, it will be reallocated. It is recommended to
|
|
reuse previously allocated array to reduce allocation
|
|
overhead.
|
|
|
|
OUTPUT PARAMETERS:
|
|
E - error function, sum-of-squares for regression networks,
|
|
cross-entropy for classification networks.
|
|
Grad - gradient of E with respect to weights of network, array[WCount]
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpgradnbatch(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t ssize,
|
|
double* e,
|
|
/* Real */ ae_vector* grad,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
mlpbuffers *pbuf;
|
|
ae_smart_ptr _pbuf;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_pbuf, 0, sizeof(_pbuf));
|
|
*e = 0;
|
|
ae_smart_ptr_init(&_pbuf, (void**)&pbuf, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Alloc
|
|
*/
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
ae_shared_pool_retrieve(&network->buf, &_pbuf, _state);
|
|
hpcpreparechunkedgradient(&network->weights, wcount, mlpntotal(network, _state), nin, nout, pbuf, _state);
|
|
rvectorsetlengthatleast(grad, wcount, _state);
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
grad->ptr.p_double[i] = (double)(0);
|
|
}
|
|
*e = (double)(0);
|
|
i = 0;
|
|
while(i<=ssize-1)
|
|
{
|
|
mlpbase_mlpchunkedgradient(network, xy, i, ae_minint(ssize, i+pbuf->chunksize, _state)-i, &pbuf->batch4buf, &pbuf->hpcbuf, e, ae_true, _state);
|
|
i = i+pbuf->chunksize;
|
|
}
|
|
hpcfinalizechunkedgradient(pbuf, grad, _state);
|
|
ae_shared_pool_recycle(&network->buf, &_pbuf, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Batch Hessian calculation (natural error function) using R-algorithm.
|
|
Internal subroutine.
|
|
|
|
-- ALGLIB --
|
|
Copyright 26.01.2008 by Bochkanov Sergey.
|
|
|
|
Hessian calculation based on R-algorithm described in
|
|
"Fast Exact Multiplication by the Hessian",
|
|
B. A. Pearlmutter,
|
|
Neural Computation, 1994.
|
|
*************************************************************************/
|
|
void mlphessiannbatch(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t ssize,
|
|
double* e,
|
|
/* Real */ ae_vector* grad,
|
|
/* Real */ ae_matrix* h,
|
|
ae_state *_state)
|
|
{
|
|
|
|
*e = 0;
|
|
|
|
mlpbase_mlphessianbatchinternal(network, xy, ssize, ae_true, e, grad, h, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Batch Hessian calculation using R-algorithm.
|
|
Internal subroutine.
|
|
|
|
-- ALGLIB --
|
|
Copyright 26.01.2008 by Bochkanov Sergey.
|
|
|
|
Hessian calculation based on R-algorithm described in
|
|
"Fast Exact Multiplication by the Hessian",
|
|
B. A. Pearlmutter,
|
|
Neural Computation, 1994.
|
|
*************************************************************************/
|
|
void mlphessianbatch(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t ssize,
|
|
double* e,
|
|
/* Real */ ae_vector* grad,
|
|
/* Real */ ae_matrix* h,
|
|
ae_state *_state)
|
|
{
|
|
|
|
*e = 0;
|
|
|
|
mlpbase_mlphessianbatchinternal(network, xy, ssize, ae_false, e, grad, h, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal subroutine, shouldn't be called by user.
|
|
*************************************************************************/
|
|
void mlpinternalprocessvector(/* Integer */ ae_vector* structinfo,
|
|
/* Real */ ae_vector* weights,
|
|
/* Real */ ae_vector* columnmeans,
|
|
/* Real */ ae_vector* columnsigmas,
|
|
/* Real */ ae_vector* neurons,
|
|
/* Real */ ae_vector* dfdnet,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t n1;
|
|
ae_int_t n2;
|
|
ae_int_t w1;
|
|
ae_int_t w2;
|
|
ae_int_t ntotal;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t istart;
|
|
ae_int_t offs;
|
|
double net;
|
|
double f;
|
|
double df;
|
|
double d2f;
|
|
double mx;
|
|
ae_bool perr;
|
|
|
|
|
|
|
|
/*
|
|
* Read network geometry
|
|
*/
|
|
nin = structinfo->ptr.p_int[1];
|
|
nout = structinfo->ptr.p_int[2];
|
|
ntotal = structinfo->ptr.p_int[3];
|
|
istart = structinfo->ptr.p_int[5];
|
|
|
|
/*
|
|
* Inputs standartisation and putting in the network
|
|
*/
|
|
for(i=0; i<=nin-1; i++)
|
|
{
|
|
if( ae_fp_neq(columnsigmas->ptr.p_double[i],(double)(0)) )
|
|
{
|
|
neurons->ptr.p_double[i] = (x->ptr.p_double[i]-columnmeans->ptr.p_double[i])/columnsigmas->ptr.p_double[i];
|
|
}
|
|
else
|
|
{
|
|
neurons->ptr.p_double[i] = x->ptr.p_double[i]-columnmeans->ptr.p_double[i];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Process network
|
|
*/
|
|
for(i=0; i<=ntotal-1; i++)
|
|
{
|
|
offs = istart+i*mlpbase_nfieldwidth;
|
|
if( structinfo->ptr.p_int[offs+0]>0||structinfo->ptr.p_int[offs+0]==-5 )
|
|
{
|
|
|
|
/*
|
|
* Activation function
|
|
*/
|
|
mlpactivationfunction(neurons->ptr.p_double[structinfo->ptr.p_int[offs+2]], structinfo->ptr.p_int[offs+0], &f, &df, &d2f, _state);
|
|
neurons->ptr.p_double[i] = f;
|
|
dfdnet->ptr.p_double[i] = df;
|
|
continue;
|
|
}
|
|
if( structinfo->ptr.p_int[offs+0]==0 )
|
|
{
|
|
|
|
/*
|
|
* Adaptive summator
|
|
*/
|
|
n1 = structinfo->ptr.p_int[offs+2];
|
|
n2 = n1+structinfo->ptr.p_int[offs+1]-1;
|
|
w1 = structinfo->ptr.p_int[offs+3];
|
|
w2 = w1+structinfo->ptr.p_int[offs+1]-1;
|
|
net = ae_v_dotproduct(&weights->ptr.p_double[w1], 1, &neurons->ptr.p_double[n1], 1, ae_v_len(w1,w2));
|
|
neurons->ptr.p_double[i] = net;
|
|
dfdnet->ptr.p_double[i] = 1.0;
|
|
touchint(&n2, _state);
|
|
continue;
|
|
}
|
|
if( structinfo->ptr.p_int[offs+0]<0 )
|
|
{
|
|
perr = ae_true;
|
|
if( structinfo->ptr.p_int[offs+0]==-2 )
|
|
{
|
|
|
|
/*
|
|
* input neuron, left unchanged
|
|
*/
|
|
perr = ae_false;
|
|
}
|
|
if( structinfo->ptr.p_int[offs+0]==-3 )
|
|
{
|
|
|
|
/*
|
|
* "-1" neuron
|
|
*/
|
|
neurons->ptr.p_double[i] = (double)(-1);
|
|
perr = ae_false;
|
|
}
|
|
if( structinfo->ptr.p_int[offs+0]==-4 )
|
|
{
|
|
|
|
/*
|
|
* "0" neuron
|
|
*/
|
|
neurons->ptr.p_double[i] = (double)(0);
|
|
perr = ae_false;
|
|
}
|
|
ae_assert(!perr, "MLPInternalProcessVector: internal error - unknown neuron type!", _state);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Extract result
|
|
*/
|
|
ae_v_move(&y->ptr.p_double[0], 1, &neurons->ptr.p_double[ntotal-nout], 1, ae_v_len(0,nout-1));
|
|
|
|
/*
|
|
* Softmax post-processing or standardisation if needed
|
|
*/
|
|
ae_assert(structinfo->ptr.p_int[6]==0||structinfo->ptr.p_int[6]==1, "MLPInternalProcessVector: unknown normalization type!", _state);
|
|
if( structinfo->ptr.p_int[6]==1 )
|
|
{
|
|
|
|
/*
|
|
* Softmax
|
|
*/
|
|
mx = y->ptr.p_double[0];
|
|
for(i=1; i<=nout-1; i++)
|
|
{
|
|
mx = ae_maxreal(mx, y->ptr.p_double[i], _state);
|
|
}
|
|
net = (double)(0);
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
y->ptr.p_double[i] = ae_exp(y->ptr.p_double[i]-mx, _state);
|
|
net = net+y->ptr.p_double[i];
|
|
}
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
y->ptr.p_double[i] = y->ptr.p_double[i]/net;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Standardisation
|
|
*/
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
y->ptr.p_double[i] = y->ptr.p_double[i]*columnsigmas->ptr.p_double[nin+i]+columnmeans->ptr.p_double[nin+i];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serializer: allocation
|
|
|
|
-- ALGLIB --
|
|
Copyright 14.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpalloc(ae_serializer* s,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t fkind;
|
|
double threshold;
|
|
double v0;
|
|
double v1;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
|
|
|
|
nin = network->hllayersizes.ptr.p_int[0];
|
|
nout = network->hllayersizes.ptr.p_int[network->hllayersizes.cnt-1];
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
allocintegerarray(s, &network->hllayersizes, -1, _state);
|
|
for(i=1; i<=network->hllayersizes.cnt-1; i++)
|
|
{
|
|
for(j=0; j<=network->hllayersizes.ptr.p_int[i]-1; j++)
|
|
{
|
|
mlpgetneuroninfo(network, i, j, &fkind, &threshold, _state);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
for(k=0; k<=network->hllayersizes.ptr.p_int[i-1]-1; k++)
|
|
{
|
|
ae_serializer_alloc_entry(s);
|
|
}
|
|
}
|
|
}
|
|
for(j=0; j<=nin-1; j++)
|
|
{
|
|
mlpgetinputscaling(network, j, &v0, &v1, _state);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
}
|
|
for(j=0; j<=nout-1; j++)
|
|
{
|
|
mlpgetoutputscaling(network, j, &v0, &v1, _state);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serializer: serialization
|
|
|
|
-- ALGLIB --
|
|
Copyright 14.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpserialize(ae_serializer* s,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t fkind;
|
|
double threshold;
|
|
double v0;
|
|
double v1;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
|
|
|
|
nin = network->hllayersizes.ptr.p_int[0];
|
|
nout = network->hllayersizes.ptr.p_int[network->hllayersizes.cnt-1];
|
|
ae_serializer_serialize_int(s, getmlpserializationcode(_state), _state);
|
|
ae_serializer_serialize_int(s, mlpbase_mlpfirstversion, _state);
|
|
ae_serializer_serialize_bool(s, mlpissoftmax(network, _state), _state);
|
|
serializeintegerarray(s, &network->hllayersizes, -1, _state);
|
|
for(i=1; i<=network->hllayersizes.cnt-1; i++)
|
|
{
|
|
for(j=0; j<=network->hllayersizes.ptr.p_int[i]-1; j++)
|
|
{
|
|
mlpgetneuroninfo(network, i, j, &fkind, &threshold, _state);
|
|
ae_serializer_serialize_int(s, fkind, _state);
|
|
ae_serializer_serialize_double(s, threshold, _state);
|
|
for(k=0; k<=network->hllayersizes.ptr.p_int[i-1]-1; k++)
|
|
{
|
|
ae_serializer_serialize_double(s, mlpgetweight(network, i-1, k, i, j, _state), _state);
|
|
}
|
|
}
|
|
}
|
|
for(j=0; j<=nin-1; j++)
|
|
{
|
|
mlpgetinputscaling(network, j, &v0, &v1, _state);
|
|
ae_serializer_serialize_double(s, v0, _state);
|
|
ae_serializer_serialize_double(s, v1, _state);
|
|
}
|
|
for(j=0; j<=nout-1; j++)
|
|
{
|
|
mlpgetoutputscaling(network, j, &v0, &v1, _state);
|
|
ae_serializer_serialize_double(s, v0, _state);
|
|
ae_serializer_serialize_double(s, v1, _state);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serializer: unserialization
|
|
|
|
-- ALGLIB --
|
|
Copyright 14.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpunserialize(ae_serializer* s,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i0;
|
|
ae_int_t i1;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t fkind;
|
|
double threshold;
|
|
double v0;
|
|
double v1;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_bool issoftmax;
|
|
ae_vector layersizes;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&layersizes, 0, sizeof(layersizes));
|
|
_multilayerperceptron_clear(network);
|
|
ae_vector_init(&layersizes, 0, DT_INT, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* check correctness of header
|
|
*/
|
|
ae_serializer_unserialize_int(s, &i0, _state);
|
|
ae_assert(i0==getmlpserializationcode(_state), "MLPUnserialize: stream header corrupted", _state);
|
|
ae_serializer_unserialize_int(s, &i1, _state);
|
|
ae_assert(i1==mlpbase_mlpfirstversion, "MLPUnserialize: stream header corrupted", _state);
|
|
|
|
/*
|
|
* Create network
|
|
*/
|
|
ae_serializer_unserialize_bool(s, &issoftmax, _state);
|
|
unserializeintegerarray(s, &layersizes, _state);
|
|
ae_assert((layersizes.cnt==2||layersizes.cnt==3)||layersizes.cnt==4, "MLPUnserialize: too many hidden layers!", _state);
|
|
nin = layersizes.ptr.p_int[0];
|
|
nout = layersizes.ptr.p_int[layersizes.cnt-1];
|
|
if( layersizes.cnt==2 )
|
|
{
|
|
if( issoftmax )
|
|
{
|
|
mlpcreatec0(layersizes.ptr.p_int[0], layersizes.ptr.p_int[1], network, _state);
|
|
}
|
|
else
|
|
{
|
|
mlpcreate0(layersizes.ptr.p_int[0], layersizes.ptr.p_int[1], network, _state);
|
|
}
|
|
}
|
|
if( layersizes.cnt==3 )
|
|
{
|
|
if( issoftmax )
|
|
{
|
|
mlpcreatec1(layersizes.ptr.p_int[0], layersizes.ptr.p_int[1], layersizes.ptr.p_int[2], network, _state);
|
|
}
|
|
else
|
|
{
|
|
mlpcreate1(layersizes.ptr.p_int[0], layersizes.ptr.p_int[1], layersizes.ptr.p_int[2], network, _state);
|
|
}
|
|
}
|
|
if( layersizes.cnt==4 )
|
|
{
|
|
if( issoftmax )
|
|
{
|
|
mlpcreatec2(layersizes.ptr.p_int[0], layersizes.ptr.p_int[1], layersizes.ptr.p_int[2], layersizes.ptr.p_int[3], network, _state);
|
|
}
|
|
else
|
|
{
|
|
mlpcreate2(layersizes.ptr.p_int[0], layersizes.ptr.p_int[1], layersizes.ptr.p_int[2], layersizes.ptr.p_int[3], network, _state);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Load neurons and weights
|
|
*/
|
|
for(i=1; i<=layersizes.cnt-1; i++)
|
|
{
|
|
for(j=0; j<=layersizes.ptr.p_int[i]-1; j++)
|
|
{
|
|
ae_serializer_unserialize_int(s, &fkind, _state);
|
|
ae_serializer_unserialize_double(s, &threshold, _state);
|
|
mlpsetneuroninfo(network, i, j, fkind, threshold, _state);
|
|
for(k=0; k<=layersizes.ptr.p_int[i-1]-1; k++)
|
|
{
|
|
ae_serializer_unserialize_double(s, &v0, _state);
|
|
mlpsetweight(network, i-1, k, i, j, v0, _state);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Load standartizator
|
|
*/
|
|
for(j=0; j<=nin-1; j++)
|
|
{
|
|
ae_serializer_unserialize_double(s, &v0, _state);
|
|
ae_serializer_unserialize_double(s, &v1, _state);
|
|
mlpsetinputscaling(network, j, v0, v1, _state);
|
|
}
|
|
for(j=0; j<=nout-1; j++)
|
|
{
|
|
ae_serializer_unserialize_double(s, &v0, _state);
|
|
ae_serializer_unserialize_double(s, &v1, _state);
|
|
mlpsetoutputscaling(network, j, v0, v1, _state);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Calculation of all types of errors on subset of dataset.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
XY - original dataset; one sample = one row;
|
|
first NIn columns contain inputs,
|
|
next NOut columns - desired outputs.
|
|
SetSize - real size of XY, SetSize>=0;
|
|
Subset - subset of SubsetSize elements, array[SubsetSize];
|
|
SubsetSize- number of elements in Subset[] array:
|
|
* if SubsetSize>0, rows of XY with indices Subset[0]...
|
|
...Subset[SubsetSize-1] are processed
|
|
* if SubsetSize=0, zeros are returned
|
|
* if SubsetSize<0, entire dataset is processed; Subset[]
|
|
array is ignored in this case.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Rep - it contains all type of errors.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.09.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpallerrorssubset(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t setsize,
|
|
/* Integer */ ae_vector* subset,
|
|
ae_int_t subsetsize,
|
|
modelerrors* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t idx0;
|
|
ae_int_t idx1;
|
|
ae_int_t idxtype;
|
|
|
|
_modelerrors_clear(rep);
|
|
|
|
ae_assert(xy->rows>=setsize, "MLPAllErrorsSubset: XY has less than SetSize rows", _state);
|
|
if( setsize>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPAllErrorsSubset: XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPAllErrorsSubset: XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
if( subsetsize>=0 )
|
|
{
|
|
idx0 = 0;
|
|
idx1 = subsetsize;
|
|
idxtype = 1;
|
|
}
|
|
else
|
|
{
|
|
idx0 = 0;
|
|
idx1 = setsize;
|
|
idxtype = 0;
|
|
}
|
|
mlpallerrorsx(network, xy, &network->dummysxy, setsize, 0, subset, idx0, idx1, idxtype, &network->buf, rep, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Calculation of all types of errors on subset of dataset.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - network initialized with one of the network creation funcs
|
|
XY - original dataset given by sparse matrix;
|
|
one sample = one row;
|
|
first NIn columns contain inputs,
|
|
next NOut columns - desired outputs.
|
|
SetSize - real size of XY, SetSize>=0;
|
|
Subset - subset of SubsetSize elements, array[SubsetSize];
|
|
SubsetSize- number of elements in Subset[] array:
|
|
* if SubsetSize>0, rows of XY with indices Subset[0]...
|
|
...Subset[SubsetSize-1] are processed
|
|
* if SubsetSize=0, zeros are returned
|
|
* if SubsetSize<0, entire dataset is processed; Subset[]
|
|
array is ignored in this case.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Rep - it contains all type of errors.
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.09.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpallerrorssparsesubset(multilayerperceptron* network,
|
|
sparsematrix* xy,
|
|
ae_int_t setsize,
|
|
/* Integer */ ae_vector* subset,
|
|
ae_int_t subsetsize,
|
|
modelerrors* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t idx0;
|
|
ae_int_t idx1;
|
|
ae_int_t idxtype;
|
|
|
|
_modelerrors_clear(rep);
|
|
|
|
ae_assert(sparseiscrs(xy, _state), "MLPAllErrorsSparseSubset: XY is not in CRS format.", _state);
|
|
ae_assert(sparsegetnrows(xy, _state)>=setsize, "MLPAllErrorsSparseSubset: XY has less than SetSize rows", _state);
|
|
if( setsize>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+1, "MLPAllErrorsSparseSubset: XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPAllErrorsSparseSubset: XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
if( subsetsize>=0 )
|
|
{
|
|
idx0 = 0;
|
|
idx1 = subsetsize;
|
|
idxtype = 1;
|
|
}
|
|
else
|
|
{
|
|
idx0 = 0;
|
|
idx1 = setsize;
|
|
idxtype = 0;
|
|
}
|
|
mlpallerrorsx(network, &network->dummydxy, xy, setsize, 1, subset, idx0, idx1, idxtype, &network->buf, rep, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Error of the neural network on subset of dataset.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format;
|
|
SetSize - real size of XY, SetSize>=0;
|
|
Subset - subset of SubsetSize elements, array[SubsetSize];
|
|
SubsetSize- number of elements in Subset[] array:
|
|
* if SubsetSize>0, rows of XY with indices Subset[0]...
|
|
...Subset[SubsetSize-1] are processed
|
|
* if SubsetSize=0, zeros are returned
|
|
* if SubsetSize<0, entire dataset is processed; Subset[]
|
|
array is ignored in this case.
|
|
|
|
RESULT:
|
|
sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.09.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlperrorsubset(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t setsize,
|
|
/* Integer */ ae_vector* subset,
|
|
ae_int_t subsetsize,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t idx0;
|
|
ae_int_t idx1;
|
|
ae_int_t idxtype;
|
|
double result;
|
|
|
|
|
|
ae_assert(xy->rows>=setsize, "MLPErrorSubset: XY has less than SetSize rows", _state);
|
|
if( setsize>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPErrorSubset: XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPErrorSubset: XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
if( subsetsize>=0 )
|
|
{
|
|
idx0 = 0;
|
|
idx1 = subsetsize;
|
|
idxtype = 1;
|
|
}
|
|
else
|
|
{
|
|
idx0 = 0;
|
|
idx1 = setsize;
|
|
idxtype = 0;
|
|
}
|
|
mlpallerrorsx(network, xy, &network->dummysxy, setsize, 0, subset, idx0, idx1, idxtype, &network->buf, &network->err, _state);
|
|
result = ae_sqr(network->err.rmserror, _state)*(idx1-idx0)*mlpgetoutputscount(network, _state)/2;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Error of the neural network on subset of sparse dataset.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network;
|
|
XY - training set, see below for information on the
|
|
training set format. This function checks correctness
|
|
of the dataset (no NANs/INFs, class numbers are
|
|
correct) and throws exception when incorrect dataset
|
|
is passed. Sparse matrix must use CRS format for
|
|
storage.
|
|
SetSize - real size of XY, SetSize>=0;
|
|
it is used when SubsetSize<0;
|
|
Subset - subset of SubsetSize elements, array[SubsetSize];
|
|
SubsetSize- number of elements in Subset[] array:
|
|
* if SubsetSize>0, rows of XY with indices Subset[0]...
|
|
...Subset[SubsetSize-1] are processed
|
|
* if SubsetSize=0, zeros are returned
|
|
* if SubsetSize<0, entire dataset is processed; Subset[]
|
|
array is ignored in this case.
|
|
|
|
RESULT:
|
|
sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
dataset format is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.09.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlperrorsparsesubset(multilayerperceptron* network,
|
|
sparsematrix* xy,
|
|
ae_int_t setsize,
|
|
/* Integer */ ae_vector* subset,
|
|
ae_int_t subsetsize,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t idx0;
|
|
ae_int_t idx1;
|
|
ae_int_t idxtype;
|
|
double result;
|
|
|
|
|
|
ae_assert(sparseiscrs(xy, _state), "MLPErrorSparseSubset: XY is not in CRS format.", _state);
|
|
ae_assert(sparsegetnrows(xy, _state)>=setsize, "MLPErrorSparseSubset: XY has less than SetSize rows", _state);
|
|
if( setsize>0 )
|
|
{
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+1, "MLPErrorSparseSubset: XY has less than NIn+1 columns", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPErrorSparseSubset: XY has less than NIn+NOut columns", _state);
|
|
}
|
|
}
|
|
if( subsetsize>=0 )
|
|
{
|
|
idx0 = 0;
|
|
idx1 = subsetsize;
|
|
idxtype = 1;
|
|
}
|
|
else
|
|
{
|
|
idx0 = 0;
|
|
idx1 = setsize;
|
|
idxtype = 0;
|
|
}
|
|
mlpallerrorsx(network, &network->dummydxy, xy, setsize, 1, subset, idx0, idx1, idxtype, &network->buf, &network->err, _state);
|
|
result = ae_sqr(network->err.rmserror, _state)*(idx1-idx0)*mlpgetoutputscount(network, _state)/2;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Calculation of all types of errors at once for a subset or full dataset,
|
|
which can be represented in different formats.
|
|
|
|
THIS INTERNAL FUNCTION IS NOT INTENDED TO BE USED BY ALGLIB USERS!
|
|
|
|
-- ALGLIB --
|
|
Copyright 26.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpallerrorsx(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* densexy,
|
|
sparsematrix* sparsexy,
|
|
ae_int_t datasetsize,
|
|
ae_int_t datasettype,
|
|
/* Integer */ ae_vector* idx,
|
|
ae_int_t subset0,
|
|
ae_int_t subset1,
|
|
ae_int_t subsettype,
|
|
ae_shared_pool* buf,
|
|
modelerrors* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t rowsize;
|
|
ae_bool iscls;
|
|
ae_int_t srcidx;
|
|
ae_int_t cstart;
|
|
ae_int_t csize;
|
|
ae_int_t j;
|
|
mlpbuffers *pbuf;
|
|
ae_smart_ptr _pbuf;
|
|
ae_int_t len0;
|
|
ae_int_t len1;
|
|
modelerrors rep0;
|
|
modelerrors rep1;
|
|
double problemcost;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_pbuf, 0, sizeof(_pbuf));
|
|
memset(&rep0, 0, sizeof(rep0));
|
|
memset(&rep1, 0, sizeof(rep1));
|
|
ae_smart_ptr_init(&_pbuf, (void**)&pbuf, _state, ae_true);
|
|
_modelerrors_init(&rep0, _state, ae_true);
|
|
_modelerrors_init(&rep1, _state, ae_true);
|
|
|
|
ae_assert(datasetsize>=0, "MLPAllErrorsX: SetSize<0", _state);
|
|
ae_assert(datasettype==0||datasettype==1, "MLPAllErrorsX: DatasetType is incorrect", _state);
|
|
ae_assert(subsettype==0||subsettype==1, "MLPAllErrorsX: SubsetType is incorrect", _state);
|
|
|
|
/*
|
|
* Determine network properties
|
|
*/
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
iscls = mlpissoftmax(network, _state);
|
|
|
|
/*
|
|
* Split problem.
|
|
*
|
|
* Splitting problem allows us to reduce effect of single-precision
|
|
* arithmetics (SSE-optimized version of MLPChunkedProcess uses single
|
|
* precision internally, but converts them to double precision after
|
|
* results are exported from HPC buffer to network). Small batches are
|
|
* calculated in single precision, results are aggregated in double
|
|
* precision, and it allows us to avoid accumulation of errors when
|
|
* we process very large batches (tens of thousands of items).
|
|
*
|
|
* NOTE: it is important to use real arithmetics for ProblemCost
|
|
* because ProblemCost may be larger than MAXINT.
|
|
*/
|
|
problemcost = (double)(subset1-subset0);
|
|
problemcost = problemcost*wcount*2;
|
|
if( ae_fp_greater_eq(problemcost,smpactivationlevel(_state))&&subset1-subset0>=2*mlpbase_microbatchsize )
|
|
{
|
|
if( _trypexec_mlpallerrorsx(network,densexy,sparsexy,datasetsize,datasettype,idx,subset0,subset1,subsettype,buf,rep, _state) )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
if( subset1-subset0>=2*mlpbase_microbatchsize&&ae_fp_greater(problemcost,spawnlevel(_state)) )
|
|
{
|
|
splitlength(subset1-subset0, mlpbase_microbatchsize, &len0, &len1, _state);
|
|
mlpallerrorsx(network, densexy, sparsexy, datasetsize, datasettype, idx, subset0, subset0+len0, subsettype, buf, &rep0, _state);
|
|
mlpallerrorsx(network, densexy, sparsexy, datasetsize, datasettype, idx, subset0+len0, subset1, subsettype, buf, &rep1, _state);
|
|
rep->relclserror = (len0*rep0.relclserror+len1*rep1.relclserror)/(len0+len1);
|
|
rep->avgce = (len0*rep0.avgce+len1*rep1.avgce)/(len0+len1);
|
|
rep->rmserror = ae_sqrt((len0*ae_sqr(rep0.rmserror, _state)+len1*ae_sqr(rep1.rmserror, _state))/(len0+len1), _state);
|
|
rep->avgerror = (len0*rep0.avgerror+len1*rep1.avgerror)/(len0+len1);
|
|
rep->avgrelerror = (len0*rep0.avgrelerror+len1*rep1.avgrelerror)/(len0+len1);
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Retrieve and prepare
|
|
*/
|
|
ae_shared_pool_retrieve(buf, &_pbuf, _state);
|
|
if( iscls )
|
|
{
|
|
rowsize = nin+1;
|
|
dserrallocate(nout, &pbuf->tmp0, _state);
|
|
}
|
|
else
|
|
{
|
|
rowsize = nin+nout;
|
|
dserrallocate(-nout, &pbuf->tmp0, _state);
|
|
}
|
|
|
|
/*
|
|
* Processing
|
|
*/
|
|
hpcpreparechunkedgradient(&network->weights, wcount, mlpntotal(network, _state), nin, nout, pbuf, _state);
|
|
cstart = subset0;
|
|
while(cstart<subset1)
|
|
{
|
|
|
|
/*
|
|
* Determine size of current chunk and copy it to PBuf.XY
|
|
*/
|
|
csize = ae_minint(subset1, cstart+pbuf->chunksize, _state)-cstart;
|
|
for(j=0; j<=csize-1; j++)
|
|
{
|
|
srcidx = -1;
|
|
if( subsettype==0 )
|
|
{
|
|
srcidx = cstart+j;
|
|
}
|
|
if( subsettype==1 )
|
|
{
|
|
srcidx = idx->ptr.p_int[cstart+j];
|
|
}
|
|
ae_assert(srcidx>=0, "MLPAllErrorsX: internal error", _state);
|
|
if( datasettype==0 )
|
|
{
|
|
ae_v_move(&pbuf->xy.ptr.pp_double[j][0], 1, &densexy->ptr.pp_double[srcidx][0], 1, ae_v_len(0,rowsize-1));
|
|
}
|
|
if( datasettype==1 )
|
|
{
|
|
sparsegetrow(sparsexy, srcidx, &pbuf->xyrow, _state);
|
|
ae_v_move(&pbuf->xy.ptr.pp_double[j][0], 1, &pbuf->xyrow.ptr.p_double[0], 1, ae_v_len(0,rowsize-1));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Unpack XY and process (temporary code, to be replaced by chunked processing)
|
|
*/
|
|
for(j=0; j<=csize-1; j++)
|
|
{
|
|
ae_v_move(&pbuf->xy2.ptr.pp_double[j][0], 1, &pbuf->xy.ptr.pp_double[j][0], 1, ae_v_len(0,rowsize-1));
|
|
}
|
|
mlpbase_mlpchunkedprocess(network, &pbuf->xy2, 0, csize, &pbuf->batch4buf, &pbuf->hpcbuf, _state);
|
|
for(j=0; j<=csize-1; j++)
|
|
{
|
|
ae_v_move(&pbuf->x.ptr.p_double[0], 1, &pbuf->xy2.ptr.pp_double[j][0], 1, ae_v_len(0,nin-1));
|
|
ae_v_move(&pbuf->y.ptr.p_double[0], 1, &pbuf->xy2.ptr.pp_double[j][nin], 1, ae_v_len(0,nout-1));
|
|
if( iscls )
|
|
{
|
|
pbuf->desiredy.ptr.p_double[0] = pbuf->xy.ptr.pp_double[j][nin];
|
|
}
|
|
else
|
|
{
|
|
ae_v_move(&pbuf->desiredy.ptr.p_double[0], 1, &pbuf->xy.ptr.pp_double[j][nin], 1, ae_v_len(0,nout-1));
|
|
}
|
|
dserraccumulate(&pbuf->tmp0, &pbuf->y, &pbuf->desiredy, _state);
|
|
}
|
|
|
|
/*
|
|
* Process chunk and advance line pointer
|
|
*/
|
|
cstart = cstart+pbuf->chunksize;
|
|
}
|
|
dserrfinish(&pbuf->tmp0, _state);
|
|
rep->relclserror = pbuf->tmp0.ptr.p_double[0];
|
|
rep->avgce = pbuf->tmp0.ptr.p_double[1]/ae_log((double)(2), _state);
|
|
rep->rmserror = pbuf->tmp0.ptr.p_double[2];
|
|
rep->avgerror = pbuf->tmp0.ptr.p_double[3];
|
|
rep->avgrelerror = pbuf->tmp0.ptr.p_double[4];
|
|
|
|
/*
|
|
* Recycle
|
|
*/
|
|
ae_shared_pool_recycle(buf, &_pbuf, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serial stub for GPL edition.
|
|
*************************************************************************/
|
|
ae_bool _trypexec_mlpallerrorsx(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* densexy,
|
|
sparsematrix* sparsexy,
|
|
ae_int_t datasetsize,
|
|
ae_int_t datasettype,
|
|
/* Integer */ ae_vector* idx,
|
|
ae_int_t subset0,
|
|
ae_int_t subset1,
|
|
ae_int_t subsettype,
|
|
ae_shared_pool* buf,
|
|
modelerrors* rep,
|
|
ae_state *_state)
|
|
{
|
|
return ae_false;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal subroutine: adding new input layer to network
|
|
*************************************************************************/
|
|
static void mlpbase_addinputlayer(ae_int_t ncount,
|
|
/* Integer */ ae_vector* lsizes,
|
|
/* Integer */ ae_vector* ltypes,
|
|
/* Integer */ ae_vector* lconnfirst,
|
|
/* Integer */ ae_vector* lconnlast,
|
|
ae_int_t* lastproc,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
lsizes->ptr.p_int[0] = ncount;
|
|
ltypes->ptr.p_int[0] = -2;
|
|
lconnfirst->ptr.p_int[0] = 0;
|
|
lconnlast->ptr.p_int[0] = 0;
|
|
*lastproc = 0;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal subroutine: adding new summator layer to network
|
|
*************************************************************************/
|
|
static void mlpbase_addbiasedsummatorlayer(ae_int_t ncount,
|
|
/* Integer */ ae_vector* lsizes,
|
|
/* Integer */ ae_vector* ltypes,
|
|
/* Integer */ ae_vector* lconnfirst,
|
|
/* Integer */ ae_vector* lconnlast,
|
|
ae_int_t* lastproc,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
lsizes->ptr.p_int[*lastproc+1] = 1;
|
|
ltypes->ptr.p_int[*lastproc+1] = -3;
|
|
lconnfirst->ptr.p_int[*lastproc+1] = 0;
|
|
lconnlast->ptr.p_int[*lastproc+1] = 0;
|
|
lsizes->ptr.p_int[*lastproc+2] = ncount;
|
|
ltypes->ptr.p_int[*lastproc+2] = 0;
|
|
lconnfirst->ptr.p_int[*lastproc+2] = *lastproc;
|
|
lconnlast->ptr.p_int[*lastproc+2] = *lastproc+1;
|
|
*lastproc = *lastproc+2;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal subroutine: adding new summator layer to network
|
|
*************************************************************************/
|
|
static void mlpbase_addactivationlayer(ae_int_t functype,
|
|
/* Integer */ ae_vector* lsizes,
|
|
/* Integer */ ae_vector* ltypes,
|
|
/* Integer */ ae_vector* lconnfirst,
|
|
/* Integer */ ae_vector* lconnlast,
|
|
ae_int_t* lastproc,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(functype>0||functype==-5, "AddActivationLayer: incorrect function type", _state);
|
|
lsizes->ptr.p_int[*lastproc+1] = lsizes->ptr.p_int[*lastproc];
|
|
ltypes->ptr.p_int[*lastproc+1] = functype;
|
|
lconnfirst->ptr.p_int[*lastproc+1] = *lastproc;
|
|
lconnlast->ptr.p_int[*lastproc+1] = *lastproc;
|
|
*lastproc = *lastproc+1;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal subroutine: adding new zero layer to network
|
|
*************************************************************************/
|
|
static void mlpbase_addzerolayer(/* Integer */ ae_vector* lsizes,
|
|
/* Integer */ ae_vector* ltypes,
|
|
/* Integer */ ae_vector* lconnfirst,
|
|
/* Integer */ ae_vector* lconnlast,
|
|
ae_int_t* lastproc,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
lsizes->ptr.p_int[*lastproc+1] = 1;
|
|
ltypes->ptr.p_int[*lastproc+1] = -4;
|
|
lconnfirst->ptr.p_int[*lastproc+1] = 0;
|
|
lconnlast->ptr.p_int[*lastproc+1] = 0;
|
|
*lastproc = *lastproc+1;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This routine adds input layer to the high-level description of the network.
|
|
|
|
It modifies Network.HLConnections and Network.HLNeurons and assumes that
|
|
these arrays have enough place to store data. It accepts following
|
|
parameters:
|
|
Network - network
|
|
ConnIdx - index of the first free entry in the HLConnections
|
|
NeuroIdx - index of the first free entry in the HLNeurons
|
|
StructInfoIdx- index of the first entry in the low level description
|
|
of the current layer (in the StructInfo array)
|
|
NIn - number of inputs
|
|
|
|
It modified Network and indices.
|
|
*************************************************************************/
|
|
static void mlpbase_hladdinputlayer(multilayerperceptron* network,
|
|
ae_int_t* connidx,
|
|
ae_int_t* neuroidx,
|
|
ae_int_t* structinfoidx,
|
|
ae_int_t nin,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t offs;
|
|
|
|
|
|
offs = mlpbase_hlnfieldwidth*(*neuroidx);
|
|
for(i=0; i<=nin-1; i++)
|
|
{
|
|
network->hlneurons.ptr.p_int[offs+0] = 0;
|
|
network->hlneurons.ptr.p_int[offs+1] = i;
|
|
network->hlneurons.ptr.p_int[offs+2] = -1;
|
|
network->hlneurons.ptr.p_int[offs+3] = -1;
|
|
offs = offs+mlpbase_hlnfieldwidth;
|
|
}
|
|
*neuroidx = *neuroidx+nin;
|
|
*structinfoidx = *structinfoidx+nin;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This routine adds output layer to the high-level description of
|
|
the network.
|
|
|
|
It modifies Network.HLConnections and Network.HLNeurons and assumes that
|
|
these arrays have enough place to store data. It accepts following
|
|
parameters:
|
|
Network - network
|
|
ConnIdx - index of the first free entry in the HLConnections
|
|
NeuroIdx - index of the first free entry in the HLNeurons
|
|
StructInfoIdx- index of the first entry in the low level description
|
|
of the current layer (in the StructInfo array)
|
|
WeightsIdx - index of the first entry in the Weights array which
|
|
corresponds to the current layer
|
|
K - current layer index
|
|
NPrev - number of neurons in the previous layer
|
|
NOut - number of outputs
|
|
IsCls - is it classifier network?
|
|
IsLinear - is it network with linear output?
|
|
|
|
It modified Network and ConnIdx/NeuroIdx/StructInfoIdx/WeightsIdx.
|
|
*************************************************************************/
|
|
static void mlpbase_hladdoutputlayer(multilayerperceptron* network,
|
|
ae_int_t* connidx,
|
|
ae_int_t* neuroidx,
|
|
ae_int_t* structinfoidx,
|
|
ae_int_t* weightsidx,
|
|
ae_int_t k,
|
|
ae_int_t nprev,
|
|
ae_int_t nout,
|
|
ae_bool iscls,
|
|
ae_bool islinearout,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t neurooffs;
|
|
ae_int_t connoffs;
|
|
|
|
|
|
ae_assert((iscls&&islinearout)||!iscls, "HLAddOutputLayer: internal error", _state);
|
|
neurooffs = mlpbase_hlnfieldwidth*(*neuroidx);
|
|
connoffs = mlpbase_hlconnfieldwidth*(*connidx);
|
|
if( !iscls )
|
|
{
|
|
|
|
/*
|
|
* Regression network
|
|
*/
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
network->hlneurons.ptr.p_int[neurooffs+0] = k;
|
|
network->hlneurons.ptr.p_int[neurooffs+1] = i;
|
|
network->hlneurons.ptr.p_int[neurooffs+2] = *structinfoidx+1+nout+i;
|
|
network->hlneurons.ptr.p_int[neurooffs+3] = *weightsidx+nprev+(nprev+1)*i;
|
|
neurooffs = neurooffs+mlpbase_hlnfieldwidth;
|
|
}
|
|
for(i=0; i<=nprev-1; i++)
|
|
{
|
|
for(j=0; j<=nout-1; j++)
|
|
{
|
|
network->hlconnections.ptr.p_int[connoffs+0] = k-1;
|
|
network->hlconnections.ptr.p_int[connoffs+1] = i;
|
|
network->hlconnections.ptr.p_int[connoffs+2] = k;
|
|
network->hlconnections.ptr.p_int[connoffs+3] = j;
|
|
network->hlconnections.ptr.p_int[connoffs+4] = *weightsidx+i+j*(nprev+1);
|
|
connoffs = connoffs+mlpbase_hlconnfieldwidth;
|
|
}
|
|
}
|
|
*connidx = *connidx+nprev*nout;
|
|
*neuroidx = *neuroidx+nout;
|
|
*structinfoidx = *structinfoidx+2*nout+1;
|
|
*weightsidx = *weightsidx+nout*(nprev+1);
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Classification network
|
|
*/
|
|
for(i=0; i<=nout-2; i++)
|
|
{
|
|
network->hlneurons.ptr.p_int[neurooffs+0] = k;
|
|
network->hlneurons.ptr.p_int[neurooffs+1] = i;
|
|
network->hlneurons.ptr.p_int[neurooffs+2] = -1;
|
|
network->hlneurons.ptr.p_int[neurooffs+3] = *weightsidx+nprev+(nprev+1)*i;
|
|
neurooffs = neurooffs+mlpbase_hlnfieldwidth;
|
|
}
|
|
network->hlneurons.ptr.p_int[neurooffs+0] = k;
|
|
network->hlneurons.ptr.p_int[neurooffs+1] = i;
|
|
network->hlneurons.ptr.p_int[neurooffs+2] = -1;
|
|
network->hlneurons.ptr.p_int[neurooffs+3] = -1;
|
|
for(i=0; i<=nprev-1; i++)
|
|
{
|
|
for(j=0; j<=nout-2; j++)
|
|
{
|
|
network->hlconnections.ptr.p_int[connoffs+0] = k-1;
|
|
network->hlconnections.ptr.p_int[connoffs+1] = i;
|
|
network->hlconnections.ptr.p_int[connoffs+2] = k;
|
|
network->hlconnections.ptr.p_int[connoffs+3] = j;
|
|
network->hlconnections.ptr.p_int[connoffs+4] = *weightsidx+i+j*(nprev+1);
|
|
connoffs = connoffs+mlpbase_hlconnfieldwidth;
|
|
}
|
|
}
|
|
*connidx = *connidx+nprev*(nout-1);
|
|
*neuroidx = *neuroidx+nout;
|
|
*structinfoidx = *structinfoidx+nout+2;
|
|
*weightsidx = *weightsidx+(nout-1)*(nprev+1);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This routine adds hidden layer to the high-level description of
|
|
the network.
|
|
|
|
It modifies Network.HLConnections and Network.HLNeurons and assumes that
|
|
these arrays have enough place to store data. It accepts following
|
|
parameters:
|
|
Network - network
|
|
ConnIdx - index of the first free entry in the HLConnections
|
|
NeuroIdx - index of the first free entry in the HLNeurons
|
|
StructInfoIdx- index of the first entry in the low level description
|
|
of the current layer (in the StructInfo array)
|
|
WeightsIdx - index of the first entry in the Weights array which
|
|
corresponds to the current layer
|
|
K - current layer index
|
|
NPrev - number of neurons in the previous layer
|
|
NCur - number of neurons in the current layer
|
|
|
|
It modified Network and ConnIdx/NeuroIdx/StructInfoIdx/WeightsIdx.
|
|
*************************************************************************/
|
|
static void mlpbase_hladdhiddenlayer(multilayerperceptron* network,
|
|
ae_int_t* connidx,
|
|
ae_int_t* neuroidx,
|
|
ae_int_t* structinfoidx,
|
|
ae_int_t* weightsidx,
|
|
ae_int_t k,
|
|
ae_int_t nprev,
|
|
ae_int_t ncur,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t neurooffs;
|
|
ae_int_t connoffs;
|
|
|
|
|
|
neurooffs = mlpbase_hlnfieldwidth*(*neuroidx);
|
|
connoffs = mlpbase_hlconnfieldwidth*(*connidx);
|
|
for(i=0; i<=ncur-1; i++)
|
|
{
|
|
network->hlneurons.ptr.p_int[neurooffs+0] = k;
|
|
network->hlneurons.ptr.p_int[neurooffs+1] = i;
|
|
network->hlneurons.ptr.p_int[neurooffs+2] = *structinfoidx+1+ncur+i;
|
|
network->hlneurons.ptr.p_int[neurooffs+3] = *weightsidx+nprev+(nprev+1)*i;
|
|
neurooffs = neurooffs+mlpbase_hlnfieldwidth;
|
|
}
|
|
for(i=0; i<=nprev-1; i++)
|
|
{
|
|
for(j=0; j<=ncur-1; j++)
|
|
{
|
|
network->hlconnections.ptr.p_int[connoffs+0] = k-1;
|
|
network->hlconnections.ptr.p_int[connoffs+1] = i;
|
|
network->hlconnections.ptr.p_int[connoffs+2] = k;
|
|
network->hlconnections.ptr.p_int[connoffs+3] = j;
|
|
network->hlconnections.ptr.p_int[connoffs+4] = *weightsidx+i+j*(nprev+1);
|
|
connoffs = connoffs+mlpbase_hlconnfieldwidth;
|
|
}
|
|
}
|
|
*connidx = *connidx+nprev*ncur;
|
|
*neuroidx = *neuroidx+ncur;
|
|
*structinfoidx = *structinfoidx+2*ncur+1;
|
|
*weightsidx = *weightsidx+ncur*(nprev+1);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function fills high level information about network created using
|
|
internal MLPCreate() function.
|
|
|
|
This function does NOT examine StructInfo for low level information, it
|
|
just expects that network has following structure:
|
|
|
|
input neuron \
|
|
... | input layer
|
|
input neuron /
|
|
|
|
"-1" neuron \
|
|
biased summator |
|
|
... |
|
|
biased summator | hidden layer(s), if there are exists any
|
|
activation function |
|
|
... |
|
|
activation function /
|
|
|
|
"-1" neuron \
|
|
biased summator | output layer:
|
|
... |
|
|
biased summator | * we have NOut summators/activators for regression networks
|
|
activation function | * we have only NOut-1 summators and no activators for classifiers
|
|
... | * we have "0" neuron only when we have classifier
|
|
activation function |
|
|
"0" neuron /
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.03.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void mlpbase_fillhighlevelinformation(multilayerperceptron* network,
|
|
ae_int_t nin,
|
|
ae_int_t nhid1,
|
|
ae_int_t nhid2,
|
|
ae_int_t nout,
|
|
ae_bool iscls,
|
|
ae_bool islinearout,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t idxweights;
|
|
ae_int_t idxstruct;
|
|
ae_int_t idxneuro;
|
|
ae_int_t idxconn;
|
|
|
|
|
|
ae_assert((iscls&&islinearout)||!iscls, "FillHighLevelInformation: internal error", _state);
|
|
|
|
/*
|
|
* Preparations common to all types of networks
|
|
*/
|
|
idxweights = 0;
|
|
idxneuro = 0;
|
|
idxstruct = 0;
|
|
idxconn = 0;
|
|
network->hlnetworktype = 0;
|
|
|
|
/*
|
|
* network without hidden layers
|
|
*/
|
|
if( nhid1==0 )
|
|
{
|
|
ae_vector_set_length(&network->hllayersizes, 2, _state);
|
|
network->hllayersizes.ptr.p_int[0] = nin;
|
|
network->hllayersizes.ptr.p_int[1] = nout;
|
|
if( !iscls )
|
|
{
|
|
ae_vector_set_length(&network->hlconnections, mlpbase_hlconnfieldwidth*nin*nout, _state);
|
|
ae_vector_set_length(&network->hlneurons, mlpbase_hlnfieldwidth*(nin+nout), _state);
|
|
network->hlnormtype = 0;
|
|
}
|
|
else
|
|
{
|
|
ae_vector_set_length(&network->hlconnections, mlpbase_hlconnfieldwidth*nin*(nout-1), _state);
|
|
ae_vector_set_length(&network->hlneurons, mlpbase_hlnfieldwidth*(nin+nout), _state);
|
|
network->hlnormtype = 1;
|
|
}
|
|
mlpbase_hladdinputlayer(network, &idxconn, &idxneuro, &idxstruct, nin, _state);
|
|
mlpbase_hladdoutputlayer(network, &idxconn, &idxneuro, &idxstruct, &idxweights, 1, nin, nout, iscls, islinearout, _state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* network with one hidden layers
|
|
*/
|
|
if( nhid2==0 )
|
|
{
|
|
ae_vector_set_length(&network->hllayersizes, 3, _state);
|
|
network->hllayersizes.ptr.p_int[0] = nin;
|
|
network->hllayersizes.ptr.p_int[1] = nhid1;
|
|
network->hllayersizes.ptr.p_int[2] = nout;
|
|
if( !iscls )
|
|
{
|
|
ae_vector_set_length(&network->hlconnections, mlpbase_hlconnfieldwidth*(nin*nhid1+nhid1*nout), _state);
|
|
ae_vector_set_length(&network->hlneurons, mlpbase_hlnfieldwidth*(nin+nhid1+nout), _state);
|
|
network->hlnormtype = 0;
|
|
}
|
|
else
|
|
{
|
|
ae_vector_set_length(&network->hlconnections, mlpbase_hlconnfieldwidth*(nin*nhid1+nhid1*(nout-1)), _state);
|
|
ae_vector_set_length(&network->hlneurons, mlpbase_hlnfieldwidth*(nin+nhid1+nout), _state);
|
|
network->hlnormtype = 1;
|
|
}
|
|
mlpbase_hladdinputlayer(network, &idxconn, &idxneuro, &idxstruct, nin, _state);
|
|
mlpbase_hladdhiddenlayer(network, &idxconn, &idxneuro, &idxstruct, &idxweights, 1, nin, nhid1, _state);
|
|
mlpbase_hladdoutputlayer(network, &idxconn, &idxneuro, &idxstruct, &idxweights, 2, nhid1, nout, iscls, islinearout, _state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Two hidden layers
|
|
*/
|
|
ae_vector_set_length(&network->hllayersizes, 4, _state);
|
|
network->hllayersizes.ptr.p_int[0] = nin;
|
|
network->hllayersizes.ptr.p_int[1] = nhid1;
|
|
network->hllayersizes.ptr.p_int[2] = nhid2;
|
|
network->hllayersizes.ptr.p_int[3] = nout;
|
|
if( !iscls )
|
|
{
|
|
ae_vector_set_length(&network->hlconnections, mlpbase_hlconnfieldwidth*(nin*nhid1+nhid1*nhid2+nhid2*nout), _state);
|
|
ae_vector_set_length(&network->hlneurons, mlpbase_hlnfieldwidth*(nin+nhid1+nhid2+nout), _state);
|
|
network->hlnormtype = 0;
|
|
}
|
|
else
|
|
{
|
|
ae_vector_set_length(&network->hlconnections, mlpbase_hlconnfieldwidth*(nin*nhid1+nhid1*nhid2+nhid2*(nout-1)), _state);
|
|
ae_vector_set_length(&network->hlneurons, mlpbase_hlnfieldwidth*(nin+nhid1+nhid2+nout), _state);
|
|
network->hlnormtype = 1;
|
|
}
|
|
mlpbase_hladdinputlayer(network, &idxconn, &idxneuro, &idxstruct, nin, _state);
|
|
mlpbase_hladdhiddenlayer(network, &idxconn, &idxneuro, &idxstruct, &idxweights, 1, nin, nhid1, _state);
|
|
mlpbase_hladdhiddenlayer(network, &idxconn, &idxneuro, &idxstruct, &idxweights, 2, nhid1, nhid2, _state);
|
|
mlpbase_hladdoutputlayer(network, &idxconn, &idxneuro, &idxstruct, &idxweights, 3, nhid2, nout, iscls, islinearout, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal subroutine.
|
|
|
|
-- ALGLIB --
|
|
Copyright 04.11.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void mlpbase_mlpcreate(ae_int_t nin,
|
|
ae_int_t nout,
|
|
/* Integer */ ae_vector* lsizes,
|
|
/* Integer */ ae_vector* ltypes,
|
|
/* Integer */ ae_vector* lconnfirst,
|
|
/* Integer */ ae_vector* lconnlast,
|
|
ae_int_t layerscount,
|
|
ae_bool isclsnet,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t ssize;
|
|
ae_int_t ntotal;
|
|
ae_int_t wcount;
|
|
ae_int_t offs;
|
|
ae_int_t nprocessed;
|
|
ae_int_t wallocated;
|
|
ae_vector localtemp;
|
|
ae_vector lnfirst;
|
|
ae_vector lnsyn;
|
|
mlpbuffers buf;
|
|
smlpgrad sgrad;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&localtemp, 0, sizeof(localtemp));
|
|
memset(&lnfirst, 0, sizeof(lnfirst));
|
|
memset(&lnsyn, 0, sizeof(lnsyn));
|
|
memset(&buf, 0, sizeof(buf));
|
|
memset(&sgrad, 0, sizeof(sgrad));
|
|
_multilayerperceptron_clear(network);
|
|
ae_vector_init(&localtemp, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lnfirst, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&lnsyn, 0, DT_INT, _state, ae_true);
|
|
_mlpbuffers_init(&buf, _state, ae_true);
|
|
_smlpgrad_init(&sgrad, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Check
|
|
*/
|
|
ae_assert(layerscount>0, "MLPCreate: wrong parameters!", _state);
|
|
ae_assert(ltypes->ptr.p_int[0]==-2, "MLPCreate: wrong LTypes[0] (must be -2)!", _state);
|
|
for(i=0; i<=layerscount-1; i++)
|
|
{
|
|
ae_assert(lsizes->ptr.p_int[i]>0, "MLPCreate: wrong LSizes!", _state);
|
|
ae_assert(lconnfirst->ptr.p_int[i]>=0&&(lconnfirst->ptr.p_int[i]<i||i==0), "MLPCreate: wrong LConnFirst!", _state);
|
|
ae_assert(lconnlast->ptr.p_int[i]>=lconnfirst->ptr.p_int[i]&&(lconnlast->ptr.p_int[i]<i||i==0), "MLPCreate: wrong LConnLast!", _state);
|
|
}
|
|
|
|
/*
|
|
* Build network geometry
|
|
*/
|
|
ae_vector_set_length(&lnfirst, layerscount-1+1, _state);
|
|
ae_vector_set_length(&lnsyn, layerscount-1+1, _state);
|
|
ntotal = 0;
|
|
wcount = 0;
|
|
for(i=0; i<=layerscount-1; i++)
|
|
{
|
|
|
|
/*
|
|
* Analyze connections.
|
|
* This code must throw an assertion in case of unknown LTypes[I]
|
|
*/
|
|
lnsyn.ptr.p_int[i] = -1;
|
|
if( ltypes->ptr.p_int[i]>=0||ltypes->ptr.p_int[i]==-5 )
|
|
{
|
|
lnsyn.ptr.p_int[i] = 0;
|
|
for(j=lconnfirst->ptr.p_int[i]; j<=lconnlast->ptr.p_int[i]; j++)
|
|
{
|
|
lnsyn.ptr.p_int[i] = lnsyn.ptr.p_int[i]+lsizes->ptr.p_int[j];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( (ltypes->ptr.p_int[i]==-2||ltypes->ptr.p_int[i]==-3)||ltypes->ptr.p_int[i]==-4 )
|
|
{
|
|
lnsyn.ptr.p_int[i] = 0;
|
|
}
|
|
}
|
|
ae_assert(lnsyn.ptr.p_int[i]>=0, "MLPCreate: internal error #0!", _state);
|
|
|
|
/*
|
|
* Other info
|
|
*/
|
|
lnfirst.ptr.p_int[i] = ntotal;
|
|
ntotal = ntotal+lsizes->ptr.p_int[i];
|
|
if( ltypes->ptr.p_int[i]==0 )
|
|
{
|
|
wcount = wcount+lnsyn.ptr.p_int[i]*lsizes->ptr.p_int[i];
|
|
}
|
|
}
|
|
ssize = 7+ntotal*mlpbase_nfieldwidth;
|
|
|
|
/*
|
|
* Allocate
|
|
*/
|
|
ae_vector_set_length(&network->structinfo, ssize-1+1, _state);
|
|
ae_vector_set_length(&network->weights, wcount-1+1, _state);
|
|
if( isclsnet )
|
|
{
|
|
ae_vector_set_length(&network->columnmeans, nin-1+1, _state);
|
|
ae_vector_set_length(&network->columnsigmas, nin-1+1, _state);
|
|
}
|
|
else
|
|
{
|
|
ae_vector_set_length(&network->columnmeans, nin+nout-1+1, _state);
|
|
ae_vector_set_length(&network->columnsigmas, nin+nout-1+1, _state);
|
|
}
|
|
ae_vector_set_length(&network->neurons, ntotal-1+1, _state);
|
|
ae_vector_set_length(&network->nwbuf, ae_maxint(wcount, 2*nout, _state)-1+1, _state);
|
|
ae_vector_set_length(&network->integerbuf, 3+1, _state);
|
|
ae_vector_set_length(&network->dfdnet, ntotal-1+1, _state);
|
|
ae_vector_set_length(&network->x, nin-1+1, _state);
|
|
ae_vector_set_length(&network->y, nout-1+1, _state);
|
|
ae_vector_set_length(&network->derror, ntotal-1+1, _state);
|
|
|
|
/*
|
|
* Fill structure:
|
|
* * first, fill by dummy values to avoid spurious reports by Valgrind
|
|
* * then fill global info header
|
|
*/
|
|
for(i=0; i<=ssize-1; i++)
|
|
{
|
|
network->structinfo.ptr.p_int[i] = -999999;
|
|
}
|
|
network->structinfo.ptr.p_int[0] = ssize;
|
|
network->structinfo.ptr.p_int[1] = nin;
|
|
network->structinfo.ptr.p_int[2] = nout;
|
|
network->structinfo.ptr.p_int[3] = ntotal;
|
|
network->structinfo.ptr.p_int[4] = wcount;
|
|
network->structinfo.ptr.p_int[5] = 7;
|
|
if( isclsnet )
|
|
{
|
|
network->structinfo.ptr.p_int[6] = 1;
|
|
}
|
|
else
|
|
{
|
|
network->structinfo.ptr.p_int[6] = 0;
|
|
}
|
|
|
|
/*
|
|
* Fill structure: neuron connections
|
|
*/
|
|
nprocessed = 0;
|
|
wallocated = 0;
|
|
for(i=0; i<=layerscount-1; i++)
|
|
{
|
|
for(j=0; j<=lsizes->ptr.p_int[i]-1; j++)
|
|
{
|
|
offs = network->structinfo.ptr.p_int[5]+nprocessed*mlpbase_nfieldwidth;
|
|
network->structinfo.ptr.p_int[offs+0] = ltypes->ptr.p_int[i];
|
|
if( ltypes->ptr.p_int[i]==0 )
|
|
{
|
|
|
|
/*
|
|
* Adaptive summator:
|
|
* * connections with weights to previous neurons
|
|
*/
|
|
network->structinfo.ptr.p_int[offs+1] = lnsyn.ptr.p_int[i];
|
|
network->structinfo.ptr.p_int[offs+2] = lnfirst.ptr.p_int[lconnfirst->ptr.p_int[i]];
|
|
network->structinfo.ptr.p_int[offs+3] = wallocated;
|
|
wallocated = wallocated+lnsyn.ptr.p_int[i];
|
|
nprocessed = nprocessed+1;
|
|
}
|
|
if( ltypes->ptr.p_int[i]>0||ltypes->ptr.p_int[i]==-5 )
|
|
{
|
|
|
|
/*
|
|
* Activation layer:
|
|
* * each neuron connected to one (only one) of previous neurons.
|
|
* * no weights
|
|
*/
|
|
network->structinfo.ptr.p_int[offs+1] = 1;
|
|
network->structinfo.ptr.p_int[offs+2] = lnfirst.ptr.p_int[lconnfirst->ptr.p_int[i]]+j;
|
|
network->structinfo.ptr.p_int[offs+3] = -1;
|
|
nprocessed = nprocessed+1;
|
|
}
|
|
if( (ltypes->ptr.p_int[i]==-2||ltypes->ptr.p_int[i]==-3)||ltypes->ptr.p_int[i]==-4 )
|
|
{
|
|
nprocessed = nprocessed+1;
|
|
}
|
|
}
|
|
}
|
|
ae_assert(wallocated==wcount, "MLPCreate: internal error #1!", _state);
|
|
ae_assert(nprocessed==ntotal, "MLPCreate: internal error #2!", _state);
|
|
|
|
/*
|
|
* Fill weights by small random values
|
|
* Initialize means and sigmas
|
|
*/
|
|
for(i=0; i<=nin-1; i++)
|
|
{
|
|
network->columnmeans.ptr.p_double[i] = (double)(0);
|
|
network->columnsigmas.ptr.p_double[i] = (double)(1);
|
|
}
|
|
if( !isclsnet )
|
|
{
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
network->columnmeans.ptr.p_double[nin+i] = (double)(0);
|
|
network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
|
|
}
|
|
}
|
|
mlprandomize(network, _state);
|
|
|
|
/*
|
|
* Seed buffers
|
|
*/
|
|
ae_shared_pool_set_seed(&network->buf, &buf, sizeof(buf), _mlpbuffers_init, _mlpbuffers_init_copy, _mlpbuffers_destroy, _state);
|
|
ae_vector_set_length(&sgrad.g, wcount, _state);
|
|
sgrad.f = 0.0;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
sgrad.g.ptr.p_double[i] = 0.0;
|
|
}
|
|
ae_shared_pool_set_seed(&network->gradbuf, &sgrad, sizeof(sgrad), _smlpgrad_init, _smlpgrad_init_copy, _smlpgrad_destroy, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal subroutine for Hessian calculation.
|
|
|
|
WARNING! Unspeakable math far beyong human capabilities :)
|
|
*************************************************************************/
|
|
static void mlpbase_mlphessianbatchinternal(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t ssize,
|
|
ae_bool naturalerr,
|
|
double* e,
|
|
/* Real */ ae_vector* grad,
|
|
/* Real */ ae_matrix* h,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t ntotal;
|
|
ae_int_t istart;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t kl;
|
|
ae_int_t offs;
|
|
ae_int_t n1;
|
|
ae_int_t n2;
|
|
ae_int_t w1;
|
|
ae_int_t w2;
|
|
double s;
|
|
double t;
|
|
double v;
|
|
double et;
|
|
ae_bool bflag;
|
|
double f;
|
|
double df;
|
|
double d2f;
|
|
double deidyj;
|
|
double mx;
|
|
double q;
|
|
double z;
|
|
double s2;
|
|
double expi;
|
|
double expj;
|
|
ae_vector x;
|
|
ae_vector desiredy;
|
|
ae_vector gt;
|
|
ae_vector zeros;
|
|
ae_matrix rx;
|
|
ae_matrix ry;
|
|
ae_matrix rdx;
|
|
ae_matrix rdy;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&x, 0, sizeof(x));
|
|
memset(&desiredy, 0, sizeof(desiredy));
|
|
memset(>, 0, sizeof(gt));
|
|
memset(&zeros, 0, sizeof(zeros));
|
|
memset(&rx, 0, sizeof(rx));
|
|
memset(&ry, 0, sizeof(ry));
|
|
memset(&rdx, 0, sizeof(rdx));
|
|
memset(&rdy, 0, sizeof(rdy));
|
|
*e = 0;
|
|
ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&desiredy, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(>, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&zeros, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&rx, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&ry, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&rdx, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&rdy, 0, 0, DT_REAL, _state, ae_true);
|
|
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
ntotal = network->structinfo.ptr.p_int[3];
|
|
istart = network->structinfo.ptr.p_int[5];
|
|
|
|
/*
|
|
* Prepare
|
|
*/
|
|
ae_vector_set_length(&x, nin-1+1, _state);
|
|
ae_vector_set_length(&desiredy, nout-1+1, _state);
|
|
ae_vector_set_length(&zeros, wcount-1+1, _state);
|
|
ae_vector_set_length(>, wcount-1+1, _state);
|
|
ae_matrix_set_length(&rx, ntotal+nout-1+1, wcount-1+1, _state);
|
|
ae_matrix_set_length(&ry, ntotal+nout-1+1, wcount-1+1, _state);
|
|
ae_matrix_set_length(&rdx, ntotal+nout-1+1, wcount-1+1, _state);
|
|
ae_matrix_set_length(&rdy, ntotal+nout-1+1, wcount-1+1, _state);
|
|
*e = (double)(0);
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
zeros.ptr.p_double[i] = (double)(0);
|
|
}
|
|
ae_v_move(&grad->ptr.p_double[0], 1, &zeros.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
ae_v_move(&h->ptr.pp_double[i][0], 1, &zeros.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
}
|
|
|
|
/*
|
|
* Process
|
|
*/
|
|
for(k=0; k<=ssize-1; k++)
|
|
{
|
|
|
|
/*
|
|
* Process vector with MLPGradN.
|
|
* Now Neurons, DFDNET and DError contains results of the last run.
|
|
*/
|
|
ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[k][0], 1, ae_v_len(0,nin-1));
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
|
|
/*
|
|
* class labels outputs
|
|
*/
|
|
kl = ae_round(xy->ptr.pp_double[k][nin], _state);
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
if( i==kl )
|
|
{
|
|
desiredy.ptr.p_double[i] = (double)(1);
|
|
}
|
|
else
|
|
{
|
|
desiredy.ptr.p_double[i] = (double)(0);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* real outputs
|
|
*/
|
|
ae_v_move(&desiredy.ptr.p_double[0], 1, &xy->ptr.pp_double[k][nin], 1, ae_v_len(0,nout-1));
|
|
}
|
|
if( naturalerr )
|
|
{
|
|
mlpgradn(network, &x, &desiredy, &et, >, _state);
|
|
}
|
|
else
|
|
{
|
|
mlpgrad(network, &x, &desiredy, &et, >, _state);
|
|
}
|
|
|
|
/*
|
|
* grad, error
|
|
*/
|
|
*e = *e+et;
|
|
ae_v_add(&grad->ptr.p_double[0], 1, >.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
|
|
/*
|
|
* Hessian.
|
|
* Forward pass of the R-algorithm
|
|
*/
|
|
for(i=0; i<=ntotal-1; i++)
|
|
{
|
|
offs = istart+i*mlpbase_nfieldwidth;
|
|
ae_v_move(&rx.ptr.pp_double[i][0], 1, &zeros.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
ae_v_move(&ry.ptr.pp_double[i][0], 1, &zeros.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
if( network->structinfo.ptr.p_int[offs+0]>0||network->structinfo.ptr.p_int[offs+0]==-5 )
|
|
{
|
|
|
|
/*
|
|
* Activation function
|
|
*/
|
|
n1 = network->structinfo.ptr.p_int[offs+2];
|
|
ae_v_move(&rx.ptr.pp_double[i][0], 1, &ry.ptr.pp_double[n1][0], 1, ae_v_len(0,wcount-1));
|
|
v = network->dfdnet.ptr.p_double[i];
|
|
ae_v_moved(&ry.ptr.pp_double[i][0], 1, &rx.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1), v);
|
|
continue;
|
|
}
|
|
if( network->structinfo.ptr.p_int[offs+0]==0 )
|
|
{
|
|
|
|
/*
|
|
* Adaptive summator
|
|
*/
|
|
n1 = network->structinfo.ptr.p_int[offs+2];
|
|
n2 = n1+network->structinfo.ptr.p_int[offs+1]-1;
|
|
w1 = network->structinfo.ptr.p_int[offs+3];
|
|
w2 = w1+network->structinfo.ptr.p_int[offs+1]-1;
|
|
for(j=n1; j<=n2; j++)
|
|
{
|
|
v = network->weights.ptr.p_double[w1+j-n1];
|
|
ae_v_addd(&rx.ptr.pp_double[i][0], 1, &ry.ptr.pp_double[j][0], 1, ae_v_len(0,wcount-1), v);
|
|
rx.ptr.pp_double[i][w1+j-n1] = rx.ptr.pp_double[i][w1+j-n1]+network->neurons.ptr.p_double[j];
|
|
}
|
|
ae_v_move(&ry.ptr.pp_double[i][0], 1, &rx.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1));
|
|
continue;
|
|
}
|
|
if( network->structinfo.ptr.p_int[offs+0]<0 )
|
|
{
|
|
bflag = ae_true;
|
|
if( network->structinfo.ptr.p_int[offs+0]==-2 )
|
|
{
|
|
|
|
/*
|
|
* input neuron, left unchanged
|
|
*/
|
|
bflag = ae_false;
|
|
}
|
|
if( network->structinfo.ptr.p_int[offs+0]==-3 )
|
|
{
|
|
|
|
/*
|
|
* "-1" neuron, left unchanged
|
|
*/
|
|
bflag = ae_false;
|
|
}
|
|
if( network->structinfo.ptr.p_int[offs+0]==-4 )
|
|
{
|
|
|
|
/*
|
|
* "0" neuron, left unchanged
|
|
*/
|
|
bflag = ae_false;
|
|
}
|
|
ae_assert(!bflag, "MLPHessianNBatch: internal error - unknown neuron type!", _state);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Hessian. Backward pass of the R-algorithm.
|
|
*
|
|
* Stage 1. Initialize RDY
|
|
*/
|
|
for(i=0; i<=ntotal+nout-1; i++)
|
|
{
|
|
ae_v_move(&rdy.ptr.pp_double[i][0], 1, &zeros.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
}
|
|
if( network->structinfo.ptr.p_int[6]==0 )
|
|
{
|
|
|
|
/*
|
|
* Standardisation.
|
|
*
|
|
* In context of the Hessian calculation standardisation
|
|
* is considered as additional layer with weightless
|
|
* activation function:
|
|
*
|
|
* F(NET) := Sigma*NET
|
|
*
|
|
* So we add one more layer to forward pass, and
|
|
* make forward/backward pass through this layer.
|
|
*/
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
n1 = ntotal-nout+i;
|
|
n2 = ntotal+i;
|
|
|
|
/*
|
|
* Forward pass from N1 to N2
|
|
*/
|
|
ae_v_move(&rx.ptr.pp_double[n2][0], 1, &ry.ptr.pp_double[n1][0], 1, ae_v_len(0,wcount-1));
|
|
v = network->columnsigmas.ptr.p_double[nin+i];
|
|
ae_v_moved(&ry.ptr.pp_double[n2][0], 1, &rx.ptr.pp_double[n2][0], 1, ae_v_len(0,wcount-1), v);
|
|
|
|
/*
|
|
* Initialization of RDY
|
|
*/
|
|
ae_v_move(&rdy.ptr.pp_double[n2][0], 1, &ry.ptr.pp_double[n2][0], 1, ae_v_len(0,wcount-1));
|
|
|
|
/*
|
|
* Backward pass from N2 to N1:
|
|
* 1. Calculate R(dE/dX).
|
|
* 2. No R(dE/dWij) is needed since weight of activation neuron
|
|
* is fixed to 1. So we can update R(dE/dY) for
|
|
* the connected neuron (note that Vij=0, Wij=1)
|
|
*/
|
|
df = network->columnsigmas.ptr.p_double[nin+i];
|
|
ae_v_moved(&rdx.ptr.pp_double[n2][0], 1, &rdy.ptr.pp_double[n2][0], 1, ae_v_len(0,wcount-1), df);
|
|
ae_v_add(&rdy.ptr.pp_double[n1][0], 1, &rdx.ptr.pp_double[n2][0], 1, ae_v_len(0,wcount-1));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Softmax.
|
|
*
|
|
* Initialize RDY using generalized expression for ei'(yi)
|
|
* (see expression (9) from p. 5 of "Fast Exact Multiplication by the Hessian").
|
|
*
|
|
* When we are working with softmax network, generalized
|
|
* expression for ei'(yi) is used because softmax
|
|
* normalization leads to ei, which depends on all y's
|
|
*/
|
|
if( naturalerr )
|
|
{
|
|
|
|
/*
|
|
* softmax + cross-entropy.
|
|
* We have:
|
|
*
|
|
* S = sum(exp(yk)),
|
|
* ei = sum(trn)*exp(yi)/S-trn_i
|
|
*
|
|
* j=i: d(ei)/d(yj) = T*exp(yi)*(S-exp(yi))/S^2
|
|
* j<>i: d(ei)/d(yj) = -T*exp(yi)*exp(yj)/S^2
|
|
*/
|
|
t = (double)(0);
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
t = t+desiredy.ptr.p_double[i];
|
|
}
|
|
mx = network->neurons.ptr.p_double[ntotal-nout];
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
mx = ae_maxreal(mx, network->neurons.ptr.p_double[ntotal-nout+i], _state);
|
|
}
|
|
s = (double)(0);
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
network->nwbuf.ptr.p_double[i] = ae_exp(network->neurons.ptr.p_double[ntotal-nout+i]-mx, _state);
|
|
s = s+network->nwbuf.ptr.p_double[i];
|
|
}
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
for(j=0; j<=nout-1; j++)
|
|
{
|
|
if( j==i )
|
|
{
|
|
deidyj = t*network->nwbuf.ptr.p_double[i]*(s-network->nwbuf.ptr.p_double[i])/ae_sqr(s, _state);
|
|
ae_v_addd(&rdy.ptr.pp_double[ntotal-nout+i][0], 1, &ry.ptr.pp_double[ntotal-nout+i][0], 1, ae_v_len(0,wcount-1), deidyj);
|
|
}
|
|
else
|
|
{
|
|
deidyj = -t*network->nwbuf.ptr.p_double[i]*network->nwbuf.ptr.p_double[j]/ae_sqr(s, _state);
|
|
ae_v_addd(&rdy.ptr.pp_double[ntotal-nout+i][0], 1, &ry.ptr.pp_double[ntotal-nout+j][0], 1, ae_v_len(0,wcount-1), deidyj);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* For a softmax + squared error we have expression
|
|
* far beyond human imagination so we dont even try
|
|
* to comment on it. Just enjoy the code...
|
|
*
|
|
* P.S. That's why "natural error" is called "natural" -
|
|
* compact beatiful expressions, fast code....
|
|
*/
|
|
mx = network->neurons.ptr.p_double[ntotal-nout];
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
mx = ae_maxreal(mx, network->neurons.ptr.p_double[ntotal-nout+i], _state);
|
|
}
|
|
s = (double)(0);
|
|
s2 = (double)(0);
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
network->nwbuf.ptr.p_double[i] = ae_exp(network->neurons.ptr.p_double[ntotal-nout+i]-mx, _state);
|
|
s = s+network->nwbuf.ptr.p_double[i];
|
|
s2 = s2+ae_sqr(network->nwbuf.ptr.p_double[i], _state);
|
|
}
|
|
q = (double)(0);
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
q = q+(network->y.ptr.p_double[i]-desiredy.ptr.p_double[i])*network->nwbuf.ptr.p_double[i];
|
|
}
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
z = -q+(network->y.ptr.p_double[i]-desiredy.ptr.p_double[i])*s;
|
|
expi = network->nwbuf.ptr.p_double[i];
|
|
for(j=0; j<=nout-1; j++)
|
|
{
|
|
expj = network->nwbuf.ptr.p_double[j];
|
|
if( j==i )
|
|
{
|
|
deidyj = expi/ae_sqr(s, _state)*((z+expi)*(s-2*expi)/s+expi*s2/ae_sqr(s, _state));
|
|
}
|
|
else
|
|
{
|
|
deidyj = expi*expj/ae_sqr(s, _state)*(s2/ae_sqr(s, _state)-2*z/s-(expi+expj)/s+(network->y.ptr.p_double[i]-desiredy.ptr.p_double[i])-(network->y.ptr.p_double[j]-desiredy.ptr.p_double[j]));
|
|
}
|
|
ae_v_addd(&rdy.ptr.pp_double[ntotal-nout+i][0], 1, &ry.ptr.pp_double[ntotal-nout+j][0], 1, ae_v_len(0,wcount-1), deidyj);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Hessian. Backward pass of the R-algorithm
|
|
*
|
|
* Stage 2. Process.
|
|
*/
|
|
for(i=ntotal-1; i>=0; i--)
|
|
{
|
|
|
|
/*
|
|
* Possible variants:
|
|
* 1. Activation function
|
|
* 2. Adaptive summator
|
|
* 3. Special neuron
|
|
*/
|
|
offs = istart+i*mlpbase_nfieldwidth;
|
|
if( network->structinfo.ptr.p_int[offs+0]>0||network->structinfo.ptr.p_int[offs+0]==-5 )
|
|
{
|
|
n1 = network->structinfo.ptr.p_int[offs+2];
|
|
|
|
/*
|
|
* First, calculate R(dE/dX).
|
|
*/
|
|
mlpactivationfunction(network->neurons.ptr.p_double[n1], network->structinfo.ptr.p_int[offs+0], &f, &df, &d2f, _state);
|
|
v = d2f*network->derror.ptr.p_double[i];
|
|
ae_v_moved(&rdx.ptr.pp_double[i][0], 1, &rdy.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1), df);
|
|
ae_v_addd(&rdx.ptr.pp_double[i][0], 1, &rx.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1), v);
|
|
|
|
/*
|
|
* No R(dE/dWij) is needed since weight of activation neuron
|
|
* is fixed to 1.
|
|
*
|
|
* So we can update R(dE/dY) for the connected neuron.
|
|
* (note that Vij=0, Wij=1)
|
|
*/
|
|
ae_v_add(&rdy.ptr.pp_double[n1][0], 1, &rdx.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1));
|
|
continue;
|
|
}
|
|
if( network->structinfo.ptr.p_int[offs+0]==0 )
|
|
{
|
|
|
|
/*
|
|
* Adaptive summator
|
|
*/
|
|
n1 = network->structinfo.ptr.p_int[offs+2];
|
|
n2 = n1+network->structinfo.ptr.p_int[offs+1]-1;
|
|
w1 = network->structinfo.ptr.p_int[offs+3];
|
|
w2 = w1+network->structinfo.ptr.p_int[offs+1]-1;
|
|
|
|
/*
|
|
* First, calculate R(dE/dX).
|
|
*/
|
|
ae_v_move(&rdx.ptr.pp_double[i][0], 1, &rdy.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1));
|
|
|
|
/*
|
|
* Then, calculate R(dE/dWij)
|
|
*/
|
|
for(j=w1; j<=w2; j++)
|
|
{
|
|
v = network->neurons.ptr.p_double[n1+j-w1];
|
|
ae_v_addd(&h->ptr.pp_double[j][0], 1, &rdx.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1), v);
|
|
v = network->derror.ptr.p_double[i];
|
|
ae_v_addd(&h->ptr.pp_double[j][0], 1, &ry.ptr.pp_double[n1+j-w1][0], 1, ae_v_len(0,wcount-1), v);
|
|
}
|
|
|
|
/*
|
|
* And finally, update R(dE/dY) for connected neurons.
|
|
*/
|
|
for(j=w1; j<=w2; j++)
|
|
{
|
|
v = network->weights.ptr.p_double[j];
|
|
ae_v_addd(&rdy.ptr.pp_double[n1+j-w1][0], 1, &rdx.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1), v);
|
|
rdy.ptr.pp_double[n1+j-w1][j] = rdy.ptr.pp_double[n1+j-w1][j]+network->derror.ptr.p_double[i];
|
|
}
|
|
continue;
|
|
}
|
|
if( network->structinfo.ptr.p_int[offs+0]<0 )
|
|
{
|
|
bflag = ae_false;
|
|
if( (network->structinfo.ptr.p_int[offs+0]==-2||network->structinfo.ptr.p_int[offs+0]==-3)||network->structinfo.ptr.p_int[offs+0]==-4 )
|
|
{
|
|
|
|
/*
|
|
* Special neuron type, no back-propagation required
|
|
*/
|
|
bflag = ae_true;
|
|
}
|
|
ae_assert(bflag, "MLPHessianNBatch: unknown neuron type!", _state);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal subroutine
|
|
|
|
Network must be processed by MLPProcess on X
|
|
*************************************************************************/
|
|
static void mlpbase_mlpinternalcalculategradient(multilayerperceptron* network,
|
|
/* Real */ ae_vector* neurons,
|
|
/* Real */ ae_vector* weights,
|
|
/* Real */ ae_vector* derror,
|
|
/* Real */ ae_vector* grad,
|
|
ae_bool naturalerrorfunc,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t n1;
|
|
ae_int_t n2;
|
|
ae_int_t w1;
|
|
ae_int_t w2;
|
|
ae_int_t ntotal;
|
|
ae_int_t istart;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t offs;
|
|
double dedf;
|
|
double dfdnet;
|
|
double v;
|
|
double fown;
|
|
double deown;
|
|
double net;
|
|
double mx;
|
|
ae_bool bflag;
|
|
|
|
|
|
|
|
/*
|
|
* Read network geometry
|
|
*/
|
|
nin = network->structinfo.ptr.p_int[1];
|
|
nout = network->structinfo.ptr.p_int[2];
|
|
ntotal = network->structinfo.ptr.p_int[3];
|
|
istart = network->structinfo.ptr.p_int[5];
|
|
|
|
/*
|
|
* Pre-processing of dError/dOut:
|
|
* from dError/dOut(normalized) to dError/dOut(non-normalized)
|
|
*/
|
|
ae_assert(network->structinfo.ptr.p_int[6]==0||network->structinfo.ptr.p_int[6]==1, "MLPInternalCalculateGradient: unknown normalization type!", _state);
|
|
if( network->structinfo.ptr.p_int[6]==1 )
|
|
{
|
|
|
|
/*
|
|
* Softmax
|
|
*/
|
|
if( !naturalerrorfunc )
|
|
{
|
|
mx = network->neurons.ptr.p_double[ntotal-nout];
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
mx = ae_maxreal(mx, network->neurons.ptr.p_double[ntotal-nout+i], _state);
|
|
}
|
|
net = (double)(0);
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
network->nwbuf.ptr.p_double[i] = ae_exp(network->neurons.ptr.p_double[ntotal-nout+i]-mx, _state);
|
|
net = net+network->nwbuf.ptr.p_double[i];
|
|
}
|
|
v = ae_v_dotproduct(&network->derror.ptr.p_double[ntotal-nout], 1, &network->nwbuf.ptr.p_double[0], 1, ae_v_len(ntotal-nout,ntotal-1));
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
fown = network->nwbuf.ptr.p_double[i];
|
|
deown = network->derror.ptr.p_double[ntotal-nout+i];
|
|
network->nwbuf.ptr.p_double[nout+i] = (-v+deown*fown+deown*(net-fown))*fown/ae_sqr(net, _state);
|
|
}
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
network->derror.ptr.p_double[ntotal-nout+i] = network->nwbuf.ptr.p_double[nout+i];
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Un-standardisation
|
|
*/
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
network->derror.ptr.p_double[ntotal-nout+i] = network->derror.ptr.p_double[ntotal-nout+i]*network->columnsigmas.ptr.p_double[nin+i];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Backpropagation
|
|
*/
|
|
for(i=ntotal-1; i>=0; i--)
|
|
{
|
|
|
|
/*
|
|
* Extract info
|
|
*/
|
|
offs = istart+i*mlpbase_nfieldwidth;
|
|
if( network->structinfo.ptr.p_int[offs+0]>0||network->structinfo.ptr.p_int[offs+0]==-5 )
|
|
{
|
|
|
|
/*
|
|
* Activation function
|
|
*/
|
|
dedf = network->derror.ptr.p_double[i];
|
|
dfdnet = network->dfdnet.ptr.p_double[i];
|
|
derror->ptr.p_double[network->structinfo.ptr.p_int[offs+2]] = derror->ptr.p_double[network->structinfo.ptr.p_int[offs+2]]+dedf*dfdnet;
|
|
continue;
|
|
}
|
|
if( network->structinfo.ptr.p_int[offs+0]==0 )
|
|
{
|
|
|
|
/*
|
|
* Adaptive summator
|
|
*/
|
|
n1 = network->structinfo.ptr.p_int[offs+2];
|
|
n2 = n1+network->structinfo.ptr.p_int[offs+1]-1;
|
|
w1 = network->structinfo.ptr.p_int[offs+3];
|
|
w2 = w1+network->structinfo.ptr.p_int[offs+1]-1;
|
|
dedf = network->derror.ptr.p_double[i];
|
|
dfdnet = 1.0;
|
|
v = dedf*dfdnet;
|
|
ae_v_moved(&grad->ptr.p_double[w1], 1, &neurons->ptr.p_double[n1], 1, ae_v_len(w1,w2), v);
|
|
ae_v_addd(&derror->ptr.p_double[n1], 1, &weights->ptr.p_double[w1], 1, ae_v_len(n1,n2), v);
|
|
continue;
|
|
}
|
|
if( network->structinfo.ptr.p_int[offs+0]<0 )
|
|
{
|
|
bflag = ae_false;
|
|
if( (network->structinfo.ptr.p_int[offs+0]==-2||network->structinfo.ptr.p_int[offs+0]==-3)||network->structinfo.ptr.p_int[offs+0]==-4 )
|
|
{
|
|
|
|
/*
|
|
* Special neuron type, no back-propagation required
|
|
*/
|
|
bflag = ae_true;
|
|
}
|
|
ae_assert(bflag, "MLPInternalCalculateGradient: unknown neuron type!", _state);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void mlpbase_mlpchunkedgradient(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t cstart,
|
|
ae_int_t csize,
|
|
/* Real */ ae_vector* batch4buf,
|
|
/* Real */ ae_vector* hpcbuf,
|
|
double* e,
|
|
ae_bool naturalerrorfunc,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t kl;
|
|
ae_int_t ntotal;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t offs;
|
|
double f;
|
|
double df;
|
|
double d2f;
|
|
double v;
|
|
double vv;
|
|
double s;
|
|
double fown;
|
|
double deown;
|
|
ae_bool bflag;
|
|
ae_int_t istart;
|
|
ae_int_t entrysize;
|
|
ae_int_t dfoffs;
|
|
ae_int_t derroroffs;
|
|
ae_int_t entryoffs;
|
|
ae_int_t neuronidx;
|
|
ae_int_t srcentryoffs;
|
|
ae_int_t srcneuronidx;
|
|
ae_int_t srcweightidx;
|
|
ae_int_t neurontype;
|
|
ae_int_t nweights;
|
|
ae_int_t offs0;
|
|
ae_int_t offs1;
|
|
ae_int_t offs2;
|
|
double v0;
|
|
double v1;
|
|
double v2;
|
|
double v3;
|
|
double s0;
|
|
double s1;
|
|
double s2;
|
|
double s3;
|
|
ae_int_t chunksize;
|
|
|
|
|
|
chunksize = 4;
|
|
ae_assert(csize<=chunksize, "MLPChunkedGradient: internal error (CSize>ChunkSize)", _state);
|
|
|
|
/*
|
|
* Try to use HPC core, if possible
|
|
*/
|
|
if( hpcchunkedgradient(&network->weights, &network->structinfo, &network->columnmeans, &network->columnsigmas, xy, cstart, csize, batch4buf, hpcbuf, e, naturalerrorfunc, _state) )
|
|
{
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Read network geometry, prepare data
|
|
*/
|
|
nin = network->structinfo.ptr.p_int[1];
|
|
nout = network->structinfo.ptr.p_int[2];
|
|
ntotal = network->structinfo.ptr.p_int[3];
|
|
istart = network->structinfo.ptr.p_int[5];
|
|
entrysize = 12;
|
|
dfoffs = 4;
|
|
derroroffs = 8;
|
|
|
|
/*
|
|
* Fill Batch4Buf by zeros.
|
|
*
|
|
* THIS STAGE IS VERY IMPORTANT!
|
|
*
|
|
* We fill all components of entry - neuron values, dF/dNET, dError/dF.
|
|
* It allows us to easily handle situations when CSize<ChunkSize by
|
|
* simply working with ALL components of Batch4Buf, without ever
|
|
* looking at CSize. The idea is that dError/dF for absent components
|
|
* will be initialized by zeros - and won't be rewritten by non-zero
|
|
* values during backpropagation.
|
|
*/
|
|
for(i=0; i<=entrysize*ntotal-1; i++)
|
|
{
|
|
batch4buf->ptr.p_double[i] = (double)(0);
|
|
}
|
|
|
|
/*
|
|
* Forward pass:
|
|
* 1. Load data into Batch4Buf. If CSize<ChunkSize, data are padded by zeros.
|
|
* 2. Perform forward pass through network
|
|
*/
|
|
for(i=0; i<=nin-1; i++)
|
|
{
|
|
entryoffs = entrysize*i;
|
|
for(j=0; j<=csize-1; j++)
|
|
{
|
|
if( ae_fp_neq(network->columnsigmas.ptr.p_double[i],(double)(0)) )
|
|
{
|
|
batch4buf->ptr.p_double[entryoffs+j] = (xy->ptr.pp_double[cstart+j][i]-network->columnmeans.ptr.p_double[i])/network->columnsigmas.ptr.p_double[i];
|
|
}
|
|
else
|
|
{
|
|
batch4buf->ptr.p_double[entryoffs+j] = xy->ptr.pp_double[cstart+j][i]-network->columnmeans.ptr.p_double[i];
|
|
}
|
|
}
|
|
}
|
|
for(neuronidx=0; neuronidx<=ntotal-1; neuronidx++)
|
|
{
|
|
entryoffs = entrysize*neuronidx;
|
|
offs = istart+neuronidx*mlpbase_nfieldwidth;
|
|
neurontype = network->structinfo.ptr.p_int[offs+0];
|
|
if( neurontype>0||neurontype==-5 )
|
|
{
|
|
|
|
/*
|
|
* "activation function" neuron, which takes value of neuron SrcNeuronIdx
|
|
* and applies activation function to it.
|
|
*
|
|
* This neuron has no weights and no tunable parameters.
|
|
*/
|
|
srcneuronidx = network->structinfo.ptr.p_int[offs+2];
|
|
srcentryoffs = entrysize*srcneuronidx;
|
|
mlpactivationfunction(batch4buf->ptr.p_double[srcentryoffs+0], neurontype, &f, &df, &d2f, _state);
|
|
batch4buf->ptr.p_double[entryoffs+0] = f;
|
|
batch4buf->ptr.p_double[entryoffs+0+dfoffs] = df;
|
|
mlpactivationfunction(batch4buf->ptr.p_double[srcentryoffs+1], neurontype, &f, &df, &d2f, _state);
|
|
batch4buf->ptr.p_double[entryoffs+1] = f;
|
|
batch4buf->ptr.p_double[entryoffs+1+dfoffs] = df;
|
|
mlpactivationfunction(batch4buf->ptr.p_double[srcentryoffs+2], neurontype, &f, &df, &d2f, _state);
|
|
batch4buf->ptr.p_double[entryoffs+2] = f;
|
|
batch4buf->ptr.p_double[entryoffs+2+dfoffs] = df;
|
|
mlpactivationfunction(batch4buf->ptr.p_double[srcentryoffs+3], neurontype, &f, &df, &d2f, _state);
|
|
batch4buf->ptr.p_double[entryoffs+3] = f;
|
|
batch4buf->ptr.p_double[entryoffs+3+dfoffs] = df;
|
|
continue;
|
|
}
|
|
if( neurontype==0 )
|
|
{
|
|
|
|
/*
|
|
* "adaptive summator" neuron, whose output is a weighted sum of inputs.
|
|
* It has weights, but has no activation function.
|
|
*/
|
|
nweights = network->structinfo.ptr.p_int[offs+1];
|
|
srcneuronidx = network->structinfo.ptr.p_int[offs+2];
|
|
srcentryoffs = entrysize*srcneuronidx;
|
|
srcweightidx = network->structinfo.ptr.p_int[offs+3];
|
|
v0 = (double)(0);
|
|
v1 = (double)(0);
|
|
v2 = (double)(0);
|
|
v3 = (double)(0);
|
|
for(j=0; j<=nweights-1; j++)
|
|
{
|
|
v = network->weights.ptr.p_double[srcweightidx];
|
|
srcweightidx = srcweightidx+1;
|
|
v0 = v0+v*batch4buf->ptr.p_double[srcentryoffs+0];
|
|
v1 = v1+v*batch4buf->ptr.p_double[srcentryoffs+1];
|
|
v2 = v2+v*batch4buf->ptr.p_double[srcentryoffs+2];
|
|
v3 = v3+v*batch4buf->ptr.p_double[srcentryoffs+3];
|
|
srcentryoffs = srcentryoffs+entrysize;
|
|
}
|
|
batch4buf->ptr.p_double[entryoffs+0] = v0;
|
|
batch4buf->ptr.p_double[entryoffs+1] = v1;
|
|
batch4buf->ptr.p_double[entryoffs+2] = v2;
|
|
batch4buf->ptr.p_double[entryoffs+3] = v3;
|
|
batch4buf->ptr.p_double[entryoffs+0+dfoffs] = (double)(1);
|
|
batch4buf->ptr.p_double[entryoffs+1+dfoffs] = (double)(1);
|
|
batch4buf->ptr.p_double[entryoffs+2+dfoffs] = (double)(1);
|
|
batch4buf->ptr.p_double[entryoffs+3+dfoffs] = (double)(1);
|
|
continue;
|
|
}
|
|
if( neurontype<0 )
|
|
{
|
|
bflag = ae_false;
|
|
if( neurontype==-2 )
|
|
{
|
|
|
|
/*
|
|
* Input neuron, left unchanged
|
|
*/
|
|
bflag = ae_true;
|
|
}
|
|
if( neurontype==-3 )
|
|
{
|
|
|
|
/*
|
|
* "-1" neuron
|
|
*/
|
|
batch4buf->ptr.p_double[entryoffs+0] = (double)(-1);
|
|
batch4buf->ptr.p_double[entryoffs+1] = (double)(-1);
|
|
batch4buf->ptr.p_double[entryoffs+2] = (double)(-1);
|
|
batch4buf->ptr.p_double[entryoffs+3] = (double)(-1);
|
|
batch4buf->ptr.p_double[entryoffs+0+dfoffs] = (double)(0);
|
|
batch4buf->ptr.p_double[entryoffs+1+dfoffs] = (double)(0);
|
|
batch4buf->ptr.p_double[entryoffs+2+dfoffs] = (double)(0);
|
|
batch4buf->ptr.p_double[entryoffs+3+dfoffs] = (double)(0);
|
|
bflag = ae_true;
|
|
}
|
|
if( neurontype==-4 )
|
|
{
|
|
|
|
/*
|
|
* "0" neuron
|
|
*/
|
|
batch4buf->ptr.p_double[entryoffs+0] = (double)(0);
|
|
batch4buf->ptr.p_double[entryoffs+1] = (double)(0);
|
|
batch4buf->ptr.p_double[entryoffs+2] = (double)(0);
|
|
batch4buf->ptr.p_double[entryoffs+3] = (double)(0);
|
|
batch4buf->ptr.p_double[entryoffs+0+dfoffs] = (double)(0);
|
|
batch4buf->ptr.p_double[entryoffs+1+dfoffs] = (double)(0);
|
|
batch4buf->ptr.p_double[entryoffs+2+dfoffs] = (double)(0);
|
|
batch4buf->ptr.p_double[entryoffs+3+dfoffs] = (double)(0);
|
|
bflag = ae_true;
|
|
}
|
|
ae_assert(bflag, "MLPChunkedGradient: internal error - unknown neuron type!", _state);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Intermediate phase between forward and backward passes.
|
|
*
|
|
* For regression networks:
|
|
* * forward pass is completely done (no additional post-processing is
|
|
* needed).
|
|
* * before starting backward pass, we have to calculate dError/dOut
|
|
* for output neurons. We also update error at this phase.
|
|
*
|
|
* For classification networks:
|
|
* * in addition to forward pass we apply SOFTMAX normalization to
|
|
* output neurons.
|
|
* * after applying normalization, we have to calculate dError/dOut,
|
|
* which is calculated in two steps:
|
|
* * first, we calculate derivative of error with respect to SOFTMAX
|
|
* normalized outputs (normalized dError)
|
|
* * then, we calculate derivative of error with respect to values
|
|
* of outputs BEFORE normalization was applied to them
|
|
*/
|
|
ae_assert(network->structinfo.ptr.p_int[6]==0||network->structinfo.ptr.p_int[6]==1, "MLPChunkedGradient: unknown normalization type!", _state);
|
|
if( network->structinfo.ptr.p_int[6]==1 )
|
|
{
|
|
|
|
/*
|
|
* SOFTMAX-normalized network.
|
|
*
|
|
* First, calculate (V0,V1,V2,V3) - component-wise maximum
|
|
* of output neurons. This vector of maximum values will be
|
|
* used for normalization of outputs prior to calculating
|
|
* exponentials.
|
|
*
|
|
* NOTE: the only purpose of this stage is to prevent overflow
|
|
* during calculation of exponentials. With this stage
|
|
* we make sure that all exponentials are calculated
|
|
* with non-positive argument. If you load (0,0,0,0) to
|
|
* (V0,V1,V2,V3), your program will continue working -
|
|
* although with less robustness.
|
|
*/
|
|
entryoffs = entrysize*(ntotal-nout);
|
|
v0 = batch4buf->ptr.p_double[entryoffs+0];
|
|
v1 = batch4buf->ptr.p_double[entryoffs+1];
|
|
v2 = batch4buf->ptr.p_double[entryoffs+2];
|
|
v3 = batch4buf->ptr.p_double[entryoffs+3];
|
|
entryoffs = entryoffs+entrysize;
|
|
for(i=1; i<=nout-1; i++)
|
|
{
|
|
v = batch4buf->ptr.p_double[entryoffs+0];
|
|
if( v>v0 )
|
|
{
|
|
v0 = v;
|
|
}
|
|
v = batch4buf->ptr.p_double[entryoffs+1];
|
|
if( v>v1 )
|
|
{
|
|
v1 = v;
|
|
}
|
|
v = batch4buf->ptr.p_double[entryoffs+2];
|
|
if( v>v2 )
|
|
{
|
|
v2 = v;
|
|
}
|
|
v = batch4buf->ptr.p_double[entryoffs+3];
|
|
if( v>v3 )
|
|
{
|
|
v3 = v;
|
|
}
|
|
entryoffs = entryoffs+entrysize;
|
|
}
|
|
|
|
/*
|
|
* Then, calculate exponentials and place them to part of the
|
|
* array which is located past the last entry. We also
|
|
* calculate sum of exponentials which will be stored past the
|
|
* exponentials.
|
|
*/
|
|
entryoffs = entrysize*(ntotal-nout);
|
|
offs0 = entrysize*ntotal;
|
|
s0 = (double)(0);
|
|
s1 = (double)(0);
|
|
s2 = (double)(0);
|
|
s3 = (double)(0);
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
v = ae_exp(batch4buf->ptr.p_double[entryoffs+0]-v0, _state);
|
|
s0 = s0+v;
|
|
batch4buf->ptr.p_double[offs0+0] = v;
|
|
v = ae_exp(batch4buf->ptr.p_double[entryoffs+1]-v1, _state);
|
|
s1 = s1+v;
|
|
batch4buf->ptr.p_double[offs0+1] = v;
|
|
v = ae_exp(batch4buf->ptr.p_double[entryoffs+2]-v2, _state);
|
|
s2 = s2+v;
|
|
batch4buf->ptr.p_double[offs0+2] = v;
|
|
v = ae_exp(batch4buf->ptr.p_double[entryoffs+3]-v3, _state);
|
|
s3 = s3+v;
|
|
batch4buf->ptr.p_double[offs0+3] = v;
|
|
entryoffs = entryoffs+entrysize;
|
|
offs0 = offs0+chunksize;
|
|
}
|
|
offs0 = entrysize*ntotal+2*nout*chunksize;
|
|
batch4buf->ptr.p_double[offs0+0] = s0;
|
|
batch4buf->ptr.p_double[offs0+1] = s1;
|
|
batch4buf->ptr.p_double[offs0+2] = s2;
|
|
batch4buf->ptr.p_double[offs0+3] = s3;
|
|
|
|
/*
|
|
* Now we have:
|
|
* * Batch4Buf[0...EntrySize*NTotal-1] stores:
|
|
* * NTotal*ChunkSize neuron output values (SOFTMAX normalization
|
|
* was not applied to these values),
|
|
* * NTotal*ChunkSize values of dF/dNET (derivative of neuron
|
|
* output with respect to its input)
|
|
* * NTotal*ChunkSize zeros in the elements which correspond to
|
|
* dError/dOut (derivative of error with respect to neuron output).
|
|
* * Batch4Buf[EntrySize*NTotal...EntrySize*NTotal+ChunkSize*NOut-1] -
|
|
* stores exponentials of last NOut neurons.
|
|
* * Batch4Buf[EntrySize*NTotal+ChunkSize*NOut-1...EntrySize*NTotal+ChunkSize*2*NOut-1]
|
|
* - can be used for temporary calculations
|
|
* * Batch4Buf[EntrySize*NTotal+ChunkSize*2*NOut...EntrySize*NTotal+ChunkSize*2*NOut+ChunkSize-1]
|
|
* - stores sum-of-exponentials
|
|
*
|
|
* Block below calculates derivatives of error function with respect
|
|
* to non-SOFTMAX-normalized output values of last NOut neurons.
|
|
*
|
|
* It is quite complicated; we do not describe algebra behind it,
|
|
* but if you want you may check it yourself :)
|
|
*/
|
|
if( naturalerrorfunc )
|
|
{
|
|
|
|
/*
|
|
* Calculate derivative of error with respect to values of
|
|
* output neurons PRIOR TO SOFTMAX NORMALIZATION. Because we
|
|
* use natural error function (cross-entropy), we can do so
|
|
* very easy.
|
|
*/
|
|
offs0 = entrysize*ntotal+2*nout*chunksize;
|
|
for(k=0; k<=csize-1; k++)
|
|
{
|
|
s = batch4buf->ptr.p_double[offs0+k];
|
|
kl = ae_round(xy->ptr.pp_double[cstart+k][nin], _state);
|
|
offs1 = (ntotal-nout)*entrysize+derroroffs+k;
|
|
offs2 = entrysize*ntotal+k;
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
if( i==kl )
|
|
{
|
|
v = (double)(1);
|
|
}
|
|
else
|
|
{
|
|
v = (double)(0);
|
|
}
|
|
vv = batch4buf->ptr.p_double[offs2];
|
|
batch4buf->ptr.p_double[offs1] = vv/s-v;
|
|
*e = *e+mlpbase_safecrossentropy(v, vv/s, _state);
|
|
offs1 = offs1+entrysize;
|
|
offs2 = offs2+chunksize;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* SOFTMAX normalization makes things very difficult.
|
|
* Sorry, we do not dare to describe this esoteric math
|
|
* in details.
|
|
*/
|
|
offs0 = entrysize*ntotal+chunksize*2*nout;
|
|
for(k=0; k<=csize-1; k++)
|
|
{
|
|
s = batch4buf->ptr.p_double[offs0+k];
|
|
kl = ae_round(xy->ptr.pp_double[cstart+k][nin], _state);
|
|
vv = (double)(0);
|
|
offs1 = entrysize*ntotal+k;
|
|
offs2 = entrysize*ntotal+nout*chunksize+k;
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
fown = batch4buf->ptr.p_double[offs1];
|
|
if( i==kl )
|
|
{
|
|
deown = fown/s-1;
|
|
}
|
|
else
|
|
{
|
|
deown = fown/s;
|
|
}
|
|
batch4buf->ptr.p_double[offs2] = deown;
|
|
vv = vv+deown*fown;
|
|
*e = *e+deown*deown/2;
|
|
offs1 = offs1+chunksize;
|
|
offs2 = offs2+chunksize;
|
|
}
|
|
offs1 = entrysize*ntotal+k;
|
|
offs2 = entrysize*ntotal+nout*chunksize+k;
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
fown = batch4buf->ptr.p_double[offs1];
|
|
deown = batch4buf->ptr.p_double[offs2];
|
|
batch4buf->ptr.p_double[(ntotal-nout+i)*entrysize+derroroffs+k] = (-vv+deown*fown+deown*(s-fown))*fown/ae_sqr(s, _state);
|
|
offs1 = offs1+chunksize;
|
|
offs2 = offs2+chunksize;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Regression network with sum-of-squares function.
|
|
*
|
|
* For each NOut of last neurons:
|
|
* * calculate difference between actual and desired output
|
|
* * calculate dError/dOut for this neuron (proportional to difference)
|
|
* * store in in last 4 components of entry (these values are used
|
|
* to start backpropagation)
|
|
* * update error
|
|
*/
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
v0 = network->columnsigmas.ptr.p_double[nin+i];
|
|
v1 = network->columnmeans.ptr.p_double[nin+i];
|
|
entryoffs = entrysize*(ntotal-nout+i);
|
|
offs0 = entryoffs;
|
|
offs1 = entryoffs+derroroffs;
|
|
for(j=0; j<=csize-1; j++)
|
|
{
|
|
v = batch4buf->ptr.p_double[offs0+j]*v0+v1-xy->ptr.pp_double[cstart+j][nin+i];
|
|
batch4buf->ptr.p_double[offs1+j] = v*v0;
|
|
*e = *e+v*v/2;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Backpropagation
|
|
*/
|
|
for(neuronidx=ntotal-1; neuronidx>=0; neuronidx--)
|
|
{
|
|
entryoffs = entrysize*neuronidx;
|
|
offs = istart+neuronidx*mlpbase_nfieldwidth;
|
|
neurontype = network->structinfo.ptr.p_int[offs+0];
|
|
if( neurontype>0||neurontype==-5 )
|
|
{
|
|
|
|
/*
|
|
* Activation function
|
|
*/
|
|
srcneuronidx = network->structinfo.ptr.p_int[offs+2];
|
|
srcentryoffs = entrysize*srcneuronidx;
|
|
offs0 = srcentryoffs+derroroffs;
|
|
offs1 = entryoffs+derroroffs;
|
|
offs2 = entryoffs+dfoffs;
|
|
batch4buf->ptr.p_double[offs0+0] = batch4buf->ptr.p_double[offs0+0]+batch4buf->ptr.p_double[offs1+0]*batch4buf->ptr.p_double[offs2+0];
|
|
batch4buf->ptr.p_double[offs0+1] = batch4buf->ptr.p_double[offs0+1]+batch4buf->ptr.p_double[offs1+1]*batch4buf->ptr.p_double[offs2+1];
|
|
batch4buf->ptr.p_double[offs0+2] = batch4buf->ptr.p_double[offs0+2]+batch4buf->ptr.p_double[offs1+2]*batch4buf->ptr.p_double[offs2+2];
|
|
batch4buf->ptr.p_double[offs0+3] = batch4buf->ptr.p_double[offs0+3]+batch4buf->ptr.p_double[offs1+3]*batch4buf->ptr.p_double[offs2+3];
|
|
continue;
|
|
}
|
|
if( neurontype==0 )
|
|
{
|
|
|
|
/*
|
|
* Adaptive summator
|
|
*/
|
|
nweights = network->structinfo.ptr.p_int[offs+1];
|
|
srcneuronidx = network->structinfo.ptr.p_int[offs+2];
|
|
srcentryoffs = entrysize*srcneuronidx;
|
|
srcweightidx = network->structinfo.ptr.p_int[offs+3];
|
|
v0 = batch4buf->ptr.p_double[entryoffs+derroroffs+0];
|
|
v1 = batch4buf->ptr.p_double[entryoffs+derroroffs+1];
|
|
v2 = batch4buf->ptr.p_double[entryoffs+derroroffs+2];
|
|
v3 = batch4buf->ptr.p_double[entryoffs+derroroffs+3];
|
|
for(j=0; j<=nweights-1; j++)
|
|
{
|
|
offs0 = srcentryoffs;
|
|
offs1 = srcentryoffs+derroroffs;
|
|
v = network->weights.ptr.p_double[srcweightidx];
|
|
hpcbuf->ptr.p_double[srcweightidx] = hpcbuf->ptr.p_double[srcweightidx]+batch4buf->ptr.p_double[offs0+0]*v0+batch4buf->ptr.p_double[offs0+1]*v1+batch4buf->ptr.p_double[offs0+2]*v2+batch4buf->ptr.p_double[offs0+3]*v3;
|
|
batch4buf->ptr.p_double[offs1+0] = batch4buf->ptr.p_double[offs1+0]+v*v0;
|
|
batch4buf->ptr.p_double[offs1+1] = batch4buf->ptr.p_double[offs1+1]+v*v1;
|
|
batch4buf->ptr.p_double[offs1+2] = batch4buf->ptr.p_double[offs1+2]+v*v2;
|
|
batch4buf->ptr.p_double[offs1+3] = batch4buf->ptr.p_double[offs1+3]+v*v3;
|
|
srcentryoffs = srcentryoffs+entrysize;
|
|
srcweightidx = srcweightidx+1;
|
|
}
|
|
continue;
|
|
}
|
|
if( neurontype<0 )
|
|
{
|
|
bflag = ae_false;
|
|
if( (neurontype==-2||neurontype==-3)||neurontype==-4 )
|
|
{
|
|
|
|
/*
|
|
* Special neuron type, no back-propagation required
|
|
*/
|
|
bflag = ae_true;
|
|
}
|
|
ae_assert(bflag, "MLPInternalCalculateGradient: unknown neuron type!", _state);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void mlpbase_mlpchunkedprocess(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t cstart,
|
|
ae_int_t csize,
|
|
/* Real */ ae_vector* batch4buf,
|
|
/* Real */ ae_vector* hpcbuf,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t ntotal;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t offs;
|
|
double f;
|
|
double df;
|
|
double d2f;
|
|
double v;
|
|
ae_bool bflag;
|
|
ae_int_t istart;
|
|
ae_int_t entrysize;
|
|
ae_int_t entryoffs;
|
|
ae_int_t neuronidx;
|
|
ae_int_t srcentryoffs;
|
|
ae_int_t srcneuronidx;
|
|
ae_int_t srcweightidx;
|
|
ae_int_t neurontype;
|
|
ae_int_t nweights;
|
|
ae_int_t offs0;
|
|
double v0;
|
|
double v1;
|
|
double v2;
|
|
double v3;
|
|
double s0;
|
|
double s1;
|
|
double s2;
|
|
double s3;
|
|
ae_int_t chunksize;
|
|
|
|
|
|
chunksize = 4;
|
|
ae_assert(csize<=chunksize, "MLPChunkedProcess: internal error (CSize>ChunkSize)", _state);
|
|
|
|
/*
|
|
* Try to use HPC core, if possible
|
|
*/
|
|
if( hpcchunkedprocess(&network->weights, &network->structinfo, &network->columnmeans, &network->columnsigmas, xy, cstart, csize, batch4buf, hpcbuf, _state) )
|
|
{
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Read network geometry, prepare data
|
|
*/
|
|
nin = network->structinfo.ptr.p_int[1];
|
|
nout = network->structinfo.ptr.p_int[2];
|
|
ntotal = network->structinfo.ptr.p_int[3];
|
|
istart = network->structinfo.ptr.p_int[5];
|
|
entrysize = 4;
|
|
|
|
/*
|
|
* Fill Batch4Buf by zeros.
|
|
*
|
|
* THIS STAGE IS VERY IMPORTANT!
|
|
*
|
|
* We fill all components of entry - neuron values, dF/dNET, dError/dF.
|
|
* It allows us to easily handle situations when CSize<ChunkSize by
|
|
* simply working with ALL components of Batch4Buf, without ever
|
|
* looking at CSize.
|
|
*/
|
|
for(i=0; i<=entrysize*ntotal-1; i++)
|
|
{
|
|
batch4buf->ptr.p_double[i] = (double)(0);
|
|
}
|
|
|
|
/*
|
|
* Forward pass:
|
|
* 1. Load data into Batch4Buf. If CSize<ChunkSize, data are padded by zeros.
|
|
* 2. Perform forward pass through network
|
|
*/
|
|
for(i=0; i<=nin-1; i++)
|
|
{
|
|
entryoffs = entrysize*i;
|
|
for(j=0; j<=csize-1; j++)
|
|
{
|
|
if( ae_fp_neq(network->columnsigmas.ptr.p_double[i],(double)(0)) )
|
|
{
|
|
batch4buf->ptr.p_double[entryoffs+j] = (xy->ptr.pp_double[cstart+j][i]-network->columnmeans.ptr.p_double[i])/network->columnsigmas.ptr.p_double[i];
|
|
}
|
|
else
|
|
{
|
|
batch4buf->ptr.p_double[entryoffs+j] = xy->ptr.pp_double[cstart+j][i]-network->columnmeans.ptr.p_double[i];
|
|
}
|
|
}
|
|
}
|
|
for(neuronidx=0; neuronidx<=ntotal-1; neuronidx++)
|
|
{
|
|
entryoffs = entrysize*neuronidx;
|
|
offs = istart+neuronidx*mlpbase_nfieldwidth;
|
|
neurontype = network->structinfo.ptr.p_int[offs+0];
|
|
if( neurontype>0||neurontype==-5 )
|
|
{
|
|
|
|
/*
|
|
* "activation function" neuron, which takes value of neuron SrcNeuronIdx
|
|
* and applies activation function to it.
|
|
*
|
|
* This neuron has no weights and no tunable parameters.
|
|
*/
|
|
srcneuronidx = network->structinfo.ptr.p_int[offs+2];
|
|
srcentryoffs = entrysize*srcneuronidx;
|
|
mlpactivationfunction(batch4buf->ptr.p_double[srcentryoffs+0], neurontype, &f, &df, &d2f, _state);
|
|
batch4buf->ptr.p_double[entryoffs+0] = f;
|
|
mlpactivationfunction(batch4buf->ptr.p_double[srcentryoffs+1], neurontype, &f, &df, &d2f, _state);
|
|
batch4buf->ptr.p_double[entryoffs+1] = f;
|
|
mlpactivationfunction(batch4buf->ptr.p_double[srcentryoffs+2], neurontype, &f, &df, &d2f, _state);
|
|
batch4buf->ptr.p_double[entryoffs+2] = f;
|
|
mlpactivationfunction(batch4buf->ptr.p_double[srcentryoffs+3], neurontype, &f, &df, &d2f, _state);
|
|
batch4buf->ptr.p_double[entryoffs+3] = f;
|
|
continue;
|
|
}
|
|
if( neurontype==0 )
|
|
{
|
|
|
|
/*
|
|
* "adaptive summator" neuron, whose output is a weighted sum of inputs.
|
|
* It has weights, but has no activation function.
|
|
*/
|
|
nweights = network->structinfo.ptr.p_int[offs+1];
|
|
srcneuronidx = network->structinfo.ptr.p_int[offs+2];
|
|
srcentryoffs = entrysize*srcneuronidx;
|
|
srcweightidx = network->structinfo.ptr.p_int[offs+3];
|
|
v0 = (double)(0);
|
|
v1 = (double)(0);
|
|
v2 = (double)(0);
|
|
v3 = (double)(0);
|
|
for(j=0; j<=nweights-1; j++)
|
|
{
|
|
v = network->weights.ptr.p_double[srcweightidx];
|
|
srcweightidx = srcweightidx+1;
|
|
v0 = v0+v*batch4buf->ptr.p_double[srcentryoffs+0];
|
|
v1 = v1+v*batch4buf->ptr.p_double[srcentryoffs+1];
|
|
v2 = v2+v*batch4buf->ptr.p_double[srcentryoffs+2];
|
|
v3 = v3+v*batch4buf->ptr.p_double[srcentryoffs+3];
|
|
srcentryoffs = srcentryoffs+entrysize;
|
|
}
|
|
batch4buf->ptr.p_double[entryoffs+0] = v0;
|
|
batch4buf->ptr.p_double[entryoffs+1] = v1;
|
|
batch4buf->ptr.p_double[entryoffs+2] = v2;
|
|
batch4buf->ptr.p_double[entryoffs+3] = v3;
|
|
continue;
|
|
}
|
|
if( neurontype<0 )
|
|
{
|
|
bflag = ae_false;
|
|
if( neurontype==-2 )
|
|
{
|
|
|
|
/*
|
|
* Input neuron, left unchanged
|
|
*/
|
|
bflag = ae_true;
|
|
}
|
|
if( neurontype==-3 )
|
|
{
|
|
|
|
/*
|
|
* "-1" neuron
|
|
*/
|
|
batch4buf->ptr.p_double[entryoffs+0] = (double)(-1);
|
|
batch4buf->ptr.p_double[entryoffs+1] = (double)(-1);
|
|
batch4buf->ptr.p_double[entryoffs+2] = (double)(-1);
|
|
batch4buf->ptr.p_double[entryoffs+3] = (double)(-1);
|
|
bflag = ae_true;
|
|
}
|
|
if( neurontype==-4 )
|
|
{
|
|
|
|
/*
|
|
* "0" neuron
|
|
*/
|
|
batch4buf->ptr.p_double[entryoffs+0] = (double)(0);
|
|
batch4buf->ptr.p_double[entryoffs+1] = (double)(0);
|
|
batch4buf->ptr.p_double[entryoffs+2] = (double)(0);
|
|
batch4buf->ptr.p_double[entryoffs+3] = (double)(0);
|
|
bflag = ae_true;
|
|
}
|
|
ae_assert(bflag, "MLPChunkedProcess: internal error - unknown neuron type!", _state);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* SOFTMAX normalization or scaling.
|
|
*/
|
|
ae_assert(network->structinfo.ptr.p_int[6]==0||network->structinfo.ptr.p_int[6]==1, "MLPChunkedProcess: unknown normalization type!", _state);
|
|
if( network->structinfo.ptr.p_int[6]==1 )
|
|
{
|
|
|
|
/*
|
|
* SOFTMAX-normalized network.
|
|
*
|
|
* First, calculate (V0,V1,V2,V3) - component-wise maximum
|
|
* of output neurons. This vector of maximum values will be
|
|
* used for normalization of outputs prior to calculating
|
|
* exponentials.
|
|
*
|
|
* NOTE: the only purpose of this stage is to prevent overflow
|
|
* during calculation of exponentials. With this stage
|
|
* we make sure that all exponentials are calculated
|
|
* with non-positive argument. If you load (0,0,0,0) to
|
|
* (V0,V1,V2,V3), your program will continue working -
|
|
* although with less robustness.
|
|
*/
|
|
entryoffs = entrysize*(ntotal-nout);
|
|
v0 = batch4buf->ptr.p_double[entryoffs+0];
|
|
v1 = batch4buf->ptr.p_double[entryoffs+1];
|
|
v2 = batch4buf->ptr.p_double[entryoffs+2];
|
|
v3 = batch4buf->ptr.p_double[entryoffs+3];
|
|
entryoffs = entryoffs+entrysize;
|
|
for(i=1; i<=nout-1; i++)
|
|
{
|
|
v = batch4buf->ptr.p_double[entryoffs+0];
|
|
if( v>v0 )
|
|
{
|
|
v0 = v;
|
|
}
|
|
v = batch4buf->ptr.p_double[entryoffs+1];
|
|
if( v>v1 )
|
|
{
|
|
v1 = v;
|
|
}
|
|
v = batch4buf->ptr.p_double[entryoffs+2];
|
|
if( v>v2 )
|
|
{
|
|
v2 = v;
|
|
}
|
|
v = batch4buf->ptr.p_double[entryoffs+3];
|
|
if( v>v3 )
|
|
{
|
|
v3 = v;
|
|
}
|
|
entryoffs = entryoffs+entrysize;
|
|
}
|
|
|
|
/*
|
|
* Then, calculate exponentials and place them to part of the
|
|
* array which is located past the last entry. We also
|
|
* calculate sum of exponentials.
|
|
*/
|
|
entryoffs = entrysize*(ntotal-nout);
|
|
offs0 = entrysize*ntotal;
|
|
s0 = (double)(0);
|
|
s1 = (double)(0);
|
|
s2 = (double)(0);
|
|
s3 = (double)(0);
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
v = ae_exp(batch4buf->ptr.p_double[entryoffs+0]-v0, _state);
|
|
s0 = s0+v;
|
|
batch4buf->ptr.p_double[offs0+0] = v;
|
|
v = ae_exp(batch4buf->ptr.p_double[entryoffs+1]-v1, _state);
|
|
s1 = s1+v;
|
|
batch4buf->ptr.p_double[offs0+1] = v;
|
|
v = ae_exp(batch4buf->ptr.p_double[entryoffs+2]-v2, _state);
|
|
s2 = s2+v;
|
|
batch4buf->ptr.p_double[offs0+2] = v;
|
|
v = ae_exp(batch4buf->ptr.p_double[entryoffs+3]-v3, _state);
|
|
s3 = s3+v;
|
|
batch4buf->ptr.p_double[offs0+3] = v;
|
|
entryoffs = entryoffs+entrysize;
|
|
offs0 = offs0+chunksize;
|
|
}
|
|
|
|
/*
|
|
* Write SOFTMAX-normalized values to the output array.
|
|
*/
|
|
offs0 = entrysize*ntotal;
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
if( csize>0 )
|
|
{
|
|
xy->ptr.pp_double[cstart+0][nin+i] = batch4buf->ptr.p_double[offs0+0]/s0;
|
|
}
|
|
if( csize>1 )
|
|
{
|
|
xy->ptr.pp_double[cstart+1][nin+i] = batch4buf->ptr.p_double[offs0+1]/s1;
|
|
}
|
|
if( csize>2 )
|
|
{
|
|
xy->ptr.pp_double[cstart+2][nin+i] = batch4buf->ptr.p_double[offs0+2]/s2;
|
|
}
|
|
if( csize>3 )
|
|
{
|
|
xy->ptr.pp_double[cstart+3][nin+i] = batch4buf->ptr.p_double[offs0+3]/s3;
|
|
}
|
|
offs0 = offs0+chunksize;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Regression network with sum-of-squares function.
|
|
*
|
|
* For each NOut of last neurons:
|
|
* * calculate difference between actual and desired output
|
|
* * calculate dError/dOut for this neuron (proportional to difference)
|
|
* * store in in last 4 components of entry (these values are used
|
|
* to start backpropagation)
|
|
* * update error
|
|
*/
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
v0 = network->columnsigmas.ptr.p_double[nin+i];
|
|
v1 = network->columnmeans.ptr.p_double[nin+i];
|
|
entryoffs = entrysize*(ntotal-nout+i);
|
|
for(j=0; j<=csize-1; j++)
|
|
{
|
|
xy->ptr.pp_double[cstart+j][nin+i] = batch4buf->ptr.p_double[entryoffs+j]*v0+v1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Returns T*Ln(T/Z), guarded against overflow/underflow.
|
|
Internal subroutine.
|
|
*************************************************************************/
|
|
static double mlpbase_safecrossentropy(double t,
|
|
double z,
|
|
ae_state *_state)
|
|
{
|
|
double r;
|
|
double result;
|
|
|
|
|
|
if( ae_fp_eq(t,(double)(0)) )
|
|
{
|
|
result = (double)(0);
|
|
}
|
|
else
|
|
{
|
|
if( ae_fp_greater(ae_fabs(z, _state),(double)(1)) )
|
|
{
|
|
|
|
/*
|
|
* Shouldn't be the case with softmax,
|
|
* but we just want to be sure.
|
|
*/
|
|
if( ae_fp_eq(t/z,(double)(0)) )
|
|
{
|
|
r = ae_minrealnumber;
|
|
}
|
|
else
|
|
{
|
|
r = t/z;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Normal case
|
|
*/
|
|
if( ae_fp_eq(z,(double)(0))||ae_fp_greater_eq(ae_fabs(t, _state),ae_maxrealnumber*ae_fabs(z, _state)) )
|
|
{
|
|
r = ae_maxrealnumber;
|
|
}
|
|
else
|
|
{
|
|
r = t/z;
|
|
}
|
|
}
|
|
result = t*ae_log(r, _state);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function performs backward pass of neural network randimization:
|
|
* it assumes that Network.Weights stores standard deviation of weights
|
|
(weights are not generated yet, only their deviations are present)
|
|
* it sets deviations of weights which feed NeuronIdx-th neuron to specified value
|
|
* it recursively passes to deeper neuron and modifies their weights
|
|
* it stops after encountering nonlinear neurons, linear activation function,
|
|
input neurons, "0" and "-1" neurons
|
|
|
|
-- ALGLIB --
|
|
Copyright 27.06.2013 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void mlpbase_randomizebackwardpass(multilayerperceptron* network,
|
|
ae_int_t neuronidx,
|
|
double v,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t istart;
|
|
ae_int_t neurontype;
|
|
ae_int_t n1;
|
|
ae_int_t n2;
|
|
ae_int_t w1;
|
|
ae_int_t w2;
|
|
ae_int_t offs;
|
|
ae_int_t i;
|
|
|
|
|
|
istart = network->structinfo.ptr.p_int[5];
|
|
neurontype = network->structinfo.ptr.p_int[istart+neuronidx*mlpbase_nfieldwidth+0];
|
|
if( neurontype==-2 )
|
|
{
|
|
|
|
/*
|
|
* Input neuron - stop
|
|
*/
|
|
return;
|
|
}
|
|
if( neurontype==-3 )
|
|
{
|
|
|
|
/*
|
|
* "-1" neuron: stop
|
|
*/
|
|
return;
|
|
}
|
|
if( neurontype==-4 )
|
|
{
|
|
|
|
/*
|
|
* "0" neuron: stop
|
|
*/
|
|
return;
|
|
}
|
|
if( neurontype==0 )
|
|
{
|
|
|
|
/*
|
|
* Adaptive summator neuron:
|
|
* * modify deviations of its weights
|
|
* * recursively call this function for its inputs
|
|
*/
|
|
offs = istart+neuronidx*mlpbase_nfieldwidth;
|
|
n1 = network->structinfo.ptr.p_int[offs+2];
|
|
n2 = n1+network->structinfo.ptr.p_int[offs+1]-1;
|
|
w1 = network->structinfo.ptr.p_int[offs+3];
|
|
w2 = w1+network->structinfo.ptr.p_int[offs+1]-1;
|
|
for(i=w1; i<=w2; i++)
|
|
{
|
|
network->weights.ptr.p_double[i] = v;
|
|
}
|
|
for(i=n1; i<=n2; i++)
|
|
{
|
|
mlpbase_randomizebackwardpass(network, i, v, _state);
|
|
}
|
|
return;
|
|
}
|
|
if( neurontype==-5 )
|
|
{
|
|
|
|
/*
|
|
* Linear activation function: stop
|
|
*/
|
|
return;
|
|
}
|
|
if( neurontype>0 )
|
|
{
|
|
|
|
/*
|
|
* Nonlinear activation function: stop
|
|
*/
|
|
return;
|
|
}
|
|
ae_assert(ae_false, "RandomizeBackwardPass: unexpected neuron type", _state);
|
|
}
|
|
|
|
|
|
void _modelerrors_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
modelerrors *p = (modelerrors*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _modelerrors_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
modelerrors *dst = (modelerrors*)_dst;
|
|
modelerrors *src = (modelerrors*)_src;
|
|
dst->relclserror = src->relclserror;
|
|
dst->avgce = src->avgce;
|
|
dst->rmserror = src->rmserror;
|
|
dst->avgerror = src->avgerror;
|
|
dst->avgrelerror = src->avgrelerror;
|
|
}
|
|
|
|
|
|
void _modelerrors_clear(void* _p)
|
|
{
|
|
modelerrors *p = (modelerrors*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _modelerrors_destroy(void* _p)
|
|
{
|
|
modelerrors *p = (modelerrors*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _smlpgrad_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
smlpgrad *p = (smlpgrad*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->g, 0, DT_REAL, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _smlpgrad_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
smlpgrad *dst = (smlpgrad*)_dst;
|
|
smlpgrad *src = (smlpgrad*)_src;
|
|
dst->f = src->f;
|
|
ae_vector_init_copy(&dst->g, &src->g, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _smlpgrad_clear(void* _p)
|
|
{
|
|
smlpgrad *p = (smlpgrad*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->g);
|
|
}
|
|
|
|
|
|
void _smlpgrad_destroy(void* _p)
|
|
{
|
|
smlpgrad *p = (smlpgrad*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->g);
|
|
}
|
|
|
|
|
|
void _multilayerperceptron_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
multilayerperceptron *p = (multilayerperceptron*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->hllayersizes, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->hlconnections, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->hlneurons, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->structinfo, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->weights, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->columnmeans, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->columnsigmas, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->neurons, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->dfdnet, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->derror, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->y, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->xy, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->xyrow, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->nwbuf, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->integerbuf, 0, DT_INT, _state, make_automatic);
|
|
_modelerrors_init(&p->err, _state, make_automatic);
|
|
ae_vector_init(&p->rndbuf, 0, DT_REAL, _state, make_automatic);
|
|
ae_shared_pool_init(&p->buf, _state, make_automatic);
|
|
ae_shared_pool_init(&p->gradbuf, _state, make_automatic);
|
|
ae_matrix_init(&p->dummydxy, 0, 0, DT_REAL, _state, make_automatic);
|
|
_sparsematrix_init(&p->dummysxy, _state, make_automatic);
|
|
ae_vector_init(&p->dummyidx, 0, DT_INT, _state, make_automatic);
|
|
ae_shared_pool_init(&p->dummypool, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _multilayerperceptron_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
multilayerperceptron *dst = (multilayerperceptron*)_dst;
|
|
multilayerperceptron *src = (multilayerperceptron*)_src;
|
|
dst->hlnetworktype = src->hlnetworktype;
|
|
dst->hlnormtype = src->hlnormtype;
|
|
ae_vector_init_copy(&dst->hllayersizes, &src->hllayersizes, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->hlconnections, &src->hlconnections, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->hlneurons, &src->hlneurons, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->structinfo, &src->structinfo, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->weights, &src->weights, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->columnmeans, &src->columnmeans, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->columnsigmas, &src->columnsigmas, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->neurons, &src->neurons, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->dfdnet, &src->dfdnet, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->derror, &src->derror, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->y, &src->y, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->xy, &src->xy, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->xyrow, &src->xyrow, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->nwbuf, &src->nwbuf, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->integerbuf, &src->integerbuf, _state, make_automatic);
|
|
_modelerrors_init_copy(&dst->err, &src->err, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->rndbuf, &src->rndbuf, _state, make_automatic);
|
|
ae_shared_pool_init_copy(&dst->buf, &src->buf, _state, make_automatic);
|
|
ae_shared_pool_init_copy(&dst->gradbuf, &src->gradbuf, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->dummydxy, &src->dummydxy, _state, make_automatic);
|
|
_sparsematrix_init_copy(&dst->dummysxy, &src->dummysxy, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->dummyidx, &src->dummyidx, _state, make_automatic);
|
|
ae_shared_pool_init_copy(&dst->dummypool, &src->dummypool, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _multilayerperceptron_clear(void* _p)
|
|
{
|
|
multilayerperceptron *p = (multilayerperceptron*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->hllayersizes);
|
|
ae_vector_clear(&p->hlconnections);
|
|
ae_vector_clear(&p->hlneurons);
|
|
ae_vector_clear(&p->structinfo);
|
|
ae_vector_clear(&p->weights);
|
|
ae_vector_clear(&p->columnmeans);
|
|
ae_vector_clear(&p->columnsigmas);
|
|
ae_vector_clear(&p->neurons);
|
|
ae_vector_clear(&p->dfdnet);
|
|
ae_vector_clear(&p->derror);
|
|
ae_vector_clear(&p->x);
|
|
ae_vector_clear(&p->y);
|
|
ae_matrix_clear(&p->xy);
|
|
ae_vector_clear(&p->xyrow);
|
|
ae_vector_clear(&p->nwbuf);
|
|
ae_vector_clear(&p->integerbuf);
|
|
_modelerrors_clear(&p->err);
|
|
ae_vector_clear(&p->rndbuf);
|
|
ae_shared_pool_clear(&p->buf);
|
|
ae_shared_pool_clear(&p->gradbuf);
|
|
ae_matrix_clear(&p->dummydxy);
|
|
_sparsematrix_clear(&p->dummysxy);
|
|
ae_vector_clear(&p->dummyidx);
|
|
ae_shared_pool_clear(&p->dummypool);
|
|
}
|
|
|
|
|
|
void _multilayerperceptron_destroy(void* _p)
|
|
{
|
|
multilayerperceptron *p = (multilayerperceptron*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->hllayersizes);
|
|
ae_vector_destroy(&p->hlconnections);
|
|
ae_vector_destroy(&p->hlneurons);
|
|
ae_vector_destroy(&p->structinfo);
|
|
ae_vector_destroy(&p->weights);
|
|
ae_vector_destroy(&p->columnmeans);
|
|
ae_vector_destroy(&p->columnsigmas);
|
|
ae_vector_destroy(&p->neurons);
|
|
ae_vector_destroy(&p->dfdnet);
|
|
ae_vector_destroy(&p->derror);
|
|
ae_vector_destroy(&p->x);
|
|
ae_vector_destroy(&p->y);
|
|
ae_matrix_destroy(&p->xy);
|
|
ae_vector_destroy(&p->xyrow);
|
|
ae_vector_destroy(&p->nwbuf);
|
|
ae_vector_destroy(&p->integerbuf);
|
|
_modelerrors_destroy(&p->err);
|
|
ae_vector_destroy(&p->rndbuf);
|
|
ae_shared_pool_destroy(&p->buf);
|
|
ae_shared_pool_destroy(&p->gradbuf);
|
|
ae_matrix_destroy(&p->dummydxy);
|
|
_sparsematrix_destroy(&p->dummysxy);
|
|
ae_vector_destroy(&p->dummyidx);
|
|
ae_shared_pool_destroy(&p->dummypool);
|
|
}
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_LDA) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
/*************************************************************************
|
|
Multiclass Fisher LDA
|
|
|
|
Subroutine finds coefficients of linear combination which optimally separates
|
|
training set on classes.
|
|
|
|
COMMERCIAL EDITION OF ALGLIB:
|
|
|
|
! Commercial version of ALGLIB includes two important improvements of
|
|
! this function, which can be used from C++ and C#:
|
|
! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB)
|
|
! * multithreading support
|
|
!
|
|
! Intel MKL gives approximately constant (with respect to number of
|
|
! worker threads) acceleration factor which depends on CPU being used,
|
|
! problem size and "baseline" ALGLIB edition which is used for
|
|
! comparison. Best results are achieved for high-dimensional problems
|
|
! (NVars is at least 256).
|
|
!
|
|
! Multithreading is used to accelerate initial phase of LDA, which
|
|
! includes calculation of products of large matrices. Again, for best
|
|
! efficiency problem must be high-dimensional.
|
|
!
|
|
! Generally, commercial ALGLIB is several times faster than open-source
|
|
! generic C edition, and many times faster than open-source C# edition.
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
XY - training set, array[0..NPoints-1,0..NVars].
|
|
First NVars columns store values of independent
|
|
variables, next column stores number of class (from 0
|
|
to NClasses-1) which dataset element belongs to. Fractional
|
|
values are rounded to nearest integer.
|
|
NPoints - training set size, NPoints>=0
|
|
NVars - number of independent variables, NVars>=1
|
|
NClasses - number of classes, NClasses>=2
|
|
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code:
|
|
* -4, if internal EVD subroutine hasn't converged
|
|
* -2, if there is a point with class number
|
|
outside of [0..NClasses-1].
|
|
* -1, if incorrect parameters was passed (NPoints<0,
|
|
NVars<1, NClasses<2)
|
|
* 1, if task has been solved
|
|
* 2, if there was a multicollinearity in training set,
|
|
but task has been solved.
|
|
W - linear combination coefficients, array[0..NVars-1]
|
|
|
|
-- ALGLIB --
|
|
Copyright 31.05.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void fisherlda(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t nclasses,
|
|
ae_int_t* info,
|
|
/* Real */ ae_vector* w,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_matrix w2;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&w2, 0, sizeof(w2));
|
|
*info = 0;
|
|
ae_vector_clear(w);
|
|
ae_matrix_init(&w2, 0, 0, DT_REAL, _state, ae_true);
|
|
|
|
fisherldan(xy, npoints, nvars, nclasses, info, &w2, _state);
|
|
if( *info>0 )
|
|
{
|
|
ae_vector_set_length(w, nvars, _state);
|
|
ae_v_move(&w->ptr.p_double[0], 1, &w2.ptr.pp_double[0][0], w2.stride, ae_v_len(0,nvars-1));
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
N-dimensional multiclass Fisher LDA
|
|
|
|
Subroutine finds coefficients of linear combinations which optimally separates
|
|
training set on classes. It returns N-dimensional basis whose vector are sorted
|
|
by quality of training set separation (in descending order).
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
! * hardware vendor (Intel) implementations of linear algebra primitives
|
|
! (C++ and C# versions, x86/x64 platform)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
XY - training set, array[0..NPoints-1,0..NVars].
|
|
First NVars columns store values of independent
|
|
variables, next column stores number of class (from 0
|
|
to NClasses-1) which dataset element belongs to. Fractional
|
|
values are rounded to nearest integer.
|
|
NPoints - training set size, NPoints>=0
|
|
NVars - number of independent variables, NVars>=1
|
|
NClasses - number of classes, NClasses>=2
|
|
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code:
|
|
* -4, if internal EVD subroutine hasn't converged
|
|
* -2, if there is a point with class number
|
|
outside of [0..NClasses-1].
|
|
* -1, if incorrect parameters was passed (NPoints<0,
|
|
NVars<1, NClasses<2)
|
|
* 1, if task has been solved
|
|
* 2, if there was a multicollinearity in training set,
|
|
but task has been solved.
|
|
W - basis, array[0..NVars-1,0..NVars-1]
|
|
columns of matrix stores basis vectors, sorted by
|
|
quality of training set separation (in descending order)
|
|
|
|
-- ALGLIB --
|
|
Copyright 31.05.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void fisherldan(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t nclasses,
|
|
ae_int_t* info,
|
|
/* Real */ ae_matrix* w,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t m;
|
|
double v;
|
|
ae_vector c;
|
|
ae_vector mu;
|
|
ae_matrix muc;
|
|
ae_vector nc;
|
|
ae_matrix sw;
|
|
ae_matrix st;
|
|
ae_matrix z;
|
|
ae_matrix z2;
|
|
ae_matrix tm;
|
|
ae_matrix sbroot;
|
|
ae_matrix a;
|
|
ae_matrix xyc;
|
|
ae_matrix xyproj;
|
|
ae_matrix wproj;
|
|
ae_vector tf;
|
|
ae_vector d;
|
|
ae_vector d2;
|
|
ae_vector work;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&c, 0, sizeof(c));
|
|
memset(&mu, 0, sizeof(mu));
|
|
memset(&muc, 0, sizeof(muc));
|
|
memset(&nc, 0, sizeof(nc));
|
|
memset(&sw, 0, sizeof(sw));
|
|
memset(&st, 0, sizeof(st));
|
|
memset(&z, 0, sizeof(z));
|
|
memset(&z2, 0, sizeof(z2));
|
|
memset(&tm, 0, sizeof(tm));
|
|
memset(&sbroot, 0, sizeof(sbroot));
|
|
memset(&a, 0, sizeof(a));
|
|
memset(&xyc, 0, sizeof(xyc));
|
|
memset(&xyproj, 0, sizeof(xyproj));
|
|
memset(&wproj, 0, sizeof(wproj));
|
|
memset(&tf, 0, sizeof(tf));
|
|
memset(&d, 0, sizeof(d));
|
|
memset(&d2, 0, sizeof(d2));
|
|
memset(&work, 0, sizeof(work));
|
|
*info = 0;
|
|
ae_matrix_clear(w);
|
|
ae_vector_init(&c, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&mu, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&muc, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&nc, 0, DT_INT, _state, ae_true);
|
|
ae_matrix_init(&sw, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&st, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&z, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&z2, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&tm, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&sbroot, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&a, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&xyc, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&xyproj, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&wproj, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&tf, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&d, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&d2, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Test data
|
|
*/
|
|
if( (npoints<0||nvars<1)||nclasses<2 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( ae_round(xy->ptr.pp_double[i][nvars], _state)<0||ae_round(xy->ptr.pp_double[i][nvars], _state)>=nclasses )
|
|
{
|
|
*info = -2;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
*info = 1;
|
|
|
|
/*
|
|
* Special case: NPoints<=1
|
|
* Degenerate task.
|
|
*/
|
|
if( npoints<=1 )
|
|
{
|
|
*info = 2;
|
|
ae_matrix_set_length(w, nvars, nvars, _state);
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
if( i==j )
|
|
{
|
|
w->ptr.pp_double[i][j] = (double)(1);
|
|
}
|
|
else
|
|
{
|
|
w->ptr.pp_double[i][j] = (double)(0);
|
|
}
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Prepare temporaries
|
|
*/
|
|
ae_vector_set_length(&tf, nvars, _state);
|
|
ae_vector_set_length(&work, ae_maxint(nvars, npoints, _state)+1, _state);
|
|
ae_matrix_set_length(&xyc, npoints, nvars, _state);
|
|
|
|
/*
|
|
* Convert class labels from reals to integers (just for convenience)
|
|
*/
|
|
ae_vector_set_length(&c, npoints, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
c.ptr.p_int[i] = ae_round(xy->ptr.pp_double[i][nvars], _state);
|
|
}
|
|
|
|
/*
|
|
* Calculate class sizes, class means
|
|
*/
|
|
ae_vector_set_length(&mu, nvars, _state);
|
|
ae_matrix_set_length(&muc, nclasses, nvars, _state);
|
|
ae_vector_set_length(&nc, nclasses, _state);
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
mu.ptr.p_double[j] = (double)(0);
|
|
}
|
|
for(i=0; i<=nclasses-1; i++)
|
|
{
|
|
nc.ptr.p_int[i] = 0;
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
muc.ptr.pp_double[i][j] = (double)(0);
|
|
}
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_v_add(&mu.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
|
|
ae_v_add(&muc.ptr.pp_double[c.ptr.p_int[i]][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
|
|
nc.ptr.p_int[c.ptr.p_int[i]] = nc.ptr.p_int[c.ptr.p_int[i]]+1;
|
|
}
|
|
for(i=0; i<=nclasses-1; i++)
|
|
{
|
|
v = (double)1/(double)nc.ptr.p_int[i];
|
|
ae_v_muld(&muc.ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1), v);
|
|
}
|
|
v = (double)1/(double)npoints;
|
|
ae_v_muld(&mu.ptr.p_double[0], 1, ae_v_len(0,nvars-1), v);
|
|
|
|
/*
|
|
* Create ST matrix
|
|
*/
|
|
ae_matrix_set_length(&st, nvars, nvars, _state);
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
st.ptr.pp_double[i][j] = (double)(0);
|
|
}
|
|
}
|
|
for(k=0; k<=npoints-1; k++)
|
|
{
|
|
ae_v_move(&xyc.ptr.pp_double[k][0], 1, &xy->ptr.pp_double[k][0], 1, ae_v_len(0,nvars-1));
|
|
ae_v_sub(&xyc.ptr.pp_double[k][0], 1, &mu.ptr.p_double[0], 1, ae_v_len(0,nvars-1));
|
|
}
|
|
rmatrixgemm(nvars, nvars, npoints, 1.0, &xyc, 0, 0, 1, &xyc, 0, 0, 0, 0.0, &st, 0, 0, _state);
|
|
|
|
/*
|
|
* Create SW matrix
|
|
*/
|
|
ae_matrix_set_length(&sw, nvars, nvars, _state);
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
sw.ptr.pp_double[i][j] = (double)(0);
|
|
}
|
|
}
|
|
for(k=0; k<=npoints-1; k++)
|
|
{
|
|
ae_v_move(&xyc.ptr.pp_double[k][0], 1, &xy->ptr.pp_double[k][0], 1, ae_v_len(0,nvars-1));
|
|
ae_v_sub(&xyc.ptr.pp_double[k][0], 1, &muc.ptr.pp_double[c.ptr.p_int[k]][0], 1, ae_v_len(0,nvars-1));
|
|
}
|
|
rmatrixgemm(nvars, nvars, npoints, 1.0, &xyc, 0, 0, 1, &xyc, 0, 0, 0, 0.0, &sw, 0, 0, _state);
|
|
|
|
/*
|
|
* Maximize ratio J=(w'*ST*w)/(w'*SW*w).
|
|
*
|
|
* First, make transition from w to v such that w'*ST*w becomes v'*v:
|
|
* v = root(ST)*w = R*w
|
|
* R = root(D)*Z'
|
|
* w = (root(ST)^-1)*v = RI*v
|
|
* RI = Z*inv(root(D))
|
|
* J = (v'*v)/(v'*(RI'*SW*RI)*v)
|
|
* ST = Z*D*Z'
|
|
*
|
|
* so we have
|
|
*
|
|
* J = (v'*v) / (v'*(inv(root(D))*Z'*SW*Z*inv(root(D)))*v) =
|
|
* = (v'*v) / (v'*A*v)
|
|
*/
|
|
if( !smatrixevd(&st, nvars, 1, ae_true, &d, &z, _state) )
|
|
{
|
|
*info = -4;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
ae_matrix_set_length(w, nvars, nvars, _state);
|
|
if( ae_fp_less_eq(d.ptr.p_double[nvars-1],(double)(0))||ae_fp_less_eq(d.ptr.p_double[0],1000*ae_machineepsilon*d.ptr.p_double[nvars-1]) )
|
|
{
|
|
|
|
/*
|
|
* Special case: D[NVars-1]<=0
|
|
* Degenerate task (all variables takes the same value).
|
|
*/
|
|
if( ae_fp_less_eq(d.ptr.p_double[nvars-1],(double)(0)) )
|
|
{
|
|
*info = 2;
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
if( i==j )
|
|
{
|
|
w->ptr.pp_double[i][j] = (double)(1);
|
|
}
|
|
else
|
|
{
|
|
w->ptr.pp_double[i][j] = (double)(0);
|
|
}
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Special case: degenerate ST matrix, multicollinearity found.
|
|
* Since we know ST eigenvalues/vectors we can translate task to
|
|
* non-degenerate form.
|
|
*
|
|
* Let WG is orthogonal basis of the non zero variance subspace
|
|
* of the ST and let WZ is orthogonal basis of the zero variance
|
|
* subspace.
|
|
*
|
|
* Projection on WG allows us to use LDA on reduced M-dimensional
|
|
* subspace, N-M vectors of WZ allows us to update reduced LDA
|
|
* factors to full N-dimensional subspace.
|
|
*/
|
|
m = 0;
|
|
for(k=0; k<=nvars-1; k++)
|
|
{
|
|
if( ae_fp_less_eq(d.ptr.p_double[k],1000*ae_machineepsilon*d.ptr.p_double[nvars-1]) )
|
|
{
|
|
m = k+1;
|
|
}
|
|
}
|
|
ae_assert(m!=0, "FisherLDAN: internal error #1", _state);
|
|
ae_matrix_set_length(&xyproj, npoints, nvars-m+1, _state);
|
|
rmatrixgemm(npoints, nvars-m, nvars, 1.0, xy, 0, 0, 0, &z, 0, m, 0, 0.0, &xyproj, 0, 0, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
xyproj.ptr.pp_double[i][nvars-m] = xy->ptr.pp_double[i][nvars];
|
|
}
|
|
fisherldan(&xyproj, npoints, nvars-m, nclasses, info, &wproj, _state);
|
|
if( *info<0 )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
rmatrixgemm(nvars, nvars-m, nvars-m, 1.0, &z, 0, m, 0, &wproj, 0, 0, 0, 0.0, w, 0, 0, _state);
|
|
for(k=nvars-m; k<=nvars-1; k++)
|
|
{
|
|
ae_v_move(&w->ptr.pp_double[0][k], w->stride, &z.ptr.pp_double[0][k-(nvars-m)], z.stride, ae_v_len(0,nvars-1));
|
|
}
|
|
*info = 2;
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* General case: no multicollinearity
|
|
*/
|
|
ae_matrix_set_length(&tm, nvars, nvars, _state);
|
|
ae_matrix_set_length(&a, nvars, nvars, _state);
|
|
rmatrixgemm(nvars, nvars, nvars, 1.0, &sw, 0, 0, 0, &z, 0, 0, 0, 0.0, &tm, 0, 0, _state);
|
|
rmatrixgemm(nvars, nvars, nvars, 1.0, &z, 0, 0, 1, &tm, 0, 0, 0, 0.0, &a, 0, 0, _state);
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
a.ptr.pp_double[i][j] = a.ptr.pp_double[i][j]/ae_sqrt(d.ptr.p_double[i]*d.ptr.p_double[j], _state);
|
|
}
|
|
}
|
|
if( !smatrixevd(&a, nvars, 1, ae_true, &d2, &z2, _state) )
|
|
{
|
|
*info = -4;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
for(k=0; k<=nvars-1; k++)
|
|
{
|
|
z2.ptr.pp_double[i][k] = z2.ptr.pp_double[i][k]/ae_sqrt(d.ptr.p_double[i], _state);
|
|
}
|
|
}
|
|
rmatrixgemm(nvars, nvars, nvars, 1.0, &z, 0, 0, 0, &z2, 0, 0, 0, 0.0, w, 0, 0, _state);
|
|
}
|
|
|
|
/*
|
|
* Post-processing:
|
|
* * normalization
|
|
* * converting to non-negative form, if possible
|
|
*/
|
|
for(k=0; k<=nvars-1; k++)
|
|
{
|
|
v = ae_v_dotproduct(&w->ptr.pp_double[0][k], w->stride, &w->ptr.pp_double[0][k], w->stride, ae_v_len(0,nvars-1));
|
|
v = 1/ae_sqrt(v, _state);
|
|
ae_v_muld(&w->ptr.pp_double[0][k], w->stride, ae_v_len(0,nvars-1), v);
|
|
v = (double)(0);
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
v = v+w->ptr.pp_double[i][k];
|
|
}
|
|
if( ae_fp_less(v,(double)(0)) )
|
|
{
|
|
ae_v_muld(&w->ptr.pp_double[0][k], w->stride, ae_v_len(0,nvars-1), -1);
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_SSA) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
/*************************************************************************
|
|
This function creates SSA model object. Right after creation model is in
|
|
"dummy" mode - you can add data, but analyzing/prediction will return
|
|
just zeros (it assumes that basis is empty).
|
|
|
|
HOW TO USE SSA MODEL:
|
|
|
|
1. create model with ssacreate()
|
|
2. add data with one/many ssaaddsequence() calls
|
|
3. choose SSA algorithm with one of ssasetalgo...() functions:
|
|
* ssasetalgotopkdirect() for direct one-run analysis
|
|
* ssasetalgotopkrealtime() for algorithm optimized for many subsequent
|
|
runs with warm-start capabilities
|
|
* ssasetalgoprecomputed() for user-supplied basis
|
|
4. set window width with ssasetwindow()
|
|
5. perform one of the analysis-related activities:
|
|
a) call ssagetbasis() to get basis
|
|
b) call ssaanalyzelast() ssaanalyzesequence() or ssaanalyzelastwindow()
|
|
to perform analysis (trend/noise separation)
|
|
c) call one of the forecasting functions (ssaforecastlast() or
|
|
ssaforecastsequence()) to perform prediction; alternatively, you can
|
|
extract linear recurrence coefficients with ssagetlrr().
|
|
SSA analysis will be performed during first call to analysis-related
|
|
function. SSA model is smart enough to track all changes in the dataset
|
|
and model settings, to cache previously computed basis and to
|
|
re-evaluate basis only when necessary.
|
|
|
|
Additionally, if your setting involves constant stream of incoming data,
|
|
you can perform quick update already calculated model with one of the
|
|
incremental append-and-update functions: ssaappendpointandupdate() or
|
|
ssaappendsequenceandupdate().
|
|
|
|
NOTE: steps (2), (3), (4) can be performed in arbitrary order.
|
|
|
|
INPUT PARAMETERS:
|
|
none
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - structure which stores model state
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssacreate(ssamodel* s, ae_state *_state)
|
|
{
|
|
|
|
_ssamodel_clear(s);
|
|
|
|
|
|
/*
|
|
* Model data, algorithms and settings
|
|
*/
|
|
s->nsequences = 0;
|
|
ae_vector_set_length(&s->sequenceidx, 1, _state);
|
|
s->sequenceidx.ptr.p_int[0] = 0;
|
|
s->algotype = 0;
|
|
s->windowwidth = 1;
|
|
s->rtpowerup = 1;
|
|
s->arebasisandsolvervalid = ae_false;
|
|
s->rngseed = 1;
|
|
s->defaultsubspaceits = 10;
|
|
s->memorylimit = 50000000;
|
|
|
|
/*
|
|
* Debug counters
|
|
*/
|
|
s->dbgcntevd = 0;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets window width for SSA model. You should call it before
|
|
analysis phase. Default window width is 1 (not for real use).
|
|
|
|
Special notes:
|
|
* this function call can be performed at any moment before first call to
|
|
analysis-related functions
|
|
* changing window width invalidates internally stored basis; if you change
|
|
window width AFTER you call analysis-related function, next analysis
|
|
phase will require re-calculation of the basis according to current
|
|
algorithm.
|
|
* calling this function with exactly same window width as current one has
|
|
no effect
|
|
* if you specify window width larger than any data sequence stored in the
|
|
model, analysis will return zero basis.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model created with ssacreate()
|
|
WindowWidth - >=1, new window width
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - SSA model, updated
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssasetwindow(ssamodel* s, ae_int_t windowwidth, ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(windowwidth>=1, "SSASetWindow: WindowWidth<1", _state);
|
|
if( windowwidth==s->windowwidth )
|
|
{
|
|
return;
|
|
}
|
|
s->windowwidth = windowwidth;
|
|
s->arebasisandsolvervalid = ae_false;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets seed which is used to initialize internal RNG when
|
|
we make pseudorandom decisions on model updates.
|
|
|
|
By default, deterministic seed is used - which results in same sequence of
|
|
pseudorandom decisions every time you run SSA model. If you specify non-
|
|
deterministic seed value, then SSA model may return slightly different
|
|
results after each run.
|
|
|
|
This function can be useful when you have several SSA models updated with
|
|
sseappendpointandupdate() called with 0<UpdateIts<1 (fractional value) and
|
|
due to performance limitations want them to perform updates at different
|
|
moments.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
Seed - seed:
|
|
* positive values = use deterministic seed for each run of
|
|
algorithms which depend on random initialization
|
|
* zero or negative values = use non-deterministic seed
|
|
|
|
-- ALGLIB --
|
|
Copyright 03.11.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssasetseed(ssamodel* s, ae_int_t seed, ae_state *_state)
|
|
{
|
|
|
|
|
|
s->rngseed = seed;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets length of power-up cycle for real-time algorithm.
|
|
|
|
By default, this algorithm performs costly O(N*WindowWidth^2) init phase
|
|
followed by full run of truncated EVD. However, if you are ready to
|
|
live with a bit lower-quality basis during first few iterations, you can
|
|
split this O(N*WindowWidth^2) initialization between several subsequent
|
|
append-and-update rounds. It results in better latency of the algorithm.
|
|
|
|
This function invalidates basis/solver, next analysis call will result in
|
|
full recalculation of everything.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
PWLen - length of the power-up stage:
|
|
* 0 means that no power-up is requested
|
|
* 1 is the same as 0
|
|
* >1 means that delayed power-up is performed
|
|
|
|
-- ALGLIB --
|
|
Copyright 03.11.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssasetpoweruplength(ssamodel* s, ae_int_t pwlen, ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(pwlen>=0, "SSASetPowerUpLength: PWLen<0", _state);
|
|
s->rtpowerup = ae_maxint(pwlen, 1, _state);
|
|
s->arebasisandsolvervalid = ae_false;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets memory limit of SSA analysis.
|
|
|
|
Straightforward SSA with sequence length T and window width W needs O(T*W)
|
|
memory. It is possible to reduce memory consumption by splitting task into
|
|
smaller chunks.
|
|
|
|
Thus function allows you to specify approximate memory limit (measured in
|
|
double precision numbers used for buffers). Actual memory consumption will
|
|
be comparable to the number specified by you.
|
|
|
|
Default memory limit is 50.000.000 (400Mbytes) in current version.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
MemLimit- memory limit, >=0. Zero value means no limit.
|
|
|
|
-- ALGLIB --
|
|
Copyright 20.12.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssasetmemorylimit(ssamodel* s, ae_int_t memlimit, ae_state *_state)
|
|
{
|
|
|
|
|
|
if( memlimit<0 )
|
|
{
|
|
memlimit = 0;
|
|
}
|
|
s->memorylimit = memlimit;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function adds data sequence to SSA model. Only single-dimensional
|
|
sequences are supported.
|
|
|
|
What is a sequences? Following definitions/requirements apply:
|
|
* a sequence is an array of values measured in subsequent, equally
|
|
separated time moments (ticks).
|
|
* you may have many sequences in your dataset; say, one sequence may
|
|
correspond to one trading session.
|
|
* sequence length should be larger than current window length (shorter
|
|
sequences will be ignored during analysis).
|
|
* analysis is performed within a sequence; different sequences are NOT
|
|
stacked together to produce one large contiguous stream of data.
|
|
* analysis is performed for all sequences at once, i.e. same set of basis
|
|
vectors is computed for all sequences
|
|
|
|
INCREMENTAL ANALYSIS
|
|
|
|
This function is non intended for incremental updates of previously found
|
|
SSA basis. Calling it invalidates all previous analysis results (basis is
|
|
reset and will be recalculated from zero during next analysis).
|
|
|
|
If you want to perform incremental/real-time SSA, consider using
|
|
following functions:
|
|
* ssaappendpointandupdate() for appending one point
|
|
* ssaappendsequenceandupdate() for appending new sequence
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model created with ssacreate()
|
|
X - array[N], data, can be larger (additional values
|
|
are ignored)
|
|
N - data length, can be automatically determined from
|
|
the array length. N>=0.
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - SSA model, updated
|
|
|
|
NOTE: you can clear dataset with ssacleardata()
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaaddsequence(ssamodel* s,
|
|
/* Real */ ae_vector* x,
|
|
ae_int_t n,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t offs;
|
|
|
|
|
|
ae_assert(n>=0, "SSAAddSequence: N<0", _state);
|
|
ae_assert(x->cnt>=n, "SSAAddSequence: X is too short", _state);
|
|
ae_assert(isfinitevector(x, n, _state), "SSAAddSequence: X contains infinities NANs", _state);
|
|
|
|
/*
|
|
* Invalidate model
|
|
*/
|
|
s->arebasisandsolvervalid = ae_false;
|
|
|
|
/*
|
|
* Add sequence
|
|
*/
|
|
ivectorgrowto(&s->sequenceidx, s->nsequences+2, _state);
|
|
s->sequenceidx.ptr.p_int[s->nsequences+1] = s->sequenceidx.ptr.p_int[s->nsequences]+n;
|
|
rvectorgrowto(&s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences+1], _state);
|
|
offs = s->sequenceidx.ptr.p_int[s->nsequences];
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
s->sequencedata.ptr.p_double[offs+i] = x->ptr.p_double[i];
|
|
}
|
|
inc(&s->nsequences, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function appends single point to last data sequence stored in the SSA
|
|
model and tries to update model in the incremental manner (if possible
|
|
with current algorithm).
|
|
|
|
If you want to add more than one point at once:
|
|
* if you want to add M points to the same sequence, perform M-1 calls with
|
|
UpdateIts parameter set to 0.0, and last call with non-zero UpdateIts.
|
|
* if you want to add new sequence, use ssaappendsequenceandupdate()
|
|
|
|
Running time of this function does NOT depend on dataset size, only on
|
|
window width and number of singular vectors. Depending on algorithm being
|
|
used, incremental update has complexity:
|
|
* for top-K real time - O(UpdateIts*K*Width^2), with fractional UpdateIts
|
|
* for top-K direct - O(Width^3) for any non-zero UpdateIts
|
|
* for precomputed basis - O(1), no update is performed
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model created with ssacreate()
|
|
X - new point
|
|
UpdateIts - >=0, floating point (!) value, desired update
|
|
frequency:
|
|
* zero value means that point is stored, but no
|
|
update is performed
|
|
* integer part of the value means that specified
|
|
number of iterations is always performed
|
|
* fractional part of the value means that one
|
|
iteration is performed with this probability.
|
|
|
|
Recommended value: 0<UpdateIts<=1. Values larger
|
|
than 1 are VERY seldom needed. If your dataset
|
|
changes slowly, you can set it to 0.1 and skip
|
|
90% of updates.
|
|
|
|
In any case, no information is lost even with zero
|
|
value of UpdateIts! It will be incorporated into
|
|
model, sooner or later.
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - SSA model, updated
|
|
|
|
NOTE: this function uses internal RNG to handle fractional values of
|
|
UpdateIts. By default it is initialized with fixed seed during
|
|
initial calculation of basis. Thus subsequent calls to this function
|
|
will result in the same sequence of pseudorandom decisions.
|
|
|
|
However, if you have several SSA models which are calculated
|
|
simultaneously, and if you want to reduce computational bottlenecks
|
|
by performing random updates at random moments, then fixed seed is
|
|
not an option - all updates will fire at same moments.
|
|
|
|
You may change it with ssasetseed() function.
|
|
|
|
NOTE: this function throws an exception if called for empty dataset (there
|
|
is no "last" sequence to modify).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaappendpointandupdate(ssamodel* s,
|
|
double x,
|
|
double updateits,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(ae_isfinite(x, _state), "SSAAppendPointAndUpdate: X is not finite", _state);
|
|
ae_assert(ae_isfinite(updateits, _state), "SSAAppendPointAndUpdate: UpdateIts is not finite", _state);
|
|
ae_assert(ae_fp_greater_eq(updateits,(double)(0)), "SSAAppendPointAndUpdate: UpdateIts<0", _state);
|
|
ae_assert(s->nsequences>0, "SSAAppendPointAndUpdate: dataset is empty, no sequence to modify", _state);
|
|
|
|
/*
|
|
* Append point to dataset
|
|
*/
|
|
rvectorgrowto(&s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences]+1, _state);
|
|
s->sequencedata.ptr.p_double[s->sequenceidx.ptr.p_int[s->nsequences]] = x;
|
|
s->sequenceidx.ptr.p_int[s->nsequences] = s->sequenceidx.ptr.p_int[s->nsequences]+1;
|
|
|
|
/*
|
|
* Do we have something to analyze? If no, invalidate basis
|
|
* (just to be sure) and exit.
|
|
*/
|
|
if( !ssa_hassomethingtoanalyze(s, _state) )
|
|
{
|
|
s->arebasisandsolvervalid = ae_false;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Well, we have data to analyze and algorithm set, but basis is
|
|
* invalid. Let's calculate it from scratch and exit.
|
|
*/
|
|
if( !s->arebasisandsolvervalid )
|
|
{
|
|
ssa_updatebasis(s, 0, 0.0, _state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Update already computed basis
|
|
*/
|
|
ssa_updatebasis(s, 1, updateits, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function appends new sequence to dataset stored in the SSA model and
|
|
tries to update model in the incremental manner (if possible with current
|
|
algorithm).
|
|
|
|
Notes:
|
|
* if you want to add M sequences at once, perform M-1 calls with UpdateIts
|
|
parameter set to 0.0, and last call with non-zero UpdateIts.
|
|
* if you want to add just one point, use ssaappendpointandupdate()
|
|
|
|
Running time of this function does NOT depend on dataset size, only on
|
|
sequence length, window width and number of singular vectors. Depending on
|
|
algorithm being used, incremental update has complexity:
|
|
* for top-K real time - O(UpdateIts*K*Width^2+(NTicks-Width)*Width^2)
|
|
* for top-K direct - O(Width^3+(NTicks-Width)*Width^2)
|
|
* for precomputed basis - O(1), no update is performed
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model created with ssacreate()
|
|
X - new sequence, array[NTicks] or larget
|
|
NTicks - >=1, number of ticks in the sequence
|
|
UpdateIts - >=0, floating point (!) value, desired update
|
|
frequency:
|
|
* zero value means that point is stored, but no
|
|
update is performed
|
|
* integer part of the value means that specified
|
|
number of iterations is always performed
|
|
* fractional part of the value means that one
|
|
iteration is performed with this probability.
|
|
|
|
Recommended value: 0<UpdateIts<=1. Values larger
|
|
than 1 are VERY seldom needed. If your dataset
|
|
changes slowly, you can set it to 0.1 and skip
|
|
90% of updates.
|
|
|
|
In any case, no information is lost even with zero
|
|
value of UpdateIts! It will be incorporated into
|
|
model, sooner or later.
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - SSA model, updated
|
|
|
|
NOTE: this function uses internal RNG to handle fractional values of
|
|
UpdateIts. By default it is initialized with fixed seed during
|
|
initial calculation of basis. Thus subsequent calls to this function
|
|
will result in the same sequence of pseudorandom decisions.
|
|
|
|
However, if you have several SSA models which are calculated
|
|
simultaneously, and if you want to reduce computational bottlenecks
|
|
by performing random updates at random moments, then fixed seed is
|
|
not an option - all updates will fire at same moments.
|
|
|
|
You may change it with ssasetseed() function.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaappendsequenceandupdate(ssamodel* s,
|
|
/* Real */ ae_vector* x,
|
|
ae_int_t nticks,
|
|
double updateits,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t offs;
|
|
|
|
|
|
ae_assert(nticks>=0, "SSAAppendSequenceAndUpdate: NTicks<0", _state);
|
|
ae_assert(x->cnt>=nticks, "SSAAppendSequenceAndUpdate: X is too short", _state);
|
|
ae_assert(isfinitevector(x, nticks, _state), "SSAAppendSequenceAndUpdate: X contains infinities NANs", _state);
|
|
|
|
/*
|
|
* Add sequence
|
|
*/
|
|
ivectorgrowto(&s->sequenceidx, s->nsequences+2, _state);
|
|
s->sequenceidx.ptr.p_int[s->nsequences+1] = s->sequenceidx.ptr.p_int[s->nsequences]+nticks;
|
|
rvectorgrowto(&s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences+1], _state);
|
|
offs = s->sequenceidx.ptr.p_int[s->nsequences];
|
|
for(i=0; i<=nticks-1; i++)
|
|
{
|
|
s->sequencedata.ptr.p_double[offs+i] = x->ptr.p_double[i];
|
|
}
|
|
inc(&s->nsequences, _state);
|
|
|
|
/*
|
|
* Do we have something to analyze? If no, invalidate basis
|
|
* (just to be sure) and exit.
|
|
*/
|
|
if( !ssa_hassomethingtoanalyze(s, _state) )
|
|
{
|
|
s->arebasisandsolvervalid = ae_false;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Well, we have data to analyze and algorithm set, but basis is
|
|
* invalid. Let's calculate it from scratch and exit.
|
|
*/
|
|
if( !s->arebasisandsolvervalid )
|
|
{
|
|
ssa_updatebasis(s, 0, 0.0, _state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Update already computed basis
|
|
*/
|
|
if( nticks>=s->windowwidth )
|
|
{
|
|
ssa_updatebasis(s, nticks-s->windowwidth+1, updateits, _state);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets SSA algorithm to "precomputed vectors" algorithm.
|
|
|
|
This algorithm uses precomputed set of orthonormal (orthogonal AND
|
|
normalized) basis vectors supplied by user. Thus, basis calculation phase
|
|
is not performed - we already have our basis - and only analysis/
|
|
forecasting phase requires actual calculations.
|
|
|
|
This algorithm may handle "append" requests which add just one/few ticks
|
|
to the end of the last sequence in O(1) time.
|
|
|
|
NOTE: this algorithm accepts both basis and window width, because these
|
|
two parameters are naturally aligned. Calling this function sets
|
|
window width; if you call ssasetwindow() with other window width,
|
|
then during analysis stage algorithm will detect conflict and reset
|
|
to zero basis.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
A - array[WindowWidth,NBasis], orthonormalized basis;
|
|
this function does NOT control orthogonality and
|
|
does NOT perform any kind of renormalization. It
|
|
is your responsibility to provide it with correct
|
|
basis.
|
|
WindowWidth - window width, >=1
|
|
NBasis - number of basis vectors, 1<=NBasis<=WindowWidth
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - updated model
|
|
|
|
NOTE: calling this function invalidates basis in all cases.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssasetalgoprecomputed(ssamodel* s,
|
|
/* Real */ ae_matrix* a,
|
|
ae_int_t windowwidth,
|
|
ae_int_t nbasis,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
|
|
|
|
ae_assert(windowwidth>=1, "SSASetAlgoPrecomputed: WindowWidth<1", _state);
|
|
ae_assert(nbasis>=1, "SSASetAlgoPrecomputed: NBasis<1", _state);
|
|
ae_assert(nbasis<=windowwidth, "SSASetAlgoPrecomputed: NBasis>WindowWidth", _state);
|
|
ae_assert(a->rows>=windowwidth, "SSASetAlgoPrecomputed: Rows(A)<WindowWidth", _state);
|
|
ae_assert(a->cols>=nbasis, "SSASetAlgoPrecomputed: Rows(A)<NBasis", _state);
|
|
ae_assert(apservisfinitematrix(a, windowwidth, nbasis, _state), "SSASetAlgoPrecomputed: Rows(A)<NBasis", _state);
|
|
s->algotype = 1;
|
|
s->precomputedwidth = windowwidth;
|
|
s->precomputednbasis = nbasis;
|
|
s->windowwidth = windowwidth;
|
|
rmatrixsetlengthatleast(&s->precomputedbasis, windowwidth, nbasis, _state);
|
|
for(i=0; i<=windowwidth-1; i++)
|
|
{
|
|
for(j=0; j<=nbasis-1; j++)
|
|
{
|
|
s->precomputedbasis.ptr.pp_double[i][j] = a->ptr.pp_double[i][j];
|
|
}
|
|
}
|
|
s->arebasisandsolvervalid = ae_false;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets SSA algorithm to "direct top-K" algorithm.
|
|
|
|
"Direct top-K" algorithm performs full SVD of the N*WINDOW trajectory
|
|
matrix (hence its name - direct solver is used), then extracts top K
|
|
components. Overall running time is O(N*WINDOW^2), where N is a number of
|
|
ticks in the dataset, WINDOW is window width.
|
|
|
|
This algorithm may handle "append" requests which add just one/few ticks
|
|
to the end of the last sequence in O(WINDOW^3) time, which is ~N/WINDOW
|
|
times faster than re-computing everything from scratch.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
TopK - number of components to analyze; TopK>=1.
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - updated model
|
|
|
|
|
|
NOTE: TopK>WindowWidth is silently decreased to WindowWidth during analysis
|
|
phase
|
|
|
|
NOTE: calling this function invalidates basis, except for the situation
|
|
when this algorithm was already set with same parameters.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssasetalgotopkdirect(ssamodel* s, ae_int_t topk, ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(topk>=1, "SSASetAlgoTopKDirect: TopK<1", _state);
|
|
|
|
/*
|
|
* Ignore calls which change nothing
|
|
*/
|
|
if( s->algotype==2&&s->topk==topk )
|
|
{
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Update settings, invalidate model
|
|
*/
|
|
s->algotype = 2;
|
|
s->topk = topk;
|
|
s->arebasisandsolvervalid = ae_false;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets SSA algorithm to "top-K real time algorithm". This algo
|
|
extracts K components with largest singular values.
|
|
|
|
It is real-time version of top-K algorithm which is optimized for
|
|
incremental processing and fast start-up. Internally it uses subspace
|
|
eigensolver for truncated SVD. It results in ability to perform quick
|
|
updates of the basis when only a few points/sequences is added to dataset.
|
|
|
|
Performance profile of the algorithm is given below:
|
|
* O(K*WindowWidth^2) running time for incremental update of the dataset
|
|
with one of the "append-and-update" functions (ssaappendpointandupdate()
|
|
or ssaappendsequenceandupdate()).
|
|
* O(N*WindowWidth^2) running time for initial basis evaluation (N=size of
|
|
dataset)
|
|
* ability to split costly initialization across several incremental
|
|
updates of the basis (so called "Power-Up" functionality, activated by
|
|
ssasetpoweruplength() function)
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
TopK - number of components to analyze; TopK>=1.
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - updated model
|
|
|
|
NOTE: this algorithm is optimized for large-scale tasks with large
|
|
datasets. On toy problems with just 5-10 points it can return basis
|
|
which is slightly different from that returned by direct algorithm
|
|
(ssasetalgotopkdirect() function). However, the difference becomes
|
|
negligible as dataset grows.
|
|
|
|
NOTE: TopK>WindowWidth is silently decreased to WindowWidth during analysis
|
|
phase
|
|
|
|
NOTE: calling this function invalidates basis, except for the situation
|
|
when this algorithm was already set with same parameters.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssasetalgotopkrealtime(ssamodel* s, ae_int_t topk, ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(topk>=1, "SSASetAlgoTopKRealTime: TopK<1", _state);
|
|
|
|
/*
|
|
* Ignore calls which change nothing
|
|
*/
|
|
if( s->algotype==3&&s->topk==topk )
|
|
{
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Update settings, invalidate model
|
|
*/
|
|
s->algotype = 3;
|
|
s->topk = topk;
|
|
s->arebasisandsolvervalid = ae_false;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function clears all data stored in the model and invalidates all
|
|
basis components found so far.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model created with ssacreate()
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - SSA model, updated
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssacleardata(ssamodel* s, ae_state *_state)
|
|
{
|
|
|
|
|
|
s->nsequences = 0;
|
|
s->arebasisandsolvervalid = ae_false;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function executes SSA on internally stored dataset and returns basis
|
|
found by current method.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
|
|
OUTPUT PARAMETERS:
|
|
A - array[WindowWidth,NBasis], basis; vectors are
|
|
stored in matrix columns, by descreasing variance
|
|
SV - array[NBasis]:
|
|
* zeros - for model initialized with SSASetAlgoPrecomputed()
|
|
* singular values - for other algorithms
|
|
WindowWidth - current window
|
|
NBasis - basis size
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Calling this function in degenerate cases (no data or all data are
|
|
shorter than window size; no algorithm is specified) returns basis with
|
|
just one zero vector.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssagetbasis(ssamodel* s,
|
|
/* Real */ ae_matrix* a,
|
|
/* Real */ ae_vector* sv,
|
|
ae_int_t* windowwidth,
|
|
ae_int_t* nbasis,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
|
|
ae_matrix_clear(a);
|
|
ae_vector_clear(sv);
|
|
*windowwidth = 0;
|
|
*nbasis = 0;
|
|
|
|
|
|
/*
|
|
* Is it degenerate case?
|
|
*/
|
|
if( !ssa_hassomethingtoanalyze(s, _state) )
|
|
{
|
|
*windowwidth = s->windowwidth;
|
|
*nbasis = 1;
|
|
ae_matrix_set_length(a, *windowwidth, 1, _state);
|
|
for(i=0; i<=*windowwidth-1; i++)
|
|
{
|
|
a->ptr.pp_double[i][0] = 0.0;
|
|
}
|
|
ae_vector_set_length(sv, 1, _state);
|
|
sv->ptr.p_double[0] = 0.0;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Update basis.
|
|
*
|
|
* It will take care of basis validity flags. AppendLen=0 which means
|
|
* that we perform initial basis evaluation.
|
|
*/
|
|
ssa_updatebasis(s, 0, 0.0, _state);
|
|
|
|
/*
|
|
* Output
|
|
*/
|
|
ae_assert(s->nbasis>0, "SSAGetBasis: integrity check failed", _state);
|
|
ae_assert(s->windowwidth>0, "SSAGetBasis: integrity check failed", _state);
|
|
*nbasis = s->nbasis;
|
|
*windowwidth = s->windowwidth;
|
|
ae_matrix_set_length(a, *windowwidth, *nbasis, _state);
|
|
rmatrixcopy(*windowwidth, *nbasis, &s->basis, 0, 0, a, 0, 0, _state);
|
|
ae_vector_set_length(sv, *nbasis, _state);
|
|
for(i=0; i<=*nbasis-1; i++)
|
|
{
|
|
sv->ptr.p_double[i] = s->sv.ptr.p_double[i];
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns linear recurrence relation (LRR) coefficients found
|
|
by current SSA algorithm.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
|
|
OUTPUT PARAMETERS:
|
|
A - array[WindowWidth-1]. Coefficients of the
|
|
linear recurrence of the form:
|
|
X[W-1] = X[W-2]*A[W-2] + X[W-3]*A[W-3] + ... + X[0]*A[0].
|
|
Empty array for WindowWidth=1.
|
|
WindowWidth - current window width
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Calling this function in degenerate cases (no data or all data are
|
|
shorter than window size; no algorithm is specified) returns zeros.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssagetlrr(ssamodel* s,
|
|
/* Real */ ae_vector* a,
|
|
ae_int_t* windowwidth,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
|
|
ae_vector_clear(a);
|
|
*windowwidth = 0;
|
|
|
|
ae_assert(s->windowwidth>0, "SSAGetLRR: integrity check failed", _state);
|
|
|
|
/*
|
|
* Is it degenerate case?
|
|
*/
|
|
if( !ssa_hassomethingtoanalyze(s, _state) )
|
|
{
|
|
*windowwidth = s->windowwidth;
|
|
ae_vector_set_length(a, *windowwidth-1, _state);
|
|
for(i=0; i<=*windowwidth-2; i++)
|
|
{
|
|
a->ptr.p_double[i] = 0.0;
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Update basis.
|
|
*
|
|
* It will take care of basis validity flags. AppendLen=0 which means
|
|
* that we perform initial basis evaluation.
|
|
*/
|
|
ssa_updatebasis(s, 0, 0.0, _state);
|
|
|
|
/*
|
|
* Output
|
|
*/
|
|
*windowwidth = s->windowwidth;
|
|
ae_vector_set_length(a, *windowwidth-1, _state);
|
|
for(i=0; i<=*windowwidth-2; i++)
|
|
{
|
|
a->ptr.p_double[i] = s->forecasta.ptr.p_double[i];
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function executes SSA on internally stored dataset and returns
|
|
analysis for the last window of the last sequence. Such analysis is
|
|
an lightweight alternative for full scale reconstruction (see below).
|
|
|
|
Typical use case for this function is real-time setting, when you are
|
|
interested in quick-and-dirty (very quick and very dirty) processing of
|
|
just a few last ticks of the trend.
|
|
|
|
IMPORTANT: full scale SSA involves analysis of the ENTIRE dataset,
|
|
with reconstruction being done for all positions of sliding
|
|
window with subsequent hankelization (diagonal averaging) of
|
|
the resulting matrix.
|
|
|
|
Such analysis requires O((DataLen-Window)*Window*NBasis) FLOPs
|
|
and can be quite costly. However, it has nice noise-canceling
|
|
effects due to averaging.
|
|
|
|
This function performs REDUCED analysis of the last window. It
|
|
is much faster - just O(Window*NBasis), but its results are
|
|
DIFFERENT from that of ssaanalyzelast(). In particular, first
|
|
few points of the trend are much more prone to noise.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[WindowSize], reconstructed trend line
|
|
Noise - array[WindowSize], the rest of the signal;
|
|
it holds that ActualData = Trend+Noise.
|
|
NTicks - current WindowSize
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
In any case, only basis is reused. Reconstruction is performed from
|
|
scratch every time you call this function.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* last sequence is shorter than the window length (analysis can be done,
|
|
but we can not perform reconstruction on the last sequence)
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* in any case, WindowWidth ticks is returned
|
|
* trend is assumed to be zero
|
|
* noise is initialized by the last sequence; if last sequence is shorter
|
|
than the window size, it is moved to the end of the array, and the
|
|
beginning of the noise array is filled by zeros
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaanalyzelastwindow(ssamodel* s,
|
|
/* Real */ ae_vector* trend,
|
|
/* Real */ ae_vector* noise,
|
|
ae_int_t* nticks,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t offs;
|
|
ae_int_t cnt;
|
|
|
|
ae_vector_clear(trend);
|
|
ae_vector_clear(noise);
|
|
*nticks = 0;
|
|
|
|
|
|
/*
|
|
* Init
|
|
*/
|
|
*nticks = s->windowwidth;
|
|
ae_vector_set_length(trend, s->windowwidth, _state);
|
|
ae_vector_set_length(noise, s->windowwidth, _state);
|
|
|
|
/*
|
|
* Is it degenerate case?
|
|
*/
|
|
if( !ssa_hassomethingtoanalyze(s, _state)||!ssa_issequencebigenough(s, -1, _state) )
|
|
{
|
|
for(i=0; i<=*nticks-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = (double)(0);
|
|
noise->ptr.p_double[i] = (double)(0);
|
|
}
|
|
if( s->nsequences>=1 )
|
|
{
|
|
cnt = ae_minint(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1], *nticks, _state);
|
|
offs = s->sequenceidx.ptr.p_int[s->nsequences]-cnt;
|
|
for(i=0; i<=cnt-1; i++)
|
|
{
|
|
noise->ptr.p_double[*nticks-cnt+i] = s->sequencedata.ptr.p_double[offs+i];
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Update basis.
|
|
*
|
|
* It will take care of basis validity flags. AppendLen=0 which means
|
|
* that we perform initial basis evaluation.
|
|
*/
|
|
ssa_updatebasis(s, 0, 0.0, _state);
|
|
|
|
/*
|
|
* Perform analysis of the last window
|
|
*/
|
|
ae_assert(s->sequenceidx.ptr.p_int[s->nsequences]-s->windowwidth>=0, "SSAAnalyzeLastWindow: integrity check failed", _state);
|
|
rvectorsetlengthatleast(&s->tmp0, s->nbasis, _state);
|
|
rmatrixgemv(s->nbasis, s->windowwidth, 1.0, &s->basist, 0, 0, 0, &s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences]-s->windowwidth, 0.0, &s->tmp0, 0, _state);
|
|
rmatrixgemv(s->windowwidth, s->nbasis, 1.0, &s->basis, 0, 0, 0, &s->tmp0, 0, 0.0, trend, 0, _state);
|
|
offs = s->sequenceidx.ptr.p_int[s->nsequences]-s->windowwidth;
|
|
cnt = s->windowwidth;
|
|
for(i=0; i<=cnt-1; i++)
|
|
{
|
|
noise->ptr.p_double[i] = s->sequencedata.ptr.p_double[offs+i]-trend->ptr.p_double[i];
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function:
|
|
* builds SSA basis using internally stored (entire) dataset
|
|
* returns reconstruction for the last NTicks of the last sequence
|
|
|
|
If you want to analyze some other sequence, use ssaanalyzesequence().
|
|
|
|
Reconstruction phase involves generation of NTicks-WindowWidth sliding
|
|
windows, their decomposition using empirical orthogonal functions found by
|
|
SSA, followed by averaging of each data point across several overlapping
|
|
windows. Thus, every point in the output trend is reconstructed using up
|
|
to WindowWidth overlapping windows (WindowWidth windows exactly in the
|
|
inner points, just one window at the extremal points).
|
|
|
|
IMPORTANT: due to averaging this function returns different results for
|
|
different values of NTicks. It is expected and not a bug.
|
|
|
|
For example:
|
|
* Trend[NTicks-1] is always same because it is not averaged in
|
|
any case (same applies to Trend[0]).
|
|
* Trend[NTicks-2] has different values for NTicks=WindowWidth
|
|
and NTicks=WindowWidth+1 because former case means that no
|
|
averaging is performed, and latter case means that averaging
|
|
using two sliding windows is performed. Larger values of
|
|
NTicks produce same results as NTicks=WindowWidth+1.
|
|
* ...and so on...
|
|
|
|
PERFORMANCE: this function has O((NTicks-WindowWidth)*WindowWidth*NBasis)
|
|
running time. If you work in time-constrained setting and
|
|
have to analyze just a few last ticks, choosing NTicks equal
|
|
to WindowWidth+SmoothingLen, with SmoothingLen=1...WindowWidth
|
|
will result in good compromise between noise cancellation and
|
|
analysis speed.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
NTicks - number of ticks to analyze, Nticks>=1.
|
|
* special case of NTicks<=WindowWidth is handled
|
|
by analyzing last window and returning NTicks
|
|
last ticks.
|
|
* special case NTicks>LastSequenceLen is handled
|
|
by prepending result with NTicks-LastSequenceLen
|
|
zeros.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[NTicks], reconstructed trend line
|
|
Noise - array[NTicks], the rest of the signal;
|
|
it holds that ActualData = Trend+Noise.
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
In any case, only basis is reused. Reconstruction is performed from
|
|
scratch every time you call this function.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* last sequence is shorter than the window length (analysis can be done,
|
|
but we can not perform reconstruction on the last sequence)
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* in any case, NTicks ticks is returned
|
|
* trend is assumed to be zero
|
|
* noise is initialized by the last sequence; if last sequence is shorter
|
|
than the window size, it is moved to the end of the array, and the
|
|
beginning of the noise array is filled by zeros
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaanalyzelast(ssamodel* s,
|
|
ae_int_t nticks,
|
|
/* Real */ ae_vector* trend,
|
|
/* Real */ ae_vector* noise,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t offs;
|
|
ae_int_t cnt;
|
|
ae_int_t cntzeros;
|
|
|
|
ae_vector_clear(trend);
|
|
ae_vector_clear(noise);
|
|
|
|
ae_assert(nticks>=1, "SSAAnalyzeLast: NTicks<1", _state);
|
|
|
|
/*
|
|
* Init
|
|
*/
|
|
ae_vector_set_length(trend, nticks, _state);
|
|
ae_vector_set_length(noise, nticks, _state);
|
|
|
|
/*
|
|
* Is it degenerate case?
|
|
*/
|
|
if( !ssa_hassomethingtoanalyze(s, _state)||!ssa_issequencebigenough(s, -1, _state) )
|
|
{
|
|
for(i=0; i<=nticks-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = (double)(0);
|
|
noise->ptr.p_double[i] = (double)(0);
|
|
}
|
|
if( s->nsequences>=1 )
|
|
{
|
|
cnt = ae_minint(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1], nticks, _state);
|
|
offs = s->sequenceidx.ptr.p_int[s->nsequences]-cnt;
|
|
for(i=0; i<=cnt-1; i++)
|
|
{
|
|
noise->ptr.p_double[nticks-cnt+i] = s->sequencedata.ptr.p_double[offs+i];
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Fast exit: NTicks<=WindowWidth, just last window is analyzed
|
|
*/
|
|
if( nticks<=s->windowwidth )
|
|
{
|
|
ssaanalyzelastwindow(s, &s->alongtrend, &s->alongnoise, &cnt, _state);
|
|
offs = s->windowwidth-nticks;
|
|
for(i=0; i<=nticks-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = s->alongtrend.ptr.p_double[offs+i];
|
|
noise->ptr.p_double[i] = s->alongnoise.ptr.p_double[offs+i];
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Update basis.
|
|
*
|
|
* It will take care of basis validity flags. AppendLen=0 which means
|
|
* that we perform initial basis evaluation.
|
|
*/
|
|
ssa_updatebasis(s, 0, 0.0, _state);
|
|
|
|
/*
|
|
* Perform analysis:
|
|
* * prepend max(NTicks-LastSequenceLength,0) zeros to the beginning
|
|
* of array
|
|
* * analyze the rest with AnalyzeSequence() which assumes that we
|
|
* already have basis
|
|
*/
|
|
ae_assert(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]>=s->windowwidth, "SSAAnalyzeLast: integrity check failed / 23vd4", _state);
|
|
cntzeros = ae_maxint(nticks-(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]), 0, _state);
|
|
for(i=0; i<=cntzeros-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = 0.0;
|
|
noise->ptr.p_double[i] = 0.0;
|
|
}
|
|
cnt = ae_minint(nticks, s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1], _state);
|
|
ssa_analyzesequence(s, &s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences]-cnt, s->sequenceidx.ptr.p_int[s->nsequences], trend, noise, cntzeros, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function:
|
|
* builds SSA basis using internally stored (entire) dataset
|
|
* returns reconstruction for the sequence being passed to this function
|
|
|
|
If you want to analyze last sequence stored in the model, use
|
|
ssaanalyzelast().
|
|
|
|
Reconstruction phase involves generation of NTicks-WindowWidth sliding
|
|
windows, their decomposition using empirical orthogonal functions found by
|
|
SSA, followed by averaging of each data point across several overlapping
|
|
windows. Thus, every point in the output trend is reconstructed using up
|
|
to WindowWidth overlapping windows (WindowWidth windows exactly in the
|
|
inner points, just one window at the extremal points).
|
|
|
|
PERFORMANCE: this function has O((NTicks-WindowWidth)*WindowWidth*NBasis)
|
|
running time. If you work in time-constrained setting and
|
|
have to analyze just a few last ticks, choosing NTicks equal
|
|
to WindowWidth+SmoothingLen, with SmoothingLen=1...WindowWidth
|
|
will result in good compromise between noise cancellation and
|
|
analysis speed.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
Data - array[NTicks], can be larger (only NTicks leading
|
|
elements will be used)
|
|
NTicks - number of ticks to analyze, Nticks>=1.
|
|
* special case of NTicks<WindowWidth is handled
|
|
by returning zeros as trend, and signal as noise
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[NTicks], reconstructed trend line
|
|
Noise - array[NTicks], the rest of the signal;
|
|
it holds that ActualData = Trend+Noise.
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
In any case, only basis is reused. Reconstruction is performed from
|
|
scratch every time you call this function.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* sequence being passed is shorter than the window length
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* in any case, NTicks ticks is returned
|
|
* trend is assumed to be zero
|
|
* noise is initialized by the sequence.
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaanalyzesequence(ssamodel* s,
|
|
/* Real */ ae_vector* data,
|
|
ae_int_t nticks,
|
|
/* Real */ ae_vector* trend,
|
|
/* Real */ ae_vector* noise,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
|
|
ae_vector_clear(trend);
|
|
ae_vector_clear(noise);
|
|
|
|
ae_assert(nticks>=1, "SSAAnalyzeSequence: NTicks<1", _state);
|
|
ae_assert(data->cnt>=nticks, "SSAAnalyzeSequence: Data is too short", _state);
|
|
ae_assert(isfinitevector(data, nticks, _state), "SSAAnalyzeSequence: Data contains infinities NANs", _state);
|
|
|
|
/*
|
|
* Init
|
|
*/
|
|
ae_vector_set_length(trend, nticks, _state);
|
|
ae_vector_set_length(noise, nticks, _state);
|
|
|
|
/*
|
|
* Is it degenerate case?
|
|
*/
|
|
if( !ssa_hassomethingtoanalyze(s, _state)||nticks<s->windowwidth )
|
|
{
|
|
for(i=0; i<=nticks-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = (double)(0);
|
|
noise->ptr.p_double[i] = data->ptr.p_double[i];
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Update basis.
|
|
*
|
|
* It will take care of basis validity flags. AppendLen=0 which means
|
|
* that we perform initial basis evaluation.
|
|
*/
|
|
ssa_updatebasis(s, 0, 0.0, _state);
|
|
|
|
/*
|
|
* Perform analysis
|
|
*/
|
|
ssa_analyzesequence(s, data, 0, nticks, trend, noise, 0, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function builds SSA basis and performs forecasting for a specified
|
|
number of ticks, returning value of trend.
|
|
|
|
Forecast is performed as follows:
|
|
* SSA trend extraction is applied to last WindowWidth elements of the
|
|
internally stored dataset; this step is basically a noise reduction.
|
|
* linear recurrence relation is applied to extracted trend
|
|
|
|
This function has following running time:
|
|
* O(NBasis*WindowWidth) for trend extraction phase (always performed)
|
|
* O(WindowWidth*NTicks) for forecast phase
|
|
|
|
NOTE: noise reduction is ALWAYS applied by this algorithm; if you want to
|
|
apply recurrence relation to raw unprocessed data, use another
|
|
function - ssaforecastsequence() which allows to turn on and off
|
|
noise reduction phase.
|
|
|
|
NOTE: this algorithm performs prediction using only one - last - sliding
|
|
window. Predictions produced by such approach are smooth
|
|
continuations of the reconstructed trend line, but they can be
|
|
easily corrupted by noise. If you need noise-resistant prediction,
|
|
use ssaforecastavglast() function, which averages predictions built
|
|
using several sliding windows.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
NTicks - number of ticks to forecast, NTicks>=1
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[NTicks], predicted trend line
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* last sequence is shorter than the WindowWidth (analysis can be done,
|
|
but we can not perform forecasting on the last sequence)
|
|
* window lentgh is 1 (impossible to use for forecasting)
|
|
* SSA analysis algorithm is configured to extract basis whose size is
|
|
equal to window length (impossible to use for forecasting; only basis
|
|
whose size is less than window length can be used).
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* NTicks copies of the last value is returned for non-empty task with
|
|
large enough dataset, but with overcomplete basis (window width=1 or
|
|
basis size is equal to window width)
|
|
* zero trend with length=NTicks is returned for empty task
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is ever constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaforecastlast(ssamodel* s,
|
|
ae_int_t nticks,
|
|
/* Real */ ae_vector* trend,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
double v;
|
|
ae_int_t winw;
|
|
|
|
ae_vector_clear(trend);
|
|
|
|
ae_assert(nticks>=1, "SSAForecast: NTicks<1", _state);
|
|
|
|
/*
|
|
* Init
|
|
*/
|
|
winw = s->windowwidth;
|
|
ae_vector_set_length(trend, nticks, _state);
|
|
|
|
/*
|
|
* Is it degenerate case?
|
|
*/
|
|
if( !ssa_hassomethingtoanalyze(s, _state) )
|
|
{
|
|
for(i=0; i<=nticks-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = (double)(0);
|
|
}
|
|
return;
|
|
}
|
|
ae_assert(s->nsequences>0, "SSAForecastLast: integrity check failed", _state);
|
|
if( s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]<winw )
|
|
{
|
|
for(i=0; i<=nticks-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = (double)(0);
|
|
}
|
|
return;
|
|
}
|
|
if( winw==1 )
|
|
{
|
|
ae_assert(s->nsequences>0, "SSAForecast: integrity check failed / 2355", _state);
|
|
ae_assert(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]>0, "SSAForecast: integrity check failed", _state);
|
|
for(i=0; i<=nticks-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = s->sequencedata.ptr.p_double[s->sequenceidx.ptr.p_int[s->nsequences]-1];
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Update basis and recurrent relation.
|
|
*
|
|
* It will take care of basis validity flags. AppendLen=0 which means
|
|
* that we perform initial basis evaluation.
|
|
*/
|
|
ssa_updatebasis(s, 0, 0.0, _state);
|
|
ae_assert(s->nbasis<=winw&&s->nbasis>0, "SSAForecast: integrity check failed / 4f5et", _state);
|
|
if( s->nbasis==winw )
|
|
{
|
|
|
|
/*
|
|
* Handle degenerate situation with basis whose size
|
|
* is equal to window length.
|
|
*/
|
|
ae_assert(s->nsequences>0, "SSAForecast: integrity check failed / 2355", _state);
|
|
ae_assert(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]>0, "SSAForecast: integrity check failed", _state);
|
|
for(i=0; i<=nticks-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = s->sequencedata.ptr.p_double[s->sequenceidx.ptr.p_int[s->nsequences]-1];
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Apply recurrent formula for SSA forecasting:
|
|
* * first, perform smoothing of the last window
|
|
* * second, perform analysis phase
|
|
*/
|
|
ae_assert(s->nsequences>0, "SSAForecastLast: integrity check failed", _state);
|
|
ae_assert(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]>=s->windowwidth, "SSAForecastLast: integrity check failed", _state);
|
|
rvectorsetlengthatleast(&s->tmp0, s->nbasis, _state);
|
|
rvectorsetlengthatleast(&s->fctrend, s->windowwidth, _state);
|
|
rmatrixgemv(s->nbasis, s->windowwidth, 1.0, &s->basist, 0, 0, 0, &s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences]-s->windowwidth, 0.0, &s->tmp0, 0, _state);
|
|
rmatrixgemv(s->windowwidth, s->nbasis, 1.0, &s->basis, 0, 0, 0, &s->tmp0, 0, 0.0, &s->fctrend, 0, _state);
|
|
rvectorsetlengthatleast(&s->tmp1, winw-1, _state);
|
|
for(i=1; i<=winw-1; i++)
|
|
{
|
|
s->tmp1.ptr.p_double[i-1] = s->fctrend.ptr.p_double[i];
|
|
}
|
|
for(i=0; i<=nticks-1; i++)
|
|
{
|
|
v = s->forecasta.ptr.p_double[0]*s->tmp1.ptr.p_double[0];
|
|
for(j=1; j<=winw-2; j++)
|
|
{
|
|
v = v+s->forecasta.ptr.p_double[j]*s->tmp1.ptr.p_double[j];
|
|
s->tmp1.ptr.p_double[j-1] = s->tmp1.ptr.p_double[j];
|
|
}
|
|
trend->ptr.p_double[i] = v;
|
|
s->tmp1.ptr.p_double[winw-2] = v;
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function builds SSA basis and performs forecasting for a user-
|
|
specified sequence, returning value of trend.
|
|
|
|
Forecasting is done in two stages:
|
|
* first, we extract trend from the WindowWidth last elements of the
|
|
sequence. This stage is optional, you can turn it off if you pass
|
|
data which are already processed with SSA. Of course, you can turn it
|
|
off even for raw data, but it is not recommended - noise suppression is
|
|
very important for correct prediction.
|
|
* then, we apply LRR for last WindowWidth-1 elements of the extracted
|
|
trend.
|
|
|
|
This function has following running time:
|
|
* O(NBasis*WindowWidth) for trend extraction phase
|
|
* O(WindowWidth*NTicks) for forecast phase
|
|
|
|
NOTE: this algorithm performs prediction using only one - last - sliding
|
|
window. Predictions produced by such approach are smooth
|
|
continuations of the reconstructed trend line, but they can be
|
|
easily corrupted by noise. If you need noise-resistant prediction,
|
|
use ssaforecastavgsequence() function, which averages predictions
|
|
built using several sliding windows.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
Data - array[NTicks], data to forecast
|
|
DataLen - number of ticks in the data, DataLen>=1
|
|
ForecastLen - number of ticks to predict, ForecastLen>=1
|
|
ApplySmoothing - whether to apply smoothing trend extraction or not;
|
|
if you do not know what to specify, pass True.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[ForecastLen], forecasted trend
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* data sequence is shorter than the WindowWidth (analysis can be done,
|
|
but we can not perform forecasting on the last sequence)
|
|
* window lentgh is 1 (impossible to use for forecasting)
|
|
* SSA analysis algorithm is configured to extract basis whose size is
|
|
equal to window length (impossible to use for forecasting; only basis
|
|
whose size is less than window length can be used).
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* ForecastLen copies of the last value is returned for non-empty task with
|
|
large enough dataset, but with overcomplete basis (window width=1 or
|
|
basis size is equal to window width)
|
|
* zero trend with length=ForecastLen is returned for empty task
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is ever constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaforecastsequence(ssamodel* s,
|
|
/* Real */ ae_vector* data,
|
|
ae_int_t datalen,
|
|
ae_int_t forecastlen,
|
|
ae_bool applysmoothing,
|
|
/* Real */ ae_vector* trend,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
double v;
|
|
ae_int_t winw;
|
|
|
|
ae_vector_clear(trend);
|
|
|
|
ae_assert(datalen>=1, "SSAForecastSequence: DataLen<1", _state);
|
|
ae_assert(data->cnt>=datalen, "SSAForecastSequence: Data is too short", _state);
|
|
ae_assert(isfinitevector(data, datalen, _state), "SSAForecastSequence: Data contains infinities NANs", _state);
|
|
ae_assert(forecastlen>=1, "SSAForecastSequence: ForecastLen<1", _state);
|
|
|
|
/*
|
|
* Init
|
|
*/
|
|
winw = s->windowwidth;
|
|
ae_vector_set_length(trend, forecastlen, _state);
|
|
|
|
/*
|
|
* Is it degenerate case?
|
|
*/
|
|
if( !ssa_hassomethingtoanalyze(s, _state)||datalen<winw )
|
|
{
|
|
for(i=0; i<=forecastlen-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = (double)(0);
|
|
}
|
|
return;
|
|
}
|
|
if( winw==1 )
|
|
{
|
|
for(i=0; i<=forecastlen-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = data->ptr.p_double[datalen-1];
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Update basis.
|
|
*
|
|
* It will take care of basis validity flags. AppendLen=0 which means
|
|
* that we perform initial basis evaluation.
|
|
*/
|
|
ssa_updatebasis(s, 0, 0.0, _state);
|
|
ae_assert(s->nbasis<=winw&&s->nbasis>0, "SSAForecast: integrity check failed / 4f5et", _state);
|
|
if( s->nbasis==winw )
|
|
{
|
|
|
|
/*
|
|
* Handle degenerate situation with basis whose size
|
|
* is equal to window length.
|
|
*/
|
|
for(i=0; i<=forecastlen-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = data->ptr.p_double[datalen-1];
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Perform trend extraction
|
|
*/
|
|
rvectorsetlengthatleast(&s->fctrend, s->windowwidth, _state);
|
|
if( applysmoothing )
|
|
{
|
|
ae_assert(datalen>=winw, "SSAForecastSequence: integrity check failed", _state);
|
|
rvectorsetlengthatleast(&s->tmp0, s->nbasis, _state);
|
|
rmatrixgemv(s->nbasis, winw, 1.0, &s->basist, 0, 0, 0, data, datalen-winw, 0.0, &s->tmp0, 0, _state);
|
|
rmatrixgemv(winw, s->nbasis, 1.0, &s->basis, 0, 0, 0, &s->tmp0, 0, 0.0, &s->fctrend, 0, _state);
|
|
}
|
|
else
|
|
{
|
|
for(i=0; i<=winw-1; i++)
|
|
{
|
|
s->fctrend.ptr.p_double[i] = data->ptr.p_double[datalen+i-winw];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Apply recurrent formula for SSA forecasting
|
|
*/
|
|
rvectorsetlengthatleast(&s->tmp1, winw-1, _state);
|
|
for(i=1; i<=winw-1; i++)
|
|
{
|
|
s->tmp1.ptr.p_double[i-1] = s->fctrend.ptr.p_double[i];
|
|
}
|
|
for(i=0; i<=forecastlen-1; i++)
|
|
{
|
|
v = s->forecasta.ptr.p_double[0]*s->tmp1.ptr.p_double[0];
|
|
for(j=1; j<=winw-2; j++)
|
|
{
|
|
v = v+s->forecasta.ptr.p_double[j]*s->tmp1.ptr.p_double[j];
|
|
s->tmp1.ptr.p_double[j-1] = s->tmp1.ptr.p_double[j];
|
|
}
|
|
trend->ptr.p_double[i] = v;
|
|
s->tmp1.ptr.p_double[winw-2] = v;
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function builds SSA basis and performs forecasting for a specified
|
|
number of ticks, returning value of trend.
|
|
|
|
Forecast is performed as follows:
|
|
* SSA trend extraction is applied to last M sliding windows of the
|
|
internally stored dataset
|
|
* for each of M sliding windows, M predictions are built
|
|
* average value of M predictions is returned
|
|
|
|
This function has following running time:
|
|
* O(NBasis*WindowWidth*M) for trend extraction phase (always performed)
|
|
* O(WindowWidth*NTicks*M) for forecast phase
|
|
|
|
NOTE: noise reduction is ALWAYS applied by this algorithm; if you want to
|
|
apply recurrence relation to raw unprocessed data, use another
|
|
function - ssaforecastsequence() which allows to turn on and off
|
|
noise reduction phase.
|
|
|
|
NOTE: combination of several predictions results in lesser sensitivity to
|
|
noise, but it may produce undesirable discontinuities between last
|
|
point of the trend and first point of the prediction. The reason is
|
|
that last point of the trend is usually corrupted by noise, but
|
|
average value of several predictions is less sensitive to noise,
|
|
thus discontinuity appears. It is not a bug.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
M - number of sliding windows to combine, M>=1. If
|
|
your dataset has less than M sliding windows, this
|
|
parameter will be silently reduced.
|
|
NTicks - number of ticks to forecast, NTicks>=1
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[NTicks], predicted trend line
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* last sequence is shorter than the WindowWidth (analysis can be done,
|
|
but we can not perform forecasting on the last sequence)
|
|
* window lentgh is 1 (impossible to use for forecasting)
|
|
* SSA analysis algorithm is configured to extract basis whose size is
|
|
equal to window length (impossible to use for forecasting; only basis
|
|
whose size is less than window length can be used).
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* NTicks copies of the last value is returned for non-empty task with
|
|
large enough dataset, but with overcomplete basis (window width=1 or
|
|
basis size is equal to window width)
|
|
* zero trend with length=NTicks is returned for empty task
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is ever constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaforecastavglast(ssamodel* s,
|
|
ae_int_t m,
|
|
ae_int_t nticks,
|
|
/* Real */ ae_vector* trend,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t winw;
|
|
|
|
ae_vector_clear(trend);
|
|
|
|
ae_assert(nticks>=1, "SSAForecastAvgLast: NTicks<1", _state);
|
|
ae_assert(m>=1, "SSAForecastAvgLast: M<1", _state);
|
|
|
|
/*
|
|
* Init
|
|
*/
|
|
winw = s->windowwidth;
|
|
ae_vector_set_length(trend, nticks, _state);
|
|
|
|
/*
|
|
* Is it degenerate case?
|
|
*/
|
|
if( !ssa_hassomethingtoanalyze(s, _state) )
|
|
{
|
|
for(i=0; i<=nticks-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = (double)(0);
|
|
}
|
|
return;
|
|
}
|
|
ae_assert(s->nsequences>0, "SSAForecastAvgLast: integrity check failed", _state);
|
|
if( s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]<winw )
|
|
{
|
|
for(i=0; i<=nticks-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = (double)(0);
|
|
}
|
|
return;
|
|
}
|
|
if( winw==1 )
|
|
{
|
|
ae_assert(s->nsequences>0, "SSAForecastAvgLast: integrity check failed / 2355", _state);
|
|
ae_assert(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]>0, "SSAForecastAvgLast: integrity check failed", _state);
|
|
for(i=0; i<=nticks-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = s->sequencedata.ptr.p_double[s->sequenceidx.ptr.p_int[s->nsequences]-1];
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Update basis and recurrent relation.
|
|
*
|
|
* It will take care of basis validity flags. AppendLen=0 which means
|
|
* that we perform initial basis evaluation.
|
|
*/
|
|
ssa_updatebasis(s, 0, 0.0, _state);
|
|
ae_assert(s->nbasis<=winw&&s->nbasis>0, "SSAForecastAvgLast: integrity check failed / 4f5et", _state);
|
|
if( s->nbasis==winw )
|
|
{
|
|
|
|
/*
|
|
* Handle degenerate situation with basis whose size
|
|
* is equal to window length.
|
|
*/
|
|
ae_assert(s->nsequences>0, "SSAForecastAvgLast: integrity check failed / 2355", _state);
|
|
ae_assert(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]>0, "SSAForecastAvgLast: integrity check failed", _state);
|
|
for(i=0; i<=nticks-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = s->sequencedata.ptr.p_double[s->sequenceidx.ptr.p_int[s->nsequences]-1];
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Decrease M if we have less than M sliding windows.
|
|
* Forecast.
|
|
*/
|
|
m = ae_minint(m, s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]-winw+1, _state);
|
|
ae_assert(m>=1, "SSAForecastAvgLast: integrity check failed", _state);
|
|
ssa_forecastavgsequence(s, &s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences-1], s->sequenceidx.ptr.p_int[s->nsequences], m, nticks, ae_true, trend, 0, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function builds SSA basis and performs forecasting for a user-
|
|
specified sequence, returning value of trend.
|
|
|
|
Forecasting is done in two stages:
|
|
* first, we extract trend from M last sliding windows of the sequence.
|
|
This stage is optional, you can turn it off if you pass data which
|
|
are already processed with SSA. Of course, you can turn it off even
|
|
for raw data, but it is not recommended - noise suppression is very
|
|
important for correct prediction.
|
|
* then, we apply LRR independently for M sliding windows
|
|
* average of M predictions is returned
|
|
|
|
This function has following running time:
|
|
* O(NBasis*WindowWidth*M) for trend extraction phase
|
|
* O(WindowWidth*NTicks*M) for forecast phase
|
|
|
|
NOTE: combination of several predictions results in lesser sensitivity to
|
|
noise, but it may produce undesirable discontinuities between last
|
|
point of the trend and first point of the prediction. The reason is
|
|
that last point of the trend is usually corrupted by noise, but
|
|
average value of several predictions is less sensitive to noise,
|
|
thus discontinuity appears. It is not a bug.
|
|
|
|
INPUT PARAMETERS:
|
|
S - SSA model
|
|
Data - array[NTicks], data to forecast
|
|
DataLen - number of ticks in the data, DataLen>=1
|
|
M - number of sliding windows to combine, M>=1. If
|
|
your dataset has less than M sliding windows, this
|
|
parameter will be silently reduced.
|
|
ForecastLen - number of ticks to predict, ForecastLen>=1
|
|
ApplySmoothing - whether to apply smoothing trend extraction or not.
|
|
if you do not know what to specify, pass true.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[ForecastLen], forecasted trend
|
|
|
|
|
|
CACHING/REUSE OF THE BASIS
|
|
|
|
Caching/reuse of previous results is performed:
|
|
* first call performs full run of SSA; basis is stored in the cache
|
|
* subsequent calls reuse previously cached basis
|
|
* if you call any function which changes model properties (window length,
|
|
algorithm, dataset), internal basis will be invalidated.
|
|
* the only calls which do NOT invalidate basis are listed below:
|
|
a) ssasetwindow() with same window length
|
|
b) ssaappendpointandupdate()
|
|
c) ssaappendsequenceandupdate()
|
|
d) ssasetalgotopk...() with exactly same K
|
|
Calling these functions will result in reuse of previously found basis.
|
|
|
|
|
|
HANDLING OF DEGENERATE CASES
|
|
|
|
Following degenerate cases may happen:
|
|
* dataset is empty (no analysis can be done)
|
|
* all sequences are shorter than the window length,no analysis can be done
|
|
* no algorithm is specified (no analysis can be done)
|
|
* data sequence is shorter than the WindowWidth (analysis can be done,
|
|
but we can not perform forecasting on the last sequence)
|
|
* window lentgh is 1 (impossible to use for forecasting)
|
|
* SSA analysis algorithm is configured to extract basis whose size is
|
|
equal to window length (impossible to use for forecasting; only basis
|
|
whose size is less than window length can be used).
|
|
|
|
Calling this function in degenerate cases returns following result:
|
|
* ForecastLen copies of the last value is returned for non-empty task with
|
|
large enough dataset, but with overcomplete basis (window width=1 or
|
|
basis size is equal to window width)
|
|
* zero trend with length=ForecastLen is returned for empty task
|
|
|
|
No analysis is performed in degenerate cases (we immediately return dummy
|
|
values, no basis is ever constructed).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void ssaforecastavgsequence(ssamodel* s,
|
|
/* Real */ ae_vector* data,
|
|
ae_int_t datalen,
|
|
ae_int_t m,
|
|
ae_int_t forecastlen,
|
|
ae_bool applysmoothing,
|
|
/* Real */ ae_vector* trend,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t winw;
|
|
|
|
ae_vector_clear(trend);
|
|
|
|
ae_assert(datalen>=1, "SSAForecastAvgSequence: DataLen<1", _state);
|
|
ae_assert(m>=1, "SSAForecastAvgSequence: M<1", _state);
|
|
ae_assert(data->cnt>=datalen, "SSAForecastAvgSequence: Data is too short", _state);
|
|
ae_assert(isfinitevector(data, datalen, _state), "SSAForecastAvgSequence: Data contains infinities NANs", _state);
|
|
ae_assert(forecastlen>=1, "SSAForecastAvgSequence: ForecastLen<1", _state);
|
|
|
|
/*
|
|
* Init
|
|
*/
|
|
winw = s->windowwidth;
|
|
ae_vector_set_length(trend, forecastlen, _state);
|
|
|
|
/*
|
|
* Is it degenerate case?
|
|
*/
|
|
if( !ssa_hassomethingtoanalyze(s, _state)||datalen<winw )
|
|
{
|
|
for(i=0; i<=forecastlen-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = (double)(0);
|
|
}
|
|
return;
|
|
}
|
|
if( winw==1 )
|
|
{
|
|
for(i=0; i<=forecastlen-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = data->ptr.p_double[datalen-1];
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Update basis.
|
|
*
|
|
* It will take care of basis validity flags. AppendLen=0 which means
|
|
* that we perform initial basis evaluation.
|
|
*/
|
|
ssa_updatebasis(s, 0, 0.0, _state);
|
|
ae_assert(s->nbasis<=winw&&s->nbasis>0, "SSAForecast: integrity check failed / 4f5et", _state);
|
|
if( s->nbasis==winw )
|
|
{
|
|
|
|
/*
|
|
* Handle degenerate situation with basis whose size
|
|
* is equal to window length.
|
|
*/
|
|
for(i=0; i<=forecastlen-1; i++)
|
|
{
|
|
trend->ptr.p_double[i] = data->ptr.p_double[datalen-1];
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Decrease M if we have less than M sliding windows.
|
|
* Forecast.
|
|
*/
|
|
m = ae_minint(m, datalen-winw+1, _state);
|
|
ae_assert(m>=1, "SSAForecastAvgLast: integrity check failed", _state);
|
|
ssa_forecastavgsequence(s, data, 0, datalen, m, forecastlen, applysmoothing, trend, 0, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function evaluates current model and tells whether we have some data
|
|
which can be analyzed by current algorithm, or not.
|
|
|
|
No analysis can be done in the following degenerate cases:
|
|
* dataset is empty
|
|
* all sequences are shorter than the window length
|
|
* no algorithm is specified
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static ae_bool ssa_hassomethingtoanalyze(ssamodel* s, ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_bool allsmaller;
|
|
ae_bool isdegenerate;
|
|
ae_bool result;
|
|
|
|
|
|
isdegenerate = ae_false;
|
|
isdegenerate = isdegenerate||s->algotype==0;
|
|
isdegenerate = isdegenerate||s->nsequences==0;
|
|
allsmaller = ae_true;
|
|
for(i=0; i<=s->nsequences-1; i++)
|
|
{
|
|
allsmaller = allsmaller&&s->sequenceidx.ptr.p_int[i+1]-s->sequenceidx.ptr.p_int[i]<s->windowwidth;
|
|
}
|
|
isdegenerate = isdegenerate||allsmaller;
|
|
result = !isdegenerate;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function checks whether I-th sequence is big enough for analysis or not.
|
|
|
|
I=-1 is used to denote last sequence (for NSequences=0)
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static ae_bool ssa_issequencebigenough(ssamodel* s,
|
|
ae_int_t i,
|
|
ae_state *_state)
|
|
{
|
|
ae_bool result;
|
|
|
|
|
|
ae_assert(i>=-1&&i<s->nsequences, "Assertion failed", _state);
|
|
result = ae_false;
|
|
if( s->nsequences==0 )
|
|
{
|
|
return result;
|
|
}
|
|
if( i<0 )
|
|
{
|
|
i = s->nsequences-1;
|
|
}
|
|
result = s->sequenceidx.ptr.p_int[i+1]-s->sequenceidx.ptr.p_int[i]>=s->windowwidth;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function performs basis update. Either full update (recalculated from
|
|
the very beginning) or partial update (handles append to the end of the
|
|
dataset).
|
|
|
|
With AppendLen=0 this function behaves as follows:
|
|
* if AreBasisAndSolverValid=False, then solver object is created from
|
|
scratch, initial calculations are performed according to specific SSA
|
|
algorithm being chosen. Basis/Solver validity flag is set to True, then
|
|
we immediately return.
|
|
* if AreBasisAndSolverValid=True, then nothing is done - we immediately
|
|
return.
|
|
|
|
With AppendLen>0 this function behaves as follows:
|
|
* if AreBasisAndSolverValid=False, then exception is generated; you can
|
|
append points only to fully constructed basis. Call this function with
|
|
zero AppendLen BEFORE append, then perform append, then call it one more
|
|
time with non-zero AppendLen.
|
|
* if AreBasisAndSolverValid=True, then basis is incrementally updated. It
|
|
also updates recurrence relation used for prediction. It is expected that
|
|
either AppendLen=1, or AppendLen=length(last_sequence). Basis update is
|
|
performed with probability UpdateIts (larger-than-one values mean that
|
|
some amount of iterations is always performed).
|
|
|
|
|
|
In any case, after calling this function we either:
|
|
* have an exception
|
|
* have completely valid basis
|
|
|
|
IMPORTANT: this function expects that we do NOT call it for degenerate tasks
|
|
(no data). So, call it after check with HasSomethingToAnalyze()
|
|
returned True.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void ssa_updatebasis(ssamodel* s,
|
|
ae_int_t appendlen,
|
|
double updateits,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t srcoffs;
|
|
ae_int_t dstoffs;
|
|
ae_int_t winw;
|
|
ae_int_t windowstotal;
|
|
ae_int_t requesttype;
|
|
ae_int_t requestsize;
|
|
double v;
|
|
ae_bool degeneraterecurrence;
|
|
double nu2;
|
|
ae_int_t subspaceits;
|
|
ae_bool needevd;
|
|
|
|
|
|
winw = s->windowwidth;
|
|
|
|
/*
|
|
* Critical checks
|
|
*/
|
|
ae_assert(appendlen>=0, "SSA: incorrect parameters passed to UpdateBasis(), integrity check failed", _state);
|
|
ae_assert(!(!s->arebasisandsolvervalid&&appendlen!=0), "SSA: incorrect parameters passed to UpdateBasis(), integrity check failed", _state);
|
|
ae_assert(!(appendlen==0&&ae_fp_greater(updateits,0.0)), "SSA: incorrect parameters passed to UpdateBasis(), integrity check failed", _state);
|
|
|
|
/*
|
|
* Everything is OK, nothing to do
|
|
*/
|
|
if( s->arebasisandsolvervalid&&appendlen==0 )
|
|
{
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Seed RNG with fixed or random seed.
|
|
*
|
|
* RNG used when pseudorandomly deciding whether
|
|
* to re-evaluate basis or not. Sandom seed is
|
|
* important when we have several simultaneously
|
|
* calculated SSA models - we do not want them
|
|
* to be re-evaluated in same moments).
|
|
*/
|
|
if( !s->arebasisandsolvervalid )
|
|
{
|
|
if( s->rngseed>0 )
|
|
{
|
|
hqrndseed(s->rngseed, s->rngseed+235, &s->rs, _state);
|
|
}
|
|
else
|
|
{
|
|
hqrndrandomize(&s->rs, _state);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Compute XXT for algorithms which need it
|
|
*/
|
|
if( !s->arebasisandsolvervalid )
|
|
{
|
|
ae_assert(appendlen==0, "SSA: integrity check failed / 34cx6", _state);
|
|
if( s->algotype==2 )
|
|
{
|
|
|
|
/*
|
|
* Compute X*X^T for direct algorithm.
|
|
* Quite straightforward, no subtle optimizations.
|
|
*/
|
|
rmatrixsetlengthatleast(&s->xxt, winw, winw, _state);
|
|
windowstotal = 0;
|
|
for(i=0; i<=s->nsequences-1; i++)
|
|
{
|
|
windowstotal = windowstotal+ae_maxint(s->sequenceidx.ptr.p_int[i+1]-s->sequenceidx.ptr.p_int[i]-winw+1, 0, _state);
|
|
}
|
|
ae_assert(windowstotal>0, "SSA: integrity check in UpdateBasis() failed / 76t34", _state);
|
|
for(i=0; i<=winw-1; i++)
|
|
{
|
|
for(j=0; j<=winw-1; j++)
|
|
{
|
|
s->xxt.ptr.pp_double[i][j] = (double)(0);
|
|
}
|
|
}
|
|
ssa_updatexxtprepare(s, windowstotal, winw, s->memorylimit, _state);
|
|
for(i=0; i<=s->nsequences-1; i++)
|
|
{
|
|
for(j=0; j<=ae_maxint(s->sequenceidx.ptr.p_int[i+1]-s->sequenceidx.ptr.p_int[i]-winw+1, 0, _state)-1; j++)
|
|
{
|
|
ssa_updatexxtsend(s, &s->sequencedata, s->sequenceidx.ptr.p_int[i]+j, &s->xxt, _state);
|
|
}
|
|
}
|
|
ssa_updatexxtfinalize(s, &s->xxt, _state);
|
|
}
|
|
if( s->algotype==3 )
|
|
{
|
|
|
|
/*
|
|
* Compute X*X^T for real-time algorithm:
|
|
* * prepare queue of windows to merge into XXT
|
|
* * shuffle queue in order to avoid time-related biases in algorithm
|
|
* * dequeue first chunk
|
|
*/
|
|
rmatrixsetlengthatleast(&s->xxt, winw, winw, _state);
|
|
windowstotal = 0;
|
|
for(i=0; i<=s->nsequences-1; i++)
|
|
{
|
|
windowstotal = windowstotal+ae_maxint(s->sequenceidx.ptr.p_int[i+1]-s->sequenceidx.ptr.p_int[i]-winw+1, 0, _state);
|
|
}
|
|
ae_assert(windowstotal>0, "SSA: integrity check in UpdateBasis() failed / 76t34", _state);
|
|
ivectorsetlengthatleast(&s->rtqueue, windowstotal, _state);
|
|
dstoffs = 0;
|
|
for(i=0; i<=s->nsequences-1; i++)
|
|
{
|
|
for(j=0; j<=ae_maxint(s->sequenceidx.ptr.p_int[i+1]-s->sequenceidx.ptr.p_int[i]-winw+1, 0, _state)-1; j++)
|
|
{
|
|
srcoffs = s->sequenceidx.ptr.p_int[i]+j;
|
|
s->rtqueue.ptr.p_int[dstoffs] = srcoffs;
|
|
inc(&dstoffs, _state);
|
|
}
|
|
}
|
|
ae_assert(dstoffs==windowstotal, "SSA: integrity check in UpdateBasis() failed / fh45f", _state);
|
|
if( s->rtpowerup>1 )
|
|
{
|
|
|
|
/*
|
|
* Shuffle queue, it helps to avoid time-related bias in algorithm
|
|
*/
|
|
for(i=0; i<=windowstotal-1; i++)
|
|
{
|
|
j = i+hqrnduniformi(&s->rs, windowstotal-i, _state);
|
|
swapelementsi(&s->rtqueue, i, j, _state);
|
|
}
|
|
}
|
|
s->rtqueuecnt = windowstotal;
|
|
s->rtqueuechunk = 1;
|
|
s->rtqueuechunk = ae_maxint(s->rtqueuechunk, s->rtqueuecnt/s->rtpowerup, _state);
|
|
s->rtqueuechunk = ae_maxint(s->rtqueuechunk, 2*s->topk, _state);
|
|
ssa_realtimedequeue(s, 0.0, ae_minint(s->rtqueuechunk, s->rtqueuecnt, _state), _state);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Handle possible updates for XXT:
|
|
* * check that append involves either last point of last sequence,
|
|
* or entire last sequence
|
|
* * if last sequence is shorter than window width, perform quick exit -
|
|
* we have nothing to update - no windows to insert into XXT
|
|
* * update XXT
|
|
*/
|
|
if( appendlen>0 )
|
|
{
|
|
ae_assert(s->arebasisandsolvervalid, "SSA: integrity check failed / 5gvz3", _state);
|
|
ae_assert(s->nsequences>=1, "SSA: integrity check failed / 658ev", _state);
|
|
ae_assert(appendlen==1||appendlen==s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]-winw+1, "SSA: integrity check failed / sd3g7", _state);
|
|
if( s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]<winw )
|
|
{
|
|
|
|
/*
|
|
* Last sequence is too short, nothing to update
|
|
*/
|
|
return;
|
|
}
|
|
if( s->algotype==2||s->algotype==3 )
|
|
{
|
|
if( appendlen>1 )
|
|
{
|
|
|
|
/*
|
|
* Long append, use GEMM for updates
|
|
*/
|
|
ssa_updatexxtprepare(s, appendlen, winw, s->memorylimit, _state);
|
|
for(j=0; j<=ae_maxint(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]-winw+1, 0, _state)-1; j++)
|
|
{
|
|
ssa_updatexxtsend(s, &s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences-1]+j, &s->xxt, _state);
|
|
}
|
|
ssa_updatexxtfinalize(s, &s->xxt, _state);
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Just one element is added, use rank-1 update
|
|
*/
|
|
rmatrixger(winw, winw, &s->xxt, 0, 0, 1.0, &s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences]-winw, &s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences]-winw, _state);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Now, perform basis calculation - either full recalculation (AppendLen=0)
|
|
* or quick update (AppendLen>0).
|
|
*/
|
|
if( s->algotype==1 )
|
|
{
|
|
|
|
/*
|
|
* Precomputed basis
|
|
*/
|
|
if( winw!=s->precomputedwidth )
|
|
{
|
|
|
|
/*
|
|
* Window width has changed, reset basis to zeros
|
|
*/
|
|
s->nbasis = 1;
|
|
rmatrixsetlengthatleast(&s->basis, winw, 1, _state);
|
|
rvectorsetlengthatleast(&s->sv, 1, _state);
|
|
for(i=0; i<=winw-1; i++)
|
|
{
|
|
s->basis.ptr.pp_double[i][0] = 0.0;
|
|
}
|
|
s->sv.ptr.p_double[0] = 0.0;
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* OK, use precomputed basis
|
|
*/
|
|
s->nbasis = s->precomputednbasis;
|
|
rmatrixsetlengthatleast(&s->basis, winw, s->nbasis, _state);
|
|
rvectorsetlengthatleast(&s->sv, s->nbasis, _state);
|
|
for(j=0; j<=s->nbasis-1; j++)
|
|
{
|
|
s->sv.ptr.p_double[j] = 0.0;
|
|
for(i=0; i<=winw-1; i++)
|
|
{
|
|
s->basis.ptr.pp_double[i][j] = s->precomputedbasis.ptr.pp_double[i][j];
|
|
}
|
|
}
|
|
}
|
|
rmatrixsetlengthatleast(&s->basist, s->nbasis, winw, _state);
|
|
rmatrixtranspose(winw, s->nbasis, &s->basis, 0, 0, &s->basist, 0, 0, _state);
|
|
}
|
|
else
|
|
{
|
|
if( s->algotype==2 )
|
|
{
|
|
|
|
/*
|
|
* Direct top-K algorithm
|
|
*
|
|
* Calculate eigenvectors with SMatrixEVD(), reorder by descending
|
|
* of magnitudes.
|
|
*
|
|
* Update is performed for invalid basis or for non-zero UpdateIts.
|
|
*/
|
|
needevd = !s->arebasisandsolvervalid;
|
|
needevd = needevd||ae_fp_greater_eq(updateits,(double)(1));
|
|
needevd = needevd||ae_fp_less(hqrnduniformr(&s->rs, _state),updateits-ae_ifloor(updateits, _state));
|
|
if( needevd )
|
|
{
|
|
inc(&s->dbgcntevd, _state);
|
|
s->nbasis = ae_minint(winw, s->topk, _state);
|
|
if( !smatrixevd(&s->xxt, winw, 1, ae_true, &s->sv, &s->basis, _state) )
|
|
{
|
|
ae_assert(ae_false, "SSA: SMatrixEVD failed", _state);
|
|
}
|
|
for(i=0; i<=winw-1; i++)
|
|
{
|
|
k = winw-1-i;
|
|
if( i>=k )
|
|
{
|
|
break;
|
|
}
|
|
v = s->sv.ptr.p_double[i];
|
|
s->sv.ptr.p_double[i] = s->sv.ptr.p_double[k];
|
|
s->sv.ptr.p_double[k] = v;
|
|
for(j=0; j<=winw-1; j++)
|
|
{
|
|
v = s->basis.ptr.pp_double[j][i];
|
|
s->basis.ptr.pp_double[j][i] = s->basis.ptr.pp_double[j][k];
|
|
s->basis.ptr.pp_double[j][k] = v;
|
|
}
|
|
}
|
|
for(i=0; i<=s->nbasis-1; i++)
|
|
{
|
|
s->sv.ptr.p_double[i] = ae_sqrt(ae_maxreal(s->sv.ptr.p_double[i], 0.0, _state), _state);
|
|
}
|
|
rmatrixsetlengthatleast(&s->basist, s->nbasis, winw, _state);
|
|
rmatrixtranspose(winw, s->nbasis, &s->basis, 0, 0, &s->basist, 0, 0, _state);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( s->algotype==3 )
|
|
{
|
|
|
|
/*
|
|
* Real-time top-K.
|
|
*
|
|
* Determine actual number of basis components, prepare subspace
|
|
* solver (either create from scratch or reuse).
|
|
*
|
|
* Update is always performed for invalid basis; for a valid basis
|
|
* it is performed with probability UpdateIts.
|
|
*/
|
|
if( s->rtpowerup==1 )
|
|
{
|
|
subspaceits = s->defaultsubspaceits;
|
|
}
|
|
else
|
|
{
|
|
subspaceits = 3;
|
|
}
|
|
if( appendlen>0 )
|
|
{
|
|
ae_assert(s->arebasisandsolvervalid, "SSA: integrity check in UpdateBasis() failed / srg6f", _state);
|
|
ae_assert(ae_fp_greater_eq(updateits,(double)(0)), "SSA: integrity check in UpdateBasis() failed / srg4f", _state);
|
|
subspaceits = ae_ifloor(updateits, _state);
|
|
if( ae_fp_less(hqrnduniformr(&s->rs, _state),updateits-ae_ifloor(updateits, _state)) )
|
|
{
|
|
inc(&subspaceits, _state);
|
|
}
|
|
ae_assert(subspaceits>=0, "SSA: integrity check in UpdateBasis() failed / srg9f", _state);
|
|
}
|
|
|
|
/*
|
|
* Dequeue pending dataset and merge it into XXT.
|
|
*
|
|
* Dequeuing is done only for appends, and only when we have
|
|
* non-empty queue.
|
|
*/
|
|
if( appendlen>0&&s->rtqueuecnt>0 )
|
|
{
|
|
ssa_realtimedequeue(s, 1.0, ae_minint(s->rtqueuechunk, s->rtqueuecnt, _state), _state);
|
|
}
|
|
|
|
/*
|
|
* Now, proceed to solver
|
|
*/
|
|
if( subspaceits>0 )
|
|
{
|
|
if( appendlen==0 )
|
|
{
|
|
s->nbasis = ae_minint(winw, s->topk, _state);
|
|
eigsubspacecreatebuf(winw, s->nbasis, &s->solver, _state);
|
|
}
|
|
else
|
|
{
|
|
eigsubspacesetwarmstart(&s->solver, ae_true, _state);
|
|
}
|
|
eigsubspacesetcond(&s->solver, 0.0, subspaceits, _state);
|
|
|
|
/*
|
|
* Perform initial basis estimation
|
|
*/
|
|
inc(&s->dbgcntevd, _state);
|
|
eigsubspaceoocstart(&s->solver, 0, _state);
|
|
while(eigsubspaceooccontinue(&s->solver, _state))
|
|
{
|
|
eigsubspaceoocgetrequestinfo(&s->solver, &requesttype, &requestsize, _state);
|
|
ae_assert(requesttype==0, "SSA: integrity check in UpdateBasis() failed / 346372", _state);
|
|
rmatrixgemm(winw, requestsize, winw, 1.0, &s->xxt, 0, 0, 0, &s->solver.x, 0, 0, 0, 0.0, &s->solver.ax, 0, 0, _state);
|
|
}
|
|
eigsubspaceoocstop(&s->solver, &s->sv, &s->basis, &s->solverrep, _state);
|
|
for(i=0; i<=s->nbasis-1; i++)
|
|
{
|
|
s->sv.ptr.p_double[i] = ae_sqrt(ae_maxreal(s->sv.ptr.p_double[i], 0.0, _state), _state);
|
|
}
|
|
rmatrixsetlengthatleast(&s->basist, s->nbasis, winw, _state);
|
|
rmatrixtranspose(winw, s->nbasis, &s->basis, 0, 0, &s->basist, 0, 0, _state);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ae_assert(ae_false, "SSA: integrity check in UpdateBasis() failed / dfgs34", _state);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Update recurrent relation
|
|
*/
|
|
rvectorsetlengthatleast(&s->forecasta, ae_maxint(winw-1, 1, _state), _state);
|
|
degeneraterecurrence = ae_false;
|
|
if( winw>1 )
|
|
{
|
|
|
|
/*
|
|
* Non-degenerate case
|
|
*/
|
|
rvectorsetlengthatleast(&s->tmp0, s->nbasis, _state);
|
|
nu2 = 0.0;
|
|
for(i=0; i<=s->nbasis-1; i++)
|
|
{
|
|
v = s->basist.ptr.pp_double[i][winw-1];
|
|
s->tmp0.ptr.p_double[i] = v;
|
|
nu2 = nu2+v*v;
|
|
}
|
|
if( ae_fp_less(nu2,1-1000*ae_machineepsilon) )
|
|
{
|
|
rmatrixgemv(winw-1, s->nbasis, 1/(1-nu2), &s->basist, 0, 0, 1, &s->tmp0, 0, 0.0, &s->forecasta, 0, _state);
|
|
}
|
|
else
|
|
{
|
|
degeneraterecurrence = ae_true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
degeneraterecurrence = ae_true;
|
|
}
|
|
if( degeneraterecurrence )
|
|
{
|
|
for(i=0; i<=ae_maxint(winw-1, 1, _state)-1; i++)
|
|
{
|
|
s->forecasta.ptr.p_double[i] = 0.0;
|
|
}
|
|
s->forecasta.ptr.p_double[ae_maxint(winw-1, 1, _state)-1] = 1.0;
|
|
}
|
|
|
|
/*
|
|
* Set validity flag
|
|
*/
|
|
s->arebasisandsolvervalid = ae_true;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function performs analysis using current basis. It assumes and checks
|
|
that validity flag AreBasisAndSolverValid is set.
|
|
|
|
INPUT PARAMETERS:
|
|
S - model
|
|
Data - array which holds data in elements [I0,I1):
|
|
* right bound is not included.
|
|
* I1-I0>=WindowWidth (assertion is performed).
|
|
Trend - preallocated output array, large enough
|
|
Noise - preallocated output array, large enough
|
|
Offs - offset in Trend/Noise where result is stored;
|
|
I1-I0 elements are written starting at offset
|
|
Offs.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend, Noise - processing results
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void ssa_analyzesequence(ssamodel* s,
|
|
/* Real */ ae_vector* data,
|
|
ae_int_t i0,
|
|
ae_int_t i1,
|
|
/* Real */ ae_vector* trend,
|
|
/* Real */ ae_vector* noise,
|
|
ae_int_t offs,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t winw;
|
|
ae_int_t nwindows;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t cnt;
|
|
ae_int_t batchstart;
|
|
ae_int_t batchlimit;
|
|
ae_int_t batchsize;
|
|
|
|
|
|
ae_assert(s->arebasisandsolvervalid, "AnalyzeSequence: integrity check failed / d84sz0", _state);
|
|
ae_assert(i1-i0>=s->windowwidth, "AnalyzeSequence: integrity check failed / d84sz1", _state);
|
|
ae_assert(s->nbasis>=1, "AnalyzeSequence: integrity check failed / d84sz2", _state);
|
|
nwindows = i1-i0-s->windowwidth+1;
|
|
winw = s->windowwidth;
|
|
batchlimit = ae_maxint(nwindows, 1, _state);
|
|
if( s->memorylimit>0 )
|
|
{
|
|
batchlimit = ae_minint(batchlimit, ae_maxint(s->memorylimit/winw, 4*winw, _state), _state);
|
|
}
|
|
|
|
/*
|
|
* Zero-initialize trend and counts
|
|
*/
|
|
cnt = i1-i0;
|
|
ivectorsetlengthatleast(&s->aseqcounts, cnt, _state);
|
|
for(i=0; i<=cnt-1; i++)
|
|
{
|
|
s->aseqcounts.ptr.p_int[i] = 0;
|
|
trend->ptr.p_double[offs+i] = 0.0;
|
|
}
|
|
|
|
/*
|
|
* Reset temporaries if algorithm settings changed since last round
|
|
*/
|
|
if( s->aseqtrajectory.cols!=winw )
|
|
{
|
|
ae_matrix_set_length(&s->aseqtrajectory, 0, 0, _state);
|
|
}
|
|
if( s->aseqtbproduct.cols!=s->nbasis )
|
|
{
|
|
ae_matrix_set_length(&s->aseqtbproduct, 0, 0, _state);
|
|
}
|
|
|
|
/*
|
|
* Perform batch processing
|
|
*/
|
|
rmatrixsetlengthatleast(&s->aseqtrajectory, batchlimit, winw, _state);
|
|
rmatrixsetlengthatleast(&s->aseqtbproduct, batchlimit, s->nbasis, _state);
|
|
batchsize = 0;
|
|
batchstart = offs;
|
|
for(i=0; i<=nwindows-1; i++)
|
|
{
|
|
|
|
/*
|
|
* Enqueue next row of trajectory matrix
|
|
*/
|
|
if( batchsize==0 )
|
|
{
|
|
batchstart = i;
|
|
}
|
|
for(j=0; j<=winw-1; j++)
|
|
{
|
|
s->aseqtrajectory.ptr.pp_double[batchsize][j] = data->ptr.p_double[i0+i+j];
|
|
}
|
|
inc(&batchsize, _state);
|
|
|
|
/*
|
|
* Process batch
|
|
*/
|
|
if( batchsize==batchlimit||i==nwindows-1 )
|
|
{
|
|
|
|
/*
|
|
* Project onto basis
|
|
*/
|
|
rmatrixgemm(batchsize, s->nbasis, winw, 1.0, &s->aseqtrajectory, 0, 0, 0, &s->basist, 0, 0, 1, 0.0, &s->aseqtbproduct, 0, 0, _state);
|
|
rmatrixgemm(batchsize, winw, s->nbasis, 1.0, &s->aseqtbproduct, 0, 0, 0, &s->basist, 0, 0, 0, 0.0, &s->aseqtrajectory, 0, 0, _state);
|
|
|
|
/*
|
|
* Hankelize
|
|
*/
|
|
for(k=0; k<=batchsize-1; k++)
|
|
{
|
|
for(j=0; j<=winw-1; j++)
|
|
{
|
|
trend->ptr.p_double[offs+batchstart+k+j] = trend->ptr.p_double[offs+batchstart+k+j]+s->aseqtrajectory.ptr.pp_double[k][j];
|
|
s->aseqcounts.ptr.p_int[batchstart+k+j] = s->aseqcounts.ptr.p_int[batchstart+k+j]+1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Reset batch size
|
|
*/
|
|
batchsize = 0;
|
|
}
|
|
}
|
|
for(i=0; i<=cnt-1; i++)
|
|
{
|
|
trend->ptr.p_double[offs+i] = trend->ptr.p_double[offs+i]/s->aseqcounts.ptr.p_int[i];
|
|
}
|
|
|
|
/*
|
|
* Output noise
|
|
*/
|
|
for(i=0; i<=cnt-1; i++)
|
|
{
|
|
noise->ptr.p_double[offs+i] = data->ptr.p_double[i0+i]-trend->ptr.p_double[offs+i];
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function performs averaged forecasting. It assumes that basis is
|
|
already built, everything is valid and checked. See comments on similar
|
|
public functions to find out more about averaged predictions.
|
|
|
|
INPUT PARAMETERS:
|
|
S - model
|
|
Data - array which holds data in elements [I0,I1):
|
|
* right bound is not included.
|
|
* I1-I0>=WindowWidth (assertion is performed).
|
|
M - number of sliding windows to combine, M>=1. If
|
|
your dataset has less than M sliding windows, this
|
|
parameter will be silently reduced.
|
|
ForecastLen - number of ticks to predict, ForecastLen>=1
|
|
Trend - preallocated output array, large enough
|
|
Offs - offset in Trend where result is stored;
|
|
I1-I0 elements are written starting at offset
|
|
Offs.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Trend - array[ForecastLen], forecasted trend
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void ssa_forecastavgsequence(ssamodel* s,
|
|
/* Real */ ae_vector* data,
|
|
ae_int_t i0,
|
|
ae_int_t i1,
|
|
ae_int_t m,
|
|
ae_int_t forecastlen,
|
|
ae_bool smooth,
|
|
/* Real */ ae_vector* trend,
|
|
ae_int_t offs,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t winw;
|
|
|
|
|
|
ae_assert(s->arebasisandsolvervalid, "ForecastAvgSequence: integrity check failed / d84sz0", _state);
|
|
ae_assert(i1-i0-s->windowwidth+1>=m, "ForecastAvgSequence: integrity check failed / d84sz1", _state);
|
|
ae_assert(s->nbasis>=1, "ForecastAvgSequence: integrity check failed / d84sz2", _state);
|
|
ae_assert(s->windowwidth>=2, "ForecastAvgSequence: integrity check failed / 5tgdg5", _state);
|
|
ae_assert(s->windowwidth>s->nbasis, "ForecastAvgSequence: integrity check failed / d5g56w", _state);
|
|
winw = s->windowwidth;
|
|
|
|
/*
|
|
* Prepare M synchronized predictions for the last known tick
|
|
* (last one is an actual value of the trend, previous M-1 predictions
|
|
* are predictions from differently positioned sliding windows).
|
|
*/
|
|
rmatrixsetlengthatleast(&s->fctrendm, m, winw, _state);
|
|
rvectorsetlengthatleast(&s->tmp0, ae_maxint(m, s->nbasis, _state), _state);
|
|
rvectorsetlengthatleast(&s->tmp1, winw, _state);
|
|
for(k=0; k<=m-1; k++)
|
|
{
|
|
|
|
/*
|
|
* Perform prediction for rows [0,K-1]
|
|
*/
|
|
rmatrixgemv(k, winw-1, 1.0, &s->fctrendm, 0, 1, 0, &s->forecasta, 0, 0.0, &s->tmp0, 0, _state);
|
|
for(i=0; i<=k-1; i++)
|
|
{
|
|
for(j=1; j<=winw-1; j++)
|
|
{
|
|
s->fctrendm.ptr.pp_double[i][j-1] = s->fctrendm.ptr.pp_double[i][j];
|
|
}
|
|
s->fctrendm.ptr.pp_double[i][winw-1] = s->tmp0.ptr.p_double[i];
|
|
}
|
|
|
|
/*
|
|
* Perform trend extraction for row K, add it to dataset
|
|
*/
|
|
if( smooth )
|
|
{
|
|
rmatrixgemv(s->nbasis, winw, 1.0, &s->basist, 0, 0, 0, data, i1-winw-(m-1-k), 0.0, &s->tmp0, 0, _state);
|
|
rmatrixgemv(s->windowwidth, s->nbasis, 1.0, &s->basis, 0, 0, 0, &s->tmp0, 0, 0.0, &s->tmp1, 0, _state);
|
|
for(j=0; j<=winw-1; j++)
|
|
{
|
|
s->fctrendm.ptr.pp_double[k][j] = s->tmp1.ptr.p_double[j];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for(j=0; j<=winw-1; j++)
|
|
{
|
|
s->fctrendm.ptr.pp_double[k][j] = data->ptr.p_double[i1-winw-(m-1-k)+j];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Now we have M synchronized predictions of the sequence state at the last
|
|
* know moment (last "prediction" is just a copy of the trend). Let's start
|
|
* batch prediction!
|
|
*/
|
|
for(k=0; k<=forecastlen-1; k++)
|
|
{
|
|
rmatrixgemv(m, winw-1, 1.0, &s->fctrendm, 0, 1, 0, &s->forecasta, 0, 0.0, &s->tmp0, 0, _state);
|
|
trend->ptr.p_double[offs+k] = 0.0;
|
|
for(i=0; i<=m-1; i++)
|
|
{
|
|
for(j=1; j<=winw-1; j++)
|
|
{
|
|
s->fctrendm.ptr.pp_double[i][j-1] = s->fctrendm.ptr.pp_double[i][j];
|
|
}
|
|
s->fctrendm.ptr.pp_double[i][winw-1] = s->tmp0.ptr.p_double[i];
|
|
trend->ptr.p_double[offs+k] = trend->ptr.p_double[offs+k]+s->tmp0.ptr.p_double[i];
|
|
}
|
|
trend->ptr.p_double[offs+k] = trend->ptr.p_double[offs+k]/m;
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function extracts updates from real-time queue and applies them to
|
|
the S.XXT matrix. XXT is premultiplied by Beta, which can be 0.0 for
|
|
initial creation, 1.0 for subsequent updates, or even within (0,1) for some
|
|
kind of updates with decay.
|
|
|
|
INPUT PARAMETERS:
|
|
S - model
|
|
Beta - >=0, coefficient to premultiply XXT
|
|
Cnt - 0<Cnt<=S.RTQueueCnt, number of updates to extract
|
|
from the end of the queue
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - S.XXT updated, S.RTQueueCnt decreased
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void ssa_realtimedequeue(ssamodel* s,
|
|
double beta,
|
|
ae_int_t cnt,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t winw;
|
|
|
|
|
|
ae_assert(cnt>0, "SSA: RealTimeDequeue() integrity check failed / 43tdv", _state);
|
|
ae_assert(ae_isfinite(beta, _state)&&ae_fp_greater_eq(beta,(double)(0)), "SSA: RealTimeDequeue() integrity check failed / 5gdg6", _state);
|
|
ae_assert(cnt<=s->rtqueuecnt, "SSA: RealTimeDequeue() integrity check failed / 547yh", _state);
|
|
ae_assert(s->xxt.cols>=s->windowwidth, "SSA: RealTimeDequeue() integrity check failed / 54bf4", _state);
|
|
ae_assert(s->xxt.rows>=s->windowwidth, "SSA: RealTimeDequeue() integrity check failed / 9gdfn", _state);
|
|
winw = s->windowwidth;
|
|
|
|
/*
|
|
* Premultiply XXT by Beta
|
|
*/
|
|
if( ae_fp_neq(beta,(double)(0)) )
|
|
{
|
|
for(i=0; i<=winw-1; i++)
|
|
{
|
|
for(j=0; j<=winw-1; j++)
|
|
{
|
|
s->xxt.ptr.pp_double[i][j] = s->xxt.ptr.pp_double[i][j]*beta;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for(i=0; i<=winw-1; i++)
|
|
{
|
|
for(j=0; j<=winw-1; j++)
|
|
{
|
|
s->xxt.ptr.pp_double[i][j] = (double)(0);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Dequeue
|
|
*/
|
|
ssa_updatexxtprepare(s, cnt, winw, s->memorylimit, _state);
|
|
for(i=0; i<=cnt-1; i++)
|
|
{
|
|
ssa_updatexxtsend(s, &s->sequencedata, s->rtqueue.ptr.p_int[s->rtqueuecnt-1], &s->xxt, _state);
|
|
dec(&s->rtqueuecnt, _state);
|
|
}
|
|
ssa_updatexxtfinalize(s, &s->xxt, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function prepares batch buffer for XXT update. The idea is that we
|
|
send a stream of "XXT += u*u'" updates, and we want to package them into
|
|
one big matrix update U*U', applied with SYRK() kernel, but U can consume
|
|
too much memory, so we want to transparently divide it into few smaller
|
|
chunks.
|
|
|
|
This set of functions solves this problem:
|
|
* UpdateXXTPrepare() prepares temporary buffers
|
|
* UpdateXXTSend() sends next u to the buffer, possibly initiating next SYRK()
|
|
* UpdateXXTFinalize() performs last SYRK() update
|
|
|
|
INPUT PARAMETERS:
|
|
S - model, only fields with UX prefix are used
|
|
UpdateSize - number of updates
|
|
WindowWidth - window width, >0
|
|
MemoryLimit - memory limit, non-positive value means no limit
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - UX temporaries updated
|
|
|
|
-- ALGLIB --
|
|
Copyright 20.12.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void ssa_updatexxtprepare(ssamodel* s,
|
|
ae_int_t updatesize,
|
|
ae_int_t windowwidth,
|
|
ae_int_t memorylimit,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(windowwidth>0, "UpdateXXTPrepare: WinW<=0", _state);
|
|
s->uxbatchlimit = ae_maxint(updatesize, 1, _state);
|
|
if( memorylimit>0 )
|
|
{
|
|
s->uxbatchlimit = ae_minint(s->uxbatchlimit, ae_maxint(memorylimit/windowwidth, 4*windowwidth, _state), _state);
|
|
}
|
|
s->uxbatchwidth = windowwidth;
|
|
s->uxbatchsize = 0;
|
|
if( s->uxbatch.cols!=windowwidth )
|
|
{
|
|
ae_matrix_set_length(&s->uxbatch, 0, 0, _state);
|
|
}
|
|
rmatrixsetlengthatleast(&s->uxbatch, s->uxbatchlimit, windowwidth, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sends update u*u' to the batch buffer.
|
|
|
|
INPUT PARAMETERS:
|
|
S - model, only fields with UX prefix are used
|
|
U - WindowWidth-sized update, starts at I0
|
|
I0 - starting position for update
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - UX temporaries updated
|
|
XXT - array[WindowWidth,WindowWidth], in the middle
|
|
of update. All intermediate updates are
|
|
applied to the upper triangle.
|
|
|
|
-- ALGLIB --
|
|
Copyright 20.12.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void ssa_updatexxtsend(ssamodel* s,
|
|
/* Real */ ae_vector* u,
|
|
ae_int_t i0,
|
|
/* Real */ ae_matrix* xxt,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(i0+s->uxbatchwidth-1<u->cnt, "UpdateXXTSend: incorrect U size", _state);
|
|
ae_assert(s->uxbatchsize>=0, "UpdateXXTSend: integrity check failure", _state);
|
|
ae_assert(s->uxbatchsize<=s->uxbatchlimit, "UpdateXXTSend: integrity check failure", _state);
|
|
ae_assert(s->uxbatchlimit>=1, "UpdateXXTSend: integrity check failure", _state);
|
|
|
|
/*
|
|
* Send pending batch if full
|
|
*/
|
|
if( s->uxbatchsize==s->uxbatchlimit )
|
|
{
|
|
rmatrixsyrk(s->uxbatchwidth, s->uxbatchsize, 1.0, &s->uxbatch, 0, 0, 2, 1.0, xxt, 0, 0, ae_true, _state);
|
|
s->uxbatchsize = 0;
|
|
}
|
|
|
|
/*
|
|
* Append update to batch
|
|
*/
|
|
ae_v_move(&s->uxbatch.ptr.pp_double[s->uxbatchsize][0], 1, &u->ptr.p_double[i0], 1, ae_v_len(0,s->uxbatchwidth-1));
|
|
inc(&s->uxbatchsize, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function finalizes batch buffer. Call it after the last update.
|
|
|
|
INPUT PARAMETERS:
|
|
S - model, only fields with UX prefix are used
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - UX temporaries updated
|
|
XXT - array[WindowWidth,WindowWidth], updated with
|
|
all previous updates, both triangles of the
|
|
symmetric matrix are present.
|
|
|
|
-- ALGLIB --
|
|
Copyright 20.12.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void ssa_updatexxtfinalize(ssamodel* s,
|
|
/* Real */ ae_matrix* xxt,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(s->uxbatchsize>=0, "UpdateXXTFinalize: integrity check failure", _state);
|
|
ae_assert(s->uxbatchsize<=s->uxbatchlimit, "UpdateXXTFinalize: integrity check failure", _state);
|
|
ae_assert(s->uxbatchlimit>=1, "UpdateXXTFinalize: integrity check failure", _state);
|
|
if( s->uxbatchsize>0 )
|
|
{
|
|
rmatrixsyrk(s->uxbatchwidth, s->uxbatchsize, 1.0, &s->uxbatch, 0, 0, 2, 1.0, &s->xxt, 0, 0, ae_true, _state);
|
|
s->uxbatchsize = 0;
|
|
}
|
|
rmatrixenforcesymmetricity(&s->xxt, s->uxbatchwidth, ae_true, _state);
|
|
}
|
|
|
|
|
|
void _ssamodel_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
ssamodel *p = (ssamodel*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->sequenceidx, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->sequencedata, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->precomputedbasis, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->basis, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->basist, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->sv, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->forecasta, 0, DT_REAL, _state, make_automatic);
|
|
_eigsubspacestate_init(&p->solver, _state, make_automatic);
|
|
ae_matrix_init(&p->xxt, 0, 0, DT_REAL, _state, make_automatic);
|
|
_hqrndstate_init(&p->rs, _state, make_automatic);
|
|
ae_vector_init(&p->rtqueue, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->tmp0, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->tmp1, 0, DT_REAL, _state, make_automatic);
|
|
_eigsubspacereport_init(&p->solverrep, _state, make_automatic);
|
|
ae_vector_init(&p->alongtrend, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->alongnoise, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->aseqtrajectory, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->aseqtbproduct, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->aseqcounts, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->fctrend, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->fcnoise, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->fctrendm, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->uxbatch, 0, 0, DT_REAL, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _ssamodel_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
ssamodel *dst = (ssamodel*)_dst;
|
|
ssamodel *src = (ssamodel*)_src;
|
|
dst->nsequences = src->nsequences;
|
|
ae_vector_init_copy(&dst->sequenceidx, &src->sequenceidx, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->sequencedata, &src->sequencedata, _state, make_automatic);
|
|
dst->algotype = src->algotype;
|
|
dst->windowwidth = src->windowwidth;
|
|
dst->rtpowerup = src->rtpowerup;
|
|
dst->topk = src->topk;
|
|
dst->precomputedwidth = src->precomputedwidth;
|
|
dst->precomputednbasis = src->precomputednbasis;
|
|
ae_matrix_init_copy(&dst->precomputedbasis, &src->precomputedbasis, _state, make_automatic);
|
|
dst->defaultsubspaceits = src->defaultsubspaceits;
|
|
dst->memorylimit = src->memorylimit;
|
|
dst->arebasisandsolvervalid = src->arebasisandsolvervalid;
|
|
ae_matrix_init_copy(&dst->basis, &src->basis, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->basist, &src->basist, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->sv, &src->sv, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->forecasta, &src->forecasta, _state, make_automatic);
|
|
dst->nbasis = src->nbasis;
|
|
_eigsubspacestate_init_copy(&dst->solver, &src->solver, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->xxt, &src->xxt, _state, make_automatic);
|
|
_hqrndstate_init_copy(&dst->rs, &src->rs, _state, make_automatic);
|
|
dst->rngseed = src->rngseed;
|
|
ae_vector_init_copy(&dst->rtqueue, &src->rtqueue, _state, make_automatic);
|
|
dst->rtqueuecnt = src->rtqueuecnt;
|
|
dst->rtqueuechunk = src->rtqueuechunk;
|
|
dst->dbgcntevd = src->dbgcntevd;
|
|
ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->tmp1, &src->tmp1, _state, make_automatic);
|
|
_eigsubspacereport_init_copy(&dst->solverrep, &src->solverrep, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->alongtrend, &src->alongtrend, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->alongnoise, &src->alongnoise, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->aseqtrajectory, &src->aseqtrajectory, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->aseqtbproduct, &src->aseqtbproduct, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->aseqcounts, &src->aseqcounts, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->fctrend, &src->fctrend, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->fcnoise, &src->fcnoise, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->fctrendm, &src->fctrendm, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->uxbatch, &src->uxbatch, _state, make_automatic);
|
|
dst->uxbatchwidth = src->uxbatchwidth;
|
|
dst->uxbatchsize = src->uxbatchsize;
|
|
dst->uxbatchlimit = src->uxbatchlimit;
|
|
}
|
|
|
|
|
|
void _ssamodel_clear(void* _p)
|
|
{
|
|
ssamodel *p = (ssamodel*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->sequenceidx);
|
|
ae_vector_clear(&p->sequencedata);
|
|
ae_matrix_clear(&p->precomputedbasis);
|
|
ae_matrix_clear(&p->basis);
|
|
ae_matrix_clear(&p->basist);
|
|
ae_vector_clear(&p->sv);
|
|
ae_vector_clear(&p->forecasta);
|
|
_eigsubspacestate_clear(&p->solver);
|
|
ae_matrix_clear(&p->xxt);
|
|
_hqrndstate_clear(&p->rs);
|
|
ae_vector_clear(&p->rtqueue);
|
|
ae_vector_clear(&p->tmp0);
|
|
ae_vector_clear(&p->tmp1);
|
|
_eigsubspacereport_clear(&p->solverrep);
|
|
ae_vector_clear(&p->alongtrend);
|
|
ae_vector_clear(&p->alongnoise);
|
|
ae_matrix_clear(&p->aseqtrajectory);
|
|
ae_matrix_clear(&p->aseqtbproduct);
|
|
ae_vector_clear(&p->aseqcounts);
|
|
ae_vector_clear(&p->fctrend);
|
|
ae_vector_clear(&p->fcnoise);
|
|
ae_matrix_clear(&p->fctrendm);
|
|
ae_matrix_clear(&p->uxbatch);
|
|
}
|
|
|
|
|
|
void _ssamodel_destroy(void* _p)
|
|
{
|
|
ssamodel *p = (ssamodel*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->sequenceidx);
|
|
ae_vector_destroy(&p->sequencedata);
|
|
ae_matrix_destroy(&p->precomputedbasis);
|
|
ae_matrix_destroy(&p->basis);
|
|
ae_matrix_destroy(&p->basist);
|
|
ae_vector_destroy(&p->sv);
|
|
ae_vector_destroy(&p->forecasta);
|
|
_eigsubspacestate_destroy(&p->solver);
|
|
ae_matrix_destroy(&p->xxt);
|
|
_hqrndstate_destroy(&p->rs);
|
|
ae_vector_destroy(&p->rtqueue);
|
|
ae_vector_destroy(&p->tmp0);
|
|
ae_vector_destroy(&p->tmp1);
|
|
_eigsubspacereport_destroy(&p->solverrep);
|
|
ae_vector_destroy(&p->alongtrend);
|
|
ae_vector_destroy(&p->alongnoise);
|
|
ae_matrix_destroy(&p->aseqtrajectory);
|
|
ae_matrix_destroy(&p->aseqtbproduct);
|
|
ae_vector_destroy(&p->aseqcounts);
|
|
ae_vector_destroy(&p->fctrend);
|
|
ae_vector_destroy(&p->fcnoise);
|
|
ae_matrix_destroy(&p->fctrendm);
|
|
ae_matrix_destroy(&p->uxbatch);
|
|
}
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_LINREG) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
/*************************************************************************
|
|
Linear regression
|
|
|
|
Subroutine builds model:
|
|
|
|
Y = A(0)*X[0] + ... + A(N-1)*X[N-1] + A(N)
|
|
|
|
and model found in ALGLIB format, covariation matrix, training set errors
|
|
(rms, average, average relative) and leave-one-out cross-validation
|
|
estimate of the generalization error. CV estimate calculated using fast
|
|
algorithm with O(NPoints*NVars) complexity.
|
|
|
|
When covariation matrix is calculated standard deviations of function
|
|
values are assumed to be equal to RMS error on the training set.
|
|
|
|
INPUT PARAMETERS:
|
|
XY - training set, array [0..NPoints-1,0..NVars]:
|
|
* NVars columns - independent variables
|
|
* last column - dependent variable
|
|
NPoints - training set size, NPoints>NVars+1
|
|
NVars - number of independent variables
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code:
|
|
* -255, in case of unknown internal error
|
|
* -4, if internal SVD subroutine haven't converged
|
|
* -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
|
|
* 1, if subroutine successfully finished
|
|
LM - linear model in the ALGLIB format. Use subroutines of
|
|
this unit to work with the model.
|
|
AR - additional results
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 02.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void lrbuild(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t* info,
|
|
linearmodel* lm,
|
|
lrreport* ar,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector s;
|
|
ae_int_t i;
|
|
double sigma2;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&s, 0, sizeof(s));
|
|
*info = 0;
|
|
_linearmodel_clear(lm);
|
|
_lrreport_clear(ar);
|
|
ae_vector_init(&s, 0, DT_REAL, _state, ae_true);
|
|
|
|
if( npoints<=nvars+1||nvars<1 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
ae_vector_set_length(&s, npoints-1+1, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
s.ptr.p_double[i] = (double)(1);
|
|
}
|
|
lrbuilds(xy, &s, npoints, nvars, info, lm, ar, _state);
|
|
if( *info<0 )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
sigma2 = ae_sqr(ar->rmserror, _state)*npoints/(npoints-nvars-1);
|
|
for(i=0; i<=nvars; i++)
|
|
{
|
|
ae_v_muld(&ar->c.ptr.pp_double[i][0], 1, ae_v_len(0,nvars), sigma2);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Linear regression
|
|
|
|
Variant of LRBuild which uses vector of standatd deviations (errors in
|
|
function values).
|
|
|
|
INPUT PARAMETERS:
|
|
XY - training set, array [0..NPoints-1,0..NVars]:
|
|
* NVars columns - independent variables
|
|
* last column - dependent variable
|
|
S - standard deviations (errors in function values)
|
|
array[0..NPoints-1], S[i]>0.
|
|
NPoints - training set size, NPoints>NVars+1
|
|
NVars - number of independent variables
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code:
|
|
* -255, in case of unknown internal error
|
|
* -4, if internal SVD subroutine haven't converged
|
|
* -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
|
|
* -2, if S[I]<=0
|
|
* 1, if subroutine successfully finished
|
|
LM - linear model in the ALGLIB format. Use subroutines of
|
|
this unit to work with the model.
|
|
AR - additional results
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 02.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void lrbuilds(/* Real */ ae_matrix* xy,
|
|
/* Real */ ae_vector* s,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t* info,
|
|
linearmodel* lm,
|
|
lrreport* ar,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_matrix xyi;
|
|
ae_vector x;
|
|
ae_vector means;
|
|
ae_vector sigmas;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
double v;
|
|
ae_int_t offs;
|
|
double mean;
|
|
double variance;
|
|
double skewness;
|
|
double kurtosis;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&xyi, 0, sizeof(xyi));
|
|
memset(&x, 0, sizeof(x));
|
|
memset(&means, 0, sizeof(means));
|
|
memset(&sigmas, 0, sizeof(sigmas));
|
|
*info = 0;
|
|
_linearmodel_clear(lm);
|
|
_lrreport_clear(ar);
|
|
ae_matrix_init(&xyi, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&means, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&sigmas, 0, DT_REAL, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Test parameters
|
|
*/
|
|
if( npoints<=nvars+1||nvars<1 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Copy data, add one more column (constant term)
|
|
*/
|
|
ae_matrix_set_length(&xyi, npoints-1+1, nvars+1+1, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_v_move(&xyi.ptr.pp_double[i][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
|
|
xyi.ptr.pp_double[i][nvars] = (double)(1);
|
|
xyi.ptr.pp_double[i][nvars+1] = xy->ptr.pp_double[i][nvars];
|
|
}
|
|
|
|
/*
|
|
* Standartization
|
|
*/
|
|
ae_vector_set_length(&x, npoints-1+1, _state);
|
|
ae_vector_set_length(&means, nvars-1+1, _state);
|
|
ae_vector_set_length(&sigmas, nvars-1+1, _state);
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[0][j], xy->stride, ae_v_len(0,npoints-1));
|
|
samplemoments(&x, npoints, &mean, &variance, &skewness, &kurtosis, _state);
|
|
means.ptr.p_double[j] = mean;
|
|
sigmas.ptr.p_double[j] = ae_sqrt(variance, _state);
|
|
if( ae_fp_eq(sigmas.ptr.p_double[j],(double)(0)) )
|
|
{
|
|
sigmas.ptr.p_double[j] = (double)(1);
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
xyi.ptr.pp_double[i][j] = (xyi.ptr.pp_double[i][j]-means.ptr.p_double[j])/sigmas.ptr.p_double[j];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Internal processing
|
|
*/
|
|
linreg_lrinternal(&xyi, s, npoints, nvars+1, info, lm, ar, _state);
|
|
if( *info<0 )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Un-standartization
|
|
*/
|
|
offs = ae_round(lm->w.ptr.p_double[3], _state);
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
|
|
/*
|
|
* Constant term is updated (and its covariance too,
|
|
* since it gets some variance from J-th component)
|
|
*/
|
|
lm->w.ptr.p_double[offs+nvars] = lm->w.ptr.p_double[offs+nvars]-lm->w.ptr.p_double[offs+j]*means.ptr.p_double[j]/sigmas.ptr.p_double[j];
|
|
v = means.ptr.p_double[j]/sigmas.ptr.p_double[j];
|
|
ae_v_subd(&ar->c.ptr.pp_double[nvars][0], 1, &ar->c.ptr.pp_double[j][0], 1, ae_v_len(0,nvars), v);
|
|
ae_v_subd(&ar->c.ptr.pp_double[0][nvars], ar->c.stride, &ar->c.ptr.pp_double[0][j], ar->c.stride, ae_v_len(0,nvars), v);
|
|
|
|
/*
|
|
* J-th term is updated
|
|
*/
|
|
lm->w.ptr.p_double[offs+j] = lm->w.ptr.p_double[offs+j]/sigmas.ptr.p_double[j];
|
|
v = 1/sigmas.ptr.p_double[j];
|
|
ae_v_muld(&ar->c.ptr.pp_double[j][0], 1, ae_v_len(0,nvars), v);
|
|
ae_v_muld(&ar->c.ptr.pp_double[0][j], ar->c.stride, ae_v_len(0,nvars), v);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Like LRBuildS, but builds model
|
|
|
|
Y = A(0)*X[0] + ... + A(N-1)*X[N-1]
|
|
|
|
i.e. with zero constant term.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void lrbuildzs(/* Real */ ae_matrix* xy,
|
|
/* Real */ ae_vector* s,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t* info,
|
|
linearmodel* lm,
|
|
lrreport* ar,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_matrix xyi;
|
|
ae_vector x;
|
|
ae_vector c;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
double v;
|
|
ae_int_t offs;
|
|
double mean;
|
|
double variance;
|
|
double skewness;
|
|
double kurtosis;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&xyi, 0, sizeof(xyi));
|
|
memset(&x, 0, sizeof(x));
|
|
memset(&c, 0, sizeof(c));
|
|
*info = 0;
|
|
_linearmodel_clear(lm);
|
|
_lrreport_clear(ar);
|
|
ae_matrix_init(&xyi, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&c, 0, DT_REAL, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Test parameters
|
|
*/
|
|
if( npoints<=nvars+1||nvars<1 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Copy data, add one more column (constant term)
|
|
*/
|
|
ae_matrix_set_length(&xyi, npoints-1+1, nvars+1+1, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_v_move(&xyi.ptr.pp_double[i][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
|
|
xyi.ptr.pp_double[i][nvars] = (double)(0);
|
|
xyi.ptr.pp_double[i][nvars+1] = xy->ptr.pp_double[i][nvars];
|
|
}
|
|
|
|
/*
|
|
* Standartization: unusual scaling
|
|
*/
|
|
ae_vector_set_length(&x, npoints-1+1, _state);
|
|
ae_vector_set_length(&c, nvars-1+1, _state);
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[0][j], xy->stride, ae_v_len(0,npoints-1));
|
|
samplemoments(&x, npoints, &mean, &variance, &skewness, &kurtosis, _state);
|
|
if( ae_fp_greater(ae_fabs(mean, _state),ae_sqrt(variance, _state)) )
|
|
{
|
|
|
|
/*
|
|
* variation is relatively small, it is better to
|
|
* bring mean value to 1
|
|
*/
|
|
c.ptr.p_double[j] = mean;
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* variation is large, it is better to bring variance to 1
|
|
*/
|
|
if( ae_fp_eq(variance,(double)(0)) )
|
|
{
|
|
variance = (double)(1);
|
|
}
|
|
c.ptr.p_double[j] = ae_sqrt(variance, _state);
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
xyi.ptr.pp_double[i][j] = xyi.ptr.pp_double[i][j]/c.ptr.p_double[j];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Internal processing
|
|
*/
|
|
linreg_lrinternal(&xyi, s, npoints, nvars+1, info, lm, ar, _state);
|
|
if( *info<0 )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Un-standartization
|
|
*/
|
|
offs = ae_round(lm->w.ptr.p_double[3], _state);
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
|
|
/*
|
|
* J-th term is updated
|
|
*/
|
|
lm->w.ptr.p_double[offs+j] = lm->w.ptr.p_double[offs+j]/c.ptr.p_double[j];
|
|
v = 1/c.ptr.p_double[j];
|
|
ae_v_muld(&ar->c.ptr.pp_double[j][0], 1, ae_v_len(0,nvars), v);
|
|
ae_v_muld(&ar->c.ptr.pp_double[0][j], ar->c.stride, ae_v_len(0,nvars), v);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Like LRBuild but builds model
|
|
|
|
Y = A(0)*X[0] + ... + A(N-1)*X[N-1]
|
|
|
|
i.e. with zero constant term.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.10.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void lrbuildz(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t* info,
|
|
linearmodel* lm,
|
|
lrreport* ar,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector s;
|
|
ae_int_t i;
|
|
double sigma2;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&s, 0, sizeof(s));
|
|
*info = 0;
|
|
_linearmodel_clear(lm);
|
|
_lrreport_clear(ar);
|
|
ae_vector_init(&s, 0, DT_REAL, _state, ae_true);
|
|
|
|
if( npoints<=nvars+1||nvars<1 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
ae_vector_set_length(&s, npoints-1+1, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
s.ptr.p_double[i] = (double)(1);
|
|
}
|
|
lrbuildzs(xy, &s, npoints, nvars, info, lm, ar, _state);
|
|
if( *info<0 )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
sigma2 = ae_sqr(ar->rmserror, _state)*npoints/(npoints-nvars-1);
|
|
for(i=0; i<=nvars; i++)
|
|
{
|
|
ae_v_muld(&ar->c.ptr.pp_double[i][0], 1, ae_v_len(0,nvars), sigma2);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Unpacks coefficients of linear model.
|
|
|
|
INPUT PARAMETERS:
|
|
LM - linear model in ALGLIB format
|
|
|
|
OUTPUT PARAMETERS:
|
|
V - coefficients, array[0..NVars]
|
|
constant term (intercept) is stored in the V[NVars].
|
|
NVars - number of independent variables (one less than number
|
|
of coefficients)
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void lrunpack(linearmodel* lm,
|
|
/* Real */ ae_vector* v,
|
|
ae_int_t* nvars,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t offs;
|
|
|
|
ae_vector_clear(v);
|
|
*nvars = 0;
|
|
|
|
ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==linreg_lrvnum, "LINREG: Incorrect LINREG version!", _state);
|
|
*nvars = ae_round(lm->w.ptr.p_double[2], _state);
|
|
offs = ae_round(lm->w.ptr.p_double[3], _state);
|
|
ae_vector_set_length(v, *nvars+1, _state);
|
|
ae_v_move(&v->ptr.p_double[0], 1, &lm->w.ptr.p_double[offs], 1, ae_v_len(0,*nvars));
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
"Packs" coefficients and creates linear model in ALGLIB format (LRUnpack
|
|
reversed).
|
|
|
|
INPUT PARAMETERS:
|
|
V - coefficients, array[0..NVars]
|
|
NVars - number of independent variables
|
|
|
|
OUTPUT PAREMETERS:
|
|
LM - linear model.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void lrpack(/* Real */ ae_vector* v,
|
|
ae_int_t nvars,
|
|
linearmodel* lm,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t offs;
|
|
|
|
_linearmodel_clear(lm);
|
|
|
|
ae_vector_set_length(&lm->w, 4+nvars+1, _state);
|
|
offs = 4;
|
|
lm->w.ptr.p_double[0] = (double)(4+nvars+1);
|
|
lm->w.ptr.p_double[1] = (double)(linreg_lrvnum);
|
|
lm->w.ptr.p_double[2] = (double)(nvars);
|
|
lm->w.ptr.p_double[3] = (double)(offs);
|
|
ae_v_move(&lm->w.ptr.p_double[offs], 1, &v->ptr.p_double[0], 1, ae_v_len(offs,offs+nvars));
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Procesing
|
|
|
|
INPUT PARAMETERS:
|
|
LM - linear model
|
|
X - input vector, array[0..NVars-1].
|
|
|
|
Result:
|
|
value of linear model regression estimate
|
|
|
|
-- ALGLIB --
|
|
Copyright 03.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double lrprocess(linearmodel* lm,
|
|
/* Real */ ae_vector* x,
|
|
ae_state *_state)
|
|
{
|
|
double v;
|
|
ae_int_t offs;
|
|
ae_int_t nvars;
|
|
double result;
|
|
|
|
|
|
ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==linreg_lrvnum, "LINREG: Incorrect LINREG version!", _state);
|
|
nvars = ae_round(lm->w.ptr.p_double[2], _state);
|
|
offs = ae_round(lm->w.ptr.p_double[3], _state);
|
|
v = ae_v_dotproduct(&x->ptr.p_double[0], 1, &lm->w.ptr.p_double[offs], 1, ae_v_len(0,nvars-1));
|
|
result = v+lm->w.ptr.p_double[offs+nvars];
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
RMS error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
LM - linear model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
root mean square error.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double lrrmserror(linearmodel* lm,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
double v;
|
|
ae_int_t offs;
|
|
ae_int_t nvars;
|
|
double result;
|
|
|
|
|
|
ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==linreg_lrvnum, "LINREG: Incorrect LINREG version!", _state);
|
|
nvars = ae_round(lm->w.ptr.p_double[2], _state);
|
|
offs = ae_round(lm->w.ptr.p_double[3], _state);
|
|
result = (double)(0);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
v = ae_v_dotproduct(&xy->ptr.pp_double[i][0], 1, &lm->w.ptr.p_double[offs], 1, ae_v_len(0,nvars-1));
|
|
v = v+lm->w.ptr.p_double[offs+nvars];
|
|
result = result+ae_sqr(v-xy->ptr.pp_double[i][nvars], _state);
|
|
}
|
|
result = ae_sqrt(result/npoints, _state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
LM - linear model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
average error.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double lravgerror(linearmodel* lm,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
double v;
|
|
ae_int_t offs;
|
|
ae_int_t nvars;
|
|
double result;
|
|
|
|
|
|
ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==linreg_lrvnum, "LINREG: Incorrect LINREG version!", _state);
|
|
nvars = ae_round(lm->w.ptr.p_double[2], _state);
|
|
offs = ae_round(lm->w.ptr.p_double[3], _state);
|
|
result = (double)(0);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
v = ae_v_dotproduct(&xy->ptr.pp_double[i][0], 1, &lm->w.ptr.p_double[offs], 1, ae_v_len(0,nvars-1));
|
|
v = v+lm->w.ptr.p_double[offs+nvars];
|
|
result = result+ae_fabs(v-xy->ptr.pp_double[i][nvars], _state);
|
|
}
|
|
result = result/npoints;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
RMS error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
LM - linear model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
average relative error.
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double lravgrelerror(linearmodel* lm,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t k;
|
|
double v;
|
|
ae_int_t offs;
|
|
ae_int_t nvars;
|
|
double result;
|
|
|
|
|
|
ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==linreg_lrvnum, "LINREG: Incorrect LINREG version!", _state);
|
|
nvars = ae_round(lm->w.ptr.p_double[2], _state);
|
|
offs = ae_round(lm->w.ptr.p_double[3], _state);
|
|
result = (double)(0);
|
|
k = 0;
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( ae_fp_neq(xy->ptr.pp_double[i][nvars],(double)(0)) )
|
|
{
|
|
v = ae_v_dotproduct(&xy->ptr.pp_double[i][0], 1, &lm->w.ptr.p_double[offs], 1, ae_v_len(0,nvars-1));
|
|
v = v+lm->w.ptr.p_double[offs+nvars];
|
|
result = result+ae_fabs((v-xy->ptr.pp_double[i][nvars])/xy->ptr.pp_double[i][nvars], _state);
|
|
k = k+1;
|
|
}
|
|
}
|
|
if( k!=0 )
|
|
{
|
|
result = result/k;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Copying of LinearModel strucure
|
|
|
|
INPUT PARAMETERS:
|
|
LM1 - original
|
|
|
|
OUTPUT PARAMETERS:
|
|
LM2 - copy
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.03.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void lrcopy(linearmodel* lm1, linearmodel* lm2, ae_state *_state)
|
|
{
|
|
ae_int_t k;
|
|
|
|
_linearmodel_clear(lm2);
|
|
|
|
k = ae_round(lm1->w.ptr.p_double[0], _state);
|
|
ae_vector_set_length(&lm2->w, k-1+1, _state);
|
|
ae_v_move(&lm2->w.ptr.p_double[0], 1, &lm1->w.ptr.p_double[0], 1, ae_v_len(0,k-1));
|
|
}
|
|
|
|
|
|
void lrlines(/* Real */ ae_matrix* xy,
|
|
/* Real */ ae_vector* s,
|
|
ae_int_t n,
|
|
ae_int_t* info,
|
|
double* a,
|
|
double* b,
|
|
double* vara,
|
|
double* varb,
|
|
double* covab,
|
|
double* corrab,
|
|
double* p,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
double ss;
|
|
double sx;
|
|
double sxx;
|
|
double sy;
|
|
double stt;
|
|
double e1;
|
|
double e2;
|
|
double t;
|
|
double chi2;
|
|
|
|
*info = 0;
|
|
*a = 0;
|
|
*b = 0;
|
|
*vara = 0;
|
|
*varb = 0;
|
|
*covab = 0;
|
|
*corrab = 0;
|
|
*p = 0;
|
|
|
|
if( n<2 )
|
|
{
|
|
*info = -1;
|
|
return;
|
|
}
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
if( ae_fp_less_eq(s->ptr.p_double[i],(double)(0)) )
|
|
{
|
|
*info = -2;
|
|
return;
|
|
}
|
|
}
|
|
*info = 1;
|
|
|
|
/*
|
|
* Calculate S, SX, SY, SXX
|
|
*/
|
|
ss = (double)(0);
|
|
sx = (double)(0);
|
|
sy = (double)(0);
|
|
sxx = (double)(0);
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
t = ae_sqr(s->ptr.p_double[i], _state);
|
|
ss = ss+1/t;
|
|
sx = sx+xy->ptr.pp_double[i][0]/t;
|
|
sy = sy+xy->ptr.pp_double[i][1]/t;
|
|
sxx = sxx+ae_sqr(xy->ptr.pp_double[i][0], _state)/t;
|
|
}
|
|
|
|
/*
|
|
* Test for condition number
|
|
*/
|
|
t = ae_sqrt(4*ae_sqr(sx, _state)+ae_sqr(ss-sxx, _state), _state);
|
|
e1 = 0.5*(ss+sxx+t);
|
|
e2 = 0.5*(ss+sxx-t);
|
|
if( ae_fp_less_eq(ae_minreal(e1, e2, _state),1000*ae_machineepsilon*ae_maxreal(e1, e2, _state)) )
|
|
{
|
|
*info = -3;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Calculate A, B
|
|
*/
|
|
*a = (double)(0);
|
|
*b = (double)(0);
|
|
stt = (double)(0);
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
t = (xy->ptr.pp_double[i][0]-sx/ss)/s->ptr.p_double[i];
|
|
*b = *b+t*xy->ptr.pp_double[i][1]/s->ptr.p_double[i];
|
|
stt = stt+ae_sqr(t, _state);
|
|
}
|
|
*b = *b/stt;
|
|
*a = (sy-sx*(*b))/ss;
|
|
|
|
/*
|
|
* Calculate goodness-of-fit
|
|
*/
|
|
if( n>2 )
|
|
{
|
|
chi2 = (double)(0);
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
chi2 = chi2+ae_sqr((xy->ptr.pp_double[i][1]-(*a)-*b*xy->ptr.pp_double[i][0])/s->ptr.p_double[i], _state);
|
|
}
|
|
*p = incompletegammac((double)(n-2)/(double)2, chi2/2, _state);
|
|
}
|
|
else
|
|
{
|
|
*p = (double)(1);
|
|
}
|
|
|
|
/*
|
|
* Calculate other parameters
|
|
*/
|
|
*vara = (1+ae_sqr(sx, _state)/(ss*stt))/ss;
|
|
*varb = 1/stt;
|
|
*covab = -sx/(ss*stt);
|
|
*corrab = *covab/ae_sqrt(*vara*(*varb), _state);
|
|
}
|
|
|
|
|
|
void lrline(/* Real */ ae_matrix* xy,
|
|
ae_int_t n,
|
|
ae_int_t* info,
|
|
double* a,
|
|
double* b,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector s;
|
|
ae_int_t i;
|
|
double vara;
|
|
double varb;
|
|
double covab;
|
|
double corrab;
|
|
double p;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&s, 0, sizeof(s));
|
|
*info = 0;
|
|
*a = 0;
|
|
*b = 0;
|
|
ae_vector_init(&s, 0, DT_REAL, _state, ae_true);
|
|
|
|
if( n<2 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
ae_vector_set_length(&s, n-1+1, _state);
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
s.ptr.p_double[i] = (double)(1);
|
|
}
|
|
lrlines(xy, &s, n, info, a, b, &vara, &varb, &covab, &corrab, &p, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal linear regression subroutine
|
|
*************************************************************************/
|
|
static void linreg_lrinternal(/* Real */ ae_matrix* xy,
|
|
/* Real */ ae_vector* s,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t* info,
|
|
linearmodel* lm,
|
|
lrreport* ar,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_matrix a;
|
|
ae_matrix u;
|
|
ae_matrix vt;
|
|
ae_matrix vm;
|
|
ae_matrix xym;
|
|
ae_vector b;
|
|
ae_vector sv;
|
|
ae_vector t;
|
|
ae_vector svi;
|
|
ae_vector work;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t ncv;
|
|
ae_int_t na;
|
|
ae_int_t nacv;
|
|
double r;
|
|
double p;
|
|
double epstol;
|
|
lrreport ar2;
|
|
ae_int_t offs;
|
|
linearmodel tlm;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&a, 0, sizeof(a));
|
|
memset(&u, 0, sizeof(u));
|
|
memset(&vt, 0, sizeof(vt));
|
|
memset(&vm, 0, sizeof(vm));
|
|
memset(&xym, 0, sizeof(xym));
|
|
memset(&b, 0, sizeof(b));
|
|
memset(&sv, 0, sizeof(sv));
|
|
memset(&t, 0, sizeof(t));
|
|
memset(&svi, 0, sizeof(svi));
|
|
memset(&work, 0, sizeof(work));
|
|
memset(&ar2, 0, sizeof(ar2));
|
|
memset(&tlm, 0, sizeof(tlm));
|
|
*info = 0;
|
|
_linearmodel_clear(lm);
|
|
_lrreport_clear(ar);
|
|
ae_matrix_init(&a, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&u, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&vt, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&vm, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&xym, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&b, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&sv, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&svi, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
|
|
_lrreport_init(&ar2, _state, ae_true);
|
|
_linearmodel_init(&tlm, _state, ae_true);
|
|
|
|
epstol = (double)(1000);
|
|
|
|
/*
|
|
* Check for errors in data
|
|
*/
|
|
if( npoints<nvars||nvars<1 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( ae_fp_less_eq(s->ptr.p_double[i],(double)(0)) )
|
|
{
|
|
*info = -2;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
*info = 1;
|
|
|
|
/*
|
|
* Create design matrix
|
|
*/
|
|
ae_matrix_set_length(&a, npoints-1+1, nvars-1+1, _state);
|
|
ae_vector_set_length(&b, npoints-1+1, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
r = 1/s->ptr.p_double[i];
|
|
ae_v_moved(&a.ptr.pp_double[i][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1), r);
|
|
b.ptr.p_double[i] = xy->ptr.pp_double[i][nvars]/s->ptr.p_double[i];
|
|
}
|
|
|
|
/*
|
|
* Allocate W:
|
|
* W[0] array size
|
|
* W[1] version number, 0
|
|
* W[2] NVars (minus 1, to be compatible with external representation)
|
|
* W[3] coefficients offset
|
|
*/
|
|
ae_vector_set_length(&lm->w, 4+nvars-1+1, _state);
|
|
offs = 4;
|
|
lm->w.ptr.p_double[0] = (double)(4+nvars);
|
|
lm->w.ptr.p_double[1] = (double)(linreg_lrvnum);
|
|
lm->w.ptr.p_double[2] = (double)(nvars-1);
|
|
lm->w.ptr.p_double[3] = (double)(offs);
|
|
|
|
/*
|
|
* Solve problem using SVD:
|
|
*
|
|
* 0. check for degeneracy (different types)
|
|
* 1. A = U*diag(sv)*V'
|
|
* 2. T = b'*U
|
|
* 3. w = SUM((T[i]/sv[i])*V[..,i])
|
|
* 4. cov(wi,wj) = SUM(Vji*Vjk/sv[i]^2,K=1..M)
|
|
*
|
|
* see $15.4 of "Numerical Recipes in C" for more information
|
|
*/
|
|
ae_vector_set_length(&t, nvars-1+1, _state);
|
|
ae_vector_set_length(&svi, nvars-1+1, _state);
|
|
ae_matrix_set_length(&ar->c, nvars-1+1, nvars-1+1, _state);
|
|
ae_matrix_set_length(&vm, nvars-1+1, nvars-1+1, _state);
|
|
if( !rmatrixsvd(&a, npoints, nvars, 1, 1, 2, &sv, &u, &vt, _state) )
|
|
{
|
|
*info = -4;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
if( ae_fp_less_eq(sv.ptr.p_double[0],(double)(0)) )
|
|
{
|
|
|
|
/*
|
|
* Degenerate case: zero design matrix.
|
|
*/
|
|
for(i=offs; i<=offs+nvars-1; i++)
|
|
{
|
|
lm->w.ptr.p_double[i] = (double)(0);
|
|
}
|
|
ar->rmserror = lrrmserror(lm, xy, npoints, _state);
|
|
ar->avgerror = lravgerror(lm, xy, npoints, _state);
|
|
ar->avgrelerror = lravgrelerror(lm, xy, npoints, _state);
|
|
ar->cvrmserror = ar->rmserror;
|
|
ar->cvavgerror = ar->avgerror;
|
|
ar->cvavgrelerror = ar->avgrelerror;
|
|
ar->ncvdefects = 0;
|
|
ae_vector_set_length(&ar->cvdefects, nvars-1+1, _state);
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
ar->cvdefects.ptr.p_int[i] = -1;
|
|
}
|
|
ae_matrix_set_length(&ar->c, nvars-1+1, nvars-1+1, _state);
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
ar->c.ptr.pp_double[i][j] = (double)(0);
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
if( ae_fp_less_eq(sv.ptr.p_double[nvars-1],epstol*ae_machineepsilon*sv.ptr.p_double[0]) )
|
|
{
|
|
|
|
/*
|
|
* Degenerate case, non-zero design matrix.
|
|
*
|
|
* We can leave it and solve task in SVD least squares fashion.
|
|
* Solution and covariance matrix will be obtained correctly,
|
|
* but CV error estimates - will not. It is better to reduce
|
|
* it to non-degenerate task and to obtain correct CV estimates.
|
|
*/
|
|
for(k=nvars; k>=1; k--)
|
|
{
|
|
if( ae_fp_greater(sv.ptr.p_double[k-1],epstol*ae_machineepsilon*sv.ptr.p_double[0]) )
|
|
{
|
|
|
|
/*
|
|
* Reduce
|
|
*/
|
|
ae_matrix_set_length(&xym, npoints-1+1, k+1, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
for(j=0; j<=k-1; j++)
|
|
{
|
|
r = ae_v_dotproduct(&xy->ptr.pp_double[i][0], 1, &vt.ptr.pp_double[j][0], 1, ae_v_len(0,nvars-1));
|
|
xym.ptr.pp_double[i][j] = r;
|
|
}
|
|
xym.ptr.pp_double[i][k] = xy->ptr.pp_double[i][nvars];
|
|
}
|
|
|
|
/*
|
|
* Solve
|
|
*/
|
|
linreg_lrinternal(&xym, s, npoints, k, info, &tlm, &ar2, _state);
|
|
if( *info!=1 )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Convert back to un-reduced format
|
|
*/
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
lm->w.ptr.p_double[offs+j] = (double)(0);
|
|
}
|
|
for(j=0; j<=k-1; j++)
|
|
{
|
|
r = tlm.w.ptr.p_double[offs+j];
|
|
ae_v_addd(&lm->w.ptr.p_double[offs], 1, &vt.ptr.pp_double[j][0], 1, ae_v_len(offs,offs+nvars-1), r);
|
|
}
|
|
ar->rmserror = ar2.rmserror;
|
|
ar->avgerror = ar2.avgerror;
|
|
ar->avgrelerror = ar2.avgrelerror;
|
|
ar->cvrmserror = ar2.cvrmserror;
|
|
ar->cvavgerror = ar2.cvavgerror;
|
|
ar->cvavgrelerror = ar2.cvavgrelerror;
|
|
ar->ncvdefects = ar2.ncvdefects;
|
|
ae_vector_set_length(&ar->cvdefects, nvars-1+1, _state);
|
|
for(j=0; j<=ar->ncvdefects-1; j++)
|
|
{
|
|
ar->cvdefects.ptr.p_int[j] = ar2.cvdefects.ptr.p_int[j];
|
|
}
|
|
for(j=ar->ncvdefects; j<=nvars-1; j++)
|
|
{
|
|
ar->cvdefects.ptr.p_int[j] = -1;
|
|
}
|
|
ae_matrix_set_length(&ar->c, nvars-1+1, nvars-1+1, _state);
|
|
ae_vector_set_length(&work, nvars+1, _state);
|
|
matrixmatrixmultiply(&ar2.c, 0, k-1, 0, k-1, ae_false, &vt, 0, k-1, 0, nvars-1, ae_false, 1.0, &vm, 0, k-1, 0, nvars-1, 0.0, &work, _state);
|
|
matrixmatrixmultiply(&vt, 0, k-1, 0, nvars-1, ae_true, &vm, 0, k-1, 0, nvars-1, ae_false, 1.0, &ar->c, 0, nvars-1, 0, nvars-1, 0.0, &work, _state);
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
*info = -255;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
if( ae_fp_greater(sv.ptr.p_double[i],epstol*ae_machineepsilon*sv.ptr.p_double[0]) )
|
|
{
|
|
svi.ptr.p_double[i] = 1/sv.ptr.p_double[i];
|
|
}
|
|
else
|
|
{
|
|
svi.ptr.p_double[i] = (double)(0);
|
|
}
|
|
}
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
t.ptr.p_double[i] = (double)(0);
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
r = b.ptr.p_double[i];
|
|
ae_v_addd(&t.ptr.p_double[0], 1, &u.ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1), r);
|
|
}
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
lm->w.ptr.p_double[offs+i] = (double)(0);
|
|
}
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
r = t.ptr.p_double[i]*svi.ptr.p_double[i];
|
|
ae_v_addd(&lm->w.ptr.p_double[offs], 1, &vt.ptr.pp_double[i][0], 1, ae_v_len(offs,offs+nvars-1), r);
|
|
}
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
r = svi.ptr.p_double[j];
|
|
ae_v_moved(&vm.ptr.pp_double[0][j], vm.stride, &vt.ptr.pp_double[j][0], 1, ae_v_len(0,nvars-1), r);
|
|
}
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
for(j=i; j<=nvars-1; j++)
|
|
{
|
|
r = ae_v_dotproduct(&vm.ptr.pp_double[i][0], 1, &vm.ptr.pp_double[j][0], 1, ae_v_len(0,nvars-1));
|
|
ar->c.ptr.pp_double[i][j] = r;
|
|
ar->c.ptr.pp_double[j][i] = r;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Leave-1-out cross-validation error.
|
|
*
|
|
* NOTATIONS:
|
|
* A design matrix
|
|
* A*x = b original linear least squares task
|
|
* U*S*V' SVD of A
|
|
* ai i-th row of the A
|
|
* bi i-th element of the b
|
|
* xf solution of the original LLS task
|
|
*
|
|
* Cross-validation error of i-th element from a sample is
|
|
* calculated using following formula:
|
|
*
|
|
* ERRi = ai*xf - (ai*xf-bi*(ui*ui'))/(1-ui*ui') (1)
|
|
*
|
|
* This formula can be derived from normal equations of the
|
|
* original task
|
|
*
|
|
* (A'*A)x = A'*b (2)
|
|
*
|
|
* by applying modification (zeroing out i-th row of A) to (2):
|
|
*
|
|
* (A-ai)'*(A-ai) = (A-ai)'*b
|
|
*
|
|
* and using Sherman-Morrison formula for updating matrix inverse
|
|
*
|
|
* NOTE 1: b is not zeroed out since it is much simpler and
|
|
* does not influence final result.
|
|
*
|
|
* NOTE 2: some design matrices A have such ui that 1-ui*ui'=0.
|
|
* Formula (1) can't be applied for such cases and they are skipped
|
|
* from CV calculation (which distorts resulting CV estimate).
|
|
* But from the properties of U we can conclude that there can
|
|
* be no more than NVars such vectors. Usually
|
|
* NVars << NPoints, so in a normal case it only slightly
|
|
* influences result.
|
|
*/
|
|
ncv = 0;
|
|
na = 0;
|
|
nacv = 0;
|
|
ar->rmserror = (double)(0);
|
|
ar->avgerror = (double)(0);
|
|
ar->avgrelerror = (double)(0);
|
|
ar->cvrmserror = (double)(0);
|
|
ar->cvavgerror = (double)(0);
|
|
ar->cvavgrelerror = (double)(0);
|
|
ar->ncvdefects = 0;
|
|
ae_vector_set_length(&ar->cvdefects, nvars-1+1, _state);
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
ar->cvdefects.ptr.p_int[i] = -1;
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
|
|
/*
|
|
* Error on a training set
|
|
*/
|
|
r = ae_v_dotproduct(&xy->ptr.pp_double[i][0], 1, &lm->w.ptr.p_double[offs], 1, ae_v_len(0,nvars-1));
|
|
ar->rmserror = ar->rmserror+ae_sqr(r-xy->ptr.pp_double[i][nvars], _state);
|
|
ar->avgerror = ar->avgerror+ae_fabs(r-xy->ptr.pp_double[i][nvars], _state);
|
|
if( ae_fp_neq(xy->ptr.pp_double[i][nvars],(double)(0)) )
|
|
{
|
|
ar->avgrelerror = ar->avgrelerror+ae_fabs((r-xy->ptr.pp_double[i][nvars])/xy->ptr.pp_double[i][nvars], _state);
|
|
na = na+1;
|
|
}
|
|
|
|
/*
|
|
* Error using fast leave-one-out cross-validation
|
|
*/
|
|
p = ae_v_dotproduct(&u.ptr.pp_double[i][0], 1, &u.ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
|
|
if( ae_fp_greater(p,1-epstol*ae_machineepsilon) )
|
|
{
|
|
ar->cvdefects.ptr.p_int[ar->ncvdefects] = i;
|
|
ar->ncvdefects = ar->ncvdefects+1;
|
|
continue;
|
|
}
|
|
r = s->ptr.p_double[i]*(r/s->ptr.p_double[i]-b.ptr.p_double[i]*p)/(1-p);
|
|
ar->cvrmserror = ar->cvrmserror+ae_sqr(r-xy->ptr.pp_double[i][nvars], _state);
|
|
ar->cvavgerror = ar->cvavgerror+ae_fabs(r-xy->ptr.pp_double[i][nvars], _state);
|
|
if( ae_fp_neq(xy->ptr.pp_double[i][nvars],(double)(0)) )
|
|
{
|
|
ar->cvavgrelerror = ar->cvavgrelerror+ae_fabs((r-xy->ptr.pp_double[i][nvars])/xy->ptr.pp_double[i][nvars], _state);
|
|
nacv = nacv+1;
|
|
}
|
|
ncv = ncv+1;
|
|
}
|
|
if( ncv==0 )
|
|
{
|
|
|
|
/*
|
|
* Something strange: ALL ui are degenerate.
|
|
* Unexpected...
|
|
*/
|
|
*info = -255;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
ar->rmserror = ae_sqrt(ar->rmserror/npoints, _state);
|
|
ar->avgerror = ar->avgerror/npoints;
|
|
if( na!=0 )
|
|
{
|
|
ar->avgrelerror = ar->avgrelerror/na;
|
|
}
|
|
ar->cvrmserror = ae_sqrt(ar->cvrmserror/ncv, _state);
|
|
ar->cvavgerror = ar->cvavgerror/ncv;
|
|
if( nacv!=0 )
|
|
{
|
|
ar->cvavgrelerror = ar->cvavgrelerror/nacv;
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
void _linearmodel_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
linearmodel *p = (linearmodel*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->w, 0, DT_REAL, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _linearmodel_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
linearmodel *dst = (linearmodel*)_dst;
|
|
linearmodel *src = (linearmodel*)_src;
|
|
ae_vector_init_copy(&dst->w, &src->w, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _linearmodel_clear(void* _p)
|
|
{
|
|
linearmodel *p = (linearmodel*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->w);
|
|
}
|
|
|
|
|
|
void _linearmodel_destroy(void* _p)
|
|
{
|
|
linearmodel *p = (linearmodel*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->w);
|
|
}
|
|
|
|
|
|
void _lrreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
lrreport *p = (lrreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_init(&p->c, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->cvdefects, 0, DT_INT, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _lrreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
lrreport *dst = (lrreport*)_dst;
|
|
lrreport *src = (lrreport*)_src;
|
|
ae_matrix_init_copy(&dst->c, &src->c, _state, make_automatic);
|
|
dst->rmserror = src->rmserror;
|
|
dst->avgerror = src->avgerror;
|
|
dst->avgrelerror = src->avgrelerror;
|
|
dst->cvrmserror = src->cvrmserror;
|
|
dst->cvavgerror = src->cvavgerror;
|
|
dst->cvavgrelerror = src->cvavgrelerror;
|
|
dst->ncvdefects = src->ncvdefects;
|
|
ae_vector_init_copy(&dst->cvdefects, &src->cvdefects, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _lrreport_clear(void* _p)
|
|
{
|
|
lrreport *p = (lrreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_clear(&p->c);
|
|
ae_vector_clear(&p->cvdefects);
|
|
}
|
|
|
|
|
|
void _lrreport_destroy(void* _p)
|
|
{
|
|
lrreport *p = (lrreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_destroy(&p->c);
|
|
ae_vector_destroy(&p->cvdefects);
|
|
}
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_FILTERS) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
/*************************************************************************
|
|
Filters: simple moving averages (unsymmetric).
|
|
|
|
This filter replaces array by results of SMA(K) filter. SMA(K) is defined
|
|
as filter which averages at most K previous points (previous - not points
|
|
AROUND central point) - or less, in case of the first K-1 points.
|
|
|
|
INPUT PARAMETERS:
|
|
X - array[N], array to process. It can be larger than N,
|
|
in this case only first N points are processed.
|
|
N - points count, N>=0
|
|
K - K>=1 (K can be larger than N , such cases will be
|
|
correctly handled). Window width. K=1 corresponds to
|
|
identity transformation (nothing changes).
|
|
|
|
OUTPUT PARAMETERS:
|
|
X - array, whose first N elements were processed with SMA(K)
|
|
|
|
NOTE 1: this function uses efficient in-place algorithm which does not
|
|
allocate temporary arrays.
|
|
|
|
NOTE 2: this algorithm makes only one pass through array and uses running
|
|
sum to speed-up calculation of the averages. Additional measures
|
|
are taken to ensure that running sum on a long sequence of zero
|
|
elements will be correctly reset to zero even in the presence of
|
|
round-off error.
|
|
|
|
NOTE 3: this is unsymmetric version of the algorithm, which does NOT
|
|
averages points after the current one. Only X[i], X[i-1], ... are
|
|
used when calculating new value of X[i]. We should also note that
|
|
this algorithm uses BOTH previous points and current one, i.e.
|
|
new value of X[i] depends on BOTH previous point and X[i] itself.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.10.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void filtersma(/* Real */ ae_vector* x,
|
|
ae_int_t n,
|
|
ae_int_t k,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
double runningsum;
|
|
double termsinsum;
|
|
ae_int_t zeroprefix;
|
|
double v;
|
|
|
|
|
|
ae_assert(n>=0, "FilterSMA: N<0", _state);
|
|
ae_assert(x->cnt>=n, "FilterSMA: Length(X)<N", _state);
|
|
ae_assert(isfinitevector(x, n, _state), "FilterSMA: X contains INF or NAN", _state);
|
|
ae_assert(k>=1, "FilterSMA: K<1", _state);
|
|
|
|
/*
|
|
* Quick exit, if necessary
|
|
*/
|
|
if( n<=1||k==1 )
|
|
{
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Prepare variables (see below for explanation)
|
|
*/
|
|
runningsum = 0.0;
|
|
termsinsum = (double)(0);
|
|
for(i=ae_maxint(n-k, 0, _state); i<=n-1; i++)
|
|
{
|
|
runningsum = runningsum+x->ptr.p_double[i];
|
|
termsinsum = termsinsum+1;
|
|
}
|
|
i = ae_maxint(n-k, 0, _state);
|
|
zeroprefix = 0;
|
|
while(i<=n-1&&ae_fp_eq(x->ptr.p_double[i],(double)(0)))
|
|
{
|
|
zeroprefix = zeroprefix+1;
|
|
i = i+1;
|
|
}
|
|
|
|
/*
|
|
* General case: we assume that N>1 and K>1
|
|
*
|
|
* Make one pass through all elements. At the beginning of
|
|
* the iteration we have:
|
|
* * I element being processed
|
|
* * RunningSum current value of the running sum
|
|
* (including I-th element)
|
|
* * TermsInSum number of terms in sum, 0<=TermsInSum<=K
|
|
* * ZeroPrefix length of the sequence of zero elements
|
|
* which starts at X[I-K+1] and continues towards X[I].
|
|
* Equal to zero in case X[I-K+1] is non-zero.
|
|
* This value is used to make RunningSum exactly zero
|
|
* when it follows from the problem properties.
|
|
*/
|
|
for(i=n-1; i>=0; i--)
|
|
{
|
|
|
|
/*
|
|
* Store new value of X[i], save old value in V
|
|
*/
|
|
v = x->ptr.p_double[i];
|
|
x->ptr.p_double[i] = runningsum/termsinsum;
|
|
|
|
/*
|
|
* Update RunningSum and TermsInSum
|
|
*/
|
|
if( i-k>=0 )
|
|
{
|
|
runningsum = runningsum-v+x->ptr.p_double[i-k];
|
|
}
|
|
else
|
|
{
|
|
runningsum = runningsum-v;
|
|
termsinsum = termsinsum-1;
|
|
}
|
|
|
|
/*
|
|
* Update ZeroPrefix.
|
|
* In case we have ZeroPrefix=TermsInSum,
|
|
* RunningSum is reset to zero.
|
|
*/
|
|
if( i-k>=0 )
|
|
{
|
|
if( ae_fp_neq(x->ptr.p_double[i-k],(double)(0)) )
|
|
{
|
|
zeroprefix = 0;
|
|
}
|
|
else
|
|
{
|
|
zeroprefix = ae_minint(zeroprefix+1, k, _state);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
zeroprefix = ae_minint(zeroprefix, i+1, _state);
|
|
}
|
|
if( ae_fp_eq((double)(zeroprefix),termsinsum) )
|
|
{
|
|
runningsum = (double)(0);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Filters: exponential moving averages.
|
|
|
|
This filter replaces array by results of EMA(alpha) filter. EMA(alpha) is
|
|
defined as filter which replaces X[] by S[]:
|
|
S[0] = X[0]
|
|
S[t] = alpha*X[t] + (1-alpha)*S[t-1]
|
|
|
|
INPUT PARAMETERS:
|
|
X - array[N], array to process. It can be larger than N,
|
|
in this case only first N points are processed.
|
|
N - points count, N>=0
|
|
alpha - 0<alpha<=1, smoothing parameter.
|
|
|
|
OUTPUT PARAMETERS:
|
|
X - array, whose first N elements were processed
|
|
with EMA(alpha)
|
|
|
|
NOTE 1: this function uses efficient in-place algorithm which does not
|
|
allocate temporary arrays.
|
|
|
|
NOTE 2: this algorithm uses BOTH previous points and current one, i.e.
|
|
new value of X[i] depends on BOTH previous point and X[i] itself.
|
|
|
|
NOTE 3: technical analytis users quite often work with EMA coefficient
|
|
expressed in DAYS instead of fractions. If you want to calculate
|
|
EMA(N), where N is a number of days, you can use alpha=2/(N+1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.10.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void filterema(/* Real */ ae_vector* x,
|
|
ae_int_t n,
|
|
double alpha,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
|
|
|
|
ae_assert(n>=0, "FilterEMA: N<0", _state);
|
|
ae_assert(x->cnt>=n, "FilterEMA: Length(X)<N", _state);
|
|
ae_assert(isfinitevector(x, n, _state), "FilterEMA: X contains INF or NAN", _state);
|
|
ae_assert(ae_fp_greater(alpha,(double)(0)), "FilterEMA: Alpha<=0", _state);
|
|
ae_assert(ae_fp_less_eq(alpha,(double)(1)), "FilterEMA: Alpha>1", _state);
|
|
|
|
/*
|
|
* Quick exit, if necessary
|
|
*/
|
|
if( n<=1||ae_fp_eq(alpha,(double)(1)) )
|
|
{
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Process
|
|
*/
|
|
for(i=1; i<=n-1; i++)
|
|
{
|
|
x->ptr.p_double[i] = alpha*x->ptr.p_double[i]+(1-alpha)*x->ptr.p_double[i-1];
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Filters: linear regression moving averages.
|
|
|
|
This filter replaces array by results of LRMA(K) filter.
|
|
|
|
LRMA(K) is defined as filter which, for each data point, builds linear
|
|
regression model using K prevous points (point itself is included in
|
|
these K points) and calculates value of this linear model at the point in
|
|
question.
|
|
|
|
INPUT PARAMETERS:
|
|
X - array[N], array to process. It can be larger than N,
|
|
in this case only first N points are processed.
|
|
N - points count, N>=0
|
|
K - K>=1 (K can be larger than N , such cases will be
|
|
correctly handled). Window width. K=1 corresponds to
|
|
identity transformation (nothing changes).
|
|
|
|
OUTPUT PARAMETERS:
|
|
X - array, whose first N elements were processed with SMA(K)
|
|
|
|
NOTE 1: this function uses efficient in-place algorithm which does not
|
|
allocate temporary arrays.
|
|
|
|
NOTE 2: this algorithm makes only one pass through array and uses running
|
|
sum to speed-up calculation of the averages. Additional measures
|
|
are taken to ensure that running sum on a long sequence of zero
|
|
elements will be correctly reset to zero even in the presence of
|
|
round-off error.
|
|
|
|
NOTE 3: this is unsymmetric version of the algorithm, which does NOT
|
|
averages points after the current one. Only X[i], X[i-1], ... are
|
|
used when calculating new value of X[i]. We should also note that
|
|
this algorithm uses BOTH previous points and current one, i.e.
|
|
new value of X[i] depends on BOTH previous point and X[i] itself.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.10.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void filterlrma(/* Real */ ae_vector* x,
|
|
ae_int_t n,
|
|
ae_int_t k,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t m;
|
|
ae_matrix xy;
|
|
ae_vector s;
|
|
ae_int_t info;
|
|
double a;
|
|
double b;
|
|
double vara;
|
|
double varb;
|
|
double covab;
|
|
double corrab;
|
|
double p;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&xy, 0, sizeof(xy));
|
|
memset(&s, 0, sizeof(s));
|
|
ae_matrix_init(&xy, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&s, 0, DT_REAL, _state, ae_true);
|
|
|
|
ae_assert(n>=0, "FilterLRMA: N<0", _state);
|
|
ae_assert(x->cnt>=n, "FilterLRMA: Length(X)<N", _state);
|
|
ae_assert(isfinitevector(x, n, _state), "FilterLRMA: X contains INF or NAN", _state);
|
|
ae_assert(k>=1, "FilterLRMA: K<1", _state);
|
|
|
|
/*
|
|
* Quick exit, if necessary:
|
|
* * either N is equal to 1 (nothing to average)
|
|
* * or K is 1 (only point itself is used) or 2 (model is too simple,
|
|
* we will always get identity transformation)
|
|
*/
|
|
if( n<=1||k<=2 )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* General case: K>2, N>1.
|
|
* We do not process points with I<2 because first two points (I=0 and I=1) will be
|
|
* left unmodified by LRMA filter in any case.
|
|
*/
|
|
ae_matrix_set_length(&xy, k, 2, _state);
|
|
ae_vector_set_length(&s, k, _state);
|
|
for(i=0; i<=k-1; i++)
|
|
{
|
|
xy.ptr.pp_double[i][0] = (double)(i);
|
|
s.ptr.p_double[i] = 1.0;
|
|
}
|
|
for(i=n-1; i>=2; i--)
|
|
{
|
|
m = ae_minint(i+1, k, _state);
|
|
ae_v_move(&xy.ptr.pp_double[0][1], xy.stride, &x->ptr.p_double[i-m+1], 1, ae_v_len(0,m-1));
|
|
lrlines(&xy, &s, m, &info, &a, &b, &vara, &varb, &covab, &corrab, &p, _state);
|
|
ae_assert(info==1, "FilterLRMA: internal error", _state);
|
|
x->ptr.p_double[i] = a+b*(m-1);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_LOGIT) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
/*************************************************************************
|
|
This subroutine trains logit model.
|
|
|
|
INPUT PARAMETERS:
|
|
XY - training set, array[0..NPoints-1,0..NVars]
|
|
First NVars columns store values of independent
|
|
variables, next column stores number of class (from 0
|
|
to NClasses-1) which dataset element belongs to. Fractional
|
|
values are rounded to nearest integer.
|
|
NPoints - training set size, NPoints>=1
|
|
NVars - number of independent variables, NVars>=1
|
|
NClasses - number of classes, NClasses>=2
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code:
|
|
* -2, if there is a point with class number
|
|
outside of [0..NClasses-1].
|
|
* -1, if incorrect parameters was passed
|
|
(NPoints<NVars+2, NVars<1, NClasses<2).
|
|
* 1, if task has been solved
|
|
LM - model built
|
|
Rep - training report
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mnltrainh(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t nclasses,
|
|
ae_int_t* info,
|
|
logitmodel* lm,
|
|
mnlreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t ssize;
|
|
ae_bool allsame;
|
|
ae_int_t offs;
|
|
double decay;
|
|
double v;
|
|
double s;
|
|
multilayerperceptron network;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
double e;
|
|
ae_vector g;
|
|
ae_matrix h;
|
|
ae_bool spd;
|
|
ae_vector x;
|
|
ae_vector y;
|
|
ae_vector wbase;
|
|
double wstep;
|
|
ae_vector wdir;
|
|
ae_vector work;
|
|
ae_int_t mcstage;
|
|
logitmcstate mcstate;
|
|
ae_int_t mcinfo;
|
|
ae_int_t mcnfev;
|
|
ae_int_t solverinfo;
|
|
densesolverreport solverrep;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&network, 0, sizeof(network));
|
|
memset(&g, 0, sizeof(g));
|
|
memset(&h, 0, sizeof(h));
|
|
memset(&x, 0, sizeof(x));
|
|
memset(&y, 0, sizeof(y));
|
|
memset(&wbase, 0, sizeof(wbase));
|
|
memset(&wdir, 0, sizeof(wdir));
|
|
memset(&work, 0, sizeof(work));
|
|
memset(&mcstate, 0, sizeof(mcstate));
|
|
memset(&solverrep, 0, sizeof(solverrep));
|
|
*info = 0;
|
|
_logitmodel_clear(lm);
|
|
_mnlreport_clear(rep);
|
|
_multilayerperceptron_init(&network, _state, ae_true);
|
|
ae_vector_init(&g, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&h, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&wbase, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&wdir, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
|
|
_logitmcstate_init(&mcstate, _state, ae_true);
|
|
_densesolverreport_init(&solverrep, _state, ae_true);
|
|
|
|
decay = 0.001;
|
|
|
|
/*
|
|
* Test for inputs
|
|
*/
|
|
if( (npoints<nvars+2||nvars<1)||nclasses<2 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( ae_round(xy->ptr.pp_double[i][nvars], _state)<0||ae_round(xy->ptr.pp_double[i][nvars], _state)>=nclasses )
|
|
{
|
|
*info = -2;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
*info = 1;
|
|
|
|
/*
|
|
* Initialize data
|
|
*/
|
|
rep->ngrad = 0;
|
|
rep->nhess = 0;
|
|
|
|
/*
|
|
* Allocate array
|
|
*/
|
|
offs = 5;
|
|
ssize = 5+(nvars+1)*(nclasses-1)+nclasses;
|
|
ae_vector_set_length(&lm->w, ssize-1+1, _state);
|
|
lm->w.ptr.p_double[0] = (double)(ssize);
|
|
lm->w.ptr.p_double[1] = (double)(logit_logitvnum);
|
|
lm->w.ptr.p_double[2] = (double)(nvars);
|
|
lm->w.ptr.p_double[3] = (double)(nclasses);
|
|
lm->w.ptr.p_double[4] = (double)(offs);
|
|
|
|
/*
|
|
* Degenerate case: all outputs are equal
|
|
*/
|
|
allsame = ae_true;
|
|
for(i=1; i<=npoints-1; i++)
|
|
{
|
|
if( ae_round(xy->ptr.pp_double[i][nvars], _state)!=ae_round(xy->ptr.pp_double[i-1][nvars], _state) )
|
|
{
|
|
allsame = ae_false;
|
|
}
|
|
}
|
|
if( allsame )
|
|
{
|
|
for(i=0; i<=(nvars+1)*(nclasses-1)-1; i++)
|
|
{
|
|
lm->w.ptr.p_double[offs+i] = (double)(0);
|
|
}
|
|
v = -2*ae_log(ae_minrealnumber, _state);
|
|
k = ae_round(xy->ptr.pp_double[0][nvars], _state);
|
|
if( k==nclasses-1 )
|
|
{
|
|
for(i=0; i<=nclasses-2; i++)
|
|
{
|
|
lm->w.ptr.p_double[offs+i*(nvars+1)+nvars] = -v;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for(i=0; i<=nclasses-2; i++)
|
|
{
|
|
if( i==k )
|
|
{
|
|
lm->w.ptr.p_double[offs+i*(nvars+1)+nvars] = v;
|
|
}
|
|
else
|
|
{
|
|
lm->w.ptr.p_double[offs+i*(nvars+1)+nvars] = (double)(0);
|
|
}
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* General case.
|
|
* Prepare task and network. Allocate space.
|
|
*/
|
|
mlpcreatec0(nvars, nclasses, &network, _state);
|
|
mlpinitpreprocessor(&network, xy, npoints, _state);
|
|
mlpproperties(&network, &nin, &nout, &wcount, _state);
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
network.weights.ptr.p_double[i] = (2*ae_randomreal(_state)-1)/nvars;
|
|
}
|
|
ae_vector_set_length(&g, wcount-1+1, _state);
|
|
ae_matrix_set_length(&h, wcount-1+1, wcount-1+1, _state);
|
|
ae_vector_set_length(&wbase, wcount-1+1, _state);
|
|
ae_vector_set_length(&wdir, wcount-1+1, _state);
|
|
ae_vector_set_length(&work, wcount-1+1, _state);
|
|
|
|
/*
|
|
* First stage: optimize in gradient direction.
|
|
*/
|
|
for(k=0; k<=wcount/3+10; k++)
|
|
{
|
|
|
|
/*
|
|
* Calculate gradient in starting point
|
|
*/
|
|
mlpgradnbatch(&network, xy, npoints, &e, &g, _state);
|
|
v = ae_v_dotproduct(&network.weights.ptr.p_double[0], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
e = e+0.5*decay*v;
|
|
ae_v_addd(&g.ptr.p_double[0], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
|
|
rep->ngrad = rep->ngrad+1;
|
|
|
|
/*
|
|
* Setup optimization scheme
|
|
*/
|
|
ae_v_moveneg(&wdir.ptr.p_double[0], 1, &g.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
v = ae_v_dotproduct(&wdir.ptr.p_double[0], 1, &wdir.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
wstep = ae_sqrt(v, _state);
|
|
v = 1/ae_sqrt(v, _state);
|
|
ae_v_muld(&wdir.ptr.p_double[0], 1, ae_v_len(0,wcount-1), v);
|
|
mcstage = 0;
|
|
logit_mnlmcsrch(wcount, &network.weights, &e, &g, &wdir, &wstep, &mcinfo, &mcnfev, &work, &mcstate, &mcstage, _state);
|
|
while(mcstage!=0)
|
|
{
|
|
mlpgradnbatch(&network, xy, npoints, &e, &g, _state);
|
|
v = ae_v_dotproduct(&network.weights.ptr.p_double[0], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
e = e+0.5*decay*v;
|
|
ae_v_addd(&g.ptr.p_double[0], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
|
|
rep->ngrad = rep->ngrad+1;
|
|
logit_mnlmcsrch(wcount, &network.weights, &e, &g, &wdir, &wstep, &mcinfo, &mcnfev, &work, &mcstate, &mcstage, _state);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Second stage: use Hessian when we are close to the minimum
|
|
*/
|
|
for(;;)
|
|
{
|
|
|
|
/*
|
|
* Calculate and update E/G/H
|
|
*/
|
|
mlphessiannbatch(&network, xy, npoints, &e, &g, &h, _state);
|
|
v = ae_v_dotproduct(&network.weights.ptr.p_double[0], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
e = e+0.5*decay*v;
|
|
ae_v_addd(&g.ptr.p_double[0], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
|
|
for(k=0; k<=wcount-1; k++)
|
|
{
|
|
h.ptr.pp_double[k][k] = h.ptr.pp_double[k][k]+decay;
|
|
}
|
|
rep->nhess = rep->nhess+1;
|
|
|
|
/*
|
|
* Select step direction
|
|
* NOTE: it is important to use lower-triangle Cholesky
|
|
* factorization since it is much faster than higher-triangle version.
|
|
*/
|
|
spd = spdmatrixcholesky(&h, wcount, ae_false, _state);
|
|
spdmatrixcholeskysolve(&h, wcount, ae_false, &g, &solverinfo, &solverrep, &wdir, _state);
|
|
spd = solverinfo>0;
|
|
if( spd )
|
|
{
|
|
|
|
/*
|
|
* H is positive definite.
|
|
* Step in Newton direction.
|
|
*/
|
|
ae_v_muld(&wdir.ptr.p_double[0], 1, ae_v_len(0,wcount-1), -1);
|
|
spd = ae_true;
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* H is indefinite.
|
|
* Step in gradient direction.
|
|
*/
|
|
ae_v_moveneg(&wdir.ptr.p_double[0], 1, &g.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
spd = ae_false;
|
|
}
|
|
|
|
/*
|
|
* Optimize in WDir direction
|
|
*/
|
|
v = ae_v_dotproduct(&wdir.ptr.p_double[0], 1, &wdir.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
wstep = ae_sqrt(v, _state);
|
|
v = 1/ae_sqrt(v, _state);
|
|
ae_v_muld(&wdir.ptr.p_double[0], 1, ae_v_len(0,wcount-1), v);
|
|
mcstage = 0;
|
|
logit_mnlmcsrch(wcount, &network.weights, &e, &g, &wdir, &wstep, &mcinfo, &mcnfev, &work, &mcstate, &mcstage, _state);
|
|
while(mcstage!=0)
|
|
{
|
|
mlpgradnbatch(&network, xy, npoints, &e, &g, _state);
|
|
v = ae_v_dotproduct(&network.weights.ptr.p_double[0], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
e = e+0.5*decay*v;
|
|
ae_v_addd(&g.ptr.p_double[0], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
|
|
rep->ngrad = rep->ngrad+1;
|
|
logit_mnlmcsrch(wcount, &network.weights, &e, &g, &wdir, &wstep, &mcinfo, &mcnfev, &work, &mcstate, &mcstage, _state);
|
|
}
|
|
if( spd&&((mcinfo==2||mcinfo==4)||mcinfo==6) )
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Convert from NN format to MNL format
|
|
*/
|
|
ae_v_move(&lm->w.ptr.p_double[offs], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(offs,offs+wcount-1));
|
|
for(k=0; k<=nvars-1; k++)
|
|
{
|
|
for(i=0; i<=nclasses-2; i++)
|
|
{
|
|
s = network.columnsigmas.ptr.p_double[k];
|
|
if( ae_fp_eq(s,(double)(0)) )
|
|
{
|
|
s = (double)(1);
|
|
}
|
|
j = offs+(nvars+1)*i;
|
|
v = lm->w.ptr.p_double[j+k];
|
|
lm->w.ptr.p_double[j+k] = v/s;
|
|
lm->w.ptr.p_double[j+nvars] = lm->w.ptr.p_double[j+nvars]+v*network.columnmeans.ptr.p_double[k]/s;
|
|
}
|
|
}
|
|
for(k=0; k<=nclasses-2; k++)
|
|
{
|
|
lm->w.ptr.p_double[offs+(nvars+1)*k+nvars] = -lm->w.ptr.p_double[offs+(nvars+1)*k+nvars];
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Procesing
|
|
|
|
INPUT PARAMETERS:
|
|
LM - logit model, passed by non-constant reference
|
|
(some fields of structure are used as temporaries
|
|
when calculating model output).
|
|
X - input vector, array[0..NVars-1].
|
|
Y - (possibly) preallocated buffer; if size of Y is less than
|
|
NClasses, it will be reallocated.If it is large enough, it
|
|
is NOT reallocated, so we can save some time on reallocation.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Y - result, array[0..NClasses-1]
|
|
Vector of posterior probabilities for classification task.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mnlprocess(logitmodel* lm,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t nvars;
|
|
ae_int_t nclasses;
|
|
ae_int_t offs;
|
|
ae_int_t i;
|
|
ae_int_t i1;
|
|
double s;
|
|
|
|
|
|
ae_assert(ae_fp_eq(lm->w.ptr.p_double[1],(double)(logit_logitvnum)), "MNLProcess: unexpected model version", _state);
|
|
nvars = ae_round(lm->w.ptr.p_double[2], _state);
|
|
nclasses = ae_round(lm->w.ptr.p_double[3], _state);
|
|
offs = ae_round(lm->w.ptr.p_double[4], _state);
|
|
logit_mnliexp(&lm->w, x, _state);
|
|
s = (double)(0);
|
|
i1 = offs+(nvars+1)*(nclasses-1);
|
|
for(i=i1; i<=i1+nclasses-1; i++)
|
|
{
|
|
s = s+lm->w.ptr.p_double[i];
|
|
}
|
|
if( y->cnt<nclasses )
|
|
{
|
|
ae_vector_set_length(y, nclasses, _state);
|
|
}
|
|
for(i=0; i<=nclasses-1; i++)
|
|
{
|
|
y->ptr.p_double[i] = lm->w.ptr.p_double[i1+i]/s;
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
'interactive' variant of MNLProcess for languages like Python which
|
|
support constructs like "Y = MNLProcess(LM,X)" and interactive mode of the
|
|
interpreter
|
|
|
|
This function allocates new array on each call, so it is significantly
|
|
slower than its 'non-interactive' counterpart, but it is more convenient
|
|
when you call it from command line.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mnlprocessi(logitmodel* lm,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state)
|
|
{
|
|
|
|
ae_vector_clear(y);
|
|
|
|
mnlprocess(lm, x, y, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Unpacks coefficients of logit model. Logit model have form:
|
|
|
|
P(class=i) = S(i) / (S(0) + S(1) + ... +S(M-1))
|
|
S(i) = Exp(A[i,0]*X[0] + ... + A[i,N-1]*X[N-1] + A[i,N]), when i<M-1
|
|
S(M-1) = 1
|
|
|
|
INPUT PARAMETERS:
|
|
LM - logit model in ALGLIB format
|
|
|
|
OUTPUT PARAMETERS:
|
|
V - coefficients, array[0..NClasses-2,0..NVars]
|
|
NVars - number of independent variables
|
|
NClasses - number of classes
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mnlunpack(logitmodel* lm,
|
|
/* Real */ ae_matrix* a,
|
|
ae_int_t* nvars,
|
|
ae_int_t* nclasses,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t offs;
|
|
ae_int_t i;
|
|
|
|
ae_matrix_clear(a);
|
|
*nvars = 0;
|
|
*nclasses = 0;
|
|
|
|
ae_assert(ae_fp_eq(lm->w.ptr.p_double[1],(double)(logit_logitvnum)), "MNLUnpack: unexpected model version", _state);
|
|
*nvars = ae_round(lm->w.ptr.p_double[2], _state);
|
|
*nclasses = ae_round(lm->w.ptr.p_double[3], _state);
|
|
offs = ae_round(lm->w.ptr.p_double[4], _state);
|
|
ae_matrix_set_length(a, *nclasses-2+1, *nvars+1, _state);
|
|
for(i=0; i<=*nclasses-2; i++)
|
|
{
|
|
ae_v_move(&a->ptr.pp_double[i][0], 1, &lm->w.ptr.p_double[offs+i*(*nvars+1)], 1, ae_v_len(0,*nvars));
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
"Packs" coefficients and creates logit model in ALGLIB format (MNLUnpack
|
|
reversed).
|
|
|
|
INPUT PARAMETERS:
|
|
A - model (see MNLUnpack)
|
|
NVars - number of independent variables
|
|
NClasses - number of classes
|
|
|
|
OUTPUT PARAMETERS:
|
|
LM - logit model.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mnlpack(/* Real */ ae_matrix* a,
|
|
ae_int_t nvars,
|
|
ae_int_t nclasses,
|
|
logitmodel* lm,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t offs;
|
|
ae_int_t i;
|
|
ae_int_t ssize;
|
|
|
|
_logitmodel_clear(lm);
|
|
|
|
offs = 5;
|
|
ssize = 5+(nvars+1)*(nclasses-1)+nclasses;
|
|
ae_vector_set_length(&lm->w, ssize-1+1, _state);
|
|
lm->w.ptr.p_double[0] = (double)(ssize);
|
|
lm->w.ptr.p_double[1] = (double)(logit_logitvnum);
|
|
lm->w.ptr.p_double[2] = (double)(nvars);
|
|
lm->w.ptr.p_double[3] = (double)(nclasses);
|
|
lm->w.ptr.p_double[4] = (double)(offs);
|
|
for(i=0; i<=nclasses-2; i++)
|
|
{
|
|
ae_v_move(&lm->w.ptr.p_double[offs+i*(nvars+1)], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(offs+i*(nvars+1),offs+i*(nvars+1)+nvars));
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Copying of LogitModel strucure
|
|
|
|
INPUT PARAMETERS:
|
|
LM1 - original
|
|
|
|
OUTPUT PARAMETERS:
|
|
LM2 - copy
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.03.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mnlcopy(logitmodel* lm1, logitmodel* lm2, ae_state *_state)
|
|
{
|
|
ae_int_t k;
|
|
|
|
_logitmodel_clear(lm2);
|
|
|
|
k = ae_round(lm1->w.ptr.p_double[0], _state);
|
|
ae_vector_set_length(&lm2->w, k-1+1, _state);
|
|
ae_v_move(&lm2->w.ptr.p_double[0], 1, &lm1->w.ptr.p_double[0], 1, ae_v_len(0,k-1));
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average cross-entropy (in bits per element) on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
LM - logit model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
CrossEntropy/(NPoints*ln(2)).
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mnlavgce(logitmodel* lm,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t nvars;
|
|
ae_int_t nclasses;
|
|
ae_int_t i;
|
|
ae_vector workx;
|
|
ae_vector worky;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&workx, 0, sizeof(workx));
|
|
memset(&worky, 0, sizeof(worky));
|
|
ae_vector_init(&workx, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&worky, 0, DT_REAL, _state, ae_true);
|
|
|
|
ae_assert(ae_fp_eq(lm->w.ptr.p_double[1],(double)(logit_logitvnum)), "MNLClsError: unexpected model version", _state);
|
|
nvars = ae_round(lm->w.ptr.p_double[2], _state);
|
|
nclasses = ae_round(lm->w.ptr.p_double[3], _state);
|
|
ae_vector_set_length(&workx, nvars-1+1, _state);
|
|
ae_vector_set_length(&worky, nclasses-1+1, _state);
|
|
result = (double)(0);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_assert(ae_round(xy->ptr.pp_double[i][nvars], _state)>=0&&ae_round(xy->ptr.pp_double[i][nvars], _state)<nclasses, "MNLAvgCE: incorrect class number!", _state);
|
|
|
|
/*
|
|
* Process
|
|
*/
|
|
ae_v_move(&workx.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
|
|
mnlprocess(lm, &workx, &worky, _state);
|
|
if( ae_fp_greater(worky.ptr.p_double[ae_round(xy->ptr.pp_double[i][nvars], _state)],(double)(0)) )
|
|
{
|
|
result = result-ae_log(worky.ptr.p_double[ae_round(xy->ptr.pp_double[i][nvars], _state)], _state);
|
|
}
|
|
else
|
|
{
|
|
result = result-ae_log(ae_minrealnumber, _state);
|
|
}
|
|
}
|
|
result = result/(npoints*ae_log((double)(2), _state));
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Relative classification error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
LM - logit model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
percent of incorrectly classified cases.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mnlrelclserror(logitmodel* lm,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
result = (double)mnlclserror(lm, xy, npoints, _state)/(double)npoints;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
RMS error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
LM - logit model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
root mean square error (error when estimating posterior probabilities).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mnlrmserror(logitmodel* lm,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double relcls;
|
|
double avgce;
|
|
double rms;
|
|
double avg;
|
|
double avgrel;
|
|
double result;
|
|
|
|
|
|
ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==logit_logitvnum, "MNLRMSError: Incorrect MNL version!", _state);
|
|
logit_mnlallerrors(lm, xy, npoints, &relcls, &avgce, &rms, &avg, &avgrel, _state);
|
|
result = rms;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
LM - logit model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
average error (error when estimating posterior probabilities).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mnlavgerror(logitmodel* lm,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double relcls;
|
|
double avgce;
|
|
double rms;
|
|
double avg;
|
|
double avgrel;
|
|
double result;
|
|
|
|
|
|
ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==logit_logitvnum, "MNLRMSError: Incorrect MNL version!", _state);
|
|
logit_mnlallerrors(lm, xy, npoints, &relcls, &avgce, &rms, &avg, &avgrel, _state);
|
|
result = avg;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average relative error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
LM - logit model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
average relative error (error when estimating posterior probabilities).
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mnlavgrelerror(logitmodel* lm,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t ssize,
|
|
ae_state *_state)
|
|
{
|
|
double relcls;
|
|
double avgce;
|
|
double rms;
|
|
double avg;
|
|
double avgrel;
|
|
double result;
|
|
|
|
|
|
ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==logit_logitvnum, "MNLRMSError: Incorrect MNL version!", _state);
|
|
logit_mnlallerrors(lm, xy, ssize, &relcls, &avgce, &rms, &avg, &avgrel, _state);
|
|
result = avgrel;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Classification error on test set = MNLRelClsError*NPoints
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t mnlclserror(logitmodel* lm,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t nvars;
|
|
ae_int_t nclasses;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_vector workx;
|
|
ae_vector worky;
|
|
ae_int_t nmax;
|
|
ae_int_t result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&workx, 0, sizeof(workx));
|
|
memset(&worky, 0, sizeof(worky));
|
|
ae_vector_init(&workx, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&worky, 0, DT_REAL, _state, ae_true);
|
|
|
|
ae_assert(ae_fp_eq(lm->w.ptr.p_double[1],(double)(logit_logitvnum)), "MNLClsError: unexpected model version", _state);
|
|
nvars = ae_round(lm->w.ptr.p_double[2], _state);
|
|
nclasses = ae_round(lm->w.ptr.p_double[3], _state);
|
|
ae_vector_set_length(&workx, nvars-1+1, _state);
|
|
ae_vector_set_length(&worky, nclasses-1+1, _state);
|
|
result = 0;
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
|
|
/*
|
|
* Process
|
|
*/
|
|
ae_v_move(&workx.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
|
|
mnlprocess(lm, &workx, &worky, _state);
|
|
|
|
/*
|
|
* Logit version of the answer
|
|
*/
|
|
nmax = 0;
|
|
for(j=0; j<=nclasses-1; j++)
|
|
{
|
|
if( ae_fp_greater(worky.ptr.p_double[j],worky.ptr.p_double[nmax]) )
|
|
{
|
|
nmax = j;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* compare
|
|
*/
|
|
if( nmax!=ae_round(xy->ptr.pp_double[i][nvars], _state) )
|
|
{
|
|
result = result+1;
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal subroutine. Places exponents of the anti-overflow shifted
|
|
internal linear outputs into the service part of the W array.
|
|
*************************************************************************/
|
|
static void logit_mnliexp(/* Real */ ae_vector* w,
|
|
/* Real */ ae_vector* x,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t nvars;
|
|
ae_int_t nclasses;
|
|
ae_int_t offs;
|
|
ae_int_t i;
|
|
ae_int_t i1;
|
|
double v;
|
|
double mx;
|
|
|
|
|
|
ae_assert(ae_fp_eq(w->ptr.p_double[1],(double)(logit_logitvnum)), "LOGIT: unexpected model version", _state);
|
|
nvars = ae_round(w->ptr.p_double[2], _state);
|
|
nclasses = ae_round(w->ptr.p_double[3], _state);
|
|
offs = ae_round(w->ptr.p_double[4], _state);
|
|
i1 = offs+(nvars+1)*(nclasses-1);
|
|
for(i=0; i<=nclasses-2; i++)
|
|
{
|
|
v = ae_v_dotproduct(&w->ptr.p_double[offs+i*(nvars+1)], 1, &x->ptr.p_double[0], 1, ae_v_len(offs+i*(nvars+1),offs+i*(nvars+1)+nvars-1));
|
|
w->ptr.p_double[i1+i] = v+w->ptr.p_double[offs+i*(nvars+1)+nvars];
|
|
}
|
|
w->ptr.p_double[i1+nclasses-1] = (double)(0);
|
|
mx = (double)(0);
|
|
for(i=i1; i<=i1+nclasses-1; i++)
|
|
{
|
|
mx = ae_maxreal(mx, w->ptr.p_double[i], _state);
|
|
}
|
|
for(i=i1; i<=i1+nclasses-1; i++)
|
|
{
|
|
w->ptr.p_double[i] = ae_exp(w->ptr.p_double[i]-mx, _state);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Calculation of all types of errors
|
|
|
|
-- ALGLIB --
|
|
Copyright 30.08.2008 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void logit_mnlallerrors(logitmodel* lm,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
double* relcls,
|
|
double* avgce,
|
|
double* rms,
|
|
double* avg,
|
|
double* avgrel,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t nvars;
|
|
ae_int_t nclasses;
|
|
ae_int_t i;
|
|
ae_vector buf;
|
|
ae_vector workx;
|
|
ae_vector y;
|
|
ae_vector dy;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&buf, 0, sizeof(buf));
|
|
memset(&workx, 0, sizeof(workx));
|
|
memset(&y, 0, sizeof(y));
|
|
memset(&dy, 0, sizeof(dy));
|
|
*relcls = 0;
|
|
*avgce = 0;
|
|
*rms = 0;
|
|
*avg = 0;
|
|
*avgrel = 0;
|
|
ae_vector_init(&buf, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&workx, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&dy, 0, DT_REAL, _state, ae_true);
|
|
|
|
ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==logit_logitvnum, "MNL unit: Incorrect MNL version!", _state);
|
|
nvars = ae_round(lm->w.ptr.p_double[2], _state);
|
|
nclasses = ae_round(lm->w.ptr.p_double[3], _state);
|
|
ae_vector_set_length(&workx, nvars-1+1, _state);
|
|
ae_vector_set_length(&y, nclasses-1+1, _state);
|
|
ae_vector_set_length(&dy, 0+1, _state);
|
|
dserrallocate(nclasses, &buf, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_v_move(&workx.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
|
|
mnlprocess(lm, &workx, &y, _state);
|
|
dy.ptr.p_double[0] = xy->ptr.pp_double[i][nvars];
|
|
dserraccumulate(&buf, &y, &dy, _state);
|
|
}
|
|
dserrfinish(&buf, _state);
|
|
*relcls = buf.ptr.p_double[0];
|
|
*avgce = buf.ptr.p_double[1];
|
|
*rms = buf.ptr.p_double[2];
|
|
*avg = buf.ptr.p_double[3];
|
|
*avgrel = buf.ptr.p_double[4];
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
THE PURPOSE OF MCSRCH IS TO FIND A STEP WHICH SATISFIES A SUFFICIENT
|
|
DECREASE CONDITION AND A CURVATURE CONDITION.
|
|
|
|
AT EACH STAGE THE SUBROUTINE UPDATES AN INTERVAL OF UNCERTAINTY WITH
|
|
ENDPOINTS STX AND STY. THE INTERVAL OF UNCERTAINTY IS INITIALLY CHOSEN
|
|
SO THAT IT CONTAINS A MINIMIZER OF THE MODIFIED FUNCTION
|
|
|
|
F(X+STP*S) - F(X) - FTOL*STP*(GRADF(X)'S).
|
|
|
|
IF A STEP IS OBTAINED FOR WHICH THE MODIFIED FUNCTION HAS A NONPOSITIVE
|
|
FUNCTION VALUE AND NONNEGATIVE DERIVATIVE, THEN THE INTERVAL OF
|
|
UNCERTAINTY IS CHOSEN SO THAT IT CONTAINS A MINIMIZER OF F(X+STP*S).
|
|
|
|
THE ALGORITHM IS DESIGNED TO FIND A STEP WHICH SATISFIES THE SUFFICIENT
|
|
DECREASE CONDITION
|
|
|
|
F(X+STP*S) .LE. F(X) + FTOL*STP*(GRADF(X)'S),
|
|
|
|
AND THE CURVATURE CONDITION
|
|
|
|
ABS(GRADF(X+STP*S)'S)) .LE. GTOL*ABS(GRADF(X)'S).
|
|
|
|
IF FTOL IS LESS THAN GTOL AND IF, FOR EXAMPLE, THE FUNCTION IS BOUNDED
|
|
BELOW, THEN THERE IS ALWAYS A STEP WHICH SATISFIES BOTH CONDITIONS.
|
|
IF NO STEP CAN BE FOUND WHICH SATISFIES BOTH CONDITIONS, THEN THE
|
|
ALGORITHM USUALLY STOPS WHEN ROUNDING ERRORS PREVENT FURTHER PROGRESS.
|
|
IN THIS CASE STP ONLY SATISFIES THE SUFFICIENT DECREASE CONDITION.
|
|
|
|
PARAMETERS DESCRIPRION
|
|
|
|
N IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER OF VARIABLES.
|
|
|
|
X IS AN ARRAY OF LENGTH N. ON INPUT IT MUST CONTAIN THE BASE POINT FOR
|
|
THE LINE SEARCH. ON OUTPUT IT CONTAINS X+STP*S.
|
|
|
|
F IS A VARIABLE. ON INPUT IT MUST CONTAIN THE VALUE OF F AT X. ON OUTPUT
|
|
IT CONTAINS THE VALUE OF F AT X + STP*S.
|
|
|
|
G IS AN ARRAY OF LENGTH N. ON INPUT IT MUST CONTAIN THE GRADIENT OF F AT X.
|
|
ON OUTPUT IT CONTAINS THE GRADIENT OF F AT X + STP*S.
|
|
|
|
S IS AN INPUT ARRAY OF LENGTH N WHICH SPECIFIES THE SEARCH DIRECTION.
|
|
|
|
STP IS A NONNEGATIVE VARIABLE. ON INPUT STP CONTAINS AN INITIAL ESTIMATE
|
|
OF A SATISFACTORY STEP. ON OUTPUT STP CONTAINS THE FINAL ESTIMATE.
|
|
|
|
FTOL AND GTOL ARE NONNEGATIVE INPUT VARIABLES. TERMINATION OCCURS WHEN THE
|
|
SUFFICIENT DECREASE CONDITION AND THE DIRECTIONAL DERIVATIVE CONDITION ARE
|
|
SATISFIED.
|
|
|
|
XTOL IS A NONNEGATIVE INPUT VARIABLE. TERMINATION OCCURS WHEN THE RELATIVE
|
|
WIDTH OF THE INTERVAL OF UNCERTAINTY IS AT MOST XTOL.
|
|
|
|
STPMIN AND STPMAX ARE NONNEGATIVE INPUT VARIABLES WHICH SPECIFY LOWER AND
|
|
UPPER BOUNDS FOR THE STEP.
|
|
|
|
MAXFEV IS A POSITIVE INTEGER INPUT VARIABLE. TERMINATION OCCURS WHEN THE
|
|
NUMBER OF CALLS TO FCN IS AT LEAST MAXFEV BY THE END OF AN ITERATION.
|
|
|
|
INFO IS AN INTEGER OUTPUT VARIABLE SET AS FOLLOWS:
|
|
INFO = 0 IMPROPER INPUT PARAMETERS.
|
|
|
|
INFO = 1 THE SUFFICIENT DECREASE CONDITION AND THE
|
|
DIRECTIONAL DERIVATIVE CONDITION HOLD.
|
|
|
|
INFO = 2 RELATIVE WIDTH OF THE INTERVAL OF UNCERTAINTY
|
|
IS AT MOST XTOL.
|
|
|
|
INFO = 3 NUMBER OF CALLS TO FCN HAS REACHED MAXFEV.
|
|
|
|
INFO = 4 THE STEP IS AT THE LOWER BOUND STPMIN.
|
|
|
|
INFO = 5 THE STEP IS AT THE UPPER BOUND STPMAX.
|
|
|
|
INFO = 6 ROUNDING ERRORS PREVENT FURTHER PROGRESS.
|
|
THERE MAY NOT BE A STEP WHICH SATISFIES THE
|
|
SUFFICIENT DECREASE AND CURVATURE CONDITIONS.
|
|
TOLERANCES MAY BE TOO SMALL.
|
|
|
|
NFEV IS AN INTEGER OUTPUT VARIABLE SET TO THE NUMBER OF CALLS TO FCN.
|
|
|
|
WA IS A WORK ARRAY OF LENGTH N.
|
|
|
|
ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JUNE 1983
|
|
JORGE J. MORE', DAVID J. THUENTE
|
|
*************************************************************************/
|
|
static void logit_mnlmcsrch(ae_int_t n,
|
|
/* Real */ ae_vector* x,
|
|
double* f,
|
|
/* Real */ ae_vector* g,
|
|
/* Real */ ae_vector* s,
|
|
double* stp,
|
|
ae_int_t* info,
|
|
ae_int_t* nfev,
|
|
/* Real */ ae_vector* wa,
|
|
logitmcstate* state,
|
|
ae_int_t* stage,
|
|
ae_state *_state)
|
|
{
|
|
double v;
|
|
double p5;
|
|
double p66;
|
|
double zero;
|
|
|
|
|
|
|
|
/*
|
|
* init
|
|
*/
|
|
p5 = 0.5;
|
|
p66 = 0.66;
|
|
state->xtrapf = 4.0;
|
|
zero = (double)(0);
|
|
|
|
/*
|
|
* Main cycle
|
|
*/
|
|
for(;;)
|
|
{
|
|
if( *stage==0 )
|
|
{
|
|
|
|
/*
|
|
* NEXT
|
|
*/
|
|
*stage = 2;
|
|
continue;
|
|
}
|
|
if( *stage==2 )
|
|
{
|
|
state->infoc = 1;
|
|
*info = 0;
|
|
|
|
/*
|
|
* CHECK THE INPUT PARAMETERS FOR ERRORS.
|
|
*/
|
|
if( ((((((n<=0||ae_fp_less_eq(*stp,(double)(0)))||ae_fp_less(logit_ftol,(double)(0)))||ae_fp_less(logit_gtol,zero))||ae_fp_less(logit_xtol,zero))||ae_fp_less(logit_stpmin,zero))||ae_fp_less(logit_stpmax,logit_stpmin))||logit_maxfev<=0 )
|
|
{
|
|
*stage = 0;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* COMPUTE THE INITIAL GRADIENT IN THE SEARCH DIRECTION
|
|
* AND CHECK THAT S IS A DESCENT DIRECTION.
|
|
*/
|
|
v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1));
|
|
state->dginit = v;
|
|
if( ae_fp_greater_eq(state->dginit,(double)(0)) )
|
|
{
|
|
*stage = 0;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* INITIALIZE LOCAL VARIABLES.
|
|
*/
|
|
state->brackt = ae_false;
|
|
state->stage1 = ae_true;
|
|
*nfev = 0;
|
|
state->finit = *f;
|
|
state->dgtest = logit_ftol*state->dginit;
|
|
state->width = logit_stpmax-logit_stpmin;
|
|
state->width1 = state->width/p5;
|
|
ae_v_move(&wa->ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1));
|
|
|
|
/*
|
|
* THE VARIABLES STX, FX, DGX CONTAIN THE VALUES OF THE STEP,
|
|
* FUNCTION, AND DIRECTIONAL DERIVATIVE AT THE BEST STEP.
|
|
* THE VARIABLES STY, FY, DGY CONTAIN THE VALUE OF THE STEP,
|
|
* FUNCTION, AND DERIVATIVE AT THE OTHER ENDPOINT OF
|
|
* THE INTERVAL OF UNCERTAINTY.
|
|
* THE VARIABLES STP, F, DG CONTAIN THE VALUES OF THE STEP,
|
|
* FUNCTION, AND DERIVATIVE AT THE CURRENT STEP.
|
|
*/
|
|
state->stx = (double)(0);
|
|
state->fx = state->finit;
|
|
state->dgx = state->dginit;
|
|
state->sty = (double)(0);
|
|
state->fy = state->finit;
|
|
state->dgy = state->dginit;
|
|
|
|
/*
|
|
* NEXT
|
|
*/
|
|
*stage = 3;
|
|
continue;
|
|
}
|
|
if( *stage==3 )
|
|
{
|
|
|
|
/*
|
|
* START OF ITERATION.
|
|
*
|
|
* SET THE MINIMUM AND MAXIMUM STEPS TO CORRESPOND
|
|
* TO THE PRESENT INTERVAL OF UNCERTAINTY.
|
|
*/
|
|
if( state->brackt )
|
|
{
|
|
if( ae_fp_less(state->stx,state->sty) )
|
|
{
|
|
state->stmin = state->stx;
|
|
state->stmax = state->sty;
|
|
}
|
|
else
|
|
{
|
|
state->stmin = state->sty;
|
|
state->stmax = state->stx;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
state->stmin = state->stx;
|
|
state->stmax = *stp+state->xtrapf*(*stp-state->stx);
|
|
}
|
|
|
|
/*
|
|
* FORCE THE STEP TO BE WITHIN THE BOUNDS STPMAX AND STPMIN.
|
|
*/
|
|
if( ae_fp_greater(*stp,logit_stpmax) )
|
|
{
|
|
*stp = logit_stpmax;
|
|
}
|
|
if( ae_fp_less(*stp,logit_stpmin) )
|
|
{
|
|
*stp = logit_stpmin;
|
|
}
|
|
|
|
/*
|
|
* IF AN UNUSUAL TERMINATION IS TO OCCUR THEN LET
|
|
* STP BE THE LOWEST POINT OBTAINED SO FAR.
|
|
*/
|
|
if( (((state->brackt&&(ae_fp_less_eq(*stp,state->stmin)||ae_fp_greater_eq(*stp,state->stmax)))||*nfev>=logit_maxfev-1)||state->infoc==0)||(state->brackt&&ae_fp_less_eq(state->stmax-state->stmin,logit_xtol*state->stmax)) )
|
|
{
|
|
*stp = state->stx;
|
|
}
|
|
|
|
/*
|
|
* EVALUATE THE FUNCTION AND GRADIENT AT STP
|
|
* AND COMPUTE THE DIRECTIONAL DERIVATIVE.
|
|
*/
|
|
ae_v_move(&x->ptr.p_double[0], 1, &wa->ptr.p_double[0], 1, ae_v_len(0,n-1));
|
|
ae_v_addd(&x->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1), *stp);
|
|
|
|
/*
|
|
* NEXT
|
|
*/
|
|
*stage = 4;
|
|
return;
|
|
}
|
|
if( *stage==4 )
|
|
{
|
|
*info = 0;
|
|
*nfev = *nfev+1;
|
|
v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1));
|
|
state->dg = v;
|
|
state->ftest1 = state->finit+*stp*state->dgtest;
|
|
|
|
/*
|
|
* TEST FOR CONVERGENCE.
|
|
*/
|
|
if( (state->brackt&&(ae_fp_less_eq(*stp,state->stmin)||ae_fp_greater_eq(*stp,state->stmax)))||state->infoc==0 )
|
|
{
|
|
*info = 6;
|
|
}
|
|
if( (ae_fp_eq(*stp,logit_stpmax)&&ae_fp_less_eq(*f,state->ftest1))&&ae_fp_less_eq(state->dg,state->dgtest) )
|
|
{
|
|
*info = 5;
|
|
}
|
|
if( ae_fp_eq(*stp,logit_stpmin)&&(ae_fp_greater(*f,state->ftest1)||ae_fp_greater_eq(state->dg,state->dgtest)) )
|
|
{
|
|
*info = 4;
|
|
}
|
|
if( *nfev>=logit_maxfev )
|
|
{
|
|
*info = 3;
|
|
}
|
|
if( state->brackt&&ae_fp_less_eq(state->stmax-state->stmin,logit_xtol*state->stmax) )
|
|
{
|
|
*info = 2;
|
|
}
|
|
if( ae_fp_less_eq(*f,state->ftest1)&&ae_fp_less_eq(ae_fabs(state->dg, _state),-logit_gtol*state->dginit) )
|
|
{
|
|
*info = 1;
|
|
}
|
|
|
|
/*
|
|
* CHECK FOR TERMINATION.
|
|
*/
|
|
if( *info!=0 )
|
|
{
|
|
*stage = 0;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* IN THE FIRST STAGE WE SEEK A STEP FOR WHICH THE MODIFIED
|
|
* FUNCTION HAS A NONPOSITIVE VALUE AND NONNEGATIVE DERIVATIVE.
|
|
*/
|
|
if( (state->stage1&&ae_fp_less_eq(*f,state->ftest1))&&ae_fp_greater_eq(state->dg,ae_minreal(logit_ftol, logit_gtol, _state)*state->dginit) )
|
|
{
|
|
state->stage1 = ae_false;
|
|
}
|
|
|
|
/*
|
|
* A MODIFIED FUNCTION IS USED TO PREDICT THE STEP ONLY IF
|
|
* WE HAVE NOT OBTAINED A STEP FOR WHICH THE MODIFIED
|
|
* FUNCTION HAS A NONPOSITIVE FUNCTION VALUE AND NONNEGATIVE
|
|
* DERIVATIVE, AND IF A LOWER FUNCTION VALUE HAS BEEN
|
|
* OBTAINED BUT THE DECREASE IS NOT SUFFICIENT.
|
|
*/
|
|
if( (state->stage1&&ae_fp_less_eq(*f,state->fx))&&ae_fp_greater(*f,state->ftest1) )
|
|
{
|
|
|
|
/*
|
|
* DEFINE THE MODIFIED FUNCTION AND DERIVATIVE VALUES.
|
|
*/
|
|
state->fm = *f-*stp*state->dgtest;
|
|
state->fxm = state->fx-state->stx*state->dgtest;
|
|
state->fym = state->fy-state->sty*state->dgtest;
|
|
state->dgm = state->dg-state->dgtest;
|
|
state->dgxm = state->dgx-state->dgtest;
|
|
state->dgym = state->dgy-state->dgtest;
|
|
|
|
/*
|
|
* CALL CSTEP TO UPDATE THE INTERVAL OF UNCERTAINTY
|
|
* AND TO COMPUTE THE NEW STEP.
|
|
*/
|
|
logit_mnlmcstep(&state->stx, &state->fxm, &state->dgxm, &state->sty, &state->fym, &state->dgym, stp, state->fm, state->dgm, &state->brackt, state->stmin, state->stmax, &state->infoc, _state);
|
|
|
|
/*
|
|
* RESET THE FUNCTION AND GRADIENT VALUES FOR F.
|
|
*/
|
|
state->fx = state->fxm+state->stx*state->dgtest;
|
|
state->fy = state->fym+state->sty*state->dgtest;
|
|
state->dgx = state->dgxm+state->dgtest;
|
|
state->dgy = state->dgym+state->dgtest;
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* CALL MCSTEP TO UPDATE THE INTERVAL OF UNCERTAINTY
|
|
* AND TO COMPUTE THE NEW STEP.
|
|
*/
|
|
logit_mnlmcstep(&state->stx, &state->fx, &state->dgx, &state->sty, &state->fy, &state->dgy, stp, *f, state->dg, &state->brackt, state->stmin, state->stmax, &state->infoc, _state);
|
|
}
|
|
|
|
/*
|
|
* FORCE A SUFFICIENT DECREASE IN THE SIZE OF THE
|
|
* INTERVAL OF UNCERTAINTY.
|
|
*/
|
|
if( state->brackt )
|
|
{
|
|
if( ae_fp_greater_eq(ae_fabs(state->sty-state->stx, _state),p66*state->width1) )
|
|
{
|
|
*stp = state->stx+p5*(state->sty-state->stx);
|
|
}
|
|
state->width1 = state->width;
|
|
state->width = ae_fabs(state->sty-state->stx, _state);
|
|
}
|
|
|
|
/*
|
|
* NEXT.
|
|
*/
|
|
*stage = 3;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void logit_mnlmcstep(double* stx,
|
|
double* fx,
|
|
double* dx,
|
|
double* sty,
|
|
double* fy,
|
|
double* dy,
|
|
double* stp,
|
|
double fp,
|
|
double dp,
|
|
ae_bool* brackt,
|
|
double stmin,
|
|
double stmax,
|
|
ae_int_t* info,
|
|
ae_state *_state)
|
|
{
|
|
ae_bool bound;
|
|
double gamma;
|
|
double p;
|
|
double q;
|
|
double r;
|
|
double s;
|
|
double sgnd;
|
|
double stpc;
|
|
double stpf;
|
|
double stpq;
|
|
double theta;
|
|
|
|
|
|
*info = 0;
|
|
|
|
/*
|
|
* CHECK THE INPUT PARAMETERS FOR ERRORS.
|
|
*/
|
|
if( ((*brackt&&(ae_fp_less_eq(*stp,ae_minreal(*stx, *sty, _state))||ae_fp_greater_eq(*stp,ae_maxreal(*stx, *sty, _state))))||ae_fp_greater_eq(*dx*(*stp-(*stx)),(double)(0)))||ae_fp_less(stmax,stmin) )
|
|
{
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* DETERMINE IF THE DERIVATIVES HAVE OPPOSITE SIGN.
|
|
*/
|
|
sgnd = dp*(*dx/ae_fabs(*dx, _state));
|
|
|
|
/*
|
|
* FIRST CASE. A HIGHER FUNCTION VALUE.
|
|
* THE MINIMUM IS BRACKETED. IF THE CUBIC STEP IS CLOSER
|
|
* TO STX THAN THE QUADRATIC STEP, THE CUBIC STEP IS TAKEN,
|
|
* ELSE THE AVERAGE OF THE CUBIC AND QUADRATIC STEPS IS TAKEN.
|
|
*/
|
|
if( ae_fp_greater(fp,*fx) )
|
|
{
|
|
*info = 1;
|
|
bound = ae_true;
|
|
theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
|
|
s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
|
|
gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state);
|
|
if( ae_fp_less(*stp,*stx) )
|
|
{
|
|
gamma = -gamma;
|
|
}
|
|
p = gamma-(*dx)+theta;
|
|
q = gamma-(*dx)+gamma+dp;
|
|
r = p/q;
|
|
stpc = *stx+r*(*stp-(*stx));
|
|
stpq = *stx+*dx/((*fx-fp)/(*stp-(*stx))+(*dx))/2*(*stp-(*stx));
|
|
if( ae_fp_less(ae_fabs(stpc-(*stx), _state),ae_fabs(stpq-(*stx), _state)) )
|
|
{
|
|
stpf = stpc;
|
|
}
|
|
else
|
|
{
|
|
stpf = stpc+(stpq-stpc)/2;
|
|
}
|
|
*brackt = ae_true;
|
|
}
|
|
else
|
|
{
|
|
if( ae_fp_less(sgnd,(double)(0)) )
|
|
{
|
|
|
|
/*
|
|
* SECOND CASE. A LOWER FUNCTION VALUE AND DERIVATIVES OF
|
|
* OPPOSITE SIGN. THE MINIMUM IS BRACKETED. IF THE CUBIC
|
|
* STEP IS CLOSER TO STX THAN THE QUADRATIC (SECANT) STEP,
|
|
* THE CUBIC STEP IS TAKEN, ELSE THE QUADRATIC STEP IS TAKEN.
|
|
*/
|
|
*info = 2;
|
|
bound = ae_false;
|
|
theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
|
|
s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
|
|
gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state);
|
|
if( ae_fp_greater(*stp,*stx) )
|
|
{
|
|
gamma = -gamma;
|
|
}
|
|
p = gamma-dp+theta;
|
|
q = gamma-dp+gamma+(*dx);
|
|
r = p/q;
|
|
stpc = *stp+r*(*stx-(*stp));
|
|
stpq = *stp+dp/(dp-(*dx))*(*stx-(*stp));
|
|
if( ae_fp_greater(ae_fabs(stpc-(*stp), _state),ae_fabs(stpq-(*stp), _state)) )
|
|
{
|
|
stpf = stpc;
|
|
}
|
|
else
|
|
{
|
|
stpf = stpq;
|
|
}
|
|
*brackt = ae_true;
|
|
}
|
|
else
|
|
{
|
|
if( ae_fp_less(ae_fabs(dp, _state),ae_fabs(*dx, _state)) )
|
|
{
|
|
|
|
/*
|
|
* THIRD CASE. A LOWER FUNCTION VALUE, DERIVATIVES OF THE
|
|
* SAME SIGN, AND THE MAGNITUDE OF THE DERIVATIVE DECREASES.
|
|
* THE CUBIC STEP IS ONLY USED IF THE CUBIC TENDS TO INFINITY
|
|
* IN THE DIRECTION OF THE STEP OR IF THE MINIMUM OF THE CUBIC
|
|
* IS BEYOND STP. OTHERWISE THE CUBIC STEP IS DEFINED TO BE
|
|
* EITHER STPMIN OR STPMAX. THE QUADRATIC (SECANT) STEP IS ALSO
|
|
* COMPUTED AND IF THE MINIMUM IS BRACKETED THEN THE THE STEP
|
|
* CLOSEST TO STX IS TAKEN, ELSE THE STEP FARTHEST AWAY IS TAKEN.
|
|
*/
|
|
*info = 3;
|
|
bound = ae_true;
|
|
theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
|
|
s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
|
|
|
|
/*
|
|
* THE CASE GAMMA = 0 ONLY ARISES IF THE CUBIC DOES NOT TEND
|
|
* TO INFINITY IN THE DIRECTION OF THE STEP.
|
|
*/
|
|
gamma = s*ae_sqrt(ae_maxreal((double)(0), ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state), _state);
|
|
if( ae_fp_greater(*stp,*stx) )
|
|
{
|
|
gamma = -gamma;
|
|
}
|
|
p = gamma-dp+theta;
|
|
q = gamma+(*dx-dp)+gamma;
|
|
r = p/q;
|
|
if( ae_fp_less(r,(double)(0))&&ae_fp_neq(gamma,(double)(0)) )
|
|
{
|
|
stpc = *stp+r*(*stx-(*stp));
|
|
}
|
|
else
|
|
{
|
|
if( ae_fp_greater(*stp,*stx) )
|
|
{
|
|
stpc = stmax;
|
|
}
|
|
else
|
|
{
|
|
stpc = stmin;
|
|
}
|
|
}
|
|
stpq = *stp+dp/(dp-(*dx))*(*stx-(*stp));
|
|
if( *brackt )
|
|
{
|
|
if( ae_fp_less(ae_fabs(*stp-stpc, _state),ae_fabs(*stp-stpq, _state)) )
|
|
{
|
|
stpf = stpc;
|
|
}
|
|
else
|
|
{
|
|
stpf = stpq;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( ae_fp_greater(ae_fabs(*stp-stpc, _state),ae_fabs(*stp-stpq, _state)) )
|
|
{
|
|
stpf = stpc;
|
|
}
|
|
else
|
|
{
|
|
stpf = stpq;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* FOURTH CASE. A LOWER FUNCTION VALUE, DERIVATIVES OF THE
|
|
* SAME SIGN, AND THE MAGNITUDE OF THE DERIVATIVE DOES
|
|
* NOT DECREASE. IF THE MINIMUM IS NOT BRACKETED, THE STEP
|
|
* IS EITHER STPMIN OR STPMAX, ELSE THE CUBIC STEP IS TAKEN.
|
|
*/
|
|
*info = 4;
|
|
bound = ae_false;
|
|
if( *brackt )
|
|
{
|
|
theta = 3*(fp-(*fy))/(*sty-(*stp))+(*dy)+dp;
|
|
s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dy, _state), ae_fabs(dp, _state), _state), _state);
|
|
gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dy/s*(dp/s), _state);
|
|
if( ae_fp_greater(*stp,*sty) )
|
|
{
|
|
gamma = -gamma;
|
|
}
|
|
p = gamma-dp+theta;
|
|
q = gamma-dp+gamma+(*dy);
|
|
r = p/q;
|
|
stpc = *stp+r*(*sty-(*stp));
|
|
stpf = stpc;
|
|
}
|
|
else
|
|
{
|
|
if( ae_fp_greater(*stp,*stx) )
|
|
{
|
|
stpf = stmax;
|
|
}
|
|
else
|
|
{
|
|
stpf = stmin;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* UPDATE THE INTERVAL OF UNCERTAINTY. THIS UPDATE DOES NOT
|
|
* DEPEND ON THE NEW STEP OR THE CASE ANALYSIS ABOVE.
|
|
*/
|
|
if( ae_fp_greater(fp,*fx) )
|
|
{
|
|
*sty = *stp;
|
|
*fy = fp;
|
|
*dy = dp;
|
|
}
|
|
else
|
|
{
|
|
if( ae_fp_less(sgnd,0.0) )
|
|
{
|
|
*sty = *stx;
|
|
*fy = *fx;
|
|
*dy = *dx;
|
|
}
|
|
*stx = *stp;
|
|
*fx = fp;
|
|
*dx = dp;
|
|
}
|
|
|
|
/*
|
|
* COMPUTE THE NEW STEP AND SAFEGUARD IT.
|
|
*/
|
|
stpf = ae_minreal(stmax, stpf, _state);
|
|
stpf = ae_maxreal(stmin, stpf, _state);
|
|
*stp = stpf;
|
|
if( *brackt&&bound )
|
|
{
|
|
if( ae_fp_greater(*sty,*stx) )
|
|
{
|
|
*stp = ae_minreal(*stx+0.66*(*sty-(*stx)), *stp, _state);
|
|
}
|
|
else
|
|
{
|
|
*stp = ae_maxreal(*stx+0.66*(*sty-(*stx)), *stp, _state);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void _logitmodel_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
logitmodel *p = (logitmodel*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->w, 0, DT_REAL, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _logitmodel_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
logitmodel *dst = (logitmodel*)_dst;
|
|
logitmodel *src = (logitmodel*)_src;
|
|
ae_vector_init_copy(&dst->w, &src->w, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _logitmodel_clear(void* _p)
|
|
{
|
|
logitmodel *p = (logitmodel*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->w);
|
|
}
|
|
|
|
|
|
void _logitmodel_destroy(void* _p)
|
|
{
|
|
logitmodel *p = (logitmodel*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->w);
|
|
}
|
|
|
|
|
|
void _logitmcstate_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
logitmcstate *p = (logitmcstate*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _logitmcstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
logitmcstate *dst = (logitmcstate*)_dst;
|
|
logitmcstate *src = (logitmcstate*)_src;
|
|
dst->brackt = src->brackt;
|
|
dst->stage1 = src->stage1;
|
|
dst->infoc = src->infoc;
|
|
dst->dg = src->dg;
|
|
dst->dgm = src->dgm;
|
|
dst->dginit = src->dginit;
|
|
dst->dgtest = src->dgtest;
|
|
dst->dgx = src->dgx;
|
|
dst->dgxm = src->dgxm;
|
|
dst->dgy = src->dgy;
|
|
dst->dgym = src->dgym;
|
|
dst->finit = src->finit;
|
|
dst->ftest1 = src->ftest1;
|
|
dst->fm = src->fm;
|
|
dst->fx = src->fx;
|
|
dst->fxm = src->fxm;
|
|
dst->fy = src->fy;
|
|
dst->fym = src->fym;
|
|
dst->stx = src->stx;
|
|
dst->sty = src->sty;
|
|
dst->stmin = src->stmin;
|
|
dst->stmax = src->stmax;
|
|
dst->width = src->width;
|
|
dst->width1 = src->width1;
|
|
dst->xtrapf = src->xtrapf;
|
|
}
|
|
|
|
|
|
void _logitmcstate_clear(void* _p)
|
|
{
|
|
logitmcstate *p = (logitmcstate*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _logitmcstate_destroy(void* _p)
|
|
{
|
|
logitmcstate *p = (logitmcstate*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _mnlreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mnlreport *p = (mnlreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _mnlreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mnlreport *dst = (mnlreport*)_dst;
|
|
mnlreport *src = (mnlreport*)_src;
|
|
dst->ngrad = src->ngrad;
|
|
dst->nhess = src->nhess;
|
|
}
|
|
|
|
|
|
void _mnlreport_clear(void* _p)
|
|
{
|
|
mnlreport *p = (mnlreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _mnlreport_destroy(void* _p)
|
|
{
|
|
mnlreport *p = (mnlreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_MCPD) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
/*************************************************************************
|
|
DESCRIPTION:
|
|
|
|
This function creates MCPD (Markov Chains for Population Data) solver.
|
|
|
|
This solver can be used to find transition matrix P for N-dimensional
|
|
prediction problem where transition from X[i] to X[i+1] is modelled as
|
|
X[i+1] = P*X[i]
|
|
where X[i] and X[i+1] are N-dimensional population vectors (components of
|
|
each X are non-negative), and P is a N*N transition matrix (elements of P
|
|
are non-negative, each column sums to 1.0).
|
|
|
|
Such models arise when when:
|
|
* there is some population of individuals
|
|
* individuals can have different states
|
|
* individuals can transit from one state to another
|
|
* population size is constant, i.e. there is no new individuals and no one
|
|
leaves population
|
|
* you want to model transitions of individuals from one state into another
|
|
|
|
USAGE:
|
|
|
|
Here we give very brief outline of the MCPD. We strongly recommend you to
|
|
read examples in the ALGLIB Reference Manual and to read ALGLIB User Guide
|
|
on data analysis which is available at http://www.alglib.net/dataanalysis/
|
|
|
|
1. User initializes algorithm state with MCPDCreate() call
|
|
|
|
2. User adds one or more tracks - sequences of states which describe
|
|
evolution of a system being modelled from different starting conditions
|
|
|
|
3. User may add optional boundary, equality and/or linear constraints on
|
|
the coefficients of P by calling one of the following functions:
|
|
* MCPDSetEC() to set equality constraints
|
|
* MCPDSetBC() to set bound constraints
|
|
* MCPDSetLC() to set linear constraints
|
|
|
|
4. Optionally, user may set custom weights for prediction errors (by
|
|
default, algorithm assigns non-equal, automatically chosen weights for
|
|
errors in the prediction of different components of X). It can be done
|
|
with a call of MCPDSetPredictionWeights() function.
|
|
|
|
5. User calls MCPDSolve() function which takes algorithm state and
|
|
pointer (delegate, etc.) to callback function which calculates F/G.
|
|
|
|
6. User calls MCPDResults() to get solution
|
|
|
|
INPUT PARAMETERS:
|
|
N - problem dimension, N>=1
|
|
|
|
OUTPUT PARAMETERS:
|
|
State - structure stores algorithm state
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdcreate(ae_int_t n, mcpdstate* s, ae_state *_state)
|
|
{
|
|
|
|
_mcpdstate_clear(s);
|
|
|
|
ae_assert(n>=1, "MCPDCreate: N<1", _state);
|
|
mcpd_mcpdinit(n, -1, -1, s, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
DESCRIPTION:
|
|
|
|
This function is a specialized version of MCPDCreate() function, and we
|
|
recommend you to read comments for this function for general information
|
|
about MCPD solver.
|
|
|
|
This function creates MCPD (Markov Chains for Population Data) solver
|
|
for "Entry-state" model, i.e. model where transition from X[i] to X[i+1]
|
|
is modelled as
|
|
X[i+1] = P*X[i]
|
|
where
|
|
X[i] and X[i+1] are N-dimensional state vectors
|
|
P is a N*N transition matrix
|
|
and one selected component of X[] is called "entry" state and is treated
|
|
in a special way:
|
|
system state always transits from "entry" state to some another state
|
|
system state can not transit from any state into "entry" state
|
|
Such conditions basically mean that row of P which corresponds to "entry"
|
|
state is zero.
|
|
|
|
Such models arise when:
|
|
* there is some population of individuals
|
|
* individuals can have different states
|
|
* individuals can transit from one state to another
|
|
* population size is NOT constant - at every moment of time there is some
|
|
(unpredictable) amount of "new" individuals, which can transit into one
|
|
of the states at the next turn, but still no one leaves population
|
|
* you want to model transitions of individuals from one state into another
|
|
* but you do NOT want to predict amount of "new" individuals because it
|
|
does not depends on individuals already present (hence system can not
|
|
transit INTO entry state - it can only transit FROM it).
|
|
|
|
This model is discussed in more details in the ALGLIB User Guide (see
|
|
http://www.alglib.net/dataanalysis/ for more data).
|
|
|
|
INPUT PARAMETERS:
|
|
N - problem dimension, N>=2
|
|
EntryState- index of entry state, in 0..N-1
|
|
|
|
OUTPUT PARAMETERS:
|
|
State - structure stores algorithm state
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdcreateentry(ae_int_t n,
|
|
ae_int_t entrystate,
|
|
mcpdstate* s,
|
|
ae_state *_state)
|
|
{
|
|
|
|
_mcpdstate_clear(s);
|
|
|
|
ae_assert(n>=2, "MCPDCreateEntry: N<2", _state);
|
|
ae_assert(entrystate>=0, "MCPDCreateEntry: EntryState<0", _state);
|
|
ae_assert(entrystate<n, "MCPDCreateEntry: EntryState>=N", _state);
|
|
mcpd_mcpdinit(n, entrystate, -1, s, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
DESCRIPTION:
|
|
|
|
This function is a specialized version of MCPDCreate() function, and we
|
|
recommend you to read comments for this function for general information
|
|
about MCPD solver.
|
|
|
|
This function creates MCPD (Markov Chains for Population Data) solver
|
|
for "Exit-state" model, i.e. model where transition from X[i] to X[i+1]
|
|
is modelled as
|
|
X[i+1] = P*X[i]
|
|
where
|
|
X[i] and X[i+1] are N-dimensional state vectors
|
|
P is a N*N transition matrix
|
|
and one selected component of X[] is called "exit" state and is treated
|
|
in a special way:
|
|
system state can transit from any state into "exit" state
|
|
system state can not transit from "exit" state into any other state
|
|
transition operator discards "exit" state (makes it zero at each turn)
|
|
Such conditions basically mean that column of P which corresponds to
|
|
"exit" state is zero. Multiplication by such P may decrease sum of vector
|
|
components.
|
|
|
|
Such models arise when:
|
|
* there is some population of individuals
|
|
* individuals can have different states
|
|
* individuals can transit from one state to another
|
|
* population size is NOT constant - individuals can move into "exit" state
|
|
and leave population at the next turn, but there are no new individuals
|
|
* amount of individuals which leave population can be predicted
|
|
* you want to model transitions of individuals from one state into another
|
|
(including transitions into the "exit" state)
|
|
|
|
This model is discussed in more details in the ALGLIB User Guide (see
|
|
http://www.alglib.net/dataanalysis/ for more data).
|
|
|
|
INPUT PARAMETERS:
|
|
N - problem dimension, N>=2
|
|
ExitState- index of exit state, in 0..N-1
|
|
|
|
OUTPUT PARAMETERS:
|
|
State - structure stores algorithm state
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdcreateexit(ae_int_t n,
|
|
ae_int_t exitstate,
|
|
mcpdstate* s,
|
|
ae_state *_state)
|
|
{
|
|
|
|
_mcpdstate_clear(s);
|
|
|
|
ae_assert(n>=2, "MCPDCreateExit: N<2", _state);
|
|
ae_assert(exitstate>=0, "MCPDCreateExit: ExitState<0", _state);
|
|
ae_assert(exitstate<n, "MCPDCreateExit: ExitState>=N", _state);
|
|
mcpd_mcpdinit(n, -1, exitstate, s, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
DESCRIPTION:
|
|
|
|
This function is a specialized version of MCPDCreate() function, and we
|
|
recommend you to read comments for this function for general information
|
|
about MCPD solver.
|
|
|
|
This function creates MCPD (Markov Chains for Population Data) solver
|
|
for "Entry-Exit-states" model, i.e. model where transition from X[i] to
|
|
X[i+1] is modelled as
|
|
X[i+1] = P*X[i]
|
|
where
|
|
X[i] and X[i+1] are N-dimensional state vectors
|
|
P is a N*N transition matrix
|
|
one selected component of X[] is called "entry" state and is treated in a
|
|
special way:
|
|
system state always transits from "entry" state to some another state
|
|
system state can not transit from any state into "entry" state
|
|
and another one component of X[] is called "exit" state and is treated in
|
|
a special way too:
|
|
system state can transit from any state into "exit" state
|
|
system state can not transit from "exit" state into any other state
|
|
transition operator discards "exit" state (makes it zero at each turn)
|
|
Such conditions basically mean that:
|
|
row of P which corresponds to "entry" state is zero
|
|
column of P which corresponds to "exit" state is zero
|
|
Multiplication by such P may decrease sum of vector components.
|
|
|
|
Such models arise when:
|
|
* there is some population of individuals
|
|
* individuals can have different states
|
|
* individuals can transit from one state to another
|
|
* population size is NOT constant
|
|
* at every moment of time there is some (unpredictable) amount of "new"
|
|
individuals, which can transit into one of the states at the next turn
|
|
* some individuals can move (predictably) into "exit" state and leave
|
|
population at the next turn
|
|
* you want to model transitions of individuals from one state into another,
|
|
including transitions from the "entry" state and into the "exit" state.
|
|
* but you do NOT want to predict amount of "new" individuals because it
|
|
does not depends on individuals already present (hence system can not
|
|
transit INTO entry state - it can only transit FROM it).
|
|
|
|
This model is discussed in more details in the ALGLIB User Guide (see
|
|
http://www.alglib.net/dataanalysis/ for more data).
|
|
|
|
INPUT PARAMETERS:
|
|
N - problem dimension, N>=2
|
|
EntryState- index of entry state, in 0..N-1
|
|
ExitState- index of exit state, in 0..N-1
|
|
|
|
OUTPUT PARAMETERS:
|
|
State - structure stores algorithm state
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdcreateentryexit(ae_int_t n,
|
|
ae_int_t entrystate,
|
|
ae_int_t exitstate,
|
|
mcpdstate* s,
|
|
ae_state *_state)
|
|
{
|
|
|
|
_mcpdstate_clear(s);
|
|
|
|
ae_assert(n>=2, "MCPDCreateEntryExit: N<2", _state);
|
|
ae_assert(entrystate>=0, "MCPDCreateEntryExit: EntryState<0", _state);
|
|
ae_assert(entrystate<n, "MCPDCreateEntryExit: EntryState>=N", _state);
|
|
ae_assert(exitstate>=0, "MCPDCreateEntryExit: ExitState<0", _state);
|
|
ae_assert(exitstate<n, "MCPDCreateEntryExit: ExitState>=N", _state);
|
|
ae_assert(entrystate!=exitstate, "MCPDCreateEntryExit: EntryState=ExitState", _state);
|
|
mcpd_mcpdinit(n, entrystate, exitstate, s, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function is used to add a track - sequence of system states at the
|
|
different moments of its evolution.
|
|
|
|
You may add one or several tracks to the MCPD solver. In case you have
|
|
several tracks, they won't overwrite each other. For example, if you pass
|
|
two tracks, A1-A2-A3 (system at t=A+1, t=A+2 and t=A+3) and B1-B2-B3, then
|
|
solver will try to model transitions from t=A+1 to t=A+2, t=A+2 to t=A+3,
|
|
t=B+1 to t=B+2, t=B+2 to t=B+3. But it WONT mix these two tracks - i.e. it
|
|
wont try to model transition from t=A+3 to t=B+1.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
XY - track, array[K,N]:
|
|
* I-th row is a state at t=I
|
|
* elements of XY must be non-negative (exception will be
|
|
thrown on negative elements)
|
|
K - number of points in a track
|
|
* if given, only leading K rows of XY are used
|
|
* if not given, automatically determined from size of XY
|
|
|
|
NOTES:
|
|
|
|
1. Track may contain either proportional or population data:
|
|
* with proportional data all rows of XY must sum to 1.0, i.e. we have
|
|
proportions instead of absolute population values
|
|
* with population data rows of XY contain population counts and generally
|
|
do not sum to 1.0 (although they still must be non-negative)
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdaddtrack(mcpdstate* s,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t k,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t n;
|
|
double s0;
|
|
double s1;
|
|
|
|
|
|
n = s->n;
|
|
ae_assert(k>=0, "MCPDAddTrack: K<0", _state);
|
|
ae_assert(xy->cols>=n, "MCPDAddTrack: Cols(XY)<N", _state);
|
|
ae_assert(xy->rows>=k, "MCPDAddTrack: Rows(XY)<K", _state);
|
|
ae_assert(apservisfinitematrix(xy, k, n, _state), "MCPDAddTrack: XY contains infinite or NaN elements", _state);
|
|
for(i=0; i<=k-1; i++)
|
|
{
|
|
for(j=0; j<=n-1; j++)
|
|
{
|
|
ae_assert(ae_fp_greater_eq(xy->ptr.pp_double[i][j],(double)(0)), "MCPDAddTrack: XY contains negative elements", _state);
|
|
}
|
|
}
|
|
if( k<2 )
|
|
{
|
|
return;
|
|
}
|
|
if( s->data.rows<s->npairs+k-1 )
|
|
{
|
|
rmatrixresize(&s->data, ae_maxint(2*s->data.rows, s->npairs+k-1, _state), 2*n, _state);
|
|
}
|
|
for(i=0; i<=k-2; i++)
|
|
{
|
|
s0 = (double)(0);
|
|
s1 = (double)(0);
|
|
for(j=0; j<=n-1; j++)
|
|
{
|
|
if( s->states.ptr.p_int[j]>=0 )
|
|
{
|
|
s0 = s0+xy->ptr.pp_double[i][j];
|
|
}
|
|
if( s->states.ptr.p_int[j]<=0 )
|
|
{
|
|
s1 = s1+xy->ptr.pp_double[i+1][j];
|
|
}
|
|
}
|
|
if( ae_fp_greater(s0,(double)(0))&&ae_fp_greater(s1,(double)(0)) )
|
|
{
|
|
for(j=0; j<=n-1; j++)
|
|
{
|
|
if( s->states.ptr.p_int[j]>=0 )
|
|
{
|
|
s->data.ptr.pp_double[s->npairs][j] = xy->ptr.pp_double[i][j]/s0;
|
|
}
|
|
else
|
|
{
|
|
s->data.ptr.pp_double[s->npairs][j] = 0.0;
|
|
}
|
|
if( s->states.ptr.p_int[j]<=0 )
|
|
{
|
|
s->data.ptr.pp_double[s->npairs][n+j] = xy->ptr.pp_double[i+1][j]/s1;
|
|
}
|
|
else
|
|
{
|
|
s->data.ptr.pp_double[s->npairs][n+j] = 0.0;
|
|
}
|
|
}
|
|
s->npairs = s->npairs+1;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function is used to add equality constraints on the elements of the
|
|
transition matrix P.
|
|
|
|
MCPD solver has four types of constraints which can be placed on P:
|
|
* user-specified equality constraints (optional)
|
|
* user-specified bound constraints (optional)
|
|
* user-specified general linear constraints (optional)
|
|
* basic constraints (always present):
|
|
* non-negativity: P[i,j]>=0
|
|
* consistency: every column of P sums to 1.0
|
|
|
|
Final constraints which are passed to the underlying optimizer are
|
|
calculated as intersection of all present constraints. For example, you
|
|
may specify boundary constraint on P[0,0] and equality one:
|
|
0.1<=P[0,0]<=0.9
|
|
P[0,0]=0.5
|
|
Such combination of constraints will be silently reduced to their
|
|
intersection, which is P[0,0]=0.5.
|
|
|
|
This function can be used to place equality constraints on arbitrary
|
|
subset of elements of P. Set of constraints is specified by EC, which may
|
|
contain either NAN's or finite numbers from [0,1]. NAN denotes absence of
|
|
constraint, finite number denotes equality constraint on specific element
|
|
of P.
|
|
|
|
You can also use MCPDAddEC() function which allows to ADD equality
|
|
constraint for one element of P without changing constraints for other
|
|
elements.
|
|
|
|
These functions (MCPDSetEC and MCPDAddEC) interact as follows:
|
|
* there is internal matrix of equality constraints which is stored in the
|
|
MCPD solver
|
|
* MCPDSetEC() replaces this matrix by another one (SET)
|
|
* MCPDAddEC() modifies one element of this matrix and leaves other ones
|
|
unchanged (ADD)
|
|
* thus MCPDAddEC() call preserves all modifications done by previous
|
|
calls, while MCPDSetEC() completely discards all changes done to the
|
|
equality constraints.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
EC - equality constraints, array[N,N]. Elements of EC can be
|
|
either NAN's or finite numbers from [0,1]. NAN denotes
|
|
absence of constraints, while finite value denotes
|
|
equality constraint on the corresponding element of P.
|
|
|
|
NOTES:
|
|
|
|
1. infinite values of EC will lead to exception being thrown. Values less
|
|
than 0.0 or greater than 1.0 will lead to error code being returned after
|
|
call to MCPDSolve().
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdsetec(mcpdstate* s,
|
|
/* Real */ ae_matrix* ec,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t n;
|
|
|
|
|
|
n = s->n;
|
|
ae_assert(ec->cols>=n, "MCPDSetEC: Cols(EC)<N", _state);
|
|
ae_assert(ec->rows>=n, "MCPDSetEC: Rows(EC)<N", _state);
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
for(j=0; j<=n-1; j++)
|
|
{
|
|
ae_assert(ae_isfinite(ec->ptr.pp_double[i][j], _state)||ae_isnan(ec->ptr.pp_double[i][j], _state), "MCPDSetEC: EC containts infinite elements", _state);
|
|
s->ec.ptr.pp_double[i][j] = ec->ptr.pp_double[i][j];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function is used to add equality constraints on the elements of the
|
|
transition matrix P.
|
|
|
|
MCPD solver has four types of constraints which can be placed on P:
|
|
* user-specified equality constraints (optional)
|
|
* user-specified bound constraints (optional)
|
|
* user-specified general linear constraints (optional)
|
|
* basic constraints (always present):
|
|
* non-negativity: P[i,j]>=0
|
|
* consistency: every column of P sums to 1.0
|
|
|
|
Final constraints which are passed to the underlying optimizer are
|
|
calculated as intersection of all present constraints. For example, you
|
|
may specify boundary constraint on P[0,0] and equality one:
|
|
0.1<=P[0,0]<=0.9
|
|
P[0,0]=0.5
|
|
Such combination of constraints will be silently reduced to their
|
|
intersection, which is P[0,0]=0.5.
|
|
|
|
This function can be used to ADD equality constraint for one element of P
|
|
without changing constraints for other elements.
|
|
|
|
You can also use MCPDSetEC() function which allows you to specify
|
|
arbitrary set of equality constraints in one call.
|
|
|
|
These functions (MCPDSetEC and MCPDAddEC) interact as follows:
|
|
* there is internal matrix of equality constraints which is stored in the
|
|
MCPD solver
|
|
* MCPDSetEC() replaces this matrix by another one (SET)
|
|
* MCPDAddEC() modifies one element of this matrix and leaves other ones
|
|
unchanged (ADD)
|
|
* thus MCPDAddEC() call preserves all modifications done by previous
|
|
calls, while MCPDSetEC() completely discards all changes done to the
|
|
equality constraints.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
I - row index of element being constrained
|
|
J - column index of element being constrained
|
|
C - value (constraint for P[I,J]). Can be either NAN (no
|
|
constraint) or finite value from [0,1].
|
|
|
|
NOTES:
|
|
|
|
1. infinite values of C will lead to exception being thrown. Values less
|
|
than 0.0 or greater than 1.0 will lead to error code being returned after
|
|
call to MCPDSolve().
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdaddec(mcpdstate* s,
|
|
ae_int_t i,
|
|
ae_int_t j,
|
|
double c,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(i>=0, "MCPDAddEC: I<0", _state);
|
|
ae_assert(i<s->n, "MCPDAddEC: I>=N", _state);
|
|
ae_assert(j>=0, "MCPDAddEC: J<0", _state);
|
|
ae_assert(j<s->n, "MCPDAddEC: J>=N", _state);
|
|
ae_assert(ae_isnan(c, _state)||ae_isfinite(c, _state), "MCPDAddEC: C is not finite number or NAN", _state);
|
|
s->ec.ptr.pp_double[i][j] = c;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function is used to add bound constraints on the elements of the
|
|
transition matrix P.
|
|
|
|
MCPD solver has four types of constraints which can be placed on P:
|
|
* user-specified equality constraints (optional)
|
|
* user-specified bound constraints (optional)
|
|
* user-specified general linear constraints (optional)
|
|
* basic constraints (always present):
|
|
* non-negativity: P[i,j]>=0
|
|
* consistency: every column of P sums to 1.0
|
|
|
|
Final constraints which are passed to the underlying optimizer are
|
|
calculated as intersection of all present constraints. For example, you
|
|
may specify boundary constraint on P[0,0] and equality one:
|
|
0.1<=P[0,0]<=0.9
|
|
P[0,0]=0.5
|
|
Such combination of constraints will be silently reduced to their
|
|
intersection, which is P[0,0]=0.5.
|
|
|
|
This function can be used to place bound constraints on arbitrary
|
|
subset of elements of P. Set of constraints is specified by BndL/BndU
|
|
matrices, which may contain arbitrary combination of finite numbers or
|
|
infinities (like -INF<x<=0.5 or 0.1<=x<+INF).
|
|
|
|
You can also use MCPDAddBC() function which allows to ADD bound constraint
|
|
for one element of P without changing constraints for other elements.
|
|
|
|
These functions (MCPDSetBC and MCPDAddBC) interact as follows:
|
|
* there is internal matrix of bound constraints which is stored in the
|
|
MCPD solver
|
|
* MCPDSetBC() replaces this matrix by another one (SET)
|
|
* MCPDAddBC() modifies one element of this matrix and leaves other ones
|
|
unchanged (ADD)
|
|
* thus MCPDAddBC() call preserves all modifications done by previous
|
|
calls, while MCPDSetBC() completely discards all changes done to the
|
|
equality constraints.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
BndL - lower bounds constraints, array[N,N]. Elements of BndL can
|
|
be finite numbers or -INF.
|
|
BndU - upper bounds constraints, array[N,N]. Elements of BndU can
|
|
be finite numbers or +INF.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdsetbc(mcpdstate* s,
|
|
/* Real */ ae_matrix* bndl,
|
|
/* Real */ ae_matrix* bndu,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t n;
|
|
|
|
|
|
n = s->n;
|
|
ae_assert(bndl->cols>=n, "MCPDSetBC: Cols(BndL)<N", _state);
|
|
ae_assert(bndl->rows>=n, "MCPDSetBC: Rows(BndL)<N", _state);
|
|
ae_assert(bndu->cols>=n, "MCPDSetBC: Cols(BndU)<N", _state);
|
|
ae_assert(bndu->rows>=n, "MCPDSetBC: Rows(BndU)<N", _state);
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
for(j=0; j<=n-1; j++)
|
|
{
|
|
ae_assert(ae_isfinite(bndl->ptr.pp_double[i][j], _state)||ae_isneginf(bndl->ptr.pp_double[i][j], _state), "MCPDSetBC: BndL containts NAN or +INF", _state);
|
|
ae_assert(ae_isfinite(bndu->ptr.pp_double[i][j], _state)||ae_isposinf(bndu->ptr.pp_double[i][j], _state), "MCPDSetBC: BndU containts NAN or -INF", _state);
|
|
s->bndl.ptr.pp_double[i][j] = bndl->ptr.pp_double[i][j];
|
|
s->bndu.ptr.pp_double[i][j] = bndu->ptr.pp_double[i][j];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function is used to add bound constraints on the elements of the
|
|
transition matrix P.
|
|
|
|
MCPD solver has four types of constraints which can be placed on P:
|
|
* user-specified equality constraints (optional)
|
|
* user-specified bound constraints (optional)
|
|
* user-specified general linear constraints (optional)
|
|
* basic constraints (always present):
|
|
* non-negativity: P[i,j]>=0
|
|
* consistency: every column of P sums to 1.0
|
|
|
|
Final constraints which are passed to the underlying optimizer are
|
|
calculated as intersection of all present constraints. For example, you
|
|
may specify boundary constraint on P[0,0] and equality one:
|
|
0.1<=P[0,0]<=0.9
|
|
P[0,0]=0.5
|
|
Such combination of constraints will be silently reduced to their
|
|
intersection, which is P[0,0]=0.5.
|
|
|
|
This function can be used to ADD bound constraint for one element of P
|
|
without changing constraints for other elements.
|
|
|
|
You can also use MCPDSetBC() function which allows to place bound
|
|
constraints on arbitrary subset of elements of P. Set of constraints is
|
|
specified by BndL/BndU matrices, which may contain arbitrary combination
|
|
of finite numbers or infinities (like -INF<x<=0.5 or 0.1<=x<+INF).
|
|
|
|
These functions (MCPDSetBC and MCPDAddBC) interact as follows:
|
|
* there is internal matrix of bound constraints which is stored in the
|
|
MCPD solver
|
|
* MCPDSetBC() replaces this matrix by another one (SET)
|
|
* MCPDAddBC() modifies one element of this matrix and leaves other ones
|
|
unchanged (ADD)
|
|
* thus MCPDAddBC() call preserves all modifications done by previous
|
|
calls, while MCPDSetBC() completely discards all changes done to the
|
|
equality constraints.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
I - row index of element being constrained
|
|
J - column index of element being constrained
|
|
BndL - lower bound
|
|
BndU - upper bound
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdaddbc(mcpdstate* s,
|
|
ae_int_t i,
|
|
ae_int_t j,
|
|
double bndl,
|
|
double bndu,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(i>=0, "MCPDAddBC: I<0", _state);
|
|
ae_assert(i<s->n, "MCPDAddBC: I>=N", _state);
|
|
ae_assert(j>=0, "MCPDAddBC: J<0", _state);
|
|
ae_assert(j<s->n, "MCPDAddBC: J>=N", _state);
|
|
ae_assert(ae_isfinite(bndl, _state)||ae_isneginf(bndl, _state), "MCPDAddBC: BndL is NAN or +INF", _state);
|
|
ae_assert(ae_isfinite(bndu, _state)||ae_isposinf(bndu, _state), "MCPDAddBC: BndU is NAN or -INF", _state);
|
|
s->bndl.ptr.pp_double[i][j] = bndl;
|
|
s->bndu.ptr.pp_double[i][j] = bndu;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function is used to set linear equality/inequality constraints on the
|
|
elements of the transition matrix P.
|
|
|
|
This function can be used to set one or several general linear constraints
|
|
on the elements of P. Two types of constraints are supported:
|
|
* equality constraints
|
|
* inequality constraints (both less-or-equal and greater-or-equal)
|
|
|
|
Coefficients of constraints are specified by matrix C (one of the
|
|
parameters). One row of C corresponds to one constraint. Because
|
|
transition matrix P has N*N elements, we need N*N columns to store all
|
|
coefficients (they are stored row by row), and one more column to store
|
|
right part - hence C has N*N+1 columns. Constraint kind is stored in the
|
|
CT array.
|
|
|
|
Thus, I-th linear constraint is
|
|
P[0,0]*C[I,0] + P[0,1]*C[I,1] + .. + P[0,N-1]*C[I,N-1] +
|
|
+ P[1,0]*C[I,N] + P[1,1]*C[I,N+1] + ... +
|
|
+ P[N-1,N-1]*C[I,N*N-1] ?=? C[I,N*N]
|
|
where ?=? can be either "=" (CT[i]=0), "<=" (CT[i]<0) or ">=" (CT[i]>0).
|
|
|
|
Your constraint may involve only some subset of P (less than N*N elements).
|
|
For example it can be something like
|
|
P[0,0] + P[0,1] = 0.5
|
|
In this case you still should pass matrix with N*N+1 columns, but all its
|
|
elements (except for C[0,0], C[0,1] and C[0,N*N-1]) will be zero.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
C - array[K,N*N+1] - coefficients of constraints
|
|
(see above for complete description)
|
|
CT - array[K] - constraint types
|
|
(see above for complete description)
|
|
K - number of equality/inequality constraints, K>=0:
|
|
* if given, only leading K elements of C/CT are used
|
|
* if not given, automatically determined from sizes of C/CT
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdsetlc(mcpdstate* s,
|
|
/* Real */ ae_matrix* c,
|
|
/* Integer */ ae_vector* ct,
|
|
ae_int_t k,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t n;
|
|
|
|
|
|
n = s->n;
|
|
ae_assert(c->cols>=n*n+1, "MCPDSetLC: Cols(C)<N*N+1", _state);
|
|
ae_assert(c->rows>=k, "MCPDSetLC: Rows(C)<K", _state);
|
|
ae_assert(ct->cnt>=k, "MCPDSetLC: Len(CT)<K", _state);
|
|
ae_assert(apservisfinitematrix(c, k, n*n+1, _state), "MCPDSetLC: C contains infinite or NaN values!", _state);
|
|
rmatrixsetlengthatleast(&s->c, k, n*n+1, _state);
|
|
ivectorsetlengthatleast(&s->ct, k, _state);
|
|
for(i=0; i<=k-1; i++)
|
|
{
|
|
for(j=0; j<=n*n; j++)
|
|
{
|
|
s->c.ptr.pp_double[i][j] = c->ptr.pp_double[i][j];
|
|
}
|
|
s->ct.ptr.p_int[i] = ct->ptr.p_int[i];
|
|
}
|
|
s->ccnt = k;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function allows to tune amount of Tikhonov regularization being
|
|
applied to your problem.
|
|
|
|
By default, regularizing term is equal to r*||P-prior_P||^2, where r is a
|
|
small non-zero value, P is transition matrix, prior_P is identity matrix,
|
|
||X||^2 is a sum of squared elements of X.
|
|
|
|
This function allows you to change coefficient r. You can also change
|
|
prior values with MCPDSetPrior() function.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
V - regularization coefficient, finite non-negative value. It
|
|
is not recommended to specify zero value unless you are
|
|
pretty sure that you want it.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdsettikhonovregularizer(mcpdstate* s, double v, ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(ae_isfinite(v, _state), "MCPDSetTikhonovRegularizer: V is infinite or NAN", _state);
|
|
ae_assert(ae_fp_greater_eq(v,0.0), "MCPDSetTikhonovRegularizer: V is less than zero", _state);
|
|
s->regterm = v;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function allows to set prior values used for regularization of your
|
|
problem.
|
|
|
|
By default, regularizing term is equal to r*||P-prior_P||^2, where r is a
|
|
small non-zero value, P is transition matrix, prior_P is identity matrix,
|
|
||X||^2 is a sum of squared elements of X.
|
|
|
|
This function allows you to change prior values prior_P. You can also
|
|
change r with MCPDSetTikhonovRegularizer() function.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
PP - array[N,N], matrix of prior values:
|
|
1. elements must be real numbers from [0,1]
|
|
2. columns must sum to 1.0.
|
|
First property is checked (exception is thrown otherwise),
|
|
while second one is not checked/enforced.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdsetprior(mcpdstate* s,
|
|
/* Real */ ae_matrix* pp,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_matrix _pp;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t n;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_pp, 0, sizeof(_pp));
|
|
ae_matrix_init_copy(&_pp, pp, _state, ae_true);
|
|
pp = &_pp;
|
|
|
|
n = s->n;
|
|
ae_assert(pp->cols>=n, "MCPDSetPrior: Cols(PP)<N", _state);
|
|
ae_assert(pp->rows>=n, "MCPDSetPrior: Rows(PP)<K", _state);
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
for(j=0; j<=n-1; j++)
|
|
{
|
|
ae_assert(ae_isfinite(pp->ptr.pp_double[i][j], _state), "MCPDSetPrior: PP containts infinite elements", _state);
|
|
ae_assert(ae_fp_greater_eq(pp->ptr.pp_double[i][j],0.0)&&ae_fp_less_eq(pp->ptr.pp_double[i][j],1.0), "MCPDSetPrior: PP[i,j] is less than 0.0 or greater than 1.0", _state);
|
|
s->priorp.ptr.pp_double[i][j] = pp->ptr.pp_double[i][j];
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function is used to change prediction weights
|
|
|
|
MCPD solver scales prediction errors as follows
|
|
Error(P) = ||W*(y-P*x)||^2
|
|
where
|
|
x is a system state at time t
|
|
y is a system state at time t+1
|
|
P is a transition matrix
|
|
W is a diagonal scaling matrix
|
|
|
|
By default, weights are chosen in order to minimize relative prediction
|
|
error instead of absolute one. For example, if one component of state is
|
|
about 0.5 in magnitude and another one is about 0.05, then algorithm will
|
|
make corresponding weights equal to 2.0 and 20.0.
|
|
|
|
INPUT PARAMETERS:
|
|
S - solver
|
|
PW - array[N], weights:
|
|
* must be non-negative values (exception will be thrown otherwise)
|
|
* zero values will be replaced by automatically chosen values
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdsetpredictionweights(mcpdstate* s,
|
|
/* Real */ ae_vector* pw,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t n;
|
|
|
|
|
|
n = s->n;
|
|
ae_assert(pw->cnt>=n, "MCPDSetPredictionWeights: Length(PW)<N", _state);
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
ae_assert(ae_isfinite(pw->ptr.p_double[i], _state), "MCPDSetPredictionWeights: PW containts infinite or NAN elements", _state);
|
|
ae_assert(ae_fp_greater_eq(pw->ptr.p_double[i],(double)(0)), "MCPDSetPredictionWeights: PW containts negative elements", _state);
|
|
s->pw.ptr.p_double[i] = pw->ptr.p_double[i];
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function is used to start solution of the MCPD problem.
|
|
|
|
After return from this function, you can use MCPDResults() to get solution
|
|
and completion code.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdsolve(mcpdstate* s, ae_state *_state)
|
|
{
|
|
ae_int_t n;
|
|
ae_int_t npairs;
|
|
ae_int_t ccnt;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t k2;
|
|
double v;
|
|
double vv;
|
|
|
|
|
|
n = s->n;
|
|
npairs = s->npairs;
|
|
|
|
/*
|
|
* init fields of S
|
|
*/
|
|
s->repterminationtype = 0;
|
|
s->repinneriterationscount = 0;
|
|
s->repouteriterationscount = 0;
|
|
s->repnfev = 0;
|
|
for(k=0; k<=n-1; k++)
|
|
{
|
|
for(k2=0; k2<=n-1; k2++)
|
|
{
|
|
s->p.ptr.pp_double[k][k2] = _state->v_nan;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Generate "effective" weights for prediction and calculate preconditioner
|
|
*/
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
if( ae_fp_eq(s->pw.ptr.p_double[i],(double)(0)) )
|
|
{
|
|
v = (double)(0);
|
|
k = 0;
|
|
for(j=0; j<=npairs-1; j++)
|
|
{
|
|
if( ae_fp_neq(s->data.ptr.pp_double[j][n+i],(double)(0)) )
|
|
{
|
|
v = v+s->data.ptr.pp_double[j][n+i];
|
|
k = k+1;
|
|
}
|
|
}
|
|
if( k!=0 )
|
|
{
|
|
s->effectivew.ptr.p_double[i] = k/v;
|
|
}
|
|
else
|
|
{
|
|
s->effectivew.ptr.p_double[i] = 1.0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
s->effectivew.ptr.p_double[i] = s->pw.ptr.p_double[i];
|
|
}
|
|
}
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
for(j=0; j<=n-1; j++)
|
|
{
|
|
s->h.ptr.p_double[i*n+j] = 2*s->regterm;
|
|
}
|
|
}
|
|
for(k=0; k<=npairs-1; k++)
|
|
{
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
for(j=0; j<=n-1; j++)
|
|
{
|
|
s->h.ptr.p_double[i*n+j] = s->h.ptr.p_double[i*n+j]+2*ae_sqr(s->effectivew.ptr.p_double[i], _state)*ae_sqr(s->data.ptr.pp_double[k][j], _state);
|
|
}
|
|
}
|
|
}
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
for(j=0; j<=n-1; j++)
|
|
{
|
|
if( ae_fp_eq(s->h.ptr.p_double[i*n+j],(double)(0)) )
|
|
{
|
|
s->h.ptr.p_double[i*n+j] = (double)(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Generate "effective" BndL/BndU
|
|
*/
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
for(j=0; j<=n-1; j++)
|
|
{
|
|
|
|
/*
|
|
* Set default boundary constraints.
|
|
* Lower bound is always zero, upper bound is calculated
|
|
* with respect to entry/exit states.
|
|
*/
|
|
s->effectivebndl.ptr.p_double[i*n+j] = 0.0;
|
|
if( s->states.ptr.p_int[i]>0||s->states.ptr.p_int[j]<0 )
|
|
{
|
|
s->effectivebndu.ptr.p_double[i*n+j] = 0.0;
|
|
}
|
|
else
|
|
{
|
|
s->effectivebndu.ptr.p_double[i*n+j] = 1.0;
|
|
}
|
|
|
|
/*
|
|
* Calculate intersection of the default and user-specified bound constraints.
|
|
* This code checks consistency of such combination.
|
|
*/
|
|
if( ae_isfinite(s->bndl.ptr.pp_double[i][j], _state)&&ae_fp_greater(s->bndl.ptr.pp_double[i][j],s->effectivebndl.ptr.p_double[i*n+j]) )
|
|
{
|
|
s->effectivebndl.ptr.p_double[i*n+j] = s->bndl.ptr.pp_double[i][j];
|
|
}
|
|
if( ae_isfinite(s->bndu.ptr.pp_double[i][j], _state)&&ae_fp_less(s->bndu.ptr.pp_double[i][j],s->effectivebndu.ptr.p_double[i*n+j]) )
|
|
{
|
|
s->effectivebndu.ptr.p_double[i*n+j] = s->bndu.ptr.pp_double[i][j];
|
|
}
|
|
if( ae_fp_greater(s->effectivebndl.ptr.p_double[i*n+j],s->effectivebndu.ptr.p_double[i*n+j]) )
|
|
{
|
|
s->repterminationtype = -3;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Calculate intersection of the effective bound constraints
|
|
* and user-specified equality constraints.
|
|
* This code checks consistency of such combination.
|
|
*/
|
|
if( ae_isfinite(s->ec.ptr.pp_double[i][j], _state) )
|
|
{
|
|
if( ae_fp_less(s->ec.ptr.pp_double[i][j],s->effectivebndl.ptr.p_double[i*n+j])||ae_fp_greater(s->ec.ptr.pp_double[i][j],s->effectivebndu.ptr.p_double[i*n+j]) )
|
|
{
|
|
s->repterminationtype = -3;
|
|
return;
|
|
}
|
|
s->effectivebndl.ptr.p_double[i*n+j] = s->ec.ptr.pp_double[i][j];
|
|
s->effectivebndu.ptr.p_double[i*n+j] = s->ec.ptr.pp_double[i][j];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Generate linear constraints:
|
|
* * "default" sums-to-one constraints (not generated for "exit" states)
|
|
*/
|
|
rmatrixsetlengthatleast(&s->effectivec, s->ccnt+n, n*n+1, _state);
|
|
ivectorsetlengthatleast(&s->effectivect, s->ccnt+n, _state);
|
|
ccnt = s->ccnt;
|
|
for(i=0; i<=s->ccnt-1; i++)
|
|
{
|
|
for(j=0; j<=n*n; j++)
|
|
{
|
|
s->effectivec.ptr.pp_double[i][j] = s->c.ptr.pp_double[i][j];
|
|
}
|
|
s->effectivect.ptr.p_int[i] = s->ct.ptr.p_int[i];
|
|
}
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
if( s->states.ptr.p_int[i]>=0 )
|
|
{
|
|
for(k=0; k<=n*n-1; k++)
|
|
{
|
|
s->effectivec.ptr.pp_double[ccnt][k] = (double)(0);
|
|
}
|
|
for(k=0; k<=n-1; k++)
|
|
{
|
|
s->effectivec.ptr.pp_double[ccnt][k*n+i] = (double)(1);
|
|
}
|
|
s->effectivec.ptr.pp_double[ccnt][n*n] = 1.0;
|
|
s->effectivect.ptr.p_int[ccnt] = 0;
|
|
ccnt = ccnt+1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* create optimizer
|
|
*/
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
for(j=0; j<=n-1; j++)
|
|
{
|
|
s->tmpp.ptr.p_double[i*n+j] = (double)1/(double)n;
|
|
}
|
|
}
|
|
minbleicrestartfrom(&s->bs, &s->tmpp, _state);
|
|
minbleicsetbc(&s->bs, &s->effectivebndl, &s->effectivebndu, _state);
|
|
minbleicsetlc(&s->bs, &s->effectivec, &s->effectivect, ccnt, _state);
|
|
minbleicsetcond(&s->bs, 0.0, 0.0, mcpd_xtol, 0, _state);
|
|
minbleicsetprecdiag(&s->bs, &s->h, _state);
|
|
|
|
/*
|
|
* solve problem
|
|
*/
|
|
while(minbleiciteration(&s->bs, _state))
|
|
{
|
|
ae_assert(s->bs.needfg, "MCPDSolve: internal error", _state);
|
|
if( s->bs.needfg )
|
|
{
|
|
|
|
/*
|
|
* Calculate regularization term
|
|
*/
|
|
s->bs.f = 0.0;
|
|
vv = s->regterm;
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
for(j=0; j<=n-1; j++)
|
|
{
|
|
s->bs.f = s->bs.f+vv*ae_sqr(s->bs.x.ptr.p_double[i*n+j]-s->priorp.ptr.pp_double[i][j], _state);
|
|
s->bs.g.ptr.p_double[i*n+j] = 2*vv*(s->bs.x.ptr.p_double[i*n+j]-s->priorp.ptr.pp_double[i][j]);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* calculate prediction error/gradient for K-th pair
|
|
*/
|
|
for(k=0; k<=npairs-1; k++)
|
|
{
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
v = ae_v_dotproduct(&s->bs.x.ptr.p_double[i*n], 1, &s->data.ptr.pp_double[k][0], 1, ae_v_len(i*n,i*n+n-1));
|
|
vv = s->effectivew.ptr.p_double[i];
|
|
s->bs.f = s->bs.f+ae_sqr(vv*(v-s->data.ptr.pp_double[k][n+i]), _state);
|
|
for(j=0; j<=n-1; j++)
|
|
{
|
|
s->bs.g.ptr.p_double[i*n+j] = s->bs.g.ptr.p_double[i*n+j]+2*vv*vv*(v-s->data.ptr.pp_double[k][n+i])*s->data.ptr.pp_double[k][j];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* continue
|
|
*/
|
|
continue;
|
|
}
|
|
}
|
|
minbleicresultsbuf(&s->bs, &s->tmpp, &s->br, _state);
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
for(j=0; j<=n-1; j++)
|
|
{
|
|
s->p.ptr.pp_double[i][j] = s->tmpp.ptr.p_double[i*n+j];
|
|
}
|
|
}
|
|
s->repterminationtype = s->br.terminationtype;
|
|
s->repinneriterationscount = s->br.inneriterationscount;
|
|
s->repouteriterationscount = s->br.outeriterationscount;
|
|
s->repnfev = s->br.nfev;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
MCPD results
|
|
|
|
INPUT PARAMETERS:
|
|
State - algorithm state
|
|
|
|
OUTPUT PARAMETERS:
|
|
P - array[N,N], transition matrix
|
|
Rep - optimization report. You should check Rep.TerminationType
|
|
in order to distinguish successful termination from
|
|
unsuccessful one. Speaking short, positive values denote
|
|
success, negative ones are failures.
|
|
More information about fields of this structure can be
|
|
found in the comments on MCPDReport datatype.
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mcpdresults(mcpdstate* s,
|
|
/* Real */ ae_matrix* p,
|
|
mcpdreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
|
|
ae_matrix_clear(p);
|
|
_mcpdreport_clear(rep);
|
|
|
|
ae_matrix_set_length(p, s->n, s->n, _state);
|
|
for(i=0; i<=s->n-1; i++)
|
|
{
|
|
for(j=0; j<=s->n-1; j++)
|
|
{
|
|
p->ptr.pp_double[i][j] = s->p.ptr.pp_double[i][j];
|
|
}
|
|
}
|
|
rep->terminationtype = s->repterminationtype;
|
|
rep->inneriterationscount = s->repinneriterationscount;
|
|
rep->outeriterationscount = s->repouteriterationscount;
|
|
rep->nfev = s->repnfev;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal initialization function
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.05.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void mcpd_mcpdinit(ae_int_t n,
|
|
ae_int_t entrystate,
|
|
ae_int_t exitstate,
|
|
mcpdstate* s,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
|
|
|
|
ae_assert(n>=1, "MCPDCreate: N<1", _state);
|
|
s->n = n;
|
|
ae_vector_set_length(&s->states, n, _state);
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
s->states.ptr.p_int[i] = 0;
|
|
}
|
|
if( entrystate>=0 )
|
|
{
|
|
s->states.ptr.p_int[entrystate] = 1;
|
|
}
|
|
if( exitstate>=0 )
|
|
{
|
|
s->states.ptr.p_int[exitstate] = -1;
|
|
}
|
|
s->npairs = 0;
|
|
s->regterm = 1.0E-8;
|
|
s->ccnt = 0;
|
|
ae_matrix_set_length(&s->p, n, n, _state);
|
|
ae_matrix_set_length(&s->ec, n, n, _state);
|
|
ae_matrix_set_length(&s->bndl, n, n, _state);
|
|
ae_matrix_set_length(&s->bndu, n, n, _state);
|
|
ae_vector_set_length(&s->pw, n, _state);
|
|
ae_matrix_set_length(&s->priorp, n, n, _state);
|
|
ae_vector_set_length(&s->tmpp, n*n, _state);
|
|
ae_vector_set_length(&s->effectivew, n, _state);
|
|
ae_vector_set_length(&s->effectivebndl, n*n, _state);
|
|
ae_vector_set_length(&s->effectivebndu, n*n, _state);
|
|
ae_vector_set_length(&s->h, n*n, _state);
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
for(j=0; j<=n-1; j++)
|
|
{
|
|
s->p.ptr.pp_double[i][j] = 0.0;
|
|
s->priorp.ptr.pp_double[i][j] = 0.0;
|
|
s->bndl.ptr.pp_double[i][j] = _state->v_neginf;
|
|
s->bndu.ptr.pp_double[i][j] = _state->v_posinf;
|
|
s->ec.ptr.pp_double[i][j] = _state->v_nan;
|
|
}
|
|
s->pw.ptr.p_double[i] = 0.0;
|
|
s->priorp.ptr.pp_double[i][i] = 1.0;
|
|
}
|
|
ae_matrix_set_length(&s->data, 1, 2*n, _state);
|
|
for(i=0; i<=2*n-1; i++)
|
|
{
|
|
s->data.ptr.pp_double[0][i] = 0.0;
|
|
}
|
|
for(i=0; i<=n*n-1; i++)
|
|
{
|
|
s->tmpp.ptr.p_double[i] = 0.0;
|
|
}
|
|
minbleiccreate(n*n, &s->tmpp, &s->bs, _state);
|
|
}
|
|
|
|
|
|
void _mcpdstate_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mcpdstate *p = (mcpdstate*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->states, 0, DT_INT, _state, make_automatic);
|
|
ae_matrix_init(&p->data, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->ec, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->bndl, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->bndu, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->c, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->ct, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->pw, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->priorp, 0, 0, DT_REAL, _state, make_automatic);
|
|
_minbleicstate_init(&p->bs, _state, make_automatic);
|
|
_minbleicreport_init(&p->br, _state, make_automatic);
|
|
ae_vector_init(&p->tmpp, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->effectivew, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->effectivebndl, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->effectivebndu, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->effectivec, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->effectivect, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->h, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->p, 0, 0, DT_REAL, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _mcpdstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mcpdstate *dst = (mcpdstate*)_dst;
|
|
mcpdstate *src = (mcpdstate*)_src;
|
|
dst->n = src->n;
|
|
ae_vector_init_copy(&dst->states, &src->states, _state, make_automatic);
|
|
dst->npairs = src->npairs;
|
|
ae_matrix_init_copy(&dst->data, &src->data, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->ec, &src->ec, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->bndl, &src->bndl, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->bndu, &src->bndu, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->c, &src->c, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->ct, &src->ct, _state, make_automatic);
|
|
dst->ccnt = src->ccnt;
|
|
ae_vector_init_copy(&dst->pw, &src->pw, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->priorp, &src->priorp, _state, make_automatic);
|
|
dst->regterm = src->regterm;
|
|
_minbleicstate_init_copy(&dst->bs, &src->bs, _state, make_automatic);
|
|
dst->repinneriterationscount = src->repinneriterationscount;
|
|
dst->repouteriterationscount = src->repouteriterationscount;
|
|
dst->repnfev = src->repnfev;
|
|
dst->repterminationtype = src->repterminationtype;
|
|
_minbleicreport_init_copy(&dst->br, &src->br, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->tmpp, &src->tmpp, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->effectivew, &src->effectivew, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->effectivebndl, &src->effectivebndl, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->effectivebndu, &src->effectivebndu, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->effectivec, &src->effectivec, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->effectivect, &src->effectivect, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->h, &src->h, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->p, &src->p, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _mcpdstate_clear(void* _p)
|
|
{
|
|
mcpdstate *p = (mcpdstate*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->states);
|
|
ae_matrix_clear(&p->data);
|
|
ae_matrix_clear(&p->ec);
|
|
ae_matrix_clear(&p->bndl);
|
|
ae_matrix_clear(&p->bndu);
|
|
ae_matrix_clear(&p->c);
|
|
ae_vector_clear(&p->ct);
|
|
ae_vector_clear(&p->pw);
|
|
ae_matrix_clear(&p->priorp);
|
|
_minbleicstate_clear(&p->bs);
|
|
_minbleicreport_clear(&p->br);
|
|
ae_vector_clear(&p->tmpp);
|
|
ae_vector_clear(&p->effectivew);
|
|
ae_vector_clear(&p->effectivebndl);
|
|
ae_vector_clear(&p->effectivebndu);
|
|
ae_matrix_clear(&p->effectivec);
|
|
ae_vector_clear(&p->effectivect);
|
|
ae_vector_clear(&p->h);
|
|
ae_matrix_clear(&p->p);
|
|
}
|
|
|
|
|
|
void _mcpdstate_destroy(void* _p)
|
|
{
|
|
mcpdstate *p = (mcpdstate*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->states);
|
|
ae_matrix_destroy(&p->data);
|
|
ae_matrix_destroy(&p->ec);
|
|
ae_matrix_destroy(&p->bndl);
|
|
ae_matrix_destroy(&p->bndu);
|
|
ae_matrix_destroy(&p->c);
|
|
ae_vector_destroy(&p->ct);
|
|
ae_vector_destroy(&p->pw);
|
|
ae_matrix_destroy(&p->priorp);
|
|
_minbleicstate_destroy(&p->bs);
|
|
_minbleicreport_destroy(&p->br);
|
|
ae_vector_destroy(&p->tmpp);
|
|
ae_vector_destroy(&p->effectivew);
|
|
ae_vector_destroy(&p->effectivebndl);
|
|
ae_vector_destroy(&p->effectivebndu);
|
|
ae_matrix_destroy(&p->effectivec);
|
|
ae_vector_destroy(&p->effectivect);
|
|
ae_vector_destroy(&p->h);
|
|
ae_matrix_destroy(&p->p);
|
|
}
|
|
|
|
|
|
void _mcpdreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mcpdreport *p = (mcpdreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _mcpdreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mcpdreport *dst = (mcpdreport*)_dst;
|
|
mcpdreport *src = (mcpdreport*)_src;
|
|
dst->inneriterationscount = src->inneriterationscount;
|
|
dst->outeriterationscount = src->outeriterationscount;
|
|
dst->nfev = src->nfev;
|
|
dst->terminationtype = src->terminationtype;
|
|
}
|
|
|
|
|
|
void _mcpdreport_clear(void* _p)
|
|
{
|
|
mcpdreport *p = (mcpdreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _mcpdreport_destroy(void* _p)
|
|
{
|
|
mcpdreport *p = (mcpdreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_MLPE) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
/*************************************************************************
|
|
Like MLPCreate0, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreate0(ae_int_t nin,
|
|
ae_int_t nout,
|
|
ae_int_t ensemblesize,
|
|
mlpensemble* ensemble,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
multilayerperceptron net;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&net, 0, sizeof(net));
|
|
_mlpensemble_clear(ensemble);
|
|
_multilayerperceptron_init(&net, _state, ae_true);
|
|
|
|
mlpcreate0(nin, nout, &net, _state);
|
|
mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Like MLPCreate1, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreate1(ae_int_t nin,
|
|
ae_int_t nhid,
|
|
ae_int_t nout,
|
|
ae_int_t ensemblesize,
|
|
mlpensemble* ensemble,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
multilayerperceptron net;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&net, 0, sizeof(net));
|
|
_mlpensemble_clear(ensemble);
|
|
_multilayerperceptron_init(&net, _state, ae_true);
|
|
|
|
mlpcreate1(nin, nhid, nout, &net, _state);
|
|
mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Like MLPCreate2, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreate2(ae_int_t nin,
|
|
ae_int_t nhid1,
|
|
ae_int_t nhid2,
|
|
ae_int_t nout,
|
|
ae_int_t ensemblesize,
|
|
mlpensemble* ensemble,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
multilayerperceptron net;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&net, 0, sizeof(net));
|
|
_mlpensemble_clear(ensemble);
|
|
_multilayerperceptron_init(&net, _state, ae_true);
|
|
|
|
mlpcreate2(nin, nhid1, nhid2, nout, &net, _state);
|
|
mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateB0, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreateb0(ae_int_t nin,
|
|
ae_int_t nout,
|
|
double b,
|
|
double d,
|
|
ae_int_t ensemblesize,
|
|
mlpensemble* ensemble,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
multilayerperceptron net;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&net, 0, sizeof(net));
|
|
_mlpensemble_clear(ensemble);
|
|
_multilayerperceptron_init(&net, _state, ae_true);
|
|
|
|
mlpcreateb0(nin, nout, b, d, &net, _state);
|
|
mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateB1, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreateb1(ae_int_t nin,
|
|
ae_int_t nhid,
|
|
ae_int_t nout,
|
|
double b,
|
|
double d,
|
|
ae_int_t ensemblesize,
|
|
mlpensemble* ensemble,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
multilayerperceptron net;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&net, 0, sizeof(net));
|
|
_mlpensemble_clear(ensemble);
|
|
_multilayerperceptron_init(&net, _state, ae_true);
|
|
|
|
mlpcreateb1(nin, nhid, nout, b, d, &net, _state);
|
|
mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateB2, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreateb2(ae_int_t nin,
|
|
ae_int_t nhid1,
|
|
ae_int_t nhid2,
|
|
ae_int_t nout,
|
|
double b,
|
|
double d,
|
|
ae_int_t ensemblesize,
|
|
mlpensemble* ensemble,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
multilayerperceptron net;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&net, 0, sizeof(net));
|
|
_mlpensemble_clear(ensemble);
|
|
_multilayerperceptron_init(&net, _state, ae_true);
|
|
|
|
mlpcreateb2(nin, nhid1, nhid2, nout, b, d, &net, _state);
|
|
mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateR0, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreater0(ae_int_t nin,
|
|
ae_int_t nout,
|
|
double a,
|
|
double b,
|
|
ae_int_t ensemblesize,
|
|
mlpensemble* ensemble,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
multilayerperceptron net;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&net, 0, sizeof(net));
|
|
_mlpensemble_clear(ensemble);
|
|
_multilayerperceptron_init(&net, _state, ae_true);
|
|
|
|
mlpcreater0(nin, nout, a, b, &net, _state);
|
|
mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateR1, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreater1(ae_int_t nin,
|
|
ae_int_t nhid,
|
|
ae_int_t nout,
|
|
double a,
|
|
double b,
|
|
ae_int_t ensemblesize,
|
|
mlpensemble* ensemble,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
multilayerperceptron net;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&net, 0, sizeof(net));
|
|
_mlpensemble_clear(ensemble);
|
|
_multilayerperceptron_init(&net, _state, ae_true);
|
|
|
|
mlpcreater1(nin, nhid, nout, a, b, &net, _state);
|
|
mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateR2, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreater2(ae_int_t nin,
|
|
ae_int_t nhid1,
|
|
ae_int_t nhid2,
|
|
ae_int_t nout,
|
|
double a,
|
|
double b,
|
|
ae_int_t ensemblesize,
|
|
mlpensemble* ensemble,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
multilayerperceptron net;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&net, 0, sizeof(net));
|
|
_mlpensemble_clear(ensemble);
|
|
_multilayerperceptron_init(&net, _state, ae_true);
|
|
|
|
mlpcreater2(nin, nhid1, nhid2, nout, a, b, &net, _state);
|
|
mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateC0, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreatec0(ae_int_t nin,
|
|
ae_int_t nout,
|
|
ae_int_t ensemblesize,
|
|
mlpensemble* ensemble,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
multilayerperceptron net;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&net, 0, sizeof(net));
|
|
_mlpensemble_clear(ensemble);
|
|
_multilayerperceptron_init(&net, _state, ae_true);
|
|
|
|
mlpcreatec0(nin, nout, &net, _state);
|
|
mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateC1, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreatec1(ae_int_t nin,
|
|
ae_int_t nhid,
|
|
ae_int_t nout,
|
|
ae_int_t ensemblesize,
|
|
mlpensemble* ensemble,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
multilayerperceptron net;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&net, 0, sizeof(net));
|
|
_mlpensemble_clear(ensemble);
|
|
_multilayerperceptron_init(&net, _state, ae_true);
|
|
|
|
mlpcreatec1(nin, nhid, nout, &net, _state);
|
|
mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Like MLPCreateC2, but for ensembles.
|
|
|
|
-- ALGLIB --
|
|
Copyright 18.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreatec2(ae_int_t nin,
|
|
ae_int_t nhid1,
|
|
ae_int_t nhid2,
|
|
ae_int_t nout,
|
|
ae_int_t ensemblesize,
|
|
mlpensemble* ensemble,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
multilayerperceptron net;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&net, 0, sizeof(net));
|
|
_mlpensemble_clear(ensemble);
|
|
_multilayerperceptron_init(&net, _state, ae_true);
|
|
|
|
mlpcreatec2(nin, nhid1, nhid2, nout, &net, _state);
|
|
mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Creates ensemble from network. Only network geometry is copied.
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecreatefromnetwork(multilayerperceptron* network,
|
|
ae_int_t ensemblesize,
|
|
mlpensemble* ensemble,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t ccount;
|
|
ae_int_t wcount;
|
|
|
|
_mlpensemble_clear(ensemble);
|
|
|
|
ae_assert(ensemblesize>0, "MLPECreate: incorrect ensemble size!", _state);
|
|
|
|
/*
|
|
* Copy network
|
|
*/
|
|
mlpcopy(network, &ensemble->network, _state);
|
|
|
|
/*
|
|
* network properties
|
|
*/
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
ccount = mlpgetinputscount(&ensemble->network, _state);
|
|
}
|
|
else
|
|
{
|
|
ccount = mlpgetinputscount(&ensemble->network, _state)+mlpgetoutputscount(&ensemble->network, _state);
|
|
}
|
|
wcount = mlpgetweightscount(&ensemble->network, _state);
|
|
ensemble->ensemblesize = ensemblesize;
|
|
|
|
/*
|
|
* weights, means, sigmas
|
|
*/
|
|
ae_vector_set_length(&ensemble->weights, ensemblesize*wcount, _state);
|
|
ae_vector_set_length(&ensemble->columnmeans, ensemblesize*ccount, _state);
|
|
ae_vector_set_length(&ensemble->columnsigmas, ensemblesize*ccount, _state);
|
|
for(i=0; i<=ensemblesize*wcount-1; i++)
|
|
{
|
|
ensemble->weights.ptr.p_double[i] = ae_randomreal(_state)-0.5;
|
|
}
|
|
for(i=0; i<=ensemblesize-1; i++)
|
|
{
|
|
ae_v_move(&ensemble->columnmeans.ptr.p_double[i*ccount], 1, &network->columnmeans.ptr.p_double[0], 1, ae_v_len(i*ccount,(i+1)*ccount-1));
|
|
ae_v_move(&ensemble->columnsigmas.ptr.p_double[i*ccount], 1, &network->columnsigmas.ptr.p_double[0], 1, ae_v_len(i*ccount,(i+1)*ccount-1));
|
|
}
|
|
|
|
/*
|
|
* temporaries, internal buffers
|
|
*/
|
|
ae_vector_set_length(&ensemble->y, mlpgetoutputscount(&ensemble->network, _state), _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Copying of MLPEnsemble strucure
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble1 - original
|
|
|
|
OUTPUT PARAMETERS:
|
|
Ensemble2 - copy
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpecopy(mlpensemble* ensemble1,
|
|
mlpensemble* ensemble2,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t ccount;
|
|
ae_int_t wcount;
|
|
|
|
_mlpensemble_clear(ensemble2);
|
|
|
|
|
|
/*
|
|
* Unload info
|
|
*/
|
|
if( mlpissoftmax(&ensemble1->network, _state) )
|
|
{
|
|
ccount = mlpgetinputscount(&ensemble1->network, _state);
|
|
}
|
|
else
|
|
{
|
|
ccount = mlpgetinputscount(&ensemble1->network, _state)+mlpgetoutputscount(&ensemble1->network, _state);
|
|
}
|
|
wcount = mlpgetweightscount(&ensemble1->network, _state);
|
|
|
|
/*
|
|
* Allocate space
|
|
*/
|
|
ae_vector_set_length(&ensemble2->weights, ensemble1->ensemblesize*wcount, _state);
|
|
ae_vector_set_length(&ensemble2->columnmeans, ensemble1->ensemblesize*ccount, _state);
|
|
ae_vector_set_length(&ensemble2->columnsigmas, ensemble1->ensemblesize*ccount, _state);
|
|
ae_vector_set_length(&ensemble2->y, mlpgetoutputscount(&ensemble1->network, _state), _state);
|
|
|
|
/*
|
|
* Copy
|
|
*/
|
|
ensemble2->ensemblesize = ensemble1->ensemblesize;
|
|
ae_v_move(&ensemble2->weights.ptr.p_double[0], 1, &ensemble1->weights.ptr.p_double[0], 1, ae_v_len(0,ensemble1->ensemblesize*wcount-1));
|
|
ae_v_move(&ensemble2->columnmeans.ptr.p_double[0], 1, &ensemble1->columnmeans.ptr.p_double[0], 1, ae_v_len(0,ensemble1->ensemblesize*ccount-1));
|
|
ae_v_move(&ensemble2->columnsigmas.ptr.p_double[0], 1, &ensemble1->columnsigmas.ptr.p_double[0], 1, ae_v_len(0,ensemble1->ensemblesize*ccount-1));
|
|
mlpcopy(&ensemble1->network, &ensemble2->network, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Randomization of MLP ensemble
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlperandomize(mlpensemble* ensemble, ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t wcount;
|
|
|
|
|
|
wcount = mlpgetweightscount(&ensemble->network, _state);
|
|
for(i=0; i<=ensemble->ensemblesize*wcount-1; i++)
|
|
{
|
|
ensemble->weights.ptr.p_double[i] = ae_randomreal(_state)-0.5;
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Return ensemble properties (number of inputs and outputs).
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpeproperties(mlpensemble* ensemble,
|
|
ae_int_t* nin,
|
|
ae_int_t* nout,
|
|
ae_state *_state)
|
|
{
|
|
|
|
*nin = 0;
|
|
*nout = 0;
|
|
|
|
*nin = mlpgetinputscount(&ensemble->network, _state);
|
|
*nout = mlpgetoutputscount(&ensemble->network, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Return normalization type (whether ensemble is SOFTMAX-normalized or not).
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_bool mlpeissoftmax(mlpensemble* ensemble, ae_state *_state)
|
|
{
|
|
ae_bool result;
|
|
|
|
|
|
result = mlpissoftmax(&ensemble->network, _state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Procesing
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble- neural networks ensemble
|
|
X - input vector, array[0..NIn-1].
|
|
Y - (possibly) preallocated buffer; if size of Y is less than
|
|
NOut, it will be reallocated. If it is large enough, it
|
|
is NOT reallocated, so we can save some time on reallocation.
|
|
|
|
|
|
OUTPUT PARAMETERS:
|
|
Y - result. Regression estimate when solving regression task,
|
|
vector of posterior probabilities for classification task.
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpeprocess(mlpensemble* ensemble,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t es;
|
|
ae_int_t wc;
|
|
ae_int_t cc;
|
|
double v;
|
|
ae_int_t nout;
|
|
|
|
|
|
if( y->cnt<mlpgetoutputscount(&ensemble->network, _state) )
|
|
{
|
|
ae_vector_set_length(y, mlpgetoutputscount(&ensemble->network, _state), _state);
|
|
}
|
|
es = ensemble->ensemblesize;
|
|
wc = mlpgetweightscount(&ensemble->network, _state);
|
|
if( mlpissoftmax(&ensemble->network, _state) )
|
|
{
|
|
cc = mlpgetinputscount(&ensemble->network, _state);
|
|
}
|
|
else
|
|
{
|
|
cc = mlpgetinputscount(&ensemble->network, _state)+mlpgetoutputscount(&ensemble->network, _state);
|
|
}
|
|
v = (double)1/(double)es;
|
|
nout = mlpgetoutputscount(&ensemble->network, _state);
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
y->ptr.p_double[i] = (double)(0);
|
|
}
|
|
for(i=0; i<=es-1; i++)
|
|
{
|
|
ae_v_move(&ensemble->network.weights.ptr.p_double[0], 1, &ensemble->weights.ptr.p_double[i*wc], 1, ae_v_len(0,wc-1));
|
|
ae_v_move(&ensemble->network.columnmeans.ptr.p_double[0], 1, &ensemble->columnmeans.ptr.p_double[i*cc], 1, ae_v_len(0,cc-1));
|
|
ae_v_move(&ensemble->network.columnsigmas.ptr.p_double[0], 1, &ensemble->columnsigmas.ptr.p_double[i*cc], 1, ae_v_len(0,cc-1));
|
|
mlpprocess(&ensemble->network, x, &ensemble->y, _state);
|
|
ae_v_addd(&y->ptr.p_double[0], 1, &ensemble->y.ptr.p_double[0], 1, ae_v_len(0,nout-1), v);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
'interactive' variant of MLPEProcess for languages like Python which
|
|
support constructs like "Y = MLPEProcess(LM,X)" and interactive mode of the
|
|
interpreter
|
|
|
|
This function allocates new array on each call, so it is significantly
|
|
slower than its 'non-interactive' counterpart, but it is more convenient
|
|
when you call it from command line.
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpeprocessi(mlpensemble* ensemble,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state)
|
|
{
|
|
|
|
ae_vector_clear(y);
|
|
|
|
mlpeprocess(ensemble, x, y, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Calculation of all types of errors
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpeallerrorsx(mlpensemble* ensemble,
|
|
/* Real */ ae_matrix* densexy,
|
|
sparsematrix* sparsexy,
|
|
ae_int_t datasetsize,
|
|
ae_int_t datasettype,
|
|
/* Integer */ ae_vector* idx,
|
|
ae_int_t subset0,
|
|
ae_int_t subset1,
|
|
ae_int_t subsettype,
|
|
ae_shared_pool* buf,
|
|
modelerrors* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_bool iscls;
|
|
ae_int_t srcidx;
|
|
mlpbuffers *pbuf;
|
|
ae_smart_ptr _pbuf;
|
|
modelerrors rep0;
|
|
modelerrors rep1;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_pbuf, 0, sizeof(_pbuf));
|
|
memset(&rep0, 0, sizeof(rep0));
|
|
memset(&rep1, 0, sizeof(rep1));
|
|
ae_smart_ptr_init(&_pbuf, (void**)&pbuf, _state, ae_true);
|
|
_modelerrors_init(&rep0, _state, ae_true);
|
|
_modelerrors_init(&rep1, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Get network information
|
|
*/
|
|
nin = mlpgetinputscount(&ensemble->network, _state);
|
|
nout = mlpgetoutputscount(&ensemble->network, _state);
|
|
iscls = mlpissoftmax(&ensemble->network, _state);
|
|
|
|
/*
|
|
* Retrieve buffer, prepare, process data, recycle buffer
|
|
*/
|
|
ae_shared_pool_retrieve(buf, &_pbuf, _state);
|
|
if( iscls )
|
|
{
|
|
dserrallocate(nout, &pbuf->tmp0, _state);
|
|
}
|
|
else
|
|
{
|
|
dserrallocate(-nout, &pbuf->tmp0, _state);
|
|
}
|
|
rvectorsetlengthatleast(&pbuf->x, nin, _state);
|
|
rvectorsetlengthatleast(&pbuf->y, nout, _state);
|
|
rvectorsetlengthatleast(&pbuf->desiredy, nout, _state);
|
|
for(i=subset0; i<=subset1-1; i++)
|
|
{
|
|
srcidx = -1;
|
|
if( subsettype==0 )
|
|
{
|
|
srcidx = i;
|
|
}
|
|
if( subsettype==1 )
|
|
{
|
|
srcidx = idx->ptr.p_int[i];
|
|
}
|
|
ae_assert(srcidx>=0, "MLPEAllErrorsX: internal error", _state);
|
|
if( datasettype==0 )
|
|
{
|
|
ae_v_move(&pbuf->x.ptr.p_double[0], 1, &densexy->ptr.pp_double[srcidx][0], 1, ae_v_len(0,nin-1));
|
|
}
|
|
if( datasettype==1 )
|
|
{
|
|
sparsegetrow(sparsexy, srcidx, &pbuf->x, _state);
|
|
}
|
|
mlpeprocess(ensemble, &pbuf->x, &pbuf->y, _state);
|
|
if( mlpissoftmax(&ensemble->network, _state) )
|
|
{
|
|
if( datasettype==0 )
|
|
{
|
|
pbuf->desiredy.ptr.p_double[0] = densexy->ptr.pp_double[srcidx][nin];
|
|
}
|
|
if( datasettype==1 )
|
|
{
|
|
pbuf->desiredy.ptr.p_double[0] = sparseget(sparsexy, srcidx, nin, _state);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( datasettype==0 )
|
|
{
|
|
ae_v_move(&pbuf->desiredy.ptr.p_double[0], 1, &densexy->ptr.pp_double[srcidx][nin], 1, ae_v_len(0,nout-1));
|
|
}
|
|
if( datasettype==1 )
|
|
{
|
|
for(j=0; j<=nout-1; j++)
|
|
{
|
|
pbuf->desiredy.ptr.p_double[j] = sparseget(sparsexy, srcidx, nin+j, _state);
|
|
}
|
|
}
|
|
}
|
|
dserraccumulate(&pbuf->tmp0, &pbuf->y, &pbuf->desiredy, _state);
|
|
}
|
|
dserrfinish(&pbuf->tmp0, _state);
|
|
rep->relclserror = pbuf->tmp0.ptr.p_double[0];
|
|
rep->avgce = pbuf->tmp0.ptr.p_double[1]/ae_log((double)(2), _state);
|
|
rep->rmserror = pbuf->tmp0.ptr.p_double[2];
|
|
rep->avgerror = pbuf->tmp0.ptr.p_double[3];
|
|
rep->avgrelerror = pbuf->tmp0.ptr.p_double[4];
|
|
ae_shared_pool_recycle(buf, &_pbuf, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Calculation of all types of errors on dataset given by sparse matrix
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.09.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpeallerrorssparse(mlpensemble* ensemble,
|
|
sparsematrix* xy,
|
|
ae_int_t npoints,
|
|
double* relcls,
|
|
double* avgce,
|
|
double* rms,
|
|
double* avg,
|
|
double* avgrel,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_vector buf;
|
|
ae_vector workx;
|
|
ae_vector y;
|
|
ae_vector dy;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&buf, 0, sizeof(buf));
|
|
memset(&workx, 0, sizeof(workx));
|
|
memset(&y, 0, sizeof(y));
|
|
memset(&dy, 0, sizeof(dy));
|
|
*relcls = 0;
|
|
*avgce = 0;
|
|
*rms = 0;
|
|
*avg = 0;
|
|
*avgrel = 0;
|
|
ae_vector_init(&buf, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&workx, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&dy, 0, DT_REAL, _state, ae_true);
|
|
|
|
nin = mlpgetinputscount(&ensemble->network, _state);
|
|
nout = mlpgetoutputscount(&ensemble->network, _state);
|
|
if( mlpissoftmax(&ensemble->network, _state) )
|
|
{
|
|
ae_vector_set_length(&dy, 1, _state);
|
|
dserrallocate(nout, &buf, _state);
|
|
}
|
|
else
|
|
{
|
|
ae_vector_set_length(&dy, nout, _state);
|
|
dserrallocate(-nout, &buf, _state);
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
sparsegetrow(xy, i, &workx, _state);
|
|
mlpeprocess(ensemble, &workx, &y, _state);
|
|
if( mlpissoftmax(&ensemble->network, _state) )
|
|
{
|
|
dy.ptr.p_double[0] = workx.ptr.p_double[nin];
|
|
}
|
|
else
|
|
{
|
|
ae_v_move(&dy.ptr.p_double[0], 1, &workx.ptr.p_double[nin], 1, ae_v_len(0,nout-1));
|
|
}
|
|
dserraccumulate(&buf, &y, &dy, _state);
|
|
}
|
|
dserrfinish(&buf, _state);
|
|
*relcls = buf.ptr.p_double[0];
|
|
*avgce = buf.ptr.p_double[1];
|
|
*rms = buf.ptr.p_double[2];
|
|
*avg = buf.ptr.p_double[3];
|
|
*avgrel = buf.ptr.p_double[4];
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Relative classification error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble- ensemble
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
percent of incorrectly classified cases.
|
|
Works both for classifier betwork and for regression networks which
|
|
are used as classifiers.
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlperelclserror(mlpensemble* ensemble,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
modelerrors rep;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&rep, 0, sizeof(rep));
|
|
_modelerrors_init(&rep, _state, ae_true);
|
|
|
|
mlpeallerrorsx(ensemble, xy, &ensemble->network.dummysxy, npoints, 0, &ensemble->network.dummyidx, 0, npoints, 0, &ensemble->network.buf, &rep, _state);
|
|
result = rep.relclserror;
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average cross-entropy (in bits per element) on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble- ensemble
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
CrossEntropy/(NPoints*LN(2)).
|
|
Zero if ensemble solves regression task.
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpeavgce(mlpensemble* ensemble,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
modelerrors rep;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&rep, 0, sizeof(rep));
|
|
_modelerrors_init(&rep, _state, ae_true);
|
|
|
|
mlpeallerrorsx(ensemble, xy, &ensemble->network.dummysxy, npoints, 0, &ensemble->network.dummyidx, 0, npoints, 0, &ensemble->network.buf, &rep, _state);
|
|
result = rep.avgce;
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
RMS error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble- ensemble
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
root mean square error.
|
|
Its meaning for regression task is obvious. As for classification task
|
|
RMS error means error when estimating posterior probabilities.
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpermserror(mlpensemble* ensemble,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
modelerrors rep;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&rep, 0, sizeof(rep));
|
|
_modelerrors_init(&rep, _state, ae_true);
|
|
|
|
mlpeallerrorsx(ensemble, xy, &ensemble->network.dummysxy, npoints, 0, &ensemble->network.dummyidx, 0, npoints, 0, &ensemble->network.buf, &rep, _state);
|
|
result = rep.rmserror;
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble- ensemble
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
Its meaning for regression task is obvious. As for classification task
|
|
it means average error when estimating posterior probabilities.
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpeavgerror(mlpensemble* ensemble,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
modelerrors rep;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&rep, 0, sizeof(rep));
|
|
_modelerrors_init(&rep, _state, ae_true);
|
|
|
|
mlpeallerrorsx(ensemble, xy, &ensemble->network.dummysxy, npoints, 0, &ensemble->network.dummyidx, 0, npoints, 0, &ensemble->network.buf, &rep, _state);
|
|
result = rep.avgerror;
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average relative error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble- ensemble
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
Its meaning for regression task is obvious. As for classification task
|
|
it means average relative error when estimating posterior probabilities.
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double mlpeavgrelerror(mlpensemble* ensemble,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
modelerrors rep;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&rep, 0, sizeof(rep));
|
|
_modelerrors_init(&rep, _state, ae_true);
|
|
|
|
mlpeallerrorsx(ensemble, xy, &ensemble->network.dummysxy, npoints, 0, &ensemble->network.dummyidx, 0, npoints, 0, &ensemble->network.buf, &rep, _state);
|
|
result = rep.avgrelerror;
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serializer: allocation
|
|
|
|
-- ALGLIB --
|
|
Copyright 19.10.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpealloc(ae_serializer* s, mlpensemble* ensemble, ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
allocrealarray(s, &ensemble->weights, -1, _state);
|
|
allocrealarray(s, &ensemble->columnmeans, -1, _state);
|
|
allocrealarray(s, &ensemble->columnsigmas, -1, _state);
|
|
mlpalloc(s, &ensemble->network, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serializer: serialization
|
|
|
|
-- ALGLIB --
|
|
Copyright 14.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpeserialize(ae_serializer* s,
|
|
mlpensemble* ensemble,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_serializer_serialize_int(s, getmlpeserializationcode(_state), _state);
|
|
ae_serializer_serialize_int(s, mlpe_mlpefirstversion, _state);
|
|
ae_serializer_serialize_int(s, ensemble->ensemblesize, _state);
|
|
serializerealarray(s, &ensemble->weights, -1, _state);
|
|
serializerealarray(s, &ensemble->columnmeans, -1, _state);
|
|
serializerealarray(s, &ensemble->columnsigmas, -1, _state);
|
|
mlpserialize(s, &ensemble->network, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serializer: unserialization
|
|
|
|
-- ALGLIB --
|
|
Copyright 14.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpeunserialize(ae_serializer* s,
|
|
mlpensemble* ensemble,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i0;
|
|
ae_int_t i1;
|
|
|
|
_mlpensemble_clear(ensemble);
|
|
|
|
|
|
/*
|
|
* check correctness of header
|
|
*/
|
|
ae_serializer_unserialize_int(s, &i0, _state);
|
|
ae_assert(i0==getmlpeserializationcode(_state), "MLPEUnserialize: stream header corrupted", _state);
|
|
ae_serializer_unserialize_int(s, &i1, _state);
|
|
ae_assert(i1==mlpe_mlpefirstversion, "MLPEUnserialize: stream header corrupted", _state);
|
|
|
|
/*
|
|
* Create network
|
|
*/
|
|
ae_serializer_unserialize_int(s, &ensemble->ensemblesize, _state);
|
|
unserializerealarray(s, &ensemble->weights, _state);
|
|
unserializerealarray(s, &ensemble->columnmeans, _state);
|
|
unserializerealarray(s, &ensemble->columnsigmas, _state);
|
|
mlpunserialize(s, &ensemble->network, _state);
|
|
|
|
/*
|
|
* Allocate termoraries
|
|
*/
|
|
ae_vector_set_length(&ensemble->y, mlpgetoutputscount(&ensemble->network, _state), _state);
|
|
}
|
|
|
|
|
|
void _mlpensemble_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mlpensemble *p = (mlpensemble*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->weights, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->columnmeans, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->columnsigmas, 0, DT_REAL, _state, make_automatic);
|
|
_multilayerperceptron_init(&p->network, _state, make_automatic);
|
|
ae_vector_init(&p->y, 0, DT_REAL, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _mlpensemble_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mlpensemble *dst = (mlpensemble*)_dst;
|
|
mlpensemble *src = (mlpensemble*)_src;
|
|
dst->ensemblesize = src->ensemblesize;
|
|
ae_vector_init_copy(&dst->weights, &src->weights, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->columnmeans, &src->columnmeans, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->columnsigmas, &src->columnsigmas, _state, make_automatic);
|
|
_multilayerperceptron_init_copy(&dst->network, &src->network, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->y, &src->y, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _mlpensemble_clear(void* _p)
|
|
{
|
|
mlpensemble *p = (mlpensemble*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->weights);
|
|
ae_vector_clear(&p->columnmeans);
|
|
ae_vector_clear(&p->columnsigmas);
|
|
_multilayerperceptron_clear(&p->network);
|
|
ae_vector_clear(&p->y);
|
|
}
|
|
|
|
|
|
void _mlpensemble_destroy(void* _p)
|
|
{
|
|
mlpensemble *p = (mlpensemble*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->weights);
|
|
ae_vector_destroy(&p->columnmeans);
|
|
ae_vector_destroy(&p->columnsigmas);
|
|
_multilayerperceptron_destroy(&p->network);
|
|
ae_vector_destroy(&p->y);
|
|
}
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_MLPTRAIN) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
/*************************************************************************
|
|
Neural network training using modified Levenberg-Marquardt with exact
|
|
Hessian calculation and regularization. Subroutine trains neural network
|
|
with restarts from random positions. Algorithm is well suited for small
|
|
and medium scale problems (hundreds of weights).
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network with initialized geometry
|
|
XY - training set
|
|
NPoints - training set size
|
|
Decay - weight decay constant, >=0.001
|
|
Decay term 'Decay*||Weights||^2' is added to error
|
|
function.
|
|
If you don't know what Decay to choose, use 0.001.
|
|
Restarts - number of restarts from random position, >0.
|
|
If you don't know what Restarts to choose, use 2.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network - trained neural network.
|
|
Info - return code:
|
|
* -9, if internal matrix inverse subroutine failed
|
|
* -2, if there is a point with class number
|
|
outside of [0..NOut-1].
|
|
* -1, if wrong parameters specified
|
|
(NPoints<0, Restarts<1).
|
|
* 2, if task has been solved.
|
|
Rep - training report
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.03.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlptrainlm(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
double decay,
|
|
ae_int_t restarts,
|
|
ae_int_t* info,
|
|
mlpreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
double lmsteptol;
|
|
ae_int_t i;
|
|
ae_int_t k;
|
|
double v;
|
|
double e;
|
|
double enew;
|
|
double xnorm2;
|
|
double stepnorm;
|
|
ae_vector g;
|
|
ae_vector d;
|
|
ae_matrix h;
|
|
ae_matrix hmod;
|
|
ae_matrix z;
|
|
ae_bool spd;
|
|
double nu;
|
|
double lambdav;
|
|
double lambdaup;
|
|
double lambdadown;
|
|
minlbfgsreport internalrep;
|
|
minlbfgsstate state;
|
|
ae_vector x;
|
|
ae_vector y;
|
|
ae_vector wbase;
|
|
ae_vector wdir;
|
|
ae_vector wt;
|
|
ae_vector wx;
|
|
ae_int_t pass;
|
|
ae_vector wbest;
|
|
double ebest;
|
|
ae_int_t invinfo;
|
|
matinvreport invrep;
|
|
ae_int_t solverinfo;
|
|
densesolverreport solverrep;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&g, 0, sizeof(g));
|
|
memset(&d, 0, sizeof(d));
|
|
memset(&h, 0, sizeof(h));
|
|
memset(&hmod, 0, sizeof(hmod));
|
|
memset(&z, 0, sizeof(z));
|
|
memset(&internalrep, 0, sizeof(internalrep));
|
|
memset(&state, 0, sizeof(state));
|
|
memset(&x, 0, sizeof(x));
|
|
memset(&y, 0, sizeof(y));
|
|
memset(&wbase, 0, sizeof(wbase));
|
|
memset(&wdir, 0, sizeof(wdir));
|
|
memset(&wt, 0, sizeof(wt));
|
|
memset(&wx, 0, sizeof(wx));
|
|
memset(&wbest, 0, sizeof(wbest));
|
|
memset(&invrep, 0, sizeof(invrep));
|
|
memset(&solverrep, 0, sizeof(solverrep));
|
|
*info = 0;
|
|
_mlpreport_clear(rep);
|
|
ae_vector_init(&g, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&d, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&h, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&hmod, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&z, 0, 0, DT_REAL, _state, ae_true);
|
|
_minlbfgsreport_init(&internalrep, _state, ae_true);
|
|
_minlbfgsstate_init(&state, _state, ae_true);
|
|
ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&wbase, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&wdir, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&wt, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&wx, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&wbest, 0, DT_REAL, _state, ae_true);
|
|
_matinvreport_init(&invrep, _state, ae_true);
|
|
_densesolverreport_init(&solverrep, _state, ae_true);
|
|
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
lambdaup = (double)(10);
|
|
lambdadown = 0.3;
|
|
lmsteptol = 0.001;
|
|
|
|
/*
|
|
* Test for inputs
|
|
*/
|
|
if( npoints<=0||restarts<1 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( ae_round(xy->ptr.pp_double[i][nin], _state)<0||ae_round(xy->ptr.pp_double[i][nin], _state)>=nout )
|
|
{
|
|
*info = -2;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
decay = ae_maxreal(decay, mlptrain_mindecay, _state);
|
|
*info = 2;
|
|
|
|
/*
|
|
* Initialize data
|
|
*/
|
|
rep->ngrad = 0;
|
|
rep->nhess = 0;
|
|
rep->ncholesky = 0;
|
|
|
|
/*
|
|
* General case.
|
|
* Prepare task and network. Allocate space.
|
|
*/
|
|
mlpinitpreprocessor(network, xy, npoints, _state);
|
|
ae_vector_set_length(&g, wcount-1+1, _state);
|
|
ae_matrix_set_length(&h, wcount-1+1, wcount-1+1, _state);
|
|
ae_matrix_set_length(&hmod, wcount-1+1, wcount-1+1, _state);
|
|
ae_vector_set_length(&wbase, wcount-1+1, _state);
|
|
ae_vector_set_length(&wdir, wcount-1+1, _state);
|
|
ae_vector_set_length(&wbest, wcount-1+1, _state);
|
|
ae_vector_set_length(&wt, wcount-1+1, _state);
|
|
ae_vector_set_length(&wx, wcount-1+1, _state);
|
|
ebest = ae_maxrealnumber;
|
|
|
|
/*
|
|
* Multiple passes
|
|
*/
|
|
for(pass=1; pass<=restarts; pass++)
|
|
{
|
|
|
|
/*
|
|
* Initialize weights
|
|
*/
|
|
mlprandomize(network, _state);
|
|
|
|
/*
|
|
* First stage of the hybrid algorithm: LBFGS
|
|
*/
|
|
ae_v_move(&wbase.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
minlbfgscreate(wcount, ae_minint(wcount, 5, _state), &wbase, &state, _state);
|
|
minlbfgssetcond(&state, (double)(0), (double)(0), (double)(0), ae_maxint(25, wcount, _state), _state);
|
|
while(minlbfgsiteration(&state, _state))
|
|
{
|
|
|
|
/*
|
|
* gradient
|
|
*/
|
|
ae_v_move(&network->weights.ptr.p_double[0], 1, &state.x.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
mlpgradbatch(network, xy, npoints, &state.f, &state.g, _state);
|
|
|
|
/*
|
|
* weight decay
|
|
*/
|
|
v = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
state.f = state.f+0.5*decay*v;
|
|
ae_v_addd(&state.g.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
|
|
|
|
/*
|
|
* next iteration
|
|
*/
|
|
rep->ngrad = rep->ngrad+1;
|
|
}
|
|
minlbfgsresults(&state, &wbase, &internalrep, _state);
|
|
ae_v_move(&network->weights.ptr.p_double[0], 1, &wbase.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
|
|
/*
|
|
* Second stage of the hybrid algorithm: LM
|
|
*
|
|
* Initialize H with identity matrix,
|
|
* G with gradient,
|
|
* E with regularized error.
|
|
*/
|
|
mlphessianbatch(network, xy, npoints, &e, &g, &h, _state);
|
|
v = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
e = e+0.5*decay*v;
|
|
ae_v_addd(&g.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
|
|
for(k=0; k<=wcount-1; k++)
|
|
{
|
|
h.ptr.pp_double[k][k] = h.ptr.pp_double[k][k]+decay;
|
|
}
|
|
rep->nhess = rep->nhess+1;
|
|
lambdav = 0.001;
|
|
nu = (double)(2);
|
|
for(;;)
|
|
{
|
|
|
|
/*
|
|
* 1. HMod = H+lambda*I
|
|
* 2. Try to solve (H+Lambda*I)*dx = -g.
|
|
* Increase lambda if left part is not positive definite.
|
|
*/
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
ae_v_move(&hmod.ptr.pp_double[i][0], 1, &h.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1));
|
|
hmod.ptr.pp_double[i][i] = hmod.ptr.pp_double[i][i]+lambdav;
|
|
}
|
|
spd = spdmatrixcholesky(&hmod, wcount, ae_true, _state);
|
|
rep->ncholesky = rep->ncholesky+1;
|
|
if( !spd )
|
|
{
|
|
lambdav = lambdav*lambdaup*nu;
|
|
nu = nu*2;
|
|
continue;
|
|
}
|
|
spdmatrixcholeskysolve(&hmod, wcount, ae_true, &g, &solverinfo, &solverrep, &wdir, _state);
|
|
if( solverinfo<0 )
|
|
{
|
|
lambdav = lambdav*lambdaup*nu;
|
|
nu = nu*2;
|
|
continue;
|
|
}
|
|
ae_v_muld(&wdir.ptr.p_double[0], 1, ae_v_len(0,wcount-1), -1);
|
|
|
|
/*
|
|
* Lambda found.
|
|
* 1. Save old w in WBase
|
|
* 1. Test some stopping criterions
|
|
* 2. If error(w+wdir)>error(w), increase lambda
|
|
*/
|
|
ae_v_add(&network->weights.ptr.p_double[0], 1, &wdir.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
xnorm2 = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
stepnorm = ae_v_dotproduct(&wdir.ptr.p_double[0], 1, &wdir.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
stepnorm = ae_sqrt(stepnorm, _state);
|
|
enew = mlperror(network, xy, npoints, _state)+0.5*decay*xnorm2;
|
|
if( ae_fp_less(stepnorm,lmsteptol*(1+ae_sqrt(xnorm2, _state))) )
|
|
{
|
|
break;
|
|
}
|
|
if( ae_fp_greater(enew,e) )
|
|
{
|
|
lambdav = lambdav*lambdaup*nu;
|
|
nu = nu*2;
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Optimize using inv(cholesky(H)) as preconditioner
|
|
*/
|
|
rmatrixtrinverse(&hmod, wcount, ae_true, ae_false, &invinfo, &invrep, _state);
|
|
if( invinfo<=0 )
|
|
{
|
|
|
|
/*
|
|
* if matrix can't be inverted then exit with errors
|
|
* TODO: make WCount steps in direction suggested by HMod
|
|
*/
|
|
*info = -9;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
ae_v_move(&wbase.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
wt.ptr.p_double[i] = (double)(0);
|
|
}
|
|
minlbfgscreatex(wcount, wcount, &wt, 1, 0.0, &state, _state);
|
|
minlbfgssetcond(&state, (double)(0), (double)(0), (double)(0), 5, _state);
|
|
while(minlbfgsiteration(&state, _state))
|
|
{
|
|
|
|
/*
|
|
* gradient
|
|
*/
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
v = ae_v_dotproduct(&state.x.ptr.p_double[i], 1, &hmod.ptr.pp_double[i][i], 1, ae_v_len(i,wcount-1));
|
|
network->weights.ptr.p_double[i] = wbase.ptr.p_double[i]+v;
|
|
}
|
|
mlpgradbatch(network, xy, npoints, &state.f, &g, _state);
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
state.g.ptr.p_double[i] = (double)(0);
|
|
}
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
v = g.ptr.p_double[i];
|
|
ae_v_addd(&state.g.ptr.p_double[i], 1, &hmod.ptr.pp_double[i][i], 1, ae_v_len(i,wcount-1), v);
|
|
}
|
|
|
|
/*
|
|
* weight decay
|
|
* grad(x'*x) = A'*(x0+A*t)
|
|
*/
|
|
v = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
state.f = state.f+0.5*decay*v;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
v = decay*network->weights.ptr.p_double[i];
|
|
ae_v_addd(&state.g.ptr.p_double[i], 1, &hmod.ptr.pp_double[i][i], 1, ae_v_len(i,wcount-1), v);
|
|
}
|
|
|
|
/*
|
|
* next iteration
|
|
*/
|
|
rep->ngrad = rep->ngrad+1;
|
|
}
|
|
minlbfgsresults(&state, &wt, &internalrep, _state);
|
|
|
|
/*
|
|
* Accept new position.
|
|
* Calculate Hessian
|
|
*/
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
v = ae_v_dotproduct(&wt.ptr.p_double[i], 1, &hmod.ptr.pp_double[i][i], 1, ae_v_len(i,wcount-1));
|
|
network->weights.ptr.p_double[i] = wbase.ptr.p_double[i]+v;
|
|
}
|
|
mlphessianbatch(network, xy, npoints, &e, &g, &h, _state);
|
|
v = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
e = e+0.5*decay*v;
|
|
ae_v_addd(&g.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
|
|
for(k=0; k<=wcount-1; k++)
|
|
{
|
|
h.ptr.pp_double[k][k] = h.ptr.pp_double[k][k]+decay;
|
|
}
|
|
rep->nhess = rep->nhess+1;
|
|
|
|
/*
|
|
* Update lambda
|
|
*/
|
|
lambdav = lambdav*lambdadown;
|
|
nu = (double)(2);
|
|
}
|
|
|
|
/*
|
|
* update WBest
|
|
*/
|
|
v = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
e = 0.5*decay*v+mlperror(network, xy, npoints, _state);
|
|
if( ae_fp_less(e,ebest) )
|
|
{
|
|
ebest = e;
|
|
ae_v_move(&wbest.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* copy WBest to output
|
|
*/
|
|
ae_v_move(&network->weights.ptr.p_double[0], 1, &wbest.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Neural network training using L-BFGS algorithm with regularization.
|
|
Subroutine trains neural network with restarts from random positions.
|
|
Algorithm is well suited for problems of any dimensionality (memory
|
|
requirements and step complexity are linear by weights number).
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network with initialized geometry
|
|
XY - training set
|
|
NPoints - training set size
|
|
Decay - weight decay constant, >=0.001
|
|
Decay term 'Decay*||Weights||^2' is added to error
|
|
function.
|
|
If you don't know what Decay to choose, use 0.001.
|
|
Restarts - number of restarts from random position, >0.
|
|
If you don't know what Restarts to choose, use 2.
|
|
WStep - stopping criterion. Algorithm stops if step size is
|
|
less than WStep. Recommended value - 0.01. Zero step
|
|
size means stopping after MaxIts iterations.
|
|
MaxIts - stopping criterion. Algorithm stops after MaxIts
|
|
iterations (NOT gradient calculations). Zero MaxIts
|
|
means stopping when step is sufficiently small.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network - trained neural network.
|
|
Info - return code:
|
|
* -8, if both WStep=0 and MaxIts=0
|
|
* -2, if there is a point with class number
|
|
outside of [0..NOut-1].
|
|
* -1, if wrong parameters specified
|
|
(NPoints<0, Restarts<1).
|
|
* 2, if task has been solved.
|
|
Rep - training report
|
|
|
|
-- ALGLIB --
|
|
Copyright 09.12.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlptrainlbfgs(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
double decay,
|
|
ae_int_t restarts,
|
|
double wstep,
|
|
ae_int_t maxits,
|
|
ae_int_t* info,
|
|
mlpreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t pass;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_vector w;
|
|
ae_vector wbest;
|
|
double e;
|
|
double v;
|
|
double ebest;
|
|
minlbfgsreport internalrep;
|
|
minlbfgsstate state;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&w, 0, sizeof(w));
|
|
memset(&wbest, 0, sizeof(wbest));
|
|
memset(&internalrep, 0, sizeof(internalrep));
|
|
memset(&state, 0, sizeof(state));
|
|
*info = 0;
|
|
_mlpreport_clear(rep);
|
|
ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&wbest, 0, DT_REAL, _state, ae_true);
|
|
_minlbfgsreport_init(&internalrep, _state, ae_true);
|
|
_minlbfgsstate_init(&state, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Test inputs, parse flags, read network geometry
|
|
*/
|
|
if( ae_fp_eq(wstep,(double)(0))&&maxits==0 )
|
|
{
|
|
*info = -8;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
if( ((npoints<=0||restarts<1)||ae_fp_less(wstep,(double)(0)))||maxits<0 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( ae_round(xy->ptr.pp_double[i][nin], _state)<0||ae_round(xy->ptr.pp_double[i][nin], _state)>=nout )
|
|
{
|
|
*info = -2;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
decay = ae_maxreal(decay, mlptrain_mindecay, _state);
|
|
*info = 2;
|
|
|
|
/*
|
|
* Prepare
|
|
*/
|
|
mlpinitpreprocessor(network, xy, npoints, _state);
|
|
ae_vector_set_length(&w, wcount-1+1, _state);
|
|
ae_vector_set_length(&wbest, wcount-1+1, _state);
|
|
ebest = ae_maxrealnumber;
|
|
|
|
/*
|
|
* Multiple starts
|
|
*/
|
|
rep->ncholesky = 0;
|
|
rep->nhess = 0;
|
|
rep->ngrad = 0;
|
|
for(pass=1; pass<=restarts; pass++)
|
|
{
|
|
|
|
/*
|
|
* Process
|
|
*/
|
|
mlprandomize(network, _state);
|
|
ae_v_move(&w.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
minlbfgscreate(wcount, ae_minint(wcount, 10, _state), &w, &state, _state);
|
|
minlbfgssetcond(&state, 0.0, 0.0, wstep, maxits, _state);
|
|
while(minlbfgsiteration(&state, _state))
|
|
{
|
|
ae_v_move(&network->weights.ptr.p_double[0], 1, &state.x.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
mlpgradnbatch(network, xy, npoints, &state.f, &state.g, _state);
|
|
v = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
state.f = state.f+0.5*decay*v;
|
|
ae_v_addd(&state.g.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
|
|
rep->ngrad = rep->ngrad+1;
|
|
}
|
|
minlbfgsresults(&state, &w, &internalrep, _state);
|
|
ae_v_move(&network->weights.ptr.p_double[0], 1, &w.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
|
|
/*
|
|
* Compare with best
|
|
*/
|
|
v = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
e = mlperrorn(network, xy, npoints, _state)+0.5*decay*v;
|
|
if( ae_fp_less(e,ebest) )
|
|
{
|
|
ae_v_move(&wbest.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
ebest = e;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* The best network
|
|
*/
|
|
ae_v_move(&network->weights.ptr.p_double[0], 1, &wbest.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Neural network training using early stopping (base algorithm - L-BFGS with
|
|
regularization).
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network with initialized geometry
|
|
TrnXY - training set
|
|
TrnSize - training set size, TrnSize>0
|
|
ValXY - validation set
|
|
ValSize - validation set size, ValSize>0
|
|
Decay - weight decay constant, >=0.001
|
|
Decay term 'Decay*||Weights||^2' is added to error
|
|
function.
|
|
If you don't know what Decay to choose, use 0.001.
|
|
Restarts - number of restarts, either:
|
|
* strictly positive number - algorithm make specified
|
|
number of restarts from random position.
|
|
* -1, in which case algorithm makes exactly one run
|
|
from the initial state of the network (no randomization).
|
|
If you don't know what Restarts to choose, choose one
|
|
one the following:
|
|
* -1 (deterministic start)
|
|
* +1 (one random restart)
|
|
* +5 (moderate amount of random restarts)
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network - trained neural network.
|
|
Info - return code:
|
|
* -2, if there is a point with class number
|
|
outside of [0..NOut-1].
|
|
* -1, if wrong parameters specified
|
|
(NPoints<0, Restarts<1, ...).
|
|
* 2, task has been solved, stopping criterion met -
|
|
sufficiently small step size. Not expected (we
|
|
use EARLY stopping) but possible and not an
|
|
error.
|
|
* 6, task has been solved, stopping criterion met -
|
|
increasing of validation set error.
|
|
Rep - training report
|
|
|
|
NOTE:
|
|
|
|
Algorithm stops if validation set error increases for a long enough or
|
|
step size is small enought (there are task where validation set may
|
|
decrease for eternity). In any case solution returned corresponds to the
|
|
minimum of validation set error.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.03.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlptraines(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* trnxy,
|
|
ae_int_t trnsize,
|
|
/* Real */ ae_matrix* valxy,
|
|
ae_int_t valsize,
|
|
double decay,
|
|
ae_int_t restarts,
|
|
ae_int_t* info,
|
|
mlpreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t pass;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_vector w;
|
|
ae_vector wbest;
|
|
double e;
|
|
double v;
|
|
double ebest;
|
|
ae_vector wfinal;
|
|
double efinal;
|
|
ae_int_t itcnt;
|
|
ae_int_t itbest;
|
|
minlbfgsreport internalrep;
|
|
minlbfgsstate state;
|
|
double wstep;
|
|
ae_bool needrandomization;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&w, 0, sizeof(w));
|
|
memset(&wbest, 0, sizeof(wbest));
|
|
memset(&wfinal, 0, sizeof(wfinal));
|
|
memset(&internalrep, 0, sizeof(internalrep));
|
|
memset(&state, 0, sizeof(state));
|
|
*info = 0;
|
|
_mlpreport_clear(rep);
|
|
ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&wbest, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&wfinal, 0, DT_REAL, _state, ae_true);
|
|
_minlbfgsreport_init(&internalrep, _state, ae_true);
|
|
_minlbfgsstate_init(&state, _state, ae_true);
|
|
|
|
wstep = 0.001;
|
|
|
|
/*
|
|
* Test inputs, parse flags, read network geometry
|
|
*/
|
|
if( ((trnsize<=0||valsize<=0)||(restarts<1&&restarts!=-1))||ae_fp_less(decay,(double)(0)) )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
if( restarts==-1 )
|
|
{
|
|
needrandomization = ae_false;
|
|
restarts = 1;
|
|
}
|
|
else
|
|
{
|
|
needrandomization = ae_true;
|
|
}
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
if( mlpissoftmax(network, _state) )
|
|
{
|
|
for(i=0; i<=trnsize-1; i++)
|
|
{
|
|
if( ae_round(trnxy->ptr.pp_double[i][nin], _state)<0||ae_round(trnxy->ptr.pp_double[i][nin], _state)>=nout )
|
|
{
|
|
*info = -2;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
for(i=0; i<=valsize-1; i++)
|
|
{
|
|
if( ae_round(valxy->ptr.pp_double[i][nin], _state)<0||ae_round(valxy->ptr.pp_double[i][nin], _state)>=nout )
|
|
{
|
|
*info = -2;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
*info = 2;
|
|
|
|
/*
|
|
* Prepare
|
|
*/
|
|
mlpinitpreprocessor(network, trnxy, trnsize, _state);
|
|
ae_vector_set_length(&w, wcount-1+1, _state);
|
|
ae_vector_set_length(&wbest, wcount-1+1, _state);
|
|
ae_vector_set_length(&wfinal, wcount-1+1, _state);
|
|
efinal = ae_maxrealnumber;
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
wfinal.ptr.p_double[i] = (double)(0);
|
|
}
|
|
|
|
/*
|
|
* Multiple starts
|
|
*/
|
|
rep->ncholesky = 0;
|
|
rep->nhess = 0;
|
|
rep->ngrad = 0;
|
|
for(pass=1; pass<=restarts; pass++)
|
|
{
|
|
|
|
/*
|
|
* Process
|
|
*/
|
|
if( needrandomization )
|
|
{
|
|
mlprandomize(network, _state);
|
|
}
|
|
ebest = mlperror(network, valxy, valsize, _state);
|
|
ae_v_move(&wbest.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
itbest = 0;
|
|
itcnt = 0;
|
|
ae_v_move(&w.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
minlbfgscreate(wcount, ae_minint(wcount, 10, _state), &w, &state, _state);
|
|
minlbfgssetcond(&state, 0.0, 0.0, wstep, 0, _state);
|
|
minlbfgssetxrep(&state, ae_true, _state);
|
|
while(minlbfgsiteration(&state, _state))
|
|
{
|
|
|
|
/*
|
|
* Calculate gradient
|
|
*/
|
|
if( state.needfg )
|
|
{
|
|
ae_v_move(&network->weights.ptr.p_double[0], 1, &state.x.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
mlpgradnbatch(network, trnxy, trnsize, &state.f, &state.g, _state);
|
|
v = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
state.f = state.f+0.5*decay*v;
|
|
ae_v_addd(&state.g.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
|
|
rep->ngrad = rep->ngrad+1;
|
|
}
|
|
|
|
/*
|
|
* Validation set
|
|
*/
|
|
if( state.xupdated )
|
|
{
|
|
ae_v_move(&network->weights.ptr.p_double[0], 1, &state.x.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
e = mlperror(network, valxy, valsize, _state);
|
|
if( ae_fp_less(e,ebest) )
|
|
{
|
|
ebest = e;
|
|
ae_v_move(&wbest.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
itbest = itcnt;
|
|
}
|
|
if( itcnt>30&&ae_fp_greater((double)(itcnt),1.5*itbest) )
|
|
{
|
|
*info = 6;
|
|
break;
|
|
}
|
|
itcnt = itcnt+1;
|
|
}
|
|
}
|
|
minlbfgsresults(&state, &w, &internalrep, _state);
|
|
|
|
/*
|
|
* Compare with final answer
|
|
*/
|
|
if( ae_fp_less(ebest,efinal) )
|
|
{
|
|
ae_v_move(&wfinal.ptr.p_double[0], 1, &wbest.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
efinal = ebest;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* The best network
|
|
*/
|
|
ae_v_move(&network->weights.ptr.p_double[0], 1, &wfinal.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Cross-validation estimate of generalization error.
|
|
|
|
Base algorithm - L-BFGS.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network with initialized geometry. Network is
|
|
not changed during cross-validation - it is used only
|
|
as a representative of its architecture.
|
|
XY - training set.
|
|
SSize - training set size
|
|
Decay - weight decay, same as in MLPTrainLBFGS
|
|
Restarts - number of restarts, >0.
|
|
restarts are counted for each partition separately, so
|
|
total number of restarts will be Restarts*FoldsCount.
|
|
WStep - stopping criterion, same as in MLPTrainLBFGS
|
|
MaxIts - stopping criterion, same as in MLPTrainLBFGS
|
|
FoldsCount - number of folds in k-fold cross-validation,
|
|
2<=FoldsCount<=SSize.
|
|
recommended value: 10.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code, same as in MLPTrainLBFGS
|
|
Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
|
|
CVRep - generalization error estimates
|
|
|
|
-- ALGLIB --
|
|
Copyright 09.12.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpkfoldcvlbfgs(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
double decay,
|
|
ae_int_t restarts,
|
|
double wstep,
|
|
ae_int_t maxits,
|
|
ae_int_t foldscount,
|
|
ae_int_t* info,
|
|
mlpreport* rep,
|
|
mlpcvreport* cvrep,
|
|
ae_state *_state)
|
|
{
|
|
|
|
*info = 0;
|
|
_mlpreport_clear(rep);
|
|
_mlpcvreport_clear(cvrep);
|
|
|
|
mlptrain_mlpkfoldcvgeneral(network, xy, npoints, decay, restarts, foldscount, ae_false, wstep, maxits, info, rep, cvrep, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Cross-validation estimate of generalization error.
|
|
|
|
Base algorithm - Levenberg-Marquardt.
|
|
|
|
INPUT PARAMETERS:
|
|
Network - neural network with initialized geometry. Network is
|
|
not changed during cross-validation - it is used only
|
|
as a representative of its architecture.
|
|
XY - training set.
|
|
SSize - training set size
|
|
Decay - weight decay, same as in MLPTrainLBFGS
|
|
Restarts - number of restarts, >0.
|
|
restarts are counted for each partition separately, so
|
|
total number of restarts will be Restarts*FoldsCount.
|
|
FoldsCount - number of folds in k-fold cross-validation,
|
|
2<=FoldsCount<=SSize.
|
|
recommended value: 10.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code, same as in MLPTrainLBFGS
|
|
Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
|
|
CVRep - generalization error estimates
|
|
|
|
-- ALGLIB --
|
|
Copyright 09.12.2007 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpkfoldcvlm(multilayerperceptron* network,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
double decay,
|
|
ae_int_t restarts,
|
|
ae_int_t foldscount,
|
|
ae_int_t* info,
|
|
mlpreport* rep,
|
|
mlpcvreport* cvrep,
|
|
ae_state *_state)
|
|
{
|
|
|
|
*info = 0;
|
|
_mlpreport_clear(rep);
|
|
_mlpcvreport_clear(cvrep);
|
|
|
|
mlptrain_mlpkfoldcvgeneral(network, xy, npoints, decay, restarts, foldscount, ae_true, 0.0, 0, info, rep, cvrep, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function estimates generalization error using cross-validation on the
|
|
current dataset with current training settings.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
Network - neural network. It must have same number of inputs and
|
|
output/classes as was specified during creation of the
|
|
trainer object. Network is not changed during cross-
|
|
validation and is not trained - it is used only as
|
|
representative of its architecture. I.e., we estimate
|
|
generalization properties of ARCHITECTURE, not some
|
|
specific network.
|
|
NRestarts - number of restarts, >=0:
|
|
* NRestarts>0 means that for each cross-validation
|
|
round specified number of random restarts is
|
|
performed, with best network being chosen after
|
|
training.
|
|
* NRestarts=0 is same as NRestarts=1
|
|
FoldsCount - number of folds in k-fold cross-validation:
|
|
* 2<=FoldsCount<=size of dataset
|
|
* recommended value: 10.
|
|
* values larger than dataset size will be silently
|
|
truncated down to dataset size
|
|
|
|
OUTPUT PARAMETERS:
|
|
Rep - structure which contains cross-validation estimates:
|
|
* Rep.RelCLSError - fraction of misclassified cases.
|
|
* Rep.AvgCE - acerage cross-entropy
|
|
* Rep.RMSError - root-mean-square error
|
|
* Rep.AvgError - average error
|
|
* Rep.AvgRelError - average relative error
|
|
|
|
NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(),
|
|
or subset with only one point was given, zeros are returned as
|
|
estimates.
|
|
|
|
NOTE: this method performs FoldsCount cross-validation rounds, each one
|
|
with NRestarts random starts. Thus, FoldsCount*NRestarts networks
|
|
are trained in total.
|
|
|
|
NOTE: Rep.RelCLSError/Rep.AvgCE are zero on regression problems.
|
|
|
|
NOTE: on classification problems Rep.RMSError/Rep.AvgError/Rep.AvgRelError
|
|
contain errors in prediction of posterior probabilities.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpkfoldcv(mlptrainer* s,
|
|
multilayerperceptron* network,
|
|
ae_int_t nrestarts,
|
|
ae_int_t foldscount,
|
|
mlpreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_shared_pool pooldatacv;
|
|
mlpparallelizationcv datacv;
|
|
mlpparallelizationcv *sdatacv;
|
|
ae_smart_ptr _sdatacv;
|
|
ae_matrix cvy;
|
|
ae_vector folds;
|
|
ae_vector buf;
|
|
ae_vector dy;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t rowsize;
|
|
ae_int_t ntype;
|
|
ae_int_t ttype;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
hqrndstate rs;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&pooldatacv, 0, sizeof(pooldatacv));
|
|
memset(&datacv, 0, sizeof(datacv));
|
|
memset(&_sdatacv, 0, sizeof(_sdatacv));
|
|
memset(&cvy, 0, sizeof(cvy));
|
|
memset(&folds, 0, sizeof(folds));
|
|
memset(&buf, 0, sizeof(buf));
|
|
memset(&dy, 0, sizeof(dy));
|
|
memset(&rs, 0, sizeof(rs));
|
|
_mlpreport_clear(rep);
|
|
ae_shared_pool_init(&pooldatacv, _state, ae_true);
|
|
_mlpparallelizationcv_init(&datacv, _state, ae_true);
|
|
ae_smart_ptr_init(&_sdatacv, (void**)&sdatacv, _state, ae_true);
|
|
ae_matrix_init(&cvy, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&folds, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&buf, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&dy, 0, DT_REAL, _state, ae_true);
|
|
_hqrndstate_init(&rs, _state, ae_true);
|
|
|
|
if( !mlpissoftmax(network, _state) )
|
|
{
|
|
ntype = 0;
|
|
}
|
|
else
|
|
{
|
|
ntype = 1;
|
|
}
|
|
if( s->rcpar )
|
|
{
|
|
ttype = 0;
|
|
}
|
|
else
|
|
{
|
|
ttype = 1;
|
|
}
|
|
ae_assert(ntype==ttype, "MLPKFoldCV: type of input network is not similar to network type in trainer object", _state);
|
|
ae_assert(s->npoints>=0, "MLPKFoldCV: possible trainer S is not initialized(S.NPoints<0)", _state);
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
ae_assert(s->nin==nin, "MLPKFoldCV: number of inputs in trainer is not equal to number of inputs in network", _state);
|
|
ae_assert(s->nout==nout, "MLPKFoldCV: number of outputs in trainer is not equal to number of outputs in network", _state);
|
|
ae_assert(nrestarts>=0, "MLPKFoldCV: NRestarts<0", _state);
|
|
ae_assert(foldscount>=2, "MLPKFoldCV: FoldsCount<2", _state);
|
|
if( foldscount>s->npoints )
|
|
{
|
|
foldscount = s->npoints;
|
|
}
|
|
rep->relclserror = (double)(0);
|
|
rep->avgce = (double)(0);
|
|
rep->rmserror = (double)(0);
|
|
rep->avgerror = (double)(0);
|
|
rep->avgrelerror = (double)(0);
|
|
hqrndrandomize(&rs, _state);
|
|
rep->ngrad = 0;
|
|
rep->nhess = 0;
|
|
rep->ncholesky = 0;
|
|
if( s->npoints==0||s->npoints==1 )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Read network geometry, test parameters
|
|
*/
|
|
if( s->rcpar )
|
|
{
|
|
rowsize = nin+nout;
|
|
ae_vector_set_length(&dy, nout, _state);
|
|
dserrallocate(-nout, &buf, _state);
|
|
}
|
|
else
|
|
{
|
|
rowsize = nin+1;
|
|
ae_vector_set_length(&dy, 1, _state);
|
|
dserrallocate(nout, &buf, _state);
|
|
}
|
|
|
|
/*
|
|
* Folds
|
|
*/
|
|
ae_vector_set_length(&folds, s->npoints, _state);
|
|
for(i=0; i<=s->npoints-1; i++)
|
|
{
|
|
folds.ptr.p_int[i] = i*foldscount/s->npoints;
|
|
}
|
|
for(i=0; i<=s->npoints-2; i++)
|
|
{
|
|
j = i+hqrnduniformi(&rs, s->npoints-i, _state);
|
|
if( j!=i )
|
|
{
|
|
k = folds.ptr.p_int[i];
|
|
folds.ptr.p_int[i] = folds.ptr.p_int[j];
|
|
folds.ptr.p_int[j] = k;
|
|
}
|
|
}
|
|
ae_matrix_set_length(&cvy, s->npoints, nout, _state);
|
|
|
|
/*
|
|
* Initialize SEED-value for shared pool
|
|
*/
|
|
datacv.ngrad = 0;
|
|
mlpcopy(network, &datacv.network, _state);
|
|
ae_vector_set_length(&datacv.subset, s->npoints, _state);
|
|
ae_vector_set_length(&datacv.xyrow, rowsize, _state);
|
|
ae_vector_set_length(&datacv.y, nout, _state);
|
|
|
|
/*
|
|
* Create shared pool
|
|
*/
|
|
ae_shared_pool_set_seed(&pooldatacv, &datacv, sizeof(datacv), _mlpparallelizationcv_init, _mlpparallelizationcv_init_copy, _mlpparallelizationcv_destroy, _state);
|
|
|
|
/*
|
|
* Parallelization
|
|
*/
|
|
mlptrain_mthreadcv(s, rowsize, nrestarts, &folds, 0, foldscount, &cvy, &pooldatacv, wcount, _state);
|
|
|
|
/*
|
|
* Calculate value for NGrad
|
|
*/
|
|
ae_shared_pool_first_recycled(&pooldatacv, &_sdatacv, _state);
|
|
while(sdatacv!=NULL)
|
|
{
|
|
rep->ngrad = rep->ngrad+sdatacv->ngrad;
|
|
ae_shared_pool_next_recycled(&pooldatacv, &_sdatacv, _state);
|
|
}
|
|
|
|
/*
|
|
* Connect of results and calculate cross-validation error
|
|
*/
|
|
for(i=0; i<=s->npoints-1; i++)
|
|
{
|
|
if( s->datatype==0 )
|
|
{
|
|
ae_v_move(&datacv.xyrow.ptr.p_double[0], 1, &s->densexy.ptr.pp_double[i][0], 1, ae_v_len(0,rowsize-1));
|
|
}
|
|
if( s->datatype==1 )
|
|
{
|
|
sparsegetrow(&s->sparsexy, i, &datacv.xyrow, _state);
|
|
}
|
|
ae_v_move(&datacv.y.ptr.p_double[0], 1, &cvy.ptr.pp_double[i][0], 1, ae_v_len(0,nout-1));
|
|
if( s->rcpar )
|
|
{
|
|
ae_v_move(&dy.ptr.p_double[0], 1, &datacv.xyrow.ptr.p_double[nin], 1, ae_v_len(0,nout-1));
|
|
}
|
|
else
|
|
{
|
|
dy.ptr.p_double[0] = datacv.xyrow.ptr.p_double[nin];
|
|
}
|
|
dserraccumulate(&buf, &datacv.y, &dy, _state);
|
|
}
|
|
dserrfinish(&buf, _state);
|
|
rep->relclserror = buf.ptr.p_double[0];
|
|
rep->avgce = buf.ptr.p_double[1];
|
|
rep->rmserror = buf.ptr.p_double[2];
|
|
rep->avgerror = buf.ptr.p_double[3];
|
|
rep->avgrelerror = buf.ptr.p_double[4];
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Creation of the network trainer object for regression networks
|
|
|
|
INPUT PARAMETERS:
|
|
NIn - number of inputs, NIn>=1
|
|
NOut - number of outputs, NOut>=1
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - neural network trainer object.
|
|
This structure can be used to train any regression
|
|
network with NIn inputs and NOut outputs.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreatetrainer(ae_int_t nin,
|
|
ae_int_t nout,
|
|
mlptrainer* s,
|
|
ae_state *_state)
|
|
{
|
|
|
|
_mlptrainer_clear(s);
|
|
|
|
ae_assert(nin>=1, "MLPCreateTrainer: NIn<1.", _state);
|
|
ae_assert(nout>=1, "MLPCreateTrainer: NOut<1.", _state);
|
|
s->nin = nin;
|
|
s->nout = nout;
|
|
s->rcpar = ae_true;
|
|
s->lbfgsfactor = mlptrain_defaultlbfgsfactor;
|
|
s->decay = 1.0E-6;
|
|
mlpsetcond(s, (double)(0), 0, _state);
|
|
s->datatype = 0;
|
|
s->npoints = 0;
|
|
mlpsetalgobatch(s, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Creation of the network trainer object for classification networks
|
|
|
|
INPUT PARAMETERS:
|
|
NIn - number of inputs, NIn>=1
|
|
NClasses - number of classes, NClasses>=2
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - neural network trainer object.
|
|
This structure can be used to train any classification
|
|
network with NIn inputs and NOut outputs.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpcreatetrainercls(ae_int_t nin,
|
|
ae_int_t nclasses,
|
|
mlptrainer* s,
|
|
ae_state *_state)
|
|
{
|
|
|
|
_mlptrainer_clear(s);
|
|
|
|
ae_assert(nin>=1, "MLPCreateTrainerCls: NIn<1.", _state);
|
|
ae_assert(nclasses>=2, "MLPCreateTrainerCls: NClasses<2.", _state);
|
|
s->nin = nin;
|
|
s->nout = nclasses;
|
|
s->rcpar = ae_false;
|
|
s->lbfgsfactor = mlptrain_defaultlbfgsfactor;
|
|
s->decay = 1.0E-6;
|
|
mlpsetcond(s, (double)(0), 0, _state);
|
|
s->datatype = 0;
|
|
s->npoints = 0;
|
|
mlpsetalgobatch(s, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets "current dataset" of the trainer object to one passed
|
|
by user.
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
XY - training set, see below for information on the
|
|
training set format. This function checks correctness
|
|
of the dataset (no NANs/INFs, class numbers are
|
|
correct) and throws exception when incorrect dataset
|
|
is passed.
|
|
NPoints - points count, >=0.
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
datasetformat is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetdataset(mlptrainer* s,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t ndim;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
|
|
|
|
ae_assert(s->nin>=1, "MLPSetDataset: possible parameter S is not initialized or spoiled(S.NIn<=0).", _state);
|
|
ae_assert(npoints>=0, "MLPSetDataset: NPoint<0", _state);
|
|
ae_assert(npoints<=xy->rows, "MLPSetDataset: invalid size of matrix XY(NPoint more then rows of matrix XY)", _state);
|
|
s->datatype = 0;
|
|
s->npoints = npoints;
|
|
if( npoints==0 )
|
|
{
|
|
return;
|
|
}
|
|
if( s->rcpar )
|
|
{
|
|
ae_assert(s->nout>=1, "MLPSetDataset: possible parameter S is not initialized or is spoiled(NOut<1 for regression).", _state);
|
|
ndim = s->nin+s->nout;
|
|
ae_assert(ndim<=xy->cols, "MLPSetDataset: invalid size of matrix XY(too few columns in matrix XY).", _state);
|
|
ae_assert(apservisfinitematrix(xy, npoints, ndim, _state), "MLPSetDataset: parameter XY contains Infinite or NaN.", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(s->nout>=2, "MLPSetDataset: possible parameter S is not initialized or is spoiled(NClasses<2 for classifier).", _state);
|
|
ndim = s->nin+1;
|
|
ae_assert(ndim<=xy->cols, "MLPSetDataset: invalid size of matrix XY(too few columns in matrix XY).", _state);
|
|
ae_assert(apservisfinitematrix(xy, npoints, ndim, _state), "MLPSetDataset: parameter XY contains Infinite or NaN.", _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_assert(ae_round(xy->ptr.pp_double[i][s->nin], _state)>=0&&ae_round(xy->ptr.pp_double[i][s->nin], _state)<s->nout, "MLPSetDataset: invalid parameter XY(in classifier used nonexistent class number: either XY[.,NIn]<0 or XY[.,NIn]>=NClasses).", _state);
|
|
}
|
|
}
|
|
rmatrixsetlengthatleast(&s->densexy, npoints, ndim, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
for(j=0; j<=ndim-1; j++)
|
|
{
|
|
s->densexy.ptr.pp_double[i][j] = xy->ptr.pp_double[i][j];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets "current dataset" of the trainer object to one passed
|
|
by user (sparse matrix is used to store dataset).
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
XY - training set, see below for information on the
|
|
training set format. This function checks correctness
|
|
of the dataset (no NANs/INFs, class numbers are
|
|
correct) and throws exception when incorrect dataset
|
|
is passed. Any sparse storage format can be used:
|
|
Hash-table, CRS...
|
|
NPoints - points count, >=0
|
|
|
|
DATASET FORMAT:
|
|
|
|
This function uses two different dataset formats - one for regression
|
|
networks, another one for classification networks.
|
|
|
|
For regression networks with NIn inputs and NOut outputs following dataset
|
|
format is used:
|
|
* dataset is given by NPoints*(NIn+NOut) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, next NOut columns are outputs
|
|
|
|
For classification networks with NIn inputs and NClasses clases following
|
|
datasetformat is used:
|
|
* dataset is given by NPoints*(NIn+1) matrix
|
|
* each row corresponds to one example
|
|
* first NIn columns are inputs, last column stores class number (from 0 to
|
|
NClasses-1).
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetsparsedataset(mlptrainer* s,
|
|
sparsematrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double v;
|
|
ae_int_t t0;
|
|
ae_int_t t1;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
|
|
|
|
|
|
/*
|
|
* Check correctness of the data
|
|
*/
|
|
ae_assert(s->nin>0, "MLPSetSparseDataset: possible parameter S is not initialized or spoiled(S.NIn<=0).", _state);
|
|
ae_assert(npoints>=0, "MLPSetSparseDataset: NPoint<0", _state);
|
|
ae_assert(npoints<=sparsegetnrows(xy, _state), "MLPSetSparseDataset: invalid size of sparse matrix XY(NPoint more then rows of matrix XY)", _state);
|
|
if( npoints>0 )
|
|
{
|
|
t0 = 0;
|
|
t1 = 0;
|
|
if( s->rcpar )
|
|
{
|
|
ae_assert(s->nout>=1, "MLPSetSparseDataset: possible parameter S is not initialized or is spoiled(NOut<1 for regression).", _state);
|
|
ae_assert(s->nin+s->nout<=sparsegetncols(xy, _state), "MLPSetSparseDataset: invalid size of sparse matrix XY(too few columns in sparse matrix XY).", _state);
|
|
while(sparseenumerate(xy, &t0, &t1, &i, &j, &v, _state))
|
|
{
|
|
if( i<npoints&&j<s->nin+s->nout )
|
|
{
|
|
ae_assert(ae_isfinite(v, _state), "MLPSetSparseDataset: sparse matrix XY contains Infinite or NaN.", _state);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ae_assert(s->nout>=2, "MLPSetSparseDataset: possible parameter S is not initialized or is spoiled(NClasses<2 for classifier).", _state);
|
|
ae_assert(s->nin+1<=sparsegetncols(xy, _state), "MLPSetSparseDataset: invalid size of sparse matrix XY(too few columns in sparse matrix XY).", _state);
|
|
while(sparseenumerate(xy, &t0, &t1, &i, &j, &v, _state))
|
|
{
|
|
if( i<npoints&&j<=s->nin )
|
|
{
|
|
if( j!=s->nin )
|
|
{
|
|
ae_assert(ae_isfinite(v, _state), "MLPSetSparseDataset: sparse matrix XY contains Infinite or NaN.", _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert((ae_isfinite(v, _state)&&ae_round(v, _state)>=0)&&ae_round(v, _state)<s->nout, "MLPSetSparseDataset: invalid sparse matrix XY(in classifier used nonexistent class number: either XY[.,NIn]<0 or XY[.,NIn]>=NClasses).", _state);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Set dataset
|
|
*/
|
|
s->datatype = 1;
|
|
s->npoints = npoints;
|
|
sparsecopytocrs(xy, &s->sparsexy, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets weight decay coefficient which is used for training.
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
Decay - weight decay coefficient, >=0. Weight decay term
|
|
'Decay*||Weights||^2' is added to error function. If
|
|
you don't know what Decay to choose, use 1.0E-3.
|
|
Weight decay can be set to zero, in this case network
|
|
is trained without weight decay.
|
|
|
|
NOTE: by default network uses some small nonzero value for weight decay.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetdecay(mlptrainer* s, double decay, ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(ae_isfinite(decay, _state), "MLPSetDecay: parameter Decay contains Infinite or NaN.", _state);
|
|
ae_assert(ae_fp_greater_eq(decay,(double)(0)), "MLPSetDecay: Decay<0.", _state);
|
|
s->decay = decay;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets stopping criteria for the optimizer.
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
WStep - stopping criterion. Algorithm stops if step size is
|
|
less than WStep. Recommended value - 0.01. Zero step
|
|
size means stopping after MaxIts iterations.
|
|
WStep>=0.
|
|
MaxIts - stopping criterion. Algorithm stops after MaxIts
|
|
epochs (full passes over entire dataset). Zero MaxIts
|
|
means stopping when step is sufficiently small.
|
|
MaxIts>=0.
|
|
|
|
NOTE: by default, WStep=0.005 and MaxIts=0 are used. These values are also
|
|
used when MLPSetCond() is called with WStep=0 and MaxIts=0.
|
|
|
|
NOTE: these stopping criteria are used for all kinds of neural training -
|
|
from "conventional" networks to early stopping ensembles. When used
|
|
for "conventional" networks, they are used as the only stopping
|
|
criteria. When combined with early stopping, they used as ADDITIONAL
|
|
stopping criteria which can terminate early stopping algorithm.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetcond(mlptrainer* s,
|
|
double wstep,
|
|
ae_int_t maxits,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(ae_isfinite(wstep, _state), "MLPSetCond: parameter WStep contains Infinite or NaN.", _state);
|
|
ae_assert(ae_fp_greater_eq(wstep,(double)(0)), "MLPSetCond: WStep<0.", _state);
|
|
ae_assert(maxits>=0, "MLPSetCond: MaxIts<0.", _state);
|
|
if( ae_fp_neq(wstep,(double)(0))||maxits!=0 )
|
|
{
|
|
s->wstep = wstep;
|
|
s->maxits = maxits;
|
|
}
|
|
else
|
|
{
|
|
s->wstep = 0.005;
|
|
s->maxits = 0;
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets training algorithm: batch training using L-BFGS will be
|
|
used.
|
|
|
|
This algorithm:
|
|
* the most robust for small-scale problems, but may be too slow for large
|
|
scale ones.
|
|
* perfoms full pass through the dataset before performing step
|
|
* uses conditions specified by MLPSetCond() for stopping
|
|
* is default one used by trainer object
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpsetalgobatch(mlptrainer* s, ae_state *_state)
|
|
{
|
|
|
|
|
|
s->algokind = 0;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function trains neural network passed to this function, using current
|
|
dataset (one which was passed to MLPSetDataset() or MLPSetSparseDataset())
|
|
and current training settings. Training from NRestarts random starting
|
|
positions is performed, best network is chosen.
|
|
|
|
Training is performed using current training algorithm.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
Network - neural network. It must have same number of inputs and
|
|
output/classes as was specified during creation of the
|
|
trainer object.
|
|
NRestarts - number of restarts, >=0:
|
|
* NRestarts>0 means that specified number of random
|
|
restarts are performed, best network is chosen after
|
|
training
|
|
* NRestarts=0 means that current state of the network
|
|
is used for training.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network - trained network
|
|
|
|
NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(),
|
|
network is filled by zero values. Same behavior for functions
|
|
MLPStartTraining and MLPContinueTraining.
|
|
|
|
NOTE: this method uses sum-of-squares error function for training.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlptrainnetwork(mlptrainer* s,
|
|
multilayerperceptron* network,
|
|
ae_int_t nrestarts,
|
|
mlpreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t ntype;
|
|
ae_int_t ttype;
|
|
ae_shared_pool trnpool;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&trnpool, 0, sizeof(trnpool));
|
|
_mlpreport_clear(rep);
|
|
ae_shared_pool_init(&trnpool, _state, ae_true);
|
|
|
|
ae_assert(s->npoints>=0, "MLPTrainNetwork: parameter S is not initialized or is spoiled(S.NPoints<0)", _state);
|
|
if( !mlpissoftmax(network, _state) )
|
|
{
|
|
ntype = 0;
|
|
}
|
|
else
|
|
{
|
|
ntype = 1;
|
|
}
|
|
if( s->rcpar )
|
|
{
|
|
ttype = 0;
|
|
}
|
|
else
|
|
{
|
|
ttype = 1;
|
|
}
|
|
ae_assert(ntype==ttype, "MLPTrainNetwork: type of input network is not similar to network type in trainer object", _state);
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
ae_assert(s->nin==nin, "MLPTrainNetwork: number of inputs in trainer is not equal to number of inputs in network", _state);
|
|
ae_assert(s->nout==nout, "MLPTrainNetwork: number of outputs in trainer is not equal to number of outputs in network", _state);
|
|
ae_assert(nrestarts>=0, "MLPTrainNetwork: NRestarts<0.", _state);
|
|
|
|
/*
|
|
* Train
|
|
*/
|
|
mlptrain_mlptrainnetworkx(s, nrestarts, -1, &s->subset, -1, &s->subset, 0, network, rep, ae_true, &trnpool, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
IMPORTANT: this is an "expert" version of the MLPTrain() function. We do
|
|
not recommend you to use it unless you are pretty sure that you
|
|
need ability to monitor training progress.
|
|
|
|
This function performs step-by-step training of the neural network. Here
|
|
"step-by-step" means that training starts with MLPStartTraining() call,
|
|
and then user subsequently calls MLPContinueTraining() to perform one more
|
|
iteration of the training.
|
|
|
|
After call to this function trainer object remembers network and is ready
|
|
to train it. However, no training is performed until first call to
|
|
MLPContinueTraining() function. Subsequent calls to MLPContinueTraining()
|
|
will advance training progress one iteration further.
|
|
|
|
EXAMPLE:
|
|
>
|
|
> ...initialize network and trainer object....
|
|
>
|
|
> MLPStartTraining(Trainer, Network, True)
|
|
> while MLPContinueTraining(Trainer, Network) do
|
|
> ...visualize training progress...
|
|
>
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
Network - neural network. It must have same number of inputs and
|
|
output/classes as was specified during creation of the
|
|
trainer object.
|
|
RandomStart - randomize network before training or not:
|
|
* True means that network is randomized and its
|
|
initial state (one which was passed to the trainer
|
|
object) is lost.
|
|
* False means that training is started from the
|
|
current state of the network
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network - neural network which is ready to training (weights are
|
|
initialized, preprocessor is initialized using current
|
|
training set)
|
|
|
|
NOTE: this method uses sum-of-squares error function for training.
|
|
|
|
NOTE: it is expected that trainer object settings are NOT changed during
|
|
step-by-step training, i.e. no one changes stopping criteria or
|
|
training set during training. It is possible and there is no defense
|
|
against such actions, but algorithm behavior in such cases is
|
|
undefined and can be unpredictable.
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpstarttraining(mlptrainer* s,
|
|
multilayerperceptron* network,
|
|
ae_bool randomstart,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t ntype;
|
|
ae_int_t ttype;
|
|
|
|
|
|
ae_assert(s->npoints>=0, "MLPStartTraining: parameter S is not initialized or is spoiled(S.NPoints<0)", _state);
|
|
if( !mlpissoftmax(network, _state) )
|
|
{
|
|
ntype = 0;
|
|
}
|
|
else
|
|
{
|
|
ntype = 1;
|
|
}
|
|
if( s->rcpar )
|
|
{
|
|
ttype = 0;
|
|
}
|
|
else
|
|
{
|
|
ttype = 1;
|
|
}
|
|
ae_assert(ntype==ttype, "MLPStartTraining: type of input network is not similar to network type in trainer object", _state);
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
ae_assert(s->nin==nin, "MLPStartTraining: number of inputs in trainer is not equal to number of inputs in the network.", _state);
|
|
ae_assert(s->nout==nout, "MLPStartTraining: number of outputs in trainer is not equal to number of outputs in the network.", _state);
|
|
|
|
/*
|
|
* Initialize temporaries
|
|
*/
|
|
mlptrain_initmlptrnsession(network, randomstart, s, &s->session, _state);
|
|
|
|
/*
|
|
* Train network
|
|
*/
|
|
mlptrain_mlpstarttrainingx(s, randomstart, -1, &s->subset, -1, &s->session, _state);
|
|
|
|
/*
|
|
* Update network
|
|
*/
|
|
mlpcopytunableparameters(&s->session.network, network, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
IMPORTANT: this is an "expert" version of the MLPTrain() function. We do
|
|
not recommend you to use it unless you are pretty sure that you
|
|
need ability to monitor training progress.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
This function performs step-by-step training of the neural network. Here
|
|
"step-by-step" means that training starts with MLPStartTraining() call,
|
|
and then user subsequently calls MLPContinueTraining() to perform one more
|
|
iteration of the training.
|
|
|
|
This function performs one more iteration of the training and returns
|
|
either True (training continues) or False (training stopped). In case True
|
|
was returned, Network weights are updated according to the current state
|
|
of the optimization progress. In case False was returned, no additional
|
|
updates is performed (previous update of the network weights moved us to
|
|
the final point, and no additional updates is needed).
|
|
|
|
EXAMPLE:
|
|
>
|
|
> [initialize network and trainer object]
|
|
>
|
|
> MLPStartTraining(Trainer, Network, True)
|
|
> while MLPContinueTraining(Trainer, Network) do
|
|
> [visualize training progress]
|
|
>
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object
|
|
Network - neural network structure, which is used to store
|
|
current state of the training process.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Network - weights of the neural network are rewritten by the
|
|
current approximation.
|
|
|
|
NOTE: this method uses sum-of-squares error function for training.
|
|
|
|
NOTE: it is expected that trainer object settings are NOT changed during
|
|
step-by-step training, i.e. no one changes stopping criteria or
|
|
training set during training. It is possible and there is no defense
|
|
against such actions, but algorithm behavior in such cases is
|
|
undefined and can be unpredictable.
|
|
|
|
NOTE: It is expected that Network is the same one which was passed to
|
|
MLPStartTraining() function. However, THIS function checks only
|
|
following:
|
|
* that number of network inputs is consistent with trainer object
|
|
settings
|
|
* that number of network outputs/classes is consistent with trainer
|
|
object settings
|
|
* that number of network weights is the same as number of weights in
|
|
the network passed to MLPStartTraining() function
|
|
Exception is thrown when these conditions are violated.
|
|
|
|
It is also expected that you do not change state of the network on
|
|
your own - the only party who has right to change network during its
|
|
training is a trainer object. Any attempt to interfere with trainer
|
|
may lead to unpredictable results.
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 23.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_bool mlpcontinuetraining(mlptrainer* s,
|
|
multilayerperceptron* network,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t ntype;
|
|
ae_int_t ttype;
|
|
ae_bool result;
|
|
|
|
|
|
ae_assert(s->npoints>=0, "MLPContinueTraining: parameter S is not initialized or is spoiled(S.NPoints<0)", _state);
|
|
if( s->rcpar )
|
|
{
|
|
ttype = 0;
|
|
}
|
|
else
|
|
{
|
|
ttype = 1;
|
|
}
|
|
if( !mlpissoftmax(network, _state) )
|
|
{
|
|
ntype = 0;
|
|
}
|
|
else
|
|
{
|
|
ntype = 1;
|
|
}
|
|
ae_assert(ntype==ttype, "MLPContinueTraining: type of input network is not similar to network type in trainer object.", _state);
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
ae_assert(s->nin==nin, "MLPContinueTraining: number of inputs in trainer is not equal to number of inputs in the network.", _state);
|
|
ae_assert(s->nout==nout, "MLPContinueTraining: number of outputs in trainer is not equal to number of outputs in the network.", _state);
|
|
result = mlptrain_mlpcontinuetrainingx(s, &s->subset, -1, &s->ngradbatch, &s->session, _state);
|
|
if( result )
|
|
{
|
|
ae_v_move(&network->weights.ptr.p_double[0], 1, &s->session.network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Training neural networks ensemble using bootstrap aggregating (bagging).
|
|
Modified Levenberg-Marquardt algorithm is used as base training method.
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble - model with initialized geometry
|
|
XY - training set
|
|
NPoints - training set size
|
|
Decay - weight decay coefficient, >=0.001
|
|
Restarts - restarts, >0.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Ensemble - trained model
|
|
Info - return code:
|
|
* -2, if there is a point with class number
|
|
outside of [0..NClasses-1].
|
|
* -1, if incorrect parameters was passed
|
|
(NPoints<0, Restarts<1).
|
|
* 2, if task has been solved.
|
|
Rep - training report.
|
|
OOBErrors - out-of-bag generalization error estimate
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpebagginglm(mlpensemble* ensemble,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
double decay,
|
|
ae_int_t restarts,
|
|
ae_int_t* info,
|
|
mlpreport* rep,
|
|
mlpcvreport* ooberrors,
|
|
ae_state *_state)
|
|
{
|
|
|
|
*info = 0;
|
|
_mlpreport_clear(rep);
|
|
_mlpcvreport_clear(ooberrors);
|
|
|
|
mlptrain_mlpebagginginternal(ensemble, xy, npoints, decay, restarts, 0.0, 0, ae_true, info, rep, ooberrors, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Training neural networks ensemble using bootstrap aggregating (bagging).
|
|
L-BFGS algorithm is used as base training method.
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble - model with initialized geometry
|
|
XY - training set
|
|
NPoints - training set size
|
|
Decay - weight decay coefficient, >=0.001
|
|
Restarts - restarts, >0.
|
|
WStep - stopping criterion, same as in MLPTrainLBFGS
|
|
MaxIts - stopping criterion, same as in MLPTrainLBFGS
|
|
|
|
OUTPUT PARAMETERS:
|
|
Ensemble - trained model
|
|
Info - return code:
|
|
* -8, if both WStep=0 and MaxIts=0
|
|
* -2, if there is a point with class number
|
|
outside of [0..NClasses-1].
|
|
* -1, if incorrect parameters was passed
|
|
(NPoints<0, Restarts<1).
|
|
* 2, if task has been solved.
|
|
Rep - training report.
|
|
OOBErrors - out-of-bag generalization error estimate
|
|
|
|
-- ALGLIB --
|
|
Copyright 17.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpebagginglbfgs(mlpensemble* ensemble,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
double decay,
|
|
ae_int_t restarts,
|
|
double wstep,
|
|
ae_int_t maxits,
|
|
ae_int_t* info,
|
|
mlpreport* rep,
|
|
mlpcvreport* ooberrors,
|
|
ae_state *_state)
|
|
{
|
|
|
|
*info = 0;
|
|
_mlpreport_clear(rep);
|
|
_mlpcvreport_clear(ooberrors);
|
|
|
|
mlptrain_mlpebagginginternal(ensemble, xy, npoints, decay, restarts, wstep, maxits, ae_false, info, rep, ooberrors, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Training neural networks ensemble using early stopping.
|
|
|
|
INPUT PARAMETERS:
|
|
Ensemble - model with initialized geometry
|
|
XY - training set
|
|
NPoints - training set size
|
|
Decay - weight decay coefficient, >=0.001
|
|
Restarts - restarts, >0.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Ensemble - trained model
|
|
Info - return code:
|
|
* -2, if there is a point with class number
|
|
outside of [0..NClasses-1].
|
|
* -1, if incorrect parameters was passed
|
|
(NPoints<0, Restarts<1).
|
|
* 6, if task has been solved.
|
|
Rep - training report.
|
|
OOBErrors - out-of-bag generalization error estimate
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.03.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlpetraines(mlpensemble* ensemble,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
double decay,
|
|
ae_int_t restarts,
|
|
ae_int_t* info,
|
|
mlpreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t k;
|
|
ae_int_t ccount;
|
|
ae_int_t pcount;
|
|
ae_matrix trnxy;
|
|
ae_matrix valxy;
|
|
ae_int_t trnsize;
|
|
ae_int_t valsize;
|
|
ae_int_t tmpinfo;
|
|
mlpreport tmprep;
|
|
modelerrors moderr;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&trnxy, 0, sizeof(trnxy));
|
|
memset(&valxy, 0, sizeof(valxy));
|
|
memset(&tmprep, 0, sizeof(tmprep));
|
|
memset(&moderr, 0, sizeof(moderr));
|
|
*info = 0;
|
|
_mlpreport_clear(rep);
|
|
ae_matrix_init(&trnxy, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&valxy, 0, 0, DT_REAL, _state, ae_true);
|
|
_mlpreport_init(&tmprep, _state, ae_true);
|
|
_modelerrors_init(&moderr, _state, ae_true);
|
|
|
|
nin = mlpgetinputscount(&ensemble->network, _state);
|
|
nout = mlpgetoutputscount(&ensemble->network, _state);
|
|
wcount = mlpgetweightscount(&ensemble->network, _state);
|
|
if( (npoints<2||restarts<1)||ae_fp_less(decay,(double)(0)) )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
if( mlpissoftmax(&ensemble->network, _state) )
|
|
{
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( ae_round(xy->ptr.pp_double[i][nin], _state)<0||ae_round(xy->ptr.pp_double[i][nin], _state)>=nout )
|
|
{
|
|
*info = -2;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
*info = 6;
|
|
|
|
/*
|
|
* allocate
|
|
*/
|
|
if( mlpissoftmax(&ensemble->network, _state) )
|
|
{
|
|
ccount = nin+1;
|
|
pcount = nin;
|
|
}
|
|
else
|
|
{
|
|
ccount = nin+nout;
|
|
pcount = nin+nout;
|
|
}
|
|
ae_matrix_set_length(&trnxy, npoints, ccount, _state);
|
|
ae_matrix_set_length(&valxy, npoints, ccount, _state);
|
|
rep->ngrad = 0;
|
|
rep->nhess = 0;
|
|
rep->ncholesky = 0;
|
|
|
|
/*
|
|
* train networks
|
|
*/
|
|
for(k=0; k<=ensemble->ensemblesize-1; k++)
|
|
{
|
|
|
|
/*
|
|
* Split set
|
|
*/
|
|
do
|
|
{
|
|
trnsize = 0;
|
|
valsize = 0;
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( ae_fp_less(ae_randomreal(_state),0.66) )
|
|
{
|
|
|
|
/*
|
|
* Assign sample to training set
|
|
*/
|
|
ae_v_move(&trnxy.ptr.pp_double[trnsize][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,ccount-1));
|
|
trnsize = trnsize+1;
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Assign sample to validation set
|
|
*/
|
|
ae_v_move(&valxy.ptr.pp_double[valsize][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,ccount-1));
|
|
valsize = valsize+1;
|
|
}
|
|
}
|
|
}
|
|
while(!(trnsize!=0&&valsize!=0));
|
|
|
|
/*
|
|
* Train
|
|
*/
|
|
mlptraines(&ensemble->network, &trnxy, trnsize, &valxy, valsize, decay, restarts, &tmpinfo, &tmprep, _state);
|
|
if( tmpinfo<0 )
|
|
{
|
|
*info = tmpinfo;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* save results
|
|
*/
|
|
ae_v_move(&ensemble->weights.ptr.p_double[k*wcount], 1, &ensemble->network.weights.ptr.p_double[0], 1, ae_v_len(k*wcount,(k+1)*wcount-1));
|
|
ae_v_move(&ensemble->columnmeans.ptr.p_double[k*pcount], 1, &ensemble->network.columnmeans.ptr.p_double[0], 1, ae_v_len(k*pcount,(k+1)*pcount-1));
|
|
ae_v_move(&ensemble->columnsigmas.ptr.p_double[k*pcount], 1, &ensemble->network.columnsigmas.ptr.p_double[0], 1, ae_v_len(k*pcount,(k+1)*pcount-1));
|
|
rep->ngrad = rep->ngrad+tmprep.ngrad;
|
|
rep->nhess = rep->nhess+tmprep.nhess;
|
|
rep->ncholesky = rep->ncholesky+tmprep.ncholesky;
|
|
}
|
|
mlpeallerrorsx(ensemble, xy, &ensemble->network.dummysxy, npoints, 0, &ensemble->network.dummyidx, 0, npoints, 0, &ensemble->network.buf, &moderr, _state);
|
|
rep->relclserror = moderr.relclserror;
|
|
rep->avgce = moderr.avgce;
|
|
rep->rmserror = moderr.rmserror;
|
|
rep->avgerror = moderr.avgerror;
|
|
rep->avgrelerror = moderr.avgrelerror;
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function trains neural network ensemble passed to this function using
|
|
current dataset and early stopping training algorithm. Each early stopping
|
|
round performs NRestarts random restarts (thus, EnsembleSize*NRestarts
|
|
training rounds is performed in total).
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object;
|
|
Ensemble - neural network ensemble. It must have same number of
|
|
inputs and outputs/classes as was specified during
|
|
creation of the trainer object.
|
|
NRestarts - number of restarts, >=0:
|
|
* NRestarts>0 means that specified number of random
|
|
restarts are performed during each ES round;
|
|
* NRestarts=0 is silently replaced by 1.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Ensemble - trained ensemble;
|
|
Rep - it contains all type of errors.
|
|
|
|
NOTE: this training method uses BOTH early stopping and weight decay! So,
|
|
you should select weight decay before starting training just as you
|
|
select it before training "conventional" networks.
|
|
|
|
NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(),
|
|
or single-point dataset was passed, ensemble is filled by zero
|
|
values.
|
|
|
|
NOTE: this method uses sum-of-squares error function for training.
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void mlptrainensemblees(mlptrainer* s,
|
|
mlpensemble* ensemble,
|
|
ae_int_t nrestarts,
|
|
mlpreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t ntype;
|
|
ae_int_t ttype;
|
|
ae_shared_pool esessions;
|
|
sinteger sgrad;
|
|
modelerrors tmprep;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&esessions, 0, sizeof(esessions));
|
|
memset(&sgrad, 0, sizeof(sgrad));
|
|
memset(&tmprep, 0, sizeof(tmprep));
|
|
_mlpreport_clear(rep);
|
|
ae_shared_pool_init(&esessions, _state, ae_true);
|
|
_sinteger_init(&sgrad, _state, ae_true);
|
|
_modelerrors_init(&tmprep, _state, ae_true);
|
|
|
|
ae_assert(s->npoints>=0, "MLPTrainEnsembleES: parameter S is not initialized or is spoiled(S.NPoints<0)", _state);
|
|
if( !mlpeissoftmax(ensemble, _state) )
|
|
{
|
|
ntype = 0;
|
|
}
|
|
else
|
|
{
|
|
ntype = 1;
|
|
}
|
|
if( s->rcpar )
|
|
{
|
|
ttype = 0;
|
|
}
|
|
else
|
|
{
|
|
ttype = 1;
|
|
}
|
|
ae_assert(ntype==ttype, "MLPTrainEnsembleES: internal error - type of input network is not similar to network type in trainer object", _state);
|
|
nin = mlpgetinputscount(&ensemble->network, _state);
|
|
ae_assert(s->nin==nin, "MLPTrainEnsembleES: number of inputs in trainer is not equal to number of inputs in ensemble network", _state);
|
|
nout = mlpgetoutputscount(&ensemble->network, _state);
|
|
ae_assert(s->nout==nout, "MLPTrainEnsembleES: number of outputs in trainer is not equal to number of outputs in ensemble network", _state);
|
|
ae_assert(nrestarts>=0, "MLPTrainEnsembleES: NRestarts<0.", _state);
|
|
|
|
/*
|
|
* Initialize parameter Rep
|
|
*/
|
|
rep->relclserror = (double)(0);
|
|
rep->avgce = (double)(0);
|
|
rep->rmserror = (double)(0);
|
|
rep->avgerror = (double)(0);
|
|
rep->avgrelerror = (double)(0);
|
|
rep->ngrad = 0;
|
|
rep->nhess = 0;
|
|
rep->ncholesky = 0;
|
|
|
|
/*
|
|
* Allocate
|
|
*/
|
|
ivectorsetlengthatleast(&s->subset, s->npoints, _state);
|
|
ivectorsetlengthatleast(&s->valsubset, s->npoints, _state);
|
|
|
|
/*
|
|
* Start training
|
|
*
|
|
* NOTE: ESessions is not initialized because MLPTrainEnsembleX
|
|
* needs uninitialized pool.
|
|
*/
|
|
sgrad.val = 0;
|
|
mlptrain_mlptrainensemblex(s, ensemble, 0, ensemble->ensemblesize, nrestarts, 0, &sgrad, ae_true, &esessions, _state);
|
|
rep->ngrad = sgrad.val;
|
|
|
|
/*
|
|
* Calculate errors.
|
|
*/
|
|
if( s->datatype==0 )
|
|
{
|
|
mlpeallerrorsx(ensemble, &s->densexy, &s->sparsexy, s->npoints, 0, &ensemble->network.dummyidx, 0, s->npoints, 0, &ensemble->network.buf, &tmprep, _state);
|
|
}
|
|
if( s->datatype==1 )
|
|
{
|
|
mlpeallerrorsx(ensemble, &s->densexy, &s->sparsexy, s->npoints, 1, &ensemble->network.dummyidx, 0, s->npoints, 0, &ensemble->network.buf, &tmprep, _state);
|
|
}
|
|
rep->relclserror = tmprep.relclserror;
|
|
rep->avgce = tmprep.avgce;
|
|
rep->rmserror = tmprep.rmserror;
|
|
rep->avgerror = tmprep.avgerror;
|
|
rep->avgrelerror = tmprep.avgrelerror;
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal cross-validation subroutine
|
|
*************************************************************************/
|
|
static void mlptrain_mlpkfoldcvgeneral(multilayerperceptron* n,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
double decay,
|
|
ae_int_t restarts,
|
|
ae_int_t foldscount,
|
|
ae_bool lmalgorithm,
|
|
double wstep,
|
|
ae_int_t maxits,
|
|
ae_int_t* info,
|
|
mlpreport* rep,
|
|
mlpcvreport* cvrep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t fold;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
multilayerperceptron network;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t rowlen;
|
|
ae_int_t wcount;
|
|
ae_int_t nclasses;
|
|
ae_int_t tssize;
|
|
ae_int_t cvssize;
|
|
ae_matrix cvset;
|
|
ae_matrix testset;
|
|
ae_vector folds;
|
|
ae_int_t relcnt;
|
|
mlpreport internalrep;
|
|
ae_vector x;
|
|
ae_vector y;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&network, 0, sizeof(network));
|
|
memset(&cvset, 0, sizeof(cvset));
|
|
memset(&testset, 0, sizeof(testset));
|
|
memset(&folds, 0, sizeof(folds));
|
|
memset(&internalrep, 0, sizeof(internalrep));
|
|
memset(&x, 0, sizeof(x));
|
|
memset(&y, 0, sizeof(y));
|
|
*info = 0;
|
|
_mlpreport_clear(rep);
|
|
_mlpcvreport_clear(cvrep);
|
|
_multilayerperceptron_init(&network, _state, ae_true);
|
|
ae_matrix_init(&cvset, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_matrix_init(&testset, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&folds, 0, DT_INT, _state, ae_true);
|
|
_mlpreport_init(&internalrep, _state, ae_true);
|
|
ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Read network geometry, test parameters
|
|
*/
|
|
mlpproperties(n, &nin, &nout, &wcount, _state);
|
|
if( mlpissoftmax(n, _state) )
|
|
{
|
|
nclasses = nout;
|
|
rowlen = nin+1;
|
|
}
|
|
else
|
|
{
|
|
nclasses = -nout;
|
|
rowlen = nin+nout;
|
|
}
|
|
if( (npoints<=0||foldscount<2)||foldscount>npoints )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
mlpcopy(n, &network, _state);
|
|
|
|
/*
|
|
* K-fold out cross-validation.
|
|
* First, estimate generalization error
|
|
*/
|
|
ae_matrix_set_length(&testset, npoints-1+1, rowlen-1+1, _state);
|
|
ae_matrix_set_length(&cvset, npoints-1+1, rowlen-1+1, _state);
|
|
ae_vector_set_length(&x, nin-1+1, _state);
|
|
ae_vector_set_length(&y, nout-1+1, _state);
|
|
mlptrain_mlpkfoldsplit(xy, npoints, nclasses, foldscount, ae_false, &folds, _state);
|
|
cvrep->relclserror = (double)(0);
|
|
cvrep->avgce = (double)(0);
|
|
cvrep->rmserror = (double)(0);
|
|
cvrep->avgerror = (double)(0);
|
|
cvrep->avgrelerror = (double)(0);
|
|
rep->ngrad = 0;
|
|
rep->nhess = 0;
|
|
rep->ncholesky = 0;
|
|
relcnt = 0;
|
|
for(fold=0; fold<=foldscount-1; fold++)
|
|
{
|
|
|
|
/*
|
|
* Separate set
|
|
*/
|
|
tssize = 0;
|
|
cvssize = 0;
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( folds.ptr.p_int[i]==fold )
|
|
{
|
|
ae_v_move(&testset.ptr.pp_double[tssize][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,rowlen-1));
|
|
tssize = tssize+1;
|
|
}
|
|
else
|
|
{
|
|
ae_v_move(&cvset.ptr.pp_double[cvssize][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,rowlen-1));
|
|
cvssize = cvssize+1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Train on CV training set
|
|
*/
|
|
if( lmalgorithm )
|
|
{
|
|
mlptrainlm(&network, &cvset, cvssize, decay, restarts, info, &internalrep, _state);
|
|
}
|
|
else
|
|
{
|
|
mlptrainlbfgs(&network, &cvset, cvssize, decay, restarts, wstep, maxits, info, &internalrep, _state);
|
|
}
|
|
if( *info<0 )
|
|
{
|
|
cvrep->relclserror = (double)(0);
|
|
cvrep->avgce = (double)(0);
|
|
cvrep->rmserror = (double)(0);
|
|
cvrep->avgerror = (double)(0);
|
|
cvrep->avgrelerror = (double)(0);
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
rep->ngrad = rep->ngrad+internalrep.ngrad;
|
|
rep->nhess = rep->nhess+internalrep.nhess;
|
|
rep->ncholesky = rep->ncholesky+internalrep.ncholesky;
|
|
|
|
/*
|
|
* Estimate error using CV test set
|
|
*/
|
|
if( mlpissoftmax(&network, _state) )
|
|
{
|
|
|
|
/*
|
|
* classification-only code
|
|
*/
|
|
cvrep->relclserror = cvrep->relclserror+mlpclserror(&network, &testset, tssize, _state);
|
|
cvrep->avgce = cvrep->avgce+mlperrorn(&network, &testset, tssize, _state);
|
|
}
|
|
for(i=0; i<=tssize-1; i++)
|
|
{
|
|
ae_v_move(&x.ptr.p_double[0], 1, &testset.ptr.pp_double[i][0], 1, ae_v_len(0,nin-1));
|
|
mlpprocess(&network, &x, &y, _state);
|
|
if( mlpissoftmax(&network, _state) )
|
|
{
|
|
|
|
/*
|
|
* Classification-specific code
|
|
*/
|
|
k = ae_round(testset.ptr.pp_double[i][nin], _state);
|
|
for(j=0; j<=nout-1; j++)
|
|
{
|
|
if( j==k )
|
|
{
|
|
cvrep->rmserror = cvrep->rmserror+ae_sqr(y.ptr.p_double[j]-1, _state);
|
|
cvrep->avgerror = cvrep->avgerror+ae_fabs(y.ptr.p_double[j]-1, _state);
|
|
cvrep->avgrelerror = cvrep->avgrelerror+ae_fabs(y.ptr.p_double[j]-1, _state);
|
|
relcnt = relcnt+1;
|
|
}
|
|
else
|
|
{
|
|
cvrep->rmserror = cvrep->rmserror+ae_sqr(y.ptr.p_double[j], _state);
|
|
cvrep->avgerror = cvrep->avgerror+ae_fabs(y.ptr.p_double[j], _state);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Regression-specific code
|
|
*/
|
|
for(j=0; j<=nout-1; j++)
|
|
{
|
|
cvrep->rmserror = cvrep->rmserror+ae_sqr(y.ptr.p_double[j]-testset.ptr.pp_double[i][nin+j], _state);
|
|
cvrep->avgerror = cvrep->avgerror+ae_fabs(y.ptr.p_double[j]-testset.ptr.pp_double[i][nin+j], _state);
|
|
if( ae_fp_neq(testset.ptr.pp_double[i][nin+j],(double)(0)) )
|
|
{
|
|
cvrep->avgrelerror = cvrep->avgrelerror+ae_fabs((y.ptr.p_double[j]-testset.ptr.pp_double[i][nin+j])/testset.ptr.pp_double[i][nin+j], _state);
|
|
relcnt = relcnt+1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if( mlpissoftmax(&network, _state) )
|
|
{
|
|
cvrep->relclserror = cvrep->relclserror/npoints;
|
|
cvrep->avgce = cvrep->avgce/(ae_log((double)(2), _state)*npoints);
|
|
}
|
|
cvrep->rmserror = ae_sqrt(cvrep->rmserror/(npoints*nout), _state);
|
|
cvrep->avgerror = cvrep->avgerror/(npoints*nout);
|
|
if( relcnt>0 )
|
|
{
|
|
cvrep->avgrelerror = cvrep->avgrelerror/relcnt;
|
|
}
|
|
*info = 1;
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Subroutine prepares K-fold split of the training set.
|
|
|
|
NOTES:
|
|
"NClasses>0" means that we have classification task.
|
|
"NClasses<0" means regression task with -NClasses real outputs.
|
|
*************************************************************************/
|
|
static void mlptrain_mlpkfoldsplit(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nclasses,
|
|
ae_int_t foldscount,
|
|
ae_bool stratifiedsplits,
|
|
/* Integer */ ae_vector* folds,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
hqrndstate rs;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&rs, 0, sizeof(rs));
|
|
ae_vector_clear(folds);
|
|
_hqrndstate_init(&rs, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* test parameters
|
|
*/
|
|
ae_assert(npoints>0, "MLPKFoldSplit: wrong NPoints!", _state);
|
|
ae_assert(nclasses>1||nclasses<0, "MLPKFoldSplit: wrong NClasses!", _state);
|
|
ae_assert(foldscount>=2&&foldscount<=npoints, "MLPKFoldSplit: wrong FoldsCount!", _state);
|
|
ae_assert(!stratifiedsplits, "MLPKFoldSplit: stratified splits are not supported!", _state);
|
|
|
|
/*
|
|
* Folds
|
|
*/
|
|
hqrndrandomize(&rs, _state);
|
|
ae_vector_set_length(folds, npoints-1+1, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
folds->ptr.p_int[i] = i*foldscount/npoints;
|
|
}
|
|
for(i=0; i<=npoints-2; i++)
|
|
{
|
|
j = i+hqrnduniformi(&rs, npoints-i, _state);
|
|
if( j!=i )
|
|
{
|
|
k = folds->ptr.p_int[i];
|
|
folds->ptr.p_int[i] = folds->ptr.p_int[j];
|
|
folds->ptr.p_int[j] = k;
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal subroutine for parallelization function MLPFoldCV.
|
|
|
|
|
|
INPUT PARAMETERS:
|
|
S - trainer object;
|
|
RowSize - row size(eitherNIn+NOut or NIn+1);
|
|
NRestarts - number of restarts(>=0);
|
|
Folds - cross-validation set;
|
|
Fold - the number of first cross-validation(>=0);
|
|
DFold - the number of second cross-validation(>=Fold+1);
|
|
CVY - parameter which stores the result is returned by network,
|
|
training on I-th cross-validation set.
|
|
It has to be preallocated.
|
|
PoolDataCV- parameter for parallelization.
|
|
WCount - number of weights in network, used to make decisions on
|
|
parallelization.
|
|
|
|
NOTE: There are no checks on the parameters correctness.
|
|
|
|
-- ALGLIB --
|
|
Copyright 25.09.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void mlptrain_mthreadcv(mlptrainer* s,
|
|
ae_int_t rowsize,
|
|
ae_int_t nrestarts,
|
|
/* Integer */ ae_vector* folds,
|
|
ae_int_t fold,
|
|
ae_int_t dfold,
|
|
/* Real */ ae_matrix* cvy,
|
|
ae_shared_pool* pooldatacv,
|
|
ae_int_t wcount,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
mlpparallelizationcv *datacv;
|
|
ae_smart_ptr _datacv;
|
|
ae_int_t i;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_datacv, 0, sizeof(_datacv));
|
|
ae_smart_ptr_init(&_datacv, (void**)&datacv, _state, ae_true);
|
|
|
|
if( fold==dfold-1 )
|
|
{
|
|
|
|
/*
|
|
* Separate set
|
|
*/
|
|
ae_shared_pool_retrieve(pooldatacv, &_datacv, _state);
|
|
datacv->subsetsize = 0;
|
|
for(i=0; i<=s->npoints-1; i++)
|
|
{
|
|
if( folds->ptr.p_int[i]!=fold )
|
|
{
|
|
datacv->subset.ptr.p_int[datacv->subsetsize] = i;
|
|
datacv->subsetsize = datacv->subsetsize+1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Train on CV training set
|
|
*/
|
|
mlptrain_mlptrainnetworkx(s, nrestarts, -1, &datacv->subset, datacv->subsetsize, &datacv->subset, 0, &datacv->network, &datacv->rep, ae_true, &datacv->trnpool, _state);
|
|
datacv->ngrad = datacv->ngrad+datacv->rep.ngrad;
|
|
|
|
/*
|
|
* Estimate error using CV test set
|
|
*/
|
|
for(i=0; i<=s->npoints-1; i++)
|
|
{
|
|
if( folds->ptr.p_int[i]==fold )
|
|
{
|
|
if( s->datatype==0 )
|
|
{
|
|
ae_v_move(&datacv->xyrow.ptr.p_double[0], 1, &s->densexy.ptr.pp_double[i][0], 1, ae_v_len(0,rowsize-1));
|
|
}
|
|
if( s->datatype==1 )
|
|
{
|
|
sparsegetrow(&s->sparsexy, i, &datacv->xyrow, _state);
|
|
}
|
|
mlpprocess(&datacv->network, &datacv->xyrow, &datacv->y, _state);
|
|
ae_v_move(&cvy->ptr.pp_double[i][0], 1, &datacv->y.ptr.p_double[0], 1, ae_v_len(0,s->nout-1));
|
|
}
|
|
}
|
|
ae_shared_pool_recycle(pooldatacv, &_datacv, _state);
|
|
}
|
|
else
|
|
{
|
|
ae_assert(fold<dfold-1, "MThreadCV: internal error(Fold>DFold-1).", _state);
|
|
|
|
/*
|
|
* We expect that minimum number of iterations before convergence is 100.
|
|
* Hence is our approach to evaluation of task complexity.
|
|
*/
|
|
if( ae_fp_greater_eq(ae_maxint(nrestarts, 1, _state)*rmul3((double)(2*wcount), (double)(s->npoints), (double)(100), _state),smpactivationlevel(_state)) )
|
|
{
|
|
if( _trypexec_mlptrain_mthreadcv(s,rowsize,nrestarts,folds,fold,dfold,cvy,pooldatacv,wcount, _state) )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Split task
|
|
*/
|
|
mlptrain_mthreadcv(s, rowsize, nrestarts, folds, fold, (fold+dfold)/2, cvy, pooldatacv, wcount, _state);
|
|
mlptrain_mthreadcv(s, rowsize, nrestarts, folds, (fold+dfold)/2, dfold, cvy, pooldatacv, wcount, _state);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serial stub for GPL edition.
|
|
*************************************************************************/
|
|
ae_bool _trypexec_mlptrain_mthreadcv(mlptrainer* s,
|
|
ae_int_t rowsize,
|
|
ae_int_t nrestarts,
|
|
/* Integer */ ae_vector* folds,
|
|
ae_int_t fold,
|
|
ae_int_t dfold,
|
|
/* Real */ ae_matrix* cvy,
|
|
ae_shared_pool* pooldatacv,
|
|
ae_int_t wcount,
|
|
ae_state *_state)
|
|
{
|
|
return ae_false;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function trains neural network passed to this function, using current
|
|
dataset (one which was passed to MLPSetDataset() or MLPSetSparseDataset())
|
|
and current training settings. Training from NRestarts random starting
|
|
positions is performed, best network is chosen.
|
|
|
|
This function is inteded to be used internally. It may be used in several
|
|
settings:
|
|
* training with ValSubsetSize=0, corresponds to "normal" training with
|
|
termination criteria based on S.MaxIts (steps count) and S.WStep (step
|
|
size). Training sample is given by TrnSubset/TrnSubsetSize.
|
|
* training with ValSubsetSize>0, corresponds to early stopping training
|
|
with additional MaxIts/WStep stopping criteria. Training sample is given
|
|
by TrnSubset/TrnSubsetSize, validation sample is given by ValSubset/
|
|
ValSubsetSize.
|
|
|
|
-- ALGLIB --
|
|
Copyright 13.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void mlptrain_mlptrainnetworkx(mlptrainer* s,
|
|
ae_int_t nrestarts,
|
|
ae_int_t algokind,
|
|
/* Integer */ ae_vector* trnsubset,
|
|
ae_int_t trnsubsetsize,
|
|
/* Integer */ ae_vector* valsubset,
|
|
ae_int_t valsubsetsize,
|
|
multilayerperceptron* network,
|
|
mlpreport* rep,
|
|
ae_bool isrootcall,
|
|
ae_shared_pool* sessions,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
modelerrors modrep;
|
|
double eval;
|
|
double ebest;
|
|
ae_int_t ngradbatch;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t pcount;
|
|
ae_int_t itbest;
|
|
ae_int_t itcnt;
|
|
ae_int_t ntype;
|
|
ae_int_t ttype;
|
|
ae_bool rndstart;
|
|
ae_int_t i;
|
|
ae_int_t nr0;
|
|
ae_int_t nr1;
|
|
mlpreport rep0;
|
|
mlpreport rep1;
|
|
ae_bool randomizenetwork;
|
|
double bestrmserror;
|
|
smlptrnsession *psession;
|
|
ae_smart_ptr _psession;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&modrep, 0, sizeof(modrep));
|
|
memset(&rep0, 0, sizeof(rep0));
|
|
memset(&rep1, 0, sizeof(rep1));
|
|
memset(&_psession, 0, sizeof(_psession));
|
|
_modelerrors_init(&modrep, _state, ae_true);
|
|
_mlpreport_init(&rep0, _state, ae_true);
|
|
_mlpreport_init(&rep1, _state, ae_true);
|
|
ae_smart_ptr_init(&_psession, (void**)&psession, _state, ae_true);
|
|
|
|
mlpproperties(network, &nin, &nout, &wcount, _state);
|
|
|
|
/*
|
|
* Process root call
|
|
*/
|
|
if( isrootcall )
|
|
{
|
|
|
|
/*
|
|
* Try parallelization
|
|
* We expect that minimum number of iterations before convergence is 100.
|
|
* Hence is our approach to evaluation of task complexity.
|
|
*/
|
|
if( ae_fp_greater_eq(ae_maxint(nrestarts, 1, _state)*rmul3((double)(2*wcount), (double)(s->npoints), (double)(100), _state),smpactivationlevel(_state)) )
|
|
{
|
|
if( _trypexec_mlptrain_mlptrainnetworkx(s,nrestarts,algokind,trnsubset,trnsubsetsize,valsubset,valsubsetsize,network,rep,isrootcall,sessions, _state) )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Check correctness of parameters
|
|
*/
|
|
ae_assert(algokind==0||algokind==-1, "MLPTrainNetworkX: unexpected AlgoKind", _state);
|
|
ae_assert(s->npoints>=0, "MLPTrainNetworkX: internal error - parameter S is not initialized or is spoiled(S.NPoints<0)", _state);
|
|
if( s->rcpar )
|
|
{
|
|
ttype = 0;
|
|
}
|
|
else
|
|
{
|
|
ttype = 1;
|
|
}
|
|
if( !mlpissoftmax(network, _state) )
|
|
{
|
|
ntype = 0;
|
|
}
|
|
else
|
|
{
|
|
ntype = 1;
|
|
}
|
|
ae_assert(ntype==ttype, "MLPTrainNetworkX: internal error - type of the training network is not similar to network type in trainer object", _state);
|
|
ae_assert(s->nin==nin, "MLPTrainNetworkX: internal error - number of inputs in trainer is not equal to number of inputs in the training network.", _state);
|
|
ae_assert(s->nout==nout, "MLPTrainNetworkX: internal error - number of outputs in trainer is not equal to number of outputs in the training network.", _state);
|
|
ae_assert(nrestarts>=0, "MLPTrainNetworkX: internal error - NRestarts<0.", _state);
|
|
ae_assert(trnsubset->cnt>=trnsubsetsize, "MLPTrainNetworkX: internal error - parameter TrnSubsetSize more than input subset size(Length(TrnSubset)<TrnSubsetSize)", _state);
|
|
for(i=0; i<=trnsubsetsize-1; i++)
|
|
{
|
|
ae_assert(trnsubset->ptr.p_int[i]>=0&&trnsubset->ptr.p_int[i]<=s->npoints-1, "MLPTrainNetworkX: internal error - parameter TrnSubset contains incorrect index(TrnSubset[I]<0 or TrnSubset[I]>S.NPoints-1)", _state);
|
|
}
|
|
ae_assert(valsubset->cnt>=valsubsetsize, "MLPTrainNetworkX: internal error - parameter ValSubsetSize more than input subset size(Length(ValSubset)<ValSubsetSize)", _state);
|
|
for(i=0; i<=valsubsetsize-1; i++)
|
|
{
|
|
ae_assert(valsubset->ptr.p_int[i]>=0&&valsubset->ptr.p_int[i]<=s->npoints-1, "MLPTrainNetworkX: internal error - parameter ValSubset contains incorrect index(ValSubset[I]<0 or ValSubset[I]>S.NPoints-1)", _state);
|
|
}
|
|
|
|
/*
|
|
* Train
|
|
*/
|
|
randomizenetwork = nrestarts>0;
|
|
mlptrain_initmlptrnsessions(network, randomizenetwork, s, sessions, _state);
|
|
mlptrain_mlptrainnetworkx(s, nrestarts, algokind, trnsubset, trnsubsetsize, valsubset, valsubsetsize, network, rep, ae_false, sessions, _state);
|
|
|
|
/*
|
|
* Choose best network
|
|
*/
|
|
bestrmserror = ae_maxrealnumber;
|
|
ae_shared_pool_first_recycled(sessions, &_psession, _state);
|
|
while(psession!=NULL)
|
|
{
|
|
if( ae_fp_less(psession->bestrmserror,bestrmserror) )
|
|
{
|
|
mlpimporttunableparameters(network, &psession->bestparameters, _state);
|
|
bestrmserror = psession->bestrmserror;
|
|
}
|
|
ae_shared_pool_next_recycled(sessions, &_psession, _state);
|
|
}
|
|
|
|
/*
|
|
* Calculate errors
|
|
*/
|
|
if( s->datatype==0 )
|
|
{
|
|
mlpallerrorssubset(network, &s->densexy, s->npoints, trnsubset, trnsubsetsize, &modrep, _state);
|
|
}
|
|
if( s->datatype==1 )
|
|
{
|
|
mlpallerrorssparsesubset(network, &s->sparsexy, s->npoints, trnsubset, trnsubsetsize, &modrep, _state);
|
|
}
|
|
rep->relclserror = modrep.relclserror;
|
|
rep->avgce = modrep.avgce;
|
|
rep->rmserror = modrep.rmserror;
|
|
rep->avgerror = modrep.avgerror;
|
|
rep->avgrelerror = modrep.avgrelerror;
|
|
|
|
/*
|
|
* Done
|
|
*/
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Split problem, if we have more than 1 restart
|
|
*/
|
|
if( nrestarts>=2 )
|
|
{
|
|
|
|
/*
|
|
* Divide problem with NRestarts into two: NR0 and NR1.
|
|
*/
|
|
nr0 = nrestarts/2;
|
|
nr1 = nrestarts-nr0;
|
|
mlptrain_mlptrainnetworkx(s, nr0, algokind, trnsubset, trnsubsetsize, valsubset, valsubsetsize, network, &rep0, ae_false, sessions, _state);
|
|
mlptrain_mlptrainnetworkx(s, nr1, algokind, trnsubset, trnsubsetsize, valsubset, valsubsetsize, network, &rep1, ae_false, sessions, _state);
|
|
|
|
/*
|
|
* Aggregate results
|
|
*/
|
|
rep->ngrad = rep0.ngrad+rep1.ngrad;
|
|
rep->nhess = rep0.nhess+rep1.nhess;
|
|
rep->ncholesky = rep0.ncholesky+rep1.ncholesky;
|
|
|
|
/*
|
|
* Done :)
|
|
*/
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Execution with NRestarts=1 or NRestarts=0:
|
|
* * NRestarts=1 means that network is restarted from random position
|
|
* * NRestarts=0 means that network is not randomized
|
|
*/
|
|
ae_assert(nrestarts==0||nrestarts==1, "MLPTrainNetworkX: internal error", _state);
|
|
rep->ngrad = 0;
|
|
rep->nhess = 0;
|
|
rep->ncholesky = 0;
|
|
ae_shared_pool_retrieve(sessions, &_psession, _state);
|
|
if( ((s->datatype==0||s->datatype==1)&&s->npoints>0)&&trnsubsetsize!=0 )
|
|
{
|
|
|
|
/*
|
|
* Train network using combination of early stopping and step-size
|
|
* and step-count based criteria. Network state with best value of
|
|
* validation set error is stored in WBuf0. When validation set is
|
|
* zero, most recent state of network is stored.
|
|
*/
|
|
rndstart = nrestarts!=0;
|
|
ngradbatch = 0;
|
|
eval = (double)(0);
|
|
ebest = (double)(0);
|
|
itbest = 0;
|
|
itcnt = 0;
|
|
mlptrain_mlpstarttrainingx(s, rndstart, algokind, trnsubset, trnsubsetsize, psession, _state);
|
|
if( s->datatype==0 )
|
|
{
|
|
ebest = mlperrorsubset(&psession->network, &s->densexy, s->npoints, valsubset, valsubsetsize, _state);
|
|
}
|
|
if( s->datatype==1 )
|
|
{
|
|
ebest = mlperrorsparsesubset(&psession->network, &s->sparsexy, s->npoints, valsubset, valsubsetsize, _state);
|
|
}
|
|
ae_v_move(&psession->wbuf0.ptr.p_double[0], 1, &psession->network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
while(mlptrain_mlpcontinuetrainingx(s, trnsubset, trnsubsetsize, &ngradbatch, psession, _state))
|
|
{
|
|
if( s->datatype==0 )
|
|
{
|
|
eval = mlperrorsubset(&psession->network, &s->densexy, s->npoints, valsubset, valsubsetsize, _state);
|
|
}
|
|
if( s->datatype==1 )
|
|
{
|
|
eval = mlperrorsparsesubset(&psession->network, &s->sparsexy, s->npoints, valsubset, valsubsetsize, _state);
|
|
}
|
|
if( ae_fp_less_eq(eval,ebest)||valsubsetsize==0 )
|
|
{
|
|
ae_v_move(&psession->wbuf0.ptr.p_double[0], 1, &psession->network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
ebest = eval;
|
|
itbest = itcnt;
|
|
}
|
|
if( itcnt>30&&ae_fp_greater((double)(itcnt),1.5*itbest) )
|
|
{
|
|
break;
|
|
}
|
|
itcnt = itcnt+1;
|
|
}
|
|
ae_v_move(&psession->network.weights.ptr.p_double[0], 1, &psession->wbuf0.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
rep->ngrad = ngradbatch;
|
|
}
|
|
else
|
|
{
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
psession->network.weights.ptr.p_double[i] = (double)(0);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Evaluate network performance and update PSession.BestParameters/BestRMSError
|
|
* (if needed).
|
|
*/
|
|
if( s->datatype==0 )
|
|
{
|
|
mlpallerrorssubset(&psession->network, &s->densexy, s->npoints, trnsubset, trnsubsetsize, &modrep, _state);
|
|
}
|
|
if( s->datatype==1 )
|
|
{
|
|
mlpallerrorssparsesubset(&psession->network, &s->sparsexy, s->npoints, trnsubset, trnsubsetsize, &modrep, _state);
|
|
}
|
|
if( ae_fp_less(modrep.rmserror,psession->bestrmserror) )
|
|
{
|
|
mlpexporttunableparameters(&psession->network, &psession->bestparameters, &pcount, _state);
|
|
psession->bestrmserror = modrep.rmserror;
|
|
}
|
|
|
|
/*
|
|
* Move session back to pool
|
|
*/
|
|
ae_shared_pool_recycle(sessions, &_psession, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serial stub for GPL edition.
|
|
*************************************************************************/
|
|
ae_bool _trypexec_mlptrain_mlptrainnetworkx(mlptrainer* s,
|
|
ae_int_t nrestarts,
|
|
ae_int_t algokind,
|
|
/* Integer */ ae_vector* trnsubset,
|
|
ae_int_t trnsubsetsize,
|
|
/* Integer */ ae_vector* valsubset,
|
|
ae_int_t valsubsetsize,
|
|
multilayerperceptron* network,
|
|
mlpreport* rep,
|
|
ae_bool isrootcall,
|
|
ae_shared_pool* sessions,
|
|
ae_state *_state)
|
|
{
|
|
return ae_false;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function trains neural network ensemble passed to this function using
|
|
current dataset and early stopping training algorithm. Each early stopping
|
|
round performs NRestarts random restarts (thus, EnsembleSize*NRestarts
|
|
training rounds is performed in total).
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void mlptrain_mlptrainensemblex(mlptrainer* s,
|
|
mlpensemble* ensemble,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_int_t nrestarts,
|
|
ae_int_t trainingmethod,
|
|
sinteger* ngrad,
|
|
ae_bool isrootcall,
|
|
ae_shared_pool* esessions,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t pcount;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t trnsubsetsize;
|
|
ae_int_t valsubsetsize;
|
|
ae_int_t k0;
|
|
sinteger ngrad0;
|
|
sinteger ngrad1;
|
|
mlpetrnsession *psession;
|
|
ae_smart_ptr _psession;
|
|
hqrndstate rs;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&ngrad0, 0, sizeof(ngrad0));
|
|
memset(&ngrad1, 0, sizeof(ngrad1));
|
|
memset(&_psession, 0, sizeof(_psession));
|
|
memset(&rs, 0, sizeof(rs));
|
|
_sinteger_init(&ngrad0, _state, ae_true);
|
|
_sinteger_init(&ngrad1, _state, ae_true);
|
|
ae_smart_ptr_init(&_psession, (void**)&psession, _state, ae_true);
|
|
_hqrndstate_init(&rs, _state, ae_true);
|
|
|
|
nin = mlpgetinputscount(&ensemble->network, _state);
|
|
nout = mlpgetoutputscount(&ensemble->network, _state);
|
|
wcount = mlpgetweightscount(&ensemble->network, _state);
|
|
if( mlpissoftmax(&ensemble->network, _state) )
|
|
{
|
|
pcount = nin;
|
|
}
|
|
else
|
|
{
|
|
pcount = nin+nout;
|
|
}
|
|
if( nrestarts<=0 )
|
|
{
|
|
nrestarts = 1;
|
|
}
|
|
|
|
/*
|
|
* Handle degenerate case
|
|
*/
|
|
if( s->npoints<2 )
|
|
{
|
|
for(i=idx0; i<=idx1-1; i++)
|
|
{
|
|
for(j=0; j<=wcount-1; j++)
|
|
{
|
|
ensemble->weights.ptr.p_double[i*wcount+j] = 0.0;
|
|
}
|
|
for(j=0; j<=pcount-1; j++)
|
|
{
|
|
ensemble->columnmeans.ptr.p_double[i*pcount+j] = 0.0;
|
|
ensemble->columnsigmas.ptr.p_double[i*pcount+j] = 1.0;
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Process root call
|
|
*/
|
|
if( isrootcall )
|
|
{
|
|
|
|
/*
|
|
* Try parallelization
|
|
* We expect that minimum number of iterations before convergence is 100.
|
|
* Hence is our approach to evaluation of task complexity.
|
|
*/
|
|
if( ae_fp_greater_eq(ae_maxint(nrestarts, 1, _state)*(idx1-idx0)*rmul3((double)(2*wcount), (double)(s->npoints), (double)(100), _state),smpactivationlevel(_state)) )
|
|
{
|
|
if( _trypexec_mlptrain_mlptrainensemblex(s,ensemble,idx0,idx1,nrestarts,trainingmethod,ngrad,isrootcall,esessions, _state) )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Prepare:
|
|
* * prepare MLPETrnSessions
|
|
* * fill ensemble by zeros (helps to detect errors)
|
|
*/
|
|
mlptrain_initmlpetrnsessions(&ensemble->network, s, esessions, _state);
|
|
for(i=idx0; i<=idx1-1; i++)
|
|
{
|
|
for(j=0; j<=wcount-1; j++)
|
|
{
|
|
ensemble->weights.ptr.p_double[i*wcount+j] = 0.0;
|
|
}
|
|
for(j=0; j<=pcount-1; j++)
|
|
{
|
|
ensemble->columnmeans.ptr.p_double[i*pcount+j] = 0.0;
|
|
ensemble->columnsigmas.ptr.p_double[i*pcount+j] = 0.0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Train in non-root mode and exit
|
|
*/
|
|
mlptrain_mlptrainensemblex(s, ensemble, idx0, idx1, nrestarts, trainingmethod, ngrad, ae_false, esessions, _state);
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Split problem
|
|
*/
|
|
if( idx1-idx0>=2 )
|
|
{
|
|
k0 = (idx1-idx0)/2;
|
|
ngrad0.val = 0;
|
|
ngrad1.val = 0;
|
|
mlptrain_mlptrainensemblex(s, ensemble, idx0, idx0+k0, nrestarts, trainingmethod, &ngrad0, ae_false, esessions, _state);
|
|
mlptrain_mlptrainensemblex(s, ensemble, idx0+k0, idx1, nrestarts, trainingmethod, &ngrad1, ae_false, esessions, _state);
|
|
ngrad->val = ngrad0.val+ngrad1.val;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Retrieve and prepare session
|
|
*/
|
|
ae_shared_pool_retrieve(esessions, &_psession, _state);
|
|
|
|
/*
|
|
* Train
|
|
*/
|
|
hqrndrandomize(&rs, _state);
|
|
for(k=idx0; k<=idx1-1; k++)
|
|
{
|
|
|
|
/*
|
|
* Split set
|
|
*/
|
|
trnsubsetsize = 0;
|
|
valsubsetsize = 0;
|
|
if( trainingmethod==0 )
|
|
{
|
|
do
|
|
{
|
|
trnsubsetsize = 0;
|
|
valsubsetsize = 0;
|
|
for(i=0; i<=s->npoints-1; i++)
|
|
{
|
|
if( ae_fp_less(ae_randomreal(_state),0.66) )
|
|
{
|
|
|
|
/*
|
|
* Assign sample to training set
|
|
*/
|
|
psession->trnsubset.ptr.p_int[trnsubsetsize] = i;
|
|
trnsubsetsize = trnsubsetsize+1;
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Assign sample to validation set
|
|
*/
|
|
psession->valsubset.ptr.p_int[valsubsetsize] = i;
|
|
valsubsetsize = valsubsetsize+1;
|
|
}
|
|
}
|
|
}
|
|
while(!(trnsubsetsize!=0&&valsubsetsize!=0));
|
|
}
|
|
if( trainingmethod==1 )
|
|
{
|
|
valsubsetsize = 0;
|
|
trnsubsetsize = s->npoints;
|
|
for(i=0; i<=s->npoints-1; i++)
|
|
{
|
|
psession->trnsubset.ptr.p_int[i] = hqrnduniformi(&rs, s->npoints, _state);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Train
|
|
*/
|
|
mlptrain_mlptrainnetworkx(s, nrestarts, -1, &psession->trnsubset, trnsubsetsize, &psession->valsubset, valsubsetsize, &psession->network, &psession->mlprep, ae_true, &psession->mlpsessions, _state);
|
|
ngrad->val = ngrad->val+psession->mlprep.ngrad;
|
|
|
|
/*
|
|
* Save results
|
|
*/
|
|
ae_v_move(&ensemble->weights.ptr.p_double[k*wcount], 1, &psession->network.weights.ptr.p_double[0], 1, ae_v_len(k*wcount,(k+1)*wcount-1));
|
|
ae_v_move(&ensemble->columnmeans.ptr.p_double[k*pcount], 1, &psession->network.columnmeans.ptr.p_double[0], 1, ae_v_len(k*pcount,(k+1)*pcount-1));
|
|
ae_v_move(&ensemble->columnsigmas.ptr.p_double[k*pcount], 1, &psession->network.columnsigmas.ptr.p_double[0], 1, ae_v_len(k*pcount,(k+1)*pcount-1));
|
|
}
|
|
|
|
/*
|
|
* Recycle session
|
|
*/
|
|
ae_shared_pool_recycle(esessions, &_psession, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serial stub for GPL edition.
|
|
*************************************************************************/
|
|
ae_bool _trypexec_mlptrain_mlptrainensemblex(mlptrainer* s,
|
|
mlpensemble* ensemble,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_int_t nrestarts,
|
|
ae_int_t trainingmethod,
|
|
sinteger* ngrad,
|
|
ae_bool isrootcall,
|
|
ae_shared_pool* esessions,
|
|
ae_state *_state)
|
|
{
|
|
return ae_false;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function performs step-by-step training of the neural network. Here
|
|
"step-by-step" means that training starts with MLPStartTrainingX call,
|
|
and then user subsequently calls MLPContinueTrainingX to perform one more
|
|
iteration of the training.
|
|
|
|
After call to this function trainer object remembers network and is ready
|
|
to train it. However, no training is performed until first call to
|
|
MLPContinueTraining() function. Subsequent calls to MLPContinueTraining()
|
|
will advance traing progress one iteration further.
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 13.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void mlptrain_mlpstarttrainingx(mlptrainer* s,
|
|
ae_bool randomstart,
|
|
ae_int_t algokind,
|
|
/* Integer */ ae_vector* subset,
|
|
ae_int_t subsetsize,
|
|
smlptrnsession* session,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t ntype;
|
|
ae_int_t ttype;
|
|
ae_int_t i;
|
|
|
|
|
|
|
|
/*
|
|
* Check parameters
|
|
*/
|
|
ae_assert(s->npoints>=0, "MLPStartTrainingX: internal error - parameter S is not initialized or is spoiled(S.NPoints<0)", _state);
|
|
ae_assert(algokind==0||algokind==-1, "MLPStartTrainingX: unexpected AlgoKind", _state);
|
|
if( s->rcpar )
|
|
{
|
|
ttype = 0;
|
|
}
|
|
else
|
|
{
|
|
ttype = 1;
|
|
}
|
|
if( !mlpissoftmax(&session->network, _state) )
|
|
{
|
|
ntype = 0;
|
|
}
|
|
else
|
|
{
|
|
ntype = 1;
|
|
}
|
|
ae_assert(ntype==ttype, "MLPStartTrainingX: internal error - type of the resulting network is not similar to network type in trainer object", _state);
|
|
mlpproperties(&session->network, &nin, &nout, &wcount, _state);
|
|
ae_assert(s->nin==nin, "MLPStartTrainingX: number of inputs in trainer is not equal to number of inputs in the network.", _state);
|
|
ae_assert(s->nout==nout, "MLPStartTrainingX: number of outputs in trainer is not equal to number of outputs in the network.", _state);
|
|
ae_assert(subset->cnt>=subsetsize, "MLPStartTrainingX: internal error - parameter SubsetSize more than input subset size(Length(Subset)<SubsetSize)", _state);
|
|
for(i=0; i<=subsetsize-1; i++)
|
|
{
|
|
ae_assert(subset->ptr.p_int[i]>=0&&subset->ptr.p_int[i]<=s->npoints-1, "MLPStartTrainingX: internal error - parameter Subset contains incorrect index(Subset[I]<0 or Subset[I]>S.NPoints-1)", _state);
|
|
}
|
|
|
|
/*
|
|
* Prepare session
|
|
*/
|
|
minlbfgssetcond(&session->optimizer, 0.0, 0.0, s->wstep, s->maxits, _state);
|
|
if( s->npoints>0&&subsetsize!=0 )
|
|
{
|
|
if( randomstart )
|
|
{
|
|
mlprandomize(&session->network, _state);
|
|
}
|
|
minlbfgsrestartfrom(&session->optimizer, &session->network.weights, _state);
|
|
}
|
|
else
|
|
{
|
|
for(i=0; i<=wcount-1; i++)
|
|
{
|
|
session->network.weights.ptr.p_double[i] = (double)(0);
|
|
}
|
|
}
|
|
if( algokind==-1 )
|
|
{
|
|
session->algoused = s->algokind;
|
|
if( s->algokind==1 )
|
|
{
|
|
session->minibatchsize = s->minibatchsize;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
session->algoused = 0;
|
|
}
|
|
hqrndrandomize(&session->generator, _state);
|
|
ae_vector_set_length(&session->rstate.ia, 15+1, _state);
|
|
ae_vector_set_length(&session->rstate.ra, 1+1, _state);
|
|
session->rstate.stage = -1;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function performs step-by-step training of the neural network. Here
|
|
"step-by-step" means that training starts with MLPStartTrainingX call,
|
|
and then user subsequently calls MLPContinueTrainingX to perform one more
|
|
iteration of the training.
|
|
|
|
This function performs one more iteration of the training and returns
|
|
either True (training continues) or False (training stopped). In case True
|
|
was returned, Network weights are updated according to the current state
|
|
of the optimization progress. In case False was returned, no additional
|
|
updates is performed (previous update of the network weights moved us to
|
|
the final point, and no additional updates is needed).
|
|
|
|
EXAMPLE:
|
|
>
|
|
> [initialize network and trainer object]
|
|
>
|
|
> MLPStartTraining(Trainer, Network, True)
|
|
> while MLPContinueTraining(Trainer, Network) do
|
|
> [visualize training progress]
|
|
>
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 13.08.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static ae_bool mlptrain_mlpcontinuetrainingx(mlptrainer* s,
|
|
/* Integer */ ae_vector* subset,
|
|
ae_int_t subsetsize,
|
|
ae_int_t* ngradbatch,
|
|
smlptrnsession* session,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t twcount;
|
|
ae_int_t ntype;
|
|
ae_int_t ttype;
|
|
double decay;
|
|
double v;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t trnsetsize;
|
|
ae_int_t epoch;
|
|
ae_int_t minibatchcount;
|
|
ae_int_t minibatchidx;
|
|
ae_int_t cursize;
|
|
ae_int_t idx0;
|
|
ae_int_t idx1;
|
|
ae_bool result;
|
|
|
|
|
|
|
|
/*
|
|
* Reverse communication preparations
|
|
* I know it looks ugly, but it works the same way
|
|
* anywhere from C++ to Python.
|
|
*
|
|
* This code initializes locals by:
|
|
* * random values determined during code
|
|
* generation - on first subroutine call
|
|
* * values from previous call - on subsequent calls
|
|
*/
|
|
if( session->rstate.stage>=0 )
|
|
{
|
|
nin = session->rstate.ia.ptr.p_int[0];
|
|
nout = session->rstate.ia.ptr.p_int[1];
|
|
wcount = session->rstate.ia.ptr.p_int[2];
|
|
twcount = session->rstate.ia.ptr.p_int[3];
|
|
ntype = session->rstate.ia.ptr.p_int[4];
|
|
ttype = session->rstate.ia.ptr.p_int[5];
|
|
i = session->rstate.ia.ptr.p_int[6];
|
|
j = session->rstate.ia.ptr.p_int[7];
|
|
k = session->rstate.ia.ptr.p_int[8];
|
|
trnsetsize = session->rstate.ia.ptr.p_int[9];
|
|
epoch = session->rstate.ia.ptr.p_int[10];
|
|
minibatchcount = session->rstate.ia.ptr.p_int[11];
|
|
minibatchidx = session->rstate.ia.ptr.p_int[12];
|
|
cursize = session->rstate.ia.ptr.p_int[13];
|
|
idx0 = session->rstate.ia.ptr.p_int[14];
|
|
idx1 = session->rstate.ia.ptr.p_int[15];
|
|
decay = session->rstate.ra.ptr.p_double[0];
|
|
v = session->rstate.ra.ptr.p_double[1];
|
|
}
|
|
else
|
|
{
|
|
nin = 359;
|
|
nout = -58;
|
|
wcount = -919;
|
|
twcount = -909;
|
|
ntype = 81;
|
|
ttype = 255;
|
|
i = 74;
|
|
j = -788;
|
|
k = 809;
|
|
trnsetsize = 205;
|
|
epoch = -838;
|
|
minibatchcount = 939;
|
|
minibatchidx = -526;
|
|
cursize = 763;
|
|
idx0 = -541;
|
|
idx1 = -698;
|
|
decay = -900;
|
|
v = -318;
|
|
}
|
|
if( session->rstate.stage==0 )
|
|
{
|
|
goto lbl_0;
|
|
}
|
|
|
|
/*
|
|
* Routine body
|
|
*/
|
|
|
|
/*
|
|
* Check correctness of inputs
|
|
*/
|
|
ae_assert(s->npoints>=0, "MLPContinueTrainingX: internal error - parameter S is not initialized or is spoiled(S.NPoints<0).", _state);
|
|
if( s->rcpar )
|
|
{
|
|
ttype = 0;
|
|
}
|
|
else
|
|
{
|
|
ttype = 1;
|
|
}
|
|
if( !mlpissoftmax(&session->network, _state) )
|
|
{
|
|
ntype = 0;
|
|
}
|
|
else
|
|
{
|
|
ntype = 1;
|
|
}
|
|
ae_assert(ntype==ttype, "MLPContinueTrainingX: internal error - type of the resulting network is not similar to network type in trainer object.", _state);
|
|
mlpproperties(&session->network, &nin, &nout, &wcount, _state);
|
|
ae_assert(s->nin==nin, "MLPContinueTrainingX: internal error - number of inputs in trainer is not equal to number of inputs in the network.", _state);
|
|
ae_assert(s->nout==nout, "MLPContinueTrainingX: internal error - number of outputs in trainer is not equal to number of outputs in the network.", _state);
|
|
ae_assert(subset->cnt>=subsetsize, "MLPContinueTrainingX: internal error - parameter SubsetSize more than input subset size(Length(Subset)<SubsetSize).", _state);
|
|
for(i=0; i<=subsetsize-1; i++)
|
|
{
|
|
ae_assert(subset->ptr.p_int[i]>=0&&subset->ptr.p_int[i]<=s->npoints-1, "MLPContinueTrainingX: internal error - parameter Subset contains incorrect index(Subset[I]<0 or Subset[I]>S.NPoints-1).", _state);
|
|
}
|
|
|
|
/*
|
|
* Quick exit on empty training set
|
|
*/
|
|
if( s->npoints==0||subsetsize==0 )
|
|
{
|
|
result = ae_false;
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Minibatch training
|
|
*/
|
|
if( session->algoused==1 )
|
|
{
|
|
ae_assert(ae_false, "MINIBATCH TRAINING IS NOT IMPLEMENTED YET", _state);
|
|
}
|
|
|
|
/*
|
|
* Last option: full batch training
|
|
*/
|
|
decay = s->decay;
|
|
lbl_1:
|
|
if( !minlbfgsiteration(&session->optimizer, _state) )
|
|
{
|
|
goto lbl_2;
|
|
}
|
|
if( !session->optimizer.xupdated )
|
|
{
|
|
goto lbl_3;
|
|
}
|
|
ae_v_move(&session->network.weights.ptr.p_double[0], 1, &session->optimizer.x.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
session->rstate.stage = 0;
|
|
goto lbl_rcomm;
|
|
lbl_0:
|
|
lbl_3:
|
|
ae_v_move(&session->network.weights.ptr.p_double[0], 1, &session->optimizer.x.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
if( s->datatype==0 )
|
|
{
|
|
mlpgradbatchsubset(&session->network, &s->densexy, s->npoints, subset, subsetsize, &session->optimizer.f, &session->optimizer.g, _state);
|
|
}
|
|
if( s->datatype==1 )
|
|
{
|
|
mlpgradbatchsparsesubset(&session->network, &s->sparsexy, s->npoints, subset, subsetsize, &session->optimizer.f, &session->optimizer.g, _state);
|
|
}
|
|
|
|
/*
|
|
* Increment number of operations performed on batch gradient
|
|
*/
|
|
*ngradbatch = *ngradbatch+1;
|
|
v = ae_v_dotproduct(&session->network.weights.ptr.p_double[0], 1, &session->network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
|
|
session->optimizer.f = session->optimizer.f+0.5*decay*v;
|
|
ae_v_addd(&session->optimizer.g.ptr.p_double[0], 1, &session->network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
|
|
goto lbl_1;
|
|
lbl_2:
|
|
minlbfgsresultsbuf(&session->optimizer, &session->network.weights, &session->optimizerrep, _state);
|
|
result = ae_false;
|
|
return result;
|
|
|
|
/*
|
|
* Saving state
|
|
*/
|
|
lbl_rcomm:
|
|
result = ae_true;
|
|
session->rstate.ia.ptr.p_int[0] = nin;
|
|
session->rstate.ia.ptr.p_int[1] = nout;
|
|
session->rstate.ia.ptr.p_int[2] = wcount;
|
|
session->rstate.ia.ptr.p_int[3] = twcount;
|
|
session->rstate.ia.ptr.p_int[4] = ntype;
|
|
session->rstate.ia.ptr.p_int[5] = ttype;
|
|
session->rstate.ia.ptr.p_int[6] = i;
|
|
session->rstate.ia.ptr.p_int[7] = j;
|
|
session->rstate.ia.ptr.p_int[8] = k;
|
|
session->rstate.ia.ptr.p_int[9] = trnsetsize;
|
|
session->rstate.ia.ptr.p_int[10] = epoch;
|
|
session->rstate.ia.ptr.p_int[11] = minibatchcount;
|
|
session->rstate.ia.ptr.p_int[12] = minibatchidx;
|
|
session->rstate.ia.ptr.p_int[13] = cursize;
|
|
session->rstate.ia.ptr.p_int[14] = idx0;
|
|
session->rstate.ia.ptr.p_int[15] = idx1;
|
|
session->rstate.ra.ptr.p_double[0] = decay;
|
|
session->rstate.ra.ptr.p_double[1] = v;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal bagging subroutine.
|
|
|
|
-- ALGLIB --
|
|
Copyright 19.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void mlptrain_mlpebagginginternal(mlpensemble* ensemble,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
double decay,
|
|
ae_int_t restarts,
|
|
double wstep,
|
|
ae_int_t maxits,
|
|
ae_bool lmalgorithm,
|
|
ae_int_t* info,
|
|
mlpreport* rep,
|
|
mlpcvreport* ooberrors,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_matrix xys;
|
|
ae_vector s;
|
|
ae_matrix oobbuf;
|
|
ae_vector oobcntbuf;
|
|
ae_vector x;
|
|
ae_vector y;
|
|
ae_vector dy;
|
|
ae_vector dsbuf;
|
|
ae_int_t ccnt;
|
|
ae_int_t pcnt;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
double v;
|
|
mlpreport tmprep;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
hqrndstate rs;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&xys, 0, sizeof(xys));
|
|
memset(&s, 0, sizeof(s));
|
|
memset(&oobbuf, 0, sizeof(oobbuf));
|
|
memset(&oobcntbuf, 0, sizeof(oobcntbuf));
|
|
memset(&x, 0, sizeof(x));
|
|
memset(&y, 0, sizeof(y));
|
|
memset(&dy, 0, sizeof(dy));
|
|
memset(&dsbuf, 0, sizeof(dsbuf));
|
|
memset(&tmprep, 0, sizeof(tmprep));
|
|
memset(&rs, 0, sizeof(rs));
|
|
*info = 0;
|
|
_mlpreport_clear(rep);
|
|
_mlpcvreport_clear(ooberrors);
|
|
ae_matrix_init(&xys, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&s, 0, DT_BOOL, _state, ae_true);
|
|
ae_matrix_init(&oobbuf, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&oobcntbuf, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&dy, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&dsbuf, 0, DT_REAL, _state, ae_true);
|
|
_mlpreport_init(&tmprep, _state, ae_true);
|
|
_hqrndstate_init(&rs, _state, ae_true);
|
|
|
|
nin = mlpgetinputscount(&ensemble->network, _state);
|
|
nout = mlpgetoutputscount(&ensemble->network, _state);
|
|
wcount = mlpgetweightscount(&ensemble->network, _state);
|
|
|
|
/*
|
|
* Test for inputs
|
|
*/
|
|
if( (!lmalgorithm&&ae_fp_eq(wstep,(double)(0)))&&maxits==0 )
|
|
{
|
|
*info = -8;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
if( ((npoints<=0||restarts<1)||ae_fp_less(wstep,(double)(0)))||maxits<0 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
if( mlpissoftmax(&ensemble->network, _state) )
|
|
{
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( ae_round(xy->ptr.pp_double[i][nin], _state)<0||ae_round(xy->ptr.pp_double[i][nin], _state)>=nout )
|
|
{
|
|
*info = -2;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* allocate temporaries
|
|
*/
|
|
*info = 2;
|
|
rep->ngrad = 0;
|
|
rep->nhess = 0;
|
|
rep->ncholesky = 0;
|
|
ooberrors->relclserror = (double)(0);
|
|
ooberrors->avgce = (double)(0);
|
|
ooberrors->rmserror = (double)(0);
|
|
ooberrors->avgerror = (double)(0);
|
|
ooberrors->avgrelerror = (double)(0);
|
|
if( mlpissoftmax(&ensemble->network, _state) )
|
|
{
|
|
ccnt = nin+1;
|
|
pcnt = nin;
|
|
}
|
|
else
|
|
{
|
|
ccnt = nin+nout;
|
|
pcnt = nin+nout;
|
|
}
|
|
ae_matrix_set_length(&xys, npoints, ccnt, _state);
|
|
ae_vector_set_length(&s, npoints, _state);
|
|
ae_matrix_set_length(&oobbuf, npoints, nout, _state);
|
|
ae_vector_set_length(&oobcntbuf, npoints, _state);
|
|
ae_vector_set_length(&x, nin, _state);
|
|
ae_vector_set_length(&y, nout, _state);
|
|
if( mlpissoftmax(&ensemble->network, _state) )
|
|
{
|
|
ae_vector_set_length(&dy, 1, _state);
|
|
}
|
|
else
|
|
{
|
|
ae_vector_set_length(&dy, nout, _state);
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
for(j=0; j<=nout-1; j++)
|
|
{
|
|
oobbuf.ptr.pp_double[i][j] = (double)(0);
|
|
}
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
oobcntbuf.ptr.p_int[i] = 0;
|
|
}
|
|
|
|
/*
|
|
* main bagging cycle
|
|
*/
|
|
hqrndrandomize(&rs, _state);
|
|
for(k=0; k<=ensemble->ensemblesize-1; k++)
|
|
{
|
|
|
|
/*
|
|
* prepare dataset
|
|
*/
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
s.ptr.p_bool[i] = ae_false;
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
j = hqrnduniformi(&rs, npoints, _state);
|
|
s.ptr.p_bool[j] = ae_true;
|
|
ae_v_move(&xys.ptr.pp_double[i][0], 1, &xy->ptr.pp_double[j][0], 1, ae_v_len(0,ccnt-1));
|
|
}
|
|
|
|
/*
|
|
* train
|
|
*/
|
|
if( lmalgorithm )
|
|
{
|
|
mlptrainlm(&ensemble->network, &xys, npoints, decay, restarts, info, &tmprep, _state);
|
|
}
|
|
else
|
|
{
|
|
mlptrainlbfgs(&ensemble->network, &xys, npoints, decay, restarts, wstep, maxits, info, &tmprep, _state);
|
|
}
|
|
if( *info<0 )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* save results
|
|
*/
|
|
rep->ngrad = rep->ngrad+tmprep.ngrad;
|
|
rep->nhess = rep->nhess+tmprep.nhess;
|
|
rep->ncholesky = rep->ncholesky+tmprep.ncholesky;
|
|
ae_v_move(&ensemble->weights.ptr.p_double[k*wcount], 1, &ensemble->network.weights.ptr.p_double[0], 1, ae_v_len(k*wcount,(k+1)*wcount-1));
|
|
ae_v_move(&ensemble->columnmeans.ptr.p_double[k*pcnt], 1, &ensemble->network.columnmeans.ptr.p_double[0], 1, ae_v_len(k*pcnt,(k+1)*pcnt-1));
|
|
ae_v_move(&ensemble->columnsigmas.ptr.p_double[k*pcnt], 1, &ensemble->network.columnsigmas.ptr.p_double[0], 1, ae_v_len(k*pcnt,(k+1)*pcnt-1));
|
|
|
|
/*
|
|
* OOB estimates
|
|
*/
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( !s.ptr.p_bool[i] )
|
|
{
|
|
ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nin-1));
|
|
mlpprocess(&ensemble->network, &x, &y, _state);
|
|
ae_v_add(&oobbuf.ptr.pp_double[i][0], 1, &y.ptr.p_double[0], 1, ae_v_len(0,nout-1));
|
|
oobcntbuf.ptr.p_int[i] = oobcntbuf.ptr.p_int[i]+1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* OOB estimates
|
|
*/
|
|
if( mlpissoftmax(&ensemble->network, _state) )
|
|
{
|
|
dserrallocate(nout, &dsbuf, _state);
|
|
}
|
|
else
|
|
{
|
|
dserrallocate(-nout, &dsbuf, _state);
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( oobcntbuf.ptr.p_int[i]!=0 )
|
|
{
|
|
v = (double)1/(double)oobcntbuf.ptr.p_int[i];
|
|
ae_v_moved(&y.ptr.p_double[0], 1, &oobbuf.ptr.pp_double[i][0], 1, ae_v_len(0,nout-1), v);
|
|
if( mlpissoftmax(&ensemble->network, _state) )
|
|
{
|
|
dy.ptr.p_double[0] = xy->ptr.pp_double[i][nin];
|
|
}
|
|
else
|
|
{
|
|
ae_v_moved(&dy.ptr.p_double[0], 1, &xy->ptr.pp_double[i][nin], 1, ae_v_len(0,nout-1), v);
|
|
}
|
|
dserraccumulate(&dsbuf, &y, &dy, _state);
|
|
}
|
|
}
|
|
dserrfinish(&dsbuf, _state);
|
|
ooberrors->relclserror = dsbuf.ptr.p_double[0];
|
|
ooberrors->avgce = dsbuf.ptr.p_double[1];
|
|
ooberrors->rmserror = dsbuf.ptr.p_double[2];
|
|
ooberrors->avgerror = dsbuf.ptr.p_double[3];
|
|
ooberrors->avgrelerror = dsbuf.ptr.p_double[4];
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function initializes temporaries needed for training session.
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 01.07.2013 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void mlptrain_initmlptrnsession(multilayerperceptron* networktrained,
|
|
ae_bool randomizenetwork,
|
|
mlptrainer* trainer,
|
|
smlptrnsession* session,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t nin;
|
|
ae_int_t nout;
|
|
ae_int_t wcount;
|
|
ae_int_t pcount;
|
|
ae_vector dummysubset;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&dummysubset, 0, sizeof(dummysubset));
|
|
ae_vector_init(&dummysubset, 0, DT_INT, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Prepare network:
|
|
* * copy input network to Session.Network
|
|
* * re-initialize preprocessor and weights if RandomizeNetwork=True
|
|
*/
|
|
mlpcopy(networktrained, &session->network, _state);
|
|
if( randomizenetwork )
|
|
{
|
|
ae_assert(trainer->datatype==0||trainer->datatype==1, "InitTemporaries: unexpected Trainer.DataType", _state);
|
|
if( trainer->datatype==0 )
|
|
{
|
|
mlpinitpreprocessorsubset(&session->network, &trainer->densexy, trainer->npoints, &dummysubset, -1, _state);
|
|
}
|
|
if( trainer->datatype==1 )
|
|
{
|
|
mlpinitpreprocessorsparsesubset(&session->network, &trainer->sparsexy, trainer->npoints, &dummysubset, -1, _state);
|
|
}
|
|
mlprandomize(&session->network, _state);
|
|
session->randomizenetwork = ae_true;
|
|
}
|
|
else
|
|
{
|
|
session->randomizenetwork = ae_false;
|
|
}
|
|
|
|
/*
|
|
* Determine network geometry and initialize optimizer
|
|
*/
|
|
mlpproperties(&session->network, &nin, &nout, &wcount, _state);
|
|
minlbfgscreate(wcount, ae_minint(wcount, trainer->lbfgsfactor, _state), &session->network.weights, &session->optimizer, _state);
|
|
minlbfgssetxrep(&session->optimizer, ae_true, _state);
|
|
|
|
/*
|
|
* Create buffers
|
|
*/
|
|
ae_vector_set_length(&session->wbuf0, wcount, _state);
|
|
ae_vector_set_length(&session->wbuf1, wcount, _state);
|
|
|
|
/*
|
|
* Initialize session result
|
|
*/
|
|
mlpexporttunableparameters(&session->network, &session->bestparameters, &pcount, _state);
|
|
session->bestrmserror = ae_maxrealnumber;
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function initializes temporaries needed for training session.
|
|
|
|
*************************************************************************/
|
|
static void mlptrain_initmlptrnsessions(multilayerperceptron* networktrained,
|
|
ae_bool randomizenetwork,
|
|
mlptrainer* trainer,
|
|
ae_shared_pool* sessions,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector dummysubset;
|
|
smlptrnsession t;
|
|
smlptrnsession *p;
|
|
ae_smart_ptr _p;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&dummysubset, 0, sizeof(dummysubset));
|
|
memset(&t, 0, sizeof(t));
|
|
memset(&_p, 0, sizeof(_p));
|
|
ae_vector_init(&dummysubset, 0, DT_INT, _state, ae_true);
|
|
_smlptrnsession_init(&t, _state, ae_true);
|
|
ae_smart_ptr_init(&_p, (void**)&p, _state, ae_true);
|
|
|
|
if( ae_shared_pool_is_initialized(sessions) )
|
|
{
|
|
|
|
/*
|
|
* Pool was already initialized.
|
|
* Clear sessions stored in the pool.
|
|
*/
|
|
ae_shared_pool_first_recycled(sessions, &_p, _state);
|
|
while(p!=NULL)
|
|
{
|
|
ae_assert(mlpsamearchitecture(&p->network, networktrained, _state), "InitMLPTrnSessions: internal consistency error", _state);
|
|
p->bestrmserror = ae_maxrealnumber;
|
|
ae_shared_pool_next_recycled(sessions, &_p, _state);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Prepare session and seed pool
|
|
*/
|
|
mlptrain_initmlptrnsession(networktrained, randomizenetwork, trainer, &t, _state);
|
|
ae_shared_pool_set_seed(sessions, &t, sizeof(t), _smlptrnsession_init, _smlptrnsession_init_copy, _smlptrnsession_destroy, _state);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function initializes temporaries needed for ensemble training.
|
|
|
|
*************************************************************************/
|
|
static void mlptrain_initmlpetrnsession(multilayerperceptron* individualnetwork,
|
|
mlptrainer* trainer,
|
|
mlpetrnsession* session,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector dummysubset;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&dummysubset, 0, sizeof(dummysubset));
|
|
ae_vector_init(&dummysubset, 0, DT_INT, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Prepare network:
|
|
* * copy input network to Session.Network
|
|
* * re-initialize preprocessor and weights if RandomizeNetwork=True
|
|
*/
|
|
mlpcopy(individualnetwork, &session->network, _state);
|
|
mlptrain_initmlptrnsessions(individualnetwork, ae_true, trainer, &session->mlpsessions, _state);
|
|
ivectorsetlengthatleast(&session->trnsubset, trainer->npoints, _state);
|
|
ivectorsetlengthatleast(&session->valsubset, trainer->npoints, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function initializes temporaries needed for training session.
|
|
|
|
*************************************************************************/
|
|
static void mlptrain_initmlpetrnsessions(multilayerperceptron* individualnetwork,
|
|
mlptrainer* trainer,
|
|
ae_shared_pool* sessions,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
mlpetrnsession t;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&t, 0, sizeof(t));
|
|
_mlpetrnsession_init(&t, _state, ae_true);
|
|
|
|
if( !ae_shared_pool_is_initialized(sessions) )
|
|
{
|
|
mlptrain_initmlpetrnsession(individualnetwork, trainer, &t, _state);
|
|
ae_shared_pool_set_seed(sessions, &t, sizeof(t), _mlpetrnsession_init, _mlpetrnsession_init_copy, _mlpetrnsession_destroy, _state);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
void _mlpreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mlpreport *p = (mlpreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _mlpreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mlpreport *dst = (mlpreport*)_dst;
|
|
mlpreport *src = (mlpreport*)_src;
|
|
dst->relclserror = src->relclserror;
|
|
dst->avgce = src->avgce;
|
|
dst->rmserror = src->rmserror;
|
|
dst->avgerror = src->avgerror;
|
|
dst->avgrelerror = src->avgrelerror;
|
|
dst->ngrad = src->ngrad;
|
|
dst->nhess = src->nhess;
|
|
dst->ncholesky = src->ncholesky;
|
|
}
|
|
|
|
|
|
void _mlpreport_clear(void* _p)
|
|
{
|
|
mlpreport *p = (mlpreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _mlpreport_destroy(void* _p)
|
|
{
|
|
mlpreport *p = (mlpreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _mlpcvreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mlpcvreport *p = (mlpcvreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _mlpcvreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mlpcvreport *dst = (mlpcvreport*)_dst;
|
|
mlpcvreport *src = (mlpcvreport*)_src;
|
|
dst->relclserror = src->relclserror;
|
|
dst->avgce = src->avgce;
|
|
dst->rmserror = src->rmserror;
|
|
dst->avgerror = src->avgerror;
|
|
dst->avgrelerror = src->avgrelerror;
|
|
}
|
|
|
|
|
|
void _mlpcvreport_clear(void* _p)
|
|
{
|
|
mlpcvreport *p = (mlpcvreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _mlpcvreport_destroy(void* _p)
|
|
{
|
|
mlpcvreport *p = (mlpcvreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _smlptrnsession_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
smlptrnsession *p = (smlptrnsession*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->bestparameters, 0, DT_REAL, _state, make_automatic);
|
|
_multilayerperceptron_init(&p->network, _state, make_automatic);
|
|
_minlbfgsstate_init(&p->optimizer, _state, make_automatic);
|
|
_minlbfgsreport_init(&p->optimizerrep, _state, make_automatic);
|
|
ae_vector_init(&p->wbuf0, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->wbuf1, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->allminibatches, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->currentminibatch, 0, DT_INT, _state, make_automatic);
|
|
_rcommstate_init(&p->rstate, _state, make_automatic);
|
|
_hqrndstate_init(&p->generator, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _smlptrnsession_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
smlptrnsession *dst = (smlptrnsession*)_dst;
|
|
smlptrnsession *src = (smlptrnsession*)_src;
|
|
ae_vector_init_copy(&dst->bestparameters, &src->bestparameters, _state, make_automatic);
|
|
dst->bestrmserror = src->bestrmserror;
|
|
dst->randomizenetwork = src->randomizenetwork;
|
|
_multilayerperceptron_init_copy(&dst->network, &src->network, _state, make_automatic);
|
|
_minlbfgsstate_init_copy(&dst->optimizer, &src->optimizer, _state, make_automatic);
|
|
_minlbfgsreport_init_copy(&dst->optimizerrep, &src->optimizerrep, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->wbuf0, &src->wbuf0, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->wbuf1, &src->wbuf1, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->allminibatches, &src->allminibatches, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->currentminibatch, &src->currentminibatch, _state, make_automatic);
|
|
_rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic);
|
|
dst->algoused = src->algoused;
|
|
dst->minibatchsize = src->minibatchsize;
|
|
_hqrndstate_init_copy(&dst->generator, &src->generator, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _smlptrnsession_clear(void* _p)
|
|
{
|
|
smlptrnsession *p = (smlptrnsession*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->bestparameters);
|
|
_multilayerperceptron_clear(&p->network);
|
|
_minlbfgsstate_clear(&p->optimizer);
|
|
_minlbfgsreport_clear(&p->optimizerrep);
|
|
ae_vector_clear(&p->wbuf0);
|
|
ae_vector_clear(&p->wbuf1);
|
|
ae_vector_clear(&p->allminibatches);
|
|
ae_vector_clear(&p->currentminibatch);
|
|
_rcommstate_clear(&p->rstate);
|
|
_hqrndstate_clear(&p->generator);
|
|
}
|
|
|
|
|
|
void _smlptrnsession_destroy(void* _p)
|
|
{
|
|
smlptrnsession *p = (smlptrnsession*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->bestparameters);
|
|
_multilayerperceptron_destroy(&p->network);
|
|
_minlbfgsstate_destroy(&p->optimizer);
|
|
_minlbfgsreport_destroy(&p->optimizerrep);
|
|
ae_vector_destroy(&p->wbuf0);
|
|
ae_vector_destroy(&p->wbuf1);
|
|
ae_vector_destroy(&p->allminibatches);
|
|
ae_vector_destroy(&p->currentminibatch);
|
|
_rcommstate_destroy(&p->rstate);
|
|
_hqrndstate_destroy(&p->generator);
|
|
}
|
|
|
|
|
|
void _mlpetrnsession_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mlpetrnsession *p = (mlpetrnsession*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->trnsubset, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->valsubset, 0, DT_INT, _state, make_automatic);
|
|
ae_shared_pool_init(&p->mlpsessions, _state, make_automatic);
|
|
_mlpreport_init(&p->mlprep, _state, make_automatic);
|
|
_multilayerperceptron_init(&p->network, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _mlpetrnsession_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mlpetrnsession *dst = (mlpetrnsession*)_dst;
|
|
mlpetrnsession *src = (mlpetrnsession*)_src;
|
|
ae_vector_init_copy(&dst->trnsubset, &src->trnsubset, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->valsubset, &src->valsubset, _state, make_automatic);
|
|
ae_shared_pool_init_copy(&dst->mlpsessions, &src->mlpsessions, _state, make_automatic);
|
|
_mlpreport_init_copy(&dst->mlprep, &src->mlprep, _state, make_automatic);
|
|
_multilayerperceptron_init_copy(&dst->network, &src->network, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _mlpetrnsession_clear(void* _p)
|
|
{
|
|
mlpetrnsession *p = (mlpetrnsession*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->trnsubset);
|
|
ae_vector_clear(&p->valsubset);
|
|
ae_shared_pool_clear(&p->mlpsessions);
|
|
_mlpreport_clear(&p->mlprep);
|
|
_multilayerperceptron_clear(&p->network);
|
|
}
|
|
|
|
|
|
void _mlpetrnsession_destroy(void* _p)
|
|
{
|
|
mlpetrnsession *p = (mlpetrnsession*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->trnsubset);
|
|
ae_vector_destroy(&p->valsubset);
|
|
ae_shared_pool_destroy(&p->mlpsessions);
|
|
_mlpreport_destroy(&p->mlprep);
|
|
_multilayerperceptron_destroy(&p->network);
|
|
}
|
|
|
|
|
|
void _mlptrainer_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mlptrainer *p = (mlptrainer*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_init(&p->densexy, 0, 0, DT_REAL, _state, make_automatic);
|
|
_sparsematrix_init(&p->sparsexy, _state, make_automatic);
|
|
_smlptrnsession_init(&p->session, _state, make_automatic);
|
|
ae_vector_init(&p->subset, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->valsubset, 0, DT_INT, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _mlptrainer_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mlptrainer *dst = (mlptrainer*)_dst;
|
|
mlptrainer *src = (mlptrainer*)_src;
|
|
dst->nin = src->nin;
|
|
dst->nout = src->nout;
|
|
dst->rcpar = src->rcpar;
|
|
dst->lbfgsfactor = src->lbfgsfactor;
|
|
dst->decay = src->decay;
|
|
dst->wstep = src->wstep;
|
|
dst->maxits = src->maxits;
|
|
dst->datatype = src->datatype;
|
|
dst->npoints = src->npoints;
|
|
ae_matrix_init_copy(&dst->densexy, &src->densexy, _state, make_automatic);
|
|
_sparsematrix_init_copy(&dst->sparsexy, &src->sparsexy, _state, make_automatic);
|
|
_smlptrnsession_init_copy(&dst->session, &src->session, _state, make_automatic);
|
|
dst->ngradbatch = src->ngradbatch;
|
|
ae_vector_init_copy(&dst->subset, &src->subset, _state, make_automatic);
|
|
dst->subsetsize = src->subsetsize;
|
|
ae_vector_init_copy(&dst->valsubset, &src->valsubset, _state, make_automatic);
|
|
dst->valsubsetsize = src->valsubsetsize;
|
|
dst->algokind = src->algokind;
|
|
dst->minibatchsize = src->minibatchsize;
|
|
}
|
|
|
|
|
|
void _mlptrainer_clear(void* _p)
|
|
{
|
|
mlptrainer *p = (mlptrainer*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_clear(&p->densexy);
|
|
_sparsematrix_clear(&p->sparsexy);
|
|
_smlptrnsession_clear(&p->session);
|
|
ae_vector_clear(&p->subset);
|
|
ae_vector_clear(&p->valsubset);
|
|
}
|
|
|
|
|
|
void _mlptrainer_destroy(void* _p)
|
|
{
|
|
mlptrainer *p = (mlptrainer*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_destroy(&p->densexy);
|
|
_sparsematrix_destroy(&p->sparsexy);
|
|
_smlptrnsession_destroy(&p->session);
|
|
ae_vector_destroy(&p->subset);
|
|
ae_vector_destroy(&p->valsubset);
|
|
}
|
|
|
|
|
|
void _mlpparallelizationcv_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mlpparallelizationcv *p = (mlpparallelizationcv*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
_multilayerperceptron_init(&p->network, _state, make_automatic);
|
|
_mlpreport_init(&p->rep, _state, make_automatic);
|
|
ae_vector_init(&p->subset, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->xyrow, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->y, 0, DT_REAL, _state, make_automatic);
|
|
ae_shared_pool_init(&p->trnpool, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _mlpparallelizationcv_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
mlpparallelizationcv *dst = (mlpparallelizationcv*)_dst;
|
|
mlpparallelizationcv *src = (mlpparallelizationcv*)_src;
|
|
_multilayerperceptron_init_copy(&dst->network, &src->network, _state, make_automatic);
|
|
_mlpreport_init_copy(&dst->rep, &src->rep, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->subset, &src->subset, _state, make_automatic);
|
|
dst->subsetsize = src->subsetsize;
|
|
ae_vector_init_copy(&dst->xyrow, &src->xyrow, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->y, &src->y, _state, make_automatic);
|
|
dst->ngrad = src->ngrad;
|
|
ae_shared_pool_init_copy(&dst->trnpool, &src->trnpool, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _mlpparallelizationcv_clear(void* _p)
|
|
{
|
|
mlpparallelizationcv *p = (mlpparallelizationcv*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
_multilayerperceptron_clear(&p->network);
|
|
_mlpreport_clear(&p->rep);
|
|
ae_vector_clear(&p->subset);
|
|
ae_vector_clear(&p->xyrow);
|
|
ae_vector_clear(&p->y);
|
|
ae_shared_pool_clear(&p->trnpool);
|
|
}
|
|
|
|
|
|
void _mlpparallelizationcv_destroy(void* _p)
|
|
{
|
|
mlpparallelizationcv *p = (mlpparallelizationcv*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
_multilayerperceptron_destroy(&p->network);
|
|
_mlpreport_destroy(&p->rep);
|
|
ae_vector_destroy(&p->subset);
|
|
ae_vector_destroy(&p->xyrow);
|
|
ae_vector_destroy(&p->y);
|
|
ae_shared_pool_destroy(&p->trnpool);
|
|
}
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_CLUSTERING) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
/*************************************************************************
|
|
This function initializes clusterizer object. Newly initialized object is
|
|
empty, i.e. it does not contain dataset. You should use it as follows:
|
|
1. creation
|
|
2. dataset is added with ClusterizerSetPoints()
|
|
3. additional parameters are set
|
|
3. clusterization is performed with one of the clustering functions
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizercreate(clusterizerstate* s, ae_state *_state)
|
|
{
|
|
|
|
_clusterizerstate_clear(s);
|
|
|
|
s->npoints = 0;
|
|
s->nfeatures = 0;
|
|
s->disttype = 2;
|
|
s->ahcalgo = 0;
|
|
s->kmeansrestarts = 1;
|
|
s->kmeansmaxits = 0;
|
|
s->kmeansinitalgo = 0;
|
|
s->kmeansdbgnoits = ae_false;
|
|
s->seed = 1;
|
|
kmeansinitbuf(&s->kmeanstmp, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function adds dataset to the clusterizer structure.
|
|
|
|
This function overrides all previous calls of ClusterizerSetPoints() or
|
|
ClusterizerSetDistances().
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
XY - array[NPoints,NFeatures], dataset
|
|
NPoints - number of points, >=0
|
|
NFeatures- number of features, >=1
|
|
DistType- distance function:
|
|
* 0 Chebyshev distance (L-inf norm)
|
|
* 1 city block distance (L1 norm)
|
|
* 2 Euclidean distance (L2 norm), non-squared
|
|
* 10 Pearson correlation:
|
|
dist(a,b) = 1-corr(a,b)
|
|
* 11 Absolute Pearson correlation:
|
|
dist(a,b) = 1-|corr(a,b)|
|
|
* 12 Uncentered Pearson correlation (cosine of the angle):
|
|
dist(a,b) = a'*b/(|a|*|b|)
|
|
* 13 Absolute uncentered Pearson correlation
|
|
dist(a,b) = |a'*b|/(|a|*|b|)
|
|
* 20 Spearman rank correlation:
|
|
dist(a,b) = 1-rankcorr(a,b)
|
|
* 21 Absolute Spearman rank correlation
|
|
dist(a,b) = 1-|rankcorr(a,b)|
|
|
|
|
NOTE 1: different distance functions have different performance penalty:
|
|
* Euclidean or Pearson correlation distances are the fastest ones
|
|
* Spearman correlation distance function is a bit slower
|
|
* city block and Chebyshev distances are order of magnitude slower
|
|
|
|
The reason behing difference in performance is that correlation-based
|
|
distance functions are computed using optimized linear algebra kernels,
|
|
while Chebyshev and city block distance functions are computed using
|
|
simple nested loops with two branches at each iteration.
|
|
|
|
NOTE 2: different clustering algorithms have different limitations:
|
|
* agglomerative hierarchical clustering algorithms may be used with
|
|
any kind of distance metric
|
|
* k-means++ clustering algorithm may be used only with Euclidean
|
|
distance function
|
|
Thus, list of specific clustering algorithms you may use depends
|
|
on distance function you specify when you set your dataset.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizersetpoints(clusterizerstate* s,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nfeatures,
|
|
ae_int_t disttype,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
|
|
|
|
ae_assert((((((((disttype==0||disttype==1)||disttype==2)||disttype==10)||disttype==11)||disttype==12)||disttype==13)||disttype==20)||disttype==21, "ClusterizerSetPoints: incorrect DistType", _state);
|
|
ae_assert(npoints>=0, "ClusterizerSetPoints: NPoints<0", _state);
|
|
ae_assert(nfeatures>=1, "ClusterizerSetPoints: NFeatures<1", _state);
|
|
ae_assert(xy->rows>=npoints, "ClusterizerSetPoints: Rows(XY)<NPoints", _state);
|
|
ae_assert(xy->cols>=nfeatures, "ClusterizerSetPoints: Cols(XY)<NFeatures", _state);
|
|
ae_assert(apservisfinitematrix(xy, npoints, nfeatures, _state), "ClusterizerSetPoints: XY contains NAN/INF", _state);
|
|
s->npoints = npoints;
|
|
s->nfeatures = nfeatures;
|
|
s->disttype = disttype;
|
|
rmatrixsetlengthatleast(&s->xy, npoints, nfeatures, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_v_move(&s->xy.ptr.pp_double[i][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nfeatures-1));
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function adds dataset given by distance matrix to the clusterizer
|
|
structure. It is important that dataset is not given explicitly - only
|
|
distance matrix is given.
|
|
|
|
This function overrides all previous calls of ClusterizerSetPoints() or
|
|
ClusterizerSetDistances().
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
D - array[NPoints,NPoints], distance matrix given by its upper
|
|
or lower triangle (main diagonal is ignored because its
|
|
entries are expected to be zero).
|
|
NPoints - number of points
|
|
IsUpper - whether upper or lower triangle of D is given.
|
|
|
|
NOTE 1: different clustering algorithms have different limitations:
|
|
* agglomerative hierarchical clustering algorithms may be used with
|
|
any kind of distance metric, including one which is given by
|
|
distance matrix
|
|
* k-means++ clustering algorithm may be used only with Euclidean
|
|
distance function and explicitly given points - it can not be
|
|
used with dataset given by distance matrix
|
|
Thus, if you call this function, you will be unable to use k-means
|
|
clustering algorithm to process your problem.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizersetdistances(clusterizerstate* s,
|
|
/* Real */ ae_matrix* d,
|
|
ae_int_t npoints,
|
|
ae_bool isupper,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t j0;
|
|
ae_int_t j1;
|
|
|
|
|
|
ae_assert(npoints>=0, "ClusterizerSetDistances: NPoints<0", _state);
|
|
ae_assert(d->rows>=npoints, "ClusterizerSetDistances: Rows(D)<NPoints", _state);
|
|
ae_assert(d->cols>=npoints, "ClusterizerSetDistances: Cols(D)<NPoints", _state);
|
|
s->npoints = npoints;
|
|
s->nfeatures = 0;
|
|
s->disttype = -1;
|
|
rmatrixsetlengthatleast(&s->d, npoints, npoints, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( isupper )
|
|
{
|
|
j0 = i+1;
|
|
j1 = npoints-1;
|
|
}
|
|
else
|
|
{
|
|
j0 = 0;
|
|
j1 = i-1;
|
|
}
|
|
for(j=j0; j<=j1; j++)
|
|
{
|
|
ae_assert(ae_isfinite(d->ptr.pp_double[i][j], _state)&&ae_fp_greater_eq(d->ptr.pp_double[i][j],(double)(0)), "ClusterizerSetDistances: D contains infinite, NAN or negative elements", _state);
|
|
s->d.ptr.pp_double[i][j] = d->ptr.pp_double[i][j];
|
|
s->d.ptr.pp_double[j][i] = d->ptr.pp_double[i][j];
|
|
}
|
|
s->d.ptr.pp_double[i][i] = (double)(0);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets agglomerative hierarchical clustering algorithm
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
Algo - algorithm type:
|
|
* 0 complete linkage (default algorithm)
|
|
* 1 single linkage
|
|
* 2 unweighted average linkage
|
|
* 3 weighted average linkage
|
|
* 4 Ward's method
|
|
|
|
NOTE: Ward's method works correctly only with Euclidean distance, that's
|
|
why algorithm will return negative termination code (failure) for
|
|
any other distance type.
|
|
|
|
It is possible, however, to use this method with user-supplied
|
|
distance matrix. It is your responsibility to pass one which was
|
|
calculated with Euclidean distance function.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizersetahcalgo(clusterizerstate* s,
|
|
ae_int_t algo,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert((((algo==0||algo==1)||algo==2)||algo==3)||algo==4, "ClusterizerSetHCAlgo: incorrect algorithm type", _state);
|
|
s->ahcalgo = algo;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets k-means properties: number of restarts and maximum
|
|
number of iterations per one run.
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
Restarts- restarts count, >=1.
|
|
k-means++ algorithm performs several restarts and chooses
|
|
best set of centers (one with minimum squared distance).
|
|
MaxIts - maximum number of k-means iterations performed during one
|
|
run. >=0, zero value means that algorithm performs unlimited
|
|
number of iterations.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizersetkmeanslimits(clusterizerstate* s,
|
|
ae_int_t restarts,
|
|
ae_int_t maxits,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(restarts>=1, "ClusterizerSetKMeansLimits: Restarts<=0", _state);
|
|
ae_assert(maxits>=0, "ClusterizerSetKMeansLimits: MaxIts<0", _state);
|
|
s->kmeansrestarts = restarts;
|
|
s->kmeansmaxits = maxits;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets k-means initialization algorithm. Several different
|
|
algorithms can be chosen, including k-means++.
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
InitAlgo- initialization algorithm:
|
|
* 0 automatic selection ( different versions of ALGLIB
|
|
may select different algorithms)
|
|
* 1 random initialization
|
|
* 2 k-means++ initialization (best quality of initial
|
|
centers, but long non-parallelizable initialization
|
|
phase with bad cache locality)
|
|
* 3 "fast-greedy" algorithm with efficient, easy to
|
|
parallelize initialization. Quality of initial centers
|
|
is somewhat worse than that of k-means++. This
|
|
algorithm is a default one in the current version of
|
|
ALGLIB.
|
|
*-1 "debug" algorithm which always selects first K rows
|
|
of dataset; this algorithm is used for debug purposes
|
|
only. Do not use it in the industrial code!
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.01.2015 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizersetkmeansinit(clusterizerstate* s,
|
|
ae_int_t initalgo,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(initalgo>=-1&&initalgo<=3, "ClusterizerSetKMeansInit: InitAlgo is incorrect", _state);
|
|
s->kmeansinitalgo = initalgo;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets seed which is used to initialize internal RNG. By
|
|
default, deterministic seed is used - same for each run of clusterizer. If
|
|
you specify non-deterministic seed value, then some algorithms which
|
|
depend on random initialization (in current version: k-means) may return
|
|
slightly different results after each run.
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
Seed - seed:
|
|
* positive values = use deterministic seed for each run of
|
|
algorithms which depend on random initialization
|
|
* zero or negative values = use non-deterministic seed
|
|
|
|
-- ALGLIB --
|
|
Copyright 08.06.2017 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizersetseed(clusterizerstate* s,
|
|
ae_int_t seed,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
s->seed = seed;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function performs agglomerative hierarchical clustering
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
! * hardware vendor (Intel) implementations of linear algebra primitives
|
|
! (C++ and C# versions, x86/x64 platform)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
NOTE: Agglomerative hierarchical clustering algorithm has two phases:
|
|
distance matrix calculation and clustering itself. Only first phase
|
|
(distance matrix calculation) is accelerated by Intel MKL and
|
|
multithreading. Thus, acceleration is significant only for medium or
|
|
high-dimensional problems.
|
|
|
|
Although activating multithreading gives some speedup over single-
|
|
threaded execution, you should not expect nearly-linear scaling
|
|
with respect to cores count.
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
|
|
OUTPUT PARAMETERS:
|
|
Rep - clustering results; see description of AHCReport
|
|
structure for more information.
|
|
|
|
NOTE 1: hierarchical clustering algorithms require large amounts of memory.
|
|
In particular, this implementation needs sizeof(double)*NPoints^2
|
|
bytes, which are used to store distance matrix. In case we work
|
|
with user-supplied matrix, this amount is multiplied by 2 (we have
|
|
to store original matrix and to work with its copy).
|
|
|
|
For example, problem with 10000 points would require 800M of RAM,
|
|
even when working in a 1-dimensional space.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizerrunahc(clusterizerstate* s,
|
|
ahcreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t npoints;
|
|
ae_int_t nfeatures;
|
|
|
|
_ahcreport_clear(rep);
|
|
|
|
npoints = s->npoints;
|
|
nfeatures = s->nfeatures;
|
|
|
|
/*
|
|
* Fill Rep.NPoints, quick exit when NPoints<=1
|
|
*/
|
|
rep->npoints = npoints;
|
|
if( npoints==0 )
|
|
{
|
|
ae_vector_set_length(&rep->p, 0, _state);
|
|
ae_matrix_set_length(&rep->z, 0, 0, _state);
|
|
ae_matrix_set_length(&rep->pz, 0, 0, _state);
|
|
ae_matrix_set_length(&rep->pm, 0, 0, _state);
|
|
ae_vector_set_length(&rep->mergedist, 0, _state);
|
|
rep->terminationtype = 1;
|
|
return;
|
|
}
|
|
if( npoints==1 )
|
|
{
|
|
ae_vector_set_length(&rep->p, 1, _state);
|
|
ae_matrix_set_length(&rep->z, 0, 0, _state);
|
|
ae_matrix_set_length(&rep->pz, 0, 0, _state);
|
|
ae_matrix_set_length(&rep->pm, 0, 0, _state);
|
|
ae_vector_set_length(&rep->mergedist, 0, _state);
|
|
rep->p.ptr.p_int[0] = 0;
|
|
rep->terminationtype = 1;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* More than one point
|
|
*/
|
|
if( s->disttype==-1 )
|
|
{
|
|
|
|
/*
|
|
* Run clusterizer with user-supplied distance matrix
|
|
*/
|
|
clustering_clusterizerrunahcinternal(s, &s->d, rep, _state);
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Check combination of AHC algo and distance type
|
|
*/
|
|
if( s->ahcalgo==4&&s->disttype!=2 )
|
|
{
|
|
rep->terminationtype = -5;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Build distance matrix D.
|
|
*/
|
|
clusterizergetdistancesbuf(&s->distbuf, &s->xy, npoints, nfeatures, s->disttype, &s->tmpd, _state);
|
|
|
|
/*
|
|
* Run clusterizer
|
|
*/
|
|
clustering_clusterizerrunahcinternal(s, &s->tmpd, rep, _state);
|
|
return;
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function performs clustering by k-means++ algorithm.
|
|
|
|
You may change algorithm properties by calling:
|
|
* ClusterizerSetKMeansLimits() to change number of restarts or iterations
|
|
* ClusterizerSetKMeansInit() to change initialization algorithm
|
|
|
|
By default, one restart and unlimited number of iterations are used.
|
|
Initialization algorithm is chosen automatically.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
! * hardware vendor (Intel) implementations of linear algebra primitives
|
|
! (C++ and C# versions, x86/x64 platform)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
NOTE: k-means clustering algorithm has two phases: selection of initial
|
|
centers and clustering itself. ALGLIB parallelizes both phases.
|
|
Parallel version is optimized for the following scenario: medium or
|
|
high-dimensional problem (8 or more dimensions) with large number of
|
|
points and clusters. However, some speed-up can be obtained even
|
|
when assumptions above are violated.
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
K - number of clusters, K>=0.
|
|
K can be zero only when algorithm is called for empty
|
|
dataset, in this case completion code is set to
|
|
success (+1).
|
|
If K=0 and dataset size is non-zero, we can not
|
|
meaningfully assign points to some center (there are no
|
|
centers because K=0) and return -3 as completion code
|
|
(failure).
|
|
|
|
OUTPUT PARAMETERS:
|
|
Rep - clustering results; see description of KMeansReport
|
|
structure for more information.
|
|
|
|
NOTE 1: k-means clustering can be performed only for datasets with
|
|
Euclidean distance function. Algorithm will return negative
|
|
completion code in Rep.TerminationType in case dataset was added
|
|
to clusterizer with DistType other than Euclidean (or dataset was
|
|
specified by distance matrix instead of explicitly given points).
|
|
|
|
NOTE 2: by default, k-means uses non-deterministic seed to initialize RNG
|
|
which is used to select initial centers. As result, each run of
|
|
algorithm may return different values. If you need deterministic
|
|
behavior, use ClusterizerSetSeed() function.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizerrunkmeans(clusterizerstate* s,
|
|
ae_int_t k,
|
|
kmeansreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_matrix dummy;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&dummy, 0, sizeof(dummy));
|
|
_kmeansreport_clear(rep);
|
|
ae_matrix_init(&dummy, 0, 0, DT_REAL, _state, ae_true);
|
|
|
|
ae_assert(k>=0, "ClusterizerRunKMeans: K<0", _state);
|
|
|
|
/*
|
|
* Incorrect distance type
|
|
*/
|
|
if( s->disttype!=2 )
|
|
{
|
|
rep->npoints = s->npoints;
|
|
rep->terminationtype = -5;
|
|
rep->k = k;
|
|
rep->iterationscount = 0;
|
|
rep->energy = 0.0;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* K>NPoints or (K=0 and NPoints>0)
|
|
*/
|
|
if( k>s->npoints||(k==0&&s->npoints>0) )
|
|
{
|
|
rep->npoints = s->npoints;
|
|
rep->terminationtype = -3;
|
|
rep->k = k;
|
|
rep->iterationscount = 0;
|
|
rep->energy = 0.0;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* No points
|
|
*/
|
|
if( s->npoints==0 )
|
|
{
|
|
rep->npoints = 0;
|
|
rep->terminationtype = 1;
|
|
rep->k = k;
|
|
rep->iterationscount = 0;
|
|
rep->energy = 0.0;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Normal case:
|
|
* 1<=K<=NPoints, Euclidean distance
|
|
*/
|
|
rep->npoints = s->npoints;
|
|
rep->nfeatures = s->nfeatures;
|
|
rep->k = k;
|
|
rep->npoints = s->npoints;
|
|
rep->nfeatures = s->nfeatures;
|
|
kmeansgenerateinternal(&s->xy, s->npoints, s->nfeatures, k, s->kmeansinitalgo, s->seed, s->kmeansmaxits, s->kmeansrestarts, s->kmeansdbgnoits, &rep->terminationtype, &rep->iterationscount, &dummy, ae_false, &rep->c, ae_true, &rep->cidx, &rep->energy, &s->kmeanstmp, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns distance matrix for dataset
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
! * hardware vendor (Intel) implementations of linear algebra primitives
|
|
! (C++ and C# versions, x86/x64 platform)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
XY - array[NPoints,NFeatures], dataset
|
|
NPoints - number of points, >=0
|
|
NFeatures- number of features, >=1
|
|
DistType- distance function:
|
|
* 0 Chebyshev distance (L-inf norm)
|
|
* 1 city block distance (L1 norm)
|
|
* 2 Euclidean distance (L2 norm, non-squared)
|
|
* 10 Pearson correlation:
|
|
dist(a,b) = 1-corr(a,b)
|
|
* 11 Absolute Pearson correlation:
|
|
dist(a,b) = 1-|corr(a,b)|
|
|
* 12 Uncentered Pearson correlation (cosine of the angle):
|
|
dist(a,b) = a'*b/(|a|*|b|)
|
|
* 13 Absolute uncentered Pearson correlation
|
|
dist(a,b) = |a'*b|/(|a|*|b|)
|
|
* 20 Spearman rank correlation:
|
|
dist(a,b) = 1-rankcorr(a,b)
|
|
* 21 Absolute Spearman rank correlation
|
|
dist(a,b) = 1-|rankcorr(a,b)|
|
|
|
|
OUTPUT PARAMETERS:
|
|
D - array[NPoints,NPoints], distance matrix
|
|
(full matrix is returned, with lower and upper triangles)
|
|
|
|
NOTE: different distance functions have different performance penalty:
|
|
* Euclidean or Pearson correlation distances are the fastest ones
|
|
* Spearman correlation distance function is a bit slower
|
|
* city block and Chebyshev distances are order of magnitude slower
|
|
|
|
The reason behing difference in performance is that correlation-based
|
|
distance functions are computed using optimized linear algebra kernels,
|
|
while Chebyshev and city block distance functions are computed using
|
|
simple nested loops with two branches at each iteration.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizergetdistances(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nfeatures,
|
|
ae_int_t disttype,
|
|
/* Real */ ae_matrix* d,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
apbuffers buf;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&buf, 0, sizeof(buf));
|
|
ae_matrix_clear(d);
|
|
_apbuffers_init(&buf, _state, ae_true);
|
|
|
|
ae_assert(nfeatures>=1, "ClusterizerGetDistances: NFeatures<1", _state);
|
|
ae_assert(npoints>=0, "ClusterizerGetDistances: NPoints<1", _state);
|
|
ae_assert((((((((disttype==0||disttype==1)||disttype==2)||disttype==10)||disttype==11)||disttype==12)||disttype==13)||disttype==20)||disttype==21, "ClusterizerGetDistances: incorrect DistType", _state);
|
|
ae_assert(xy->rows>=npoints, "ClusterizerGetDistances: Rows(XY)<NPoints", _state);
|
|
ae_assert(xy->cols>=nfeatures, "ClusterizerGetDistances: Cols(XY)<NFeatures", _state);
|
|
ae_assert(apservisfinitematrix(xy, npoints, nfeatures, _state), "ClusterizerGetDistances: XY contains NAN/INF", _state);
|
|
clusterizergetdistancesbuf(&buf, xy, npoints, nfeatures, disttype, d, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Buffered version of ClusterizerGetDistances() which reuses previously
|
|
allocated space.
|
|
|
|
-- ALGLIB --
|
|
Copyright 29.05.2015 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizergetdistancesbuf(apbuffers* buf,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nfeatures,
|
|
ae_int_t disttype,
|
|
/* Real */ ae_matrix* d,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
double v;
|
|
double vv;
|
|
double vr;
|
|
|
|
|
|
ae_assert(nfeatures>=1, "ClusterizerGetDistancesBuf: NFeatures<1", _state);
|
|
ae_assert(npoints>=0, "ClusterizerGetDistancesBuf: NPoints<1", _state);
|
|
ae_assert((((((((disttype==0||disttype==1)||disttype==2)||disttype==10)||disttype==11)||disttype==12)||disttype==13)||disttype==20)||disttype==21, "ClusterizerGetDistancesBuf: incorrect DistType", _state);
|
|
ae_assert(xy->rows>=npoints, "ClusterizerGetDistancesBuf: Rows(XY)<NPoints", _state);
|
|
ae_assert(xy->cols>=nfeatures, "ClusterizerGetDistancesBuf: Cols(XY)<NFeatures", _state);
|
|
ae_assert(apservisfinitematrix(xy, npoints, nfeatures, _state), "ClusterizerGetDistancesBuf: XY contains NAN/INF", _state);
|
|
|
|
/*
|
|
* Quick exit
|
|
*/
|
|
if( npoints==0 )
|
|
{
|
|
return;
|
|
}
|
|
if( npoints==1 )
|
|
{
|
|
rmatrixsetlengthatleast(d, 1, 1, _state);
|
|
d->ptr.pp_double[0][0] = (double)(0);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Build distance matrix D.
|
|
*/
|
|
if( disttype==0||disttype==1 )
|
|
{
|
|
|
|
/*
|
|
* Chebyshev or city-block distances:
|
|
* * recursively calculate upper triangle (with main diagonal)
|
|
* * copy it to the bottom part of the matrix
|
|
*/
|
|
rmatrixsetlengthatleast(d, npoints, npoints, _state);
|
|
clustering_evaluatedistancematrixrec(xy, nfeatures, disttype, d, 0, npoints, 0, npoints, _state);
|
|
rmatrixenforcesymmetricity(d, npoints, ae_true, _state);
|
|
return;
|
|
}
|
|
if( disttype==2 )
|
|
{
|
|
|
|
/*
|
|
* Euclidean distance
|
|
*
|
|
* NOTE: parallelization is done within RMatrixSYRK
|
|
*/
|
|
rmatrixsetlengthatleast(d, npoints, npoints, _state);
|
|
rmatrixsetlengthatleast(&buf->rm0, npoints, nfeatures, _state);
|
|
rvectorsetlengthatleast(&buf->ra1, nfeatures, _state);
|
|
rvectorsetlengthatleast(&buf->ra0, npoints, _state);
|
|
for(j=0; j<=nfeatures-1; j++)
|
|
{
|
|
buf->ra1.ptr.p_double[j] = 0.0;
|
|
}
|
|
v = (double)1/(double)npoints;
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_v_addd(&buf->ra1.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nfeatures-1), v);
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_v_move(&buf->rm0.ptr.pp_double[i][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nfeatures-1));
|
|
ae_v_sub(&buf->rm0.ptr.pp_double[i][0], 1, &buf->ra1.ptr.p_double[0], 1, ae_v_len(0,nfeatures-1));
|
|
}
|
|
rmatrixsyrk(npoints, nfeatures, 1.0, &buf->rm0, 0, 0, 0, 0.0, d, 0, 0, ae_true, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
buf->ra0.ptr.p_double[i] = d->ptr.pp_double[i][i];
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
d->ptr.pp_double[i][i] = 0.0;
|
|
for(j=i+1; j<=npoints-1; j++)
|
|
{
|
|
v = ae_sqrt(ae_maxreal(buf->ra0.ptr.p_double[i]+buf->ra0.ptr.p_double[j]-2*d->ptr.pp_double[i][j], 0.0, _state), _state);
|
|
d->ptr.pp_double[i][j] = v;
|
|
}
|
|
}
|
|
rmatrixenforcesymmetricity(d, npoints, ae_true, _state);
|
|
return;
|
|
}
|
|
if( disttype==10||disttype==11 )
|
|
{
|
|
|
|
/*
|
|
* Absolute/nonabsolute Pearson correlation distance
|
|
*
|
|
* NOTE: parallelization is done within PearsonCorrM, which calls RMatrixSYRK internally
|
|
*/
|
|
rmatrixsetlengthatleast(d, npoints, npoints, _state);
|
|
rvectorsetlengthatleast(&buf->ra0, npoints, _state);
|
|
rmatrixsetlengthatleast(&buf->rm0, npoints, nfeatures, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
v = 0.0;
|
|
for(j=0; j<=nfeatures-1; j++)
|
|
{
|
|
v = v+xy->ptr.pp_double[i][j];
|
|
}
|
|
v = v/nfeatures;
|
|
for(j=0; j<=nfeatures-1; j++)
|
|
{
|
|
buf->rm0.ptr.pp_double[i][j] = xy->ptr.pp_double[i][j]-v;
|
|
}
|
|
}
|
|
rmatrixsyrk(npoints, nfeatures, 1.0, &buf->rm0, 0, 0, 0, 0.0, d, 0, 0, ae_true, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
buf->ra0.ptr.p_double[i] = d->ptr.pp_double[i][i];
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
d->ptr.pp_double[i][i] = 0.0;
|
|
for(j=i+1; j<=npoints-1; j++)
|
|
{
|
|
v = d->ptr.pp_double[i][j]/ae_sqrt(buf->ra0.ptr.p_double[i]*buf->ra0.ptr.p_double[j], _state);
|
|
if( disttype==10 )
|
|
{
|
|
v = 1-v;
|
|
}
|
|
else
|
|
{
|
|
v = 1-ae_fabs(v, _state);
|
|
}
|
|
v = ae_maxreal(v, 0.0, _state);
|
|
d->ptr.pp_double[i][j] = v;
|
|
}
|
|
}
|
|
rmatrixenforcesymmetricity(d, npoints, ae_true, _state);
|
|
return;
|
|
}
|
|
if( disttype==12||disttype==13 )
|
|
{
|
|
|
|
/*
|
|
* Absolute/nonabsolute uncentered Pearson correlation distance
|
|
*
|
|
* NOTE: parallelization is done within RMatrixSYRK
|
|
*/
|
|
rmatrixsetlengthatleast(d, npoints, npoints, _state);
|
|
rvectorsetlengthatleast(&buf->ra0, npoints, _state);
|
|
rmatrixsyrk(npoints, nfeatures, 1.0, xy, 0, 0, 0, 0.0, d, 0, 0, ae_true, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
buf->ra0.ptr.p_double[i] = d->ptr.pp_double[i][i];
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
d->ptr.pp_double[i][i] = 0.0;
|
|
for(j=i+1; j<=npoints-1; j++)
|
|
{
|
|
v = d->ptr.pp_double[i][j]/ae_sqrt(buf->ra0.ptr.p_double[i]*buf->ra0.ptr.p_double[j], _state);
|
|
if( disttype==13 )
|
|
{
|
|
v = ae_fabs(v, _state);
|
|
}
|
|
v = ae_minreal(v, 1.0, _state);
|
|
d->ptr.pp_double[i][j] = 1-v;
|
|
}
|
|
}
|
|
rmatrixenforcesymmetricity(d, npoints, ae_true, _state);
|
|
return;
|
|
}
|
|
if( disttype==20||disttype==21 )
|
|
{
|
|
|
|
/*
|
|
* Spearman rank correlation
|
|
*
|
|
* NOTE: parallelization of correlation matrix is done within
|
|
* PearsonCorrM, which calls RMatrixSYRK internally
|
|
*/
|
|
rmatrixsetlengthatleast(d, npoints, npoints, _state);
|
|
rvectorsetlengthatleast(&buf->ra0, npoints, _state);
|
|
rmatrixsetlengthatleast(&buf->rm0, npoints, nfeatures, _state);
|
|
rmatrixcopy(npoints, nfeatures, xy, 0, 0, &buf->rm0, 0, 0, _state);
|
|
rankdatacentered(&buf->rm0, npoints, nfeatures, _state);
|
|
rmatrixsyrk(npoints, nfeatures, 1.0, &buf->rm0, 0, 0, 0, 0.0, d, 0, 0, ae_true, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( ae_fp_greater(d->ptr.pp_double[i][i],(double)(0)) )
|
|
{
|
|
buf->ra0.ptr.p_double[i] = 1/ae_sqrt(d->ptr.pp_double[i][i], _state);
|
|
}
|
|
else
|
|
{
|
|
buf->ra0.ptr.p_double[i] = 0.0;
|
|
}
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
v = buf->ra0.ptr.p_double[i];
|
|
d->ptr.pp_double[i][i] = 0.0;
|
|
for(j=i+1; j<=npoints-1; j++)
|
|
{
|
|
vv = d->ptr.pp_double[i][j]*v*buf->ra0.ptr.p_double[j];
|
|
if( disttype==20 )
|
|
{
|
|
vr = 1-vv;
|
|
}
|
|
else
|
|
{
|
|
vr = 1-ae_fabs(vv, _state);
|
|
}
|
|
if( ae_fp_less(vr,(double)(0)) )
|
|
{
|
|
vr = 0.0;
|
|
}
|
|
d->ptr.pp_double[i][j] = vr;
|
|
}
|
|
}
|
|
rmatrixenforcesymmetricity(d, npoints, ae_true, _state);
|
|
return;
|
|
}
|
|
ae_assert(ae_false, "Assertion failed", _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function takes as input clusterization report Rep, desired clusters
|
|
count K, and builds top K clusters from hierarchical clusterization tree.
|
|
It returns assignment of points to clusters (array of cluster indexes).
|
|
|
|
INPUT PARAMETERS:
|
|
Rep - report from ClusterizerRunAHC() performed on XY
|
|
K - desired number of clusters, 1<=K<=NPoints.
|
|
K can be zero only when NPoints=0.
|
|
|
|
OUTPUT PARAMETERS:
|
|
CIdx - array[NPoints], I-th element contains cluster index (from
|
|
0 to K-1) for I-th point of the dataset.
|
|
CZ - array[K]. This array allows to convert cluster indexes
|
|
returned by this function to indexes used by Rep.Z. J-th
|
|
cluster returned by this function corresponds to CZ[J]-th
|
|
cluster stored in Rep.Z/PZ/PM.
|
|
It is guaranteed that CZ[I]<CZ[I+1].
|
|
|
|
NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
|
|
Although they were obtained by manipulation with top K nodes of
|
|
dendrogram (i.e. hierarchical decomposition of dataset), this
|
|
function does not return information about hierarchy. Each of the
|
|
clusters stand on its own.
|
|
|
|
NOTE: Cluster indexes returned by this function does not correspond to
|
|
indexes returned in Rep.Z/PZ/PM. Either you work with hierarchical
|
|
representation of the dataset (dendrogram), or you work with "flat"
|
|
representation returned by this function. Each of representations
|
|
has its own clusters indexing system (former uses [0, 2*NPoints-2]),
|
|
while latter uses [0..K-1]), although it is possible to perform
|
|
conversion from one system to another by means of CZ array, returned
|
|
by this function, which allows you to convert indexes stored in CIdx
|
|
to the numeration system used by Rep.Z.
|
|
|
|
NOTE: this subroutine is optimized for moderate values of K. Say, for K=5
|
|
it will perform many times faster than for K=100. Its worst-case
|
|
performance is O(N*K), although in average case it perform better
|
|
(up to O(N*log(K))).
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizergetkclusters(ahcreport* rep,
|
|
ae_int_t k,
|
|
/* Integer */ ae_vector* cidx,
|
|
/* Integer */ ae_vector* cz,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t mergeidx;
|
|
ae_int_t i0;
|
|
ae_int_t i1;
|
|
ae_int_t t;
|
|
ae_vector presentclusters;
|
|
ae_vector clusterindexes;
|
|
ae_vector clustersizes;
|
|
ae_vector tmpidx;
|
|
ae_int_t npoints;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&presentclusters, 0, sizeof(presentclusters));
|
|
memset(&clusterindexes, 0, sizeof(clusterindexes));
|
|
memset(&clustersizes, 0, sizeof(clustersizes));
|
|
memset(&tmpidx, 0, sizeof(tmpidx));
|
|
ae_vector_clear(cidx);
|
|
ae_vector_clear(cz);
|
|
ae_vector_init(&presentclusters, 0, DT_BOOL, _state, ae_true);
|
|
ae_vector_init(&clusterindexes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&clustersizes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&tmpidx, 0, DT_INT, _state, ae_true);
|
|
|
|
npoints = rep->npoints;
|
|
ae_assert(npoints>=0, "ClusterizerGetKClusters: internal error in Rep integrity", _state);
|
|
ae_assert(k>=0, "ClusterizerGetKClusters: K<=0", _state);
|
|
ae_assert(k<=npoints, "ClusterizerGetKClusters: K>NPoints", _state);
|
|
ae_assert(k>0||npoints==0, "ClusterizerGetKClusters: K<=0", _state);
|
|
ae_assert(npoints==rep->npoints, "ClusterizerGetKClusters: NPoints<>Rep.NPoints", _state);
|
|
|
|
/*
|
|
* Quick exit
|
|
*/
|
|
if( npoints==0 )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
if( npoints==1 )
|
|
{
|
|
ae_vector_set_length(cz, 1, _state);
|
|
ae_vector_set_length(cidx, 1, _state);
|
|
cz->ptr.p_int[0] = 0;
|
|
cidx->ptr.p_int[0] = 0;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Replay merges, from top to bottom,
|
|
* keep track of clusters being present at the moment
|
|
*/
|
|
ae_vector_set_length(&presentclusters, 2*npoints-1, _state);
|
|
ae_vector_set_length(&tmpidx, npoints, _state);
|
|
for(i=0; i<=2*npoints-3; i++)
|
|
{
|
|
presentclusters.ptr.p_bool[i] = ae_false;
|
|
}
|
|
presentclusters.ptr.p_bool[2*npoints-2] = ae_true;
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
tmpidx.ptr.p_int[i] = 2*npoints-2;
|
|
}
|
|
for(mergeidx=npoints-2; mergeidx>=npoints-k; mergeidx--)
|
|
{
|
|
|
|
/*
|
|
* Update information about clusters being present at the moment
|
|
*/
|
|
presentclusters.ptr.p_bool[npoints+mergeidx] = ae_false;
|
|
presentclusters.ptr.p_bool[rep->z.ptr.pp_int[mergeidx][0]] = ae_true;
|
|
presentclusters.ptr.p_bool[rep->z.ptr.pp_int[mergeidx][1]] = ae_true;
|
|
|
|
/*
|
|
* Update TmpIdx according to the current state of the dataset
|
|
*
|
|
* NOTE: TmpIdx contains cluster indexes from [0..2*NPoints-2];
|
|
* we will convert them to [0..K-1] later.
|
|
*/
|
|
i0 = rep->pm.ptr.pp_int[mergeidx][0];
|
|
i1 = rep->pm.ptr.pp_int[mergeidx][1];
|
|
t = rep->z.ptr.pp_int[mergeidx][0];
|
|
for(i=i0; i<=i1; i++)
|
|
{
|
|
tmpidx.ptr.p_int[i] = t;
|
|
}
|
|
i0 = rep->pm.ptr.pp_int[mergeidx][2];
|
|
i1 = rep->pm.ptr.pp_int[mergeidx][3];
|
|
t = rep->z.ptr.pp_int[mergeidx][1];
|
|
for(i=i0; i<=i1; i++)
|
|
{
|
|
tmpidx.ptr.p_int[i] = t;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Fill CZ - array which allows us to convert cluster indexes
|
|
* from one system to another.
|
|
*/
|
|
ae_vector_set_length(cz, k, _state);
|
|
ae_vector_set_length(&clusterindexes, 2*npoints-1, _state);
|
|
t = 0;
|
|
for(i=0; i<=2*npoints-2; i++)
|
|
{
|
|
if( presentclusters.ptr.p_bool[i] )
|
|
{
|
|
cz->ptr.p_int[t] = i;
|
|
clusterindexes.ptr.p_int[i] = t;
|
|
t = t+1;
|
|
}
|
|
}
|
|
ae_assert(t==k, "ClusterizerGetKClusters: internal error", _state);
|
|
|
|
/*
|
|
* Convert indexes stored in CIdx
|
|
*/
|
|
ae_vector_set_length(cidx, npoints, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
cidx->ptr.p_int[i] = clusterindexes.ptr.p_int[tmpidx.ptr.p_int[rep->p.ptr.p_int[i]]];
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function accepts AHC report Rep, desired minimum intercluster
|
|
distance and returns top clusters from hierarchical clusterization tree
|
|
which are separated by distance R or HIGHER.
|
|
|
|
It returns assignment of points to clusters (array of cluster indexes).
|
|
|
|
There is one more function with similar name - ClusterizerSeparatedByCorr,
|
|
which returns clusters with intercluster correlation equal to R or LOWER
|
|
(note: higher for distance, lower for correlation).
|
|
|
|
INPUT PARAMETERS:
|
|
Rep - report from ClusterizerRunAHC() performed on XY
|
|
R - desired minimum intercluster distance, R>=0
|
|
|
|
OUTPUT PARAMETERS:
|
|
K - number of clusters, 1<=K<=NPoints
|
|
CIdx - array[NPoints], I-th element contains cluster index (from
|
|
0 to K-1) for I-th point of the dataset.
|
|
CZ - array[K]. This array allows to convert cluster indexes
|
|
returned by this function to indexes used by Rep.Z. J-th
|
|
cluster returned by this function corresponds to CZ[J]-th
|
|
cluster stored in Rep.Z/PZ/PM.
|
|
It is guaranteed that CZ[I]<CZ[I+1].
|
|
|
|
NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
|
|
Although they were obtained by manipulation with top K nodes of
|
|
dendrogram (i.e. hierarchical decomposition of dataset), this
|
|
function does not return information about hierarchy. Each of the
|
|
clusters stand on its own.
|
|
|
|
NOTE: Cluster indexes returned by this function does not correspond to
|
|
indexes returned in Rep.Z/PZ/PM. Either you work with hierarchical
|
|
representation of the dataset (dendrogram), or you work with "flat"
|
|
representation returned by this function. Each of representations
|
|
has its own clusters indexing system (former uses [0, 2*NPoints-2]),
|
|
while latter uses [0..K-1]), although it is possible to perform
|
|
conversion from one system to another by means of CZ array, returned
|
|
by this function, which allows you to convert indexes stored in CIdx
|
|
to the numeration system used by Rep.Z.
|
|
|
|
NOTE: this subroutine is optimized for moderate values of K. Say, for K=5
|
|
it will perform many times faster than for K=100. Its worst-case
|
|
performance is O(N*K), although in average case it perform better
|
|
(up to O(N*log(K))).
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizerseparatedbydist(ahcreport* rep,
|
|
double r,
|
|
ae_int_t* k,
|
|
/* Integer */ ae_vector* cidx,
|
|
/* Integer */ ae_vector* cz,
|
|
ae_state *_state)
|
|
{
|
|
|
|
*k = 0;
|
|
ae_vector_clear(cidx);
|
|
ae_vector_clear(cz);
|
|
|
|
ae_assert(ae_isfinite(r, _state)&&ae_fp_greater_eq(r,(double)(0)), "ClusterizerSeparatedByDist: R is infinite or less than 0", _state);
|
|
*k = 1;
|
|
while(*k<rep->npoints&&ae_fp_greater_eq(rep->mergedist.ptr.p_double[rep->npoints-1-(*k)],r))
|
|
{
|
|
*k = *k+1;
|
|
}
|
|
clusterizergetkclusters(rep, *k, cidx, cz, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function accepts AHC report Rep, desired maximum intercluster
|
|
correlation and returns top clusters from hierarchical clusterization tree
|
|
which are separated by correlation R or LOWER.
|
|
|
|
It returns assignment of points to clusters (array of cluster indexes).
|
|
|
|
There is one more function with similar name - ClusterizerSeparatedByDist,
|
|
which returns clusters with intercluster distance equal to R or HIGHER
|
|
(note: higher for distance, lower for correlation).
|
|
|
|
INPUT PARAMETERS:
|
|
Rep - report from ClusterizerRunAHC() performed on XY
|
|
R - desired maximum intercluster correlation, -1<=R<=+1
|
|
|
|
OUTPUT PARAMETERS:
|
|
K - number of clusters, 1<=K<=NPoints
|
|
CIdx - array[NPoints], I-th element contains cluster index (from
|
|
0 to K-1) for I-th point of the dataset.
|
|
CZ - array[K]. This array allows to convert cluster indexes
|
|
returned by this function to indexes used by Rep.Z. J-th
|
|
cluster returned by this function corresponds to CZ[J]-th
|
|
cluster stored in Rep.Z/PZ/PM.
|
|
It is guaranteed that CZ[I]<CZ[I+1].
|
|
|
|
NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
|
|
Although they were obtained by manipulation with top K nodes of
|
|
dendrogram (i.e. hierarchical decomposition of dataset), this
|
|
function does not return information about hierarchy. Each of the
|
|
clusters stand on its own.
|
|
|
|
NOTE: Cluster indexes returned by this function does not correspond to
|
|
indexes returned in Rep.Z/PZ/PM. Either you work with hierarchical
|
|
representation of the dataset (dendrogram), or you work with "flat"
|
|
representation returned by this function. Each of representations
|
|
has its own clusters indexing system (former uses [0, 2*NPoints-2]),
|
|
while latter uses [0..K-1]), although it is possible to perform
|
|
conversion from one system to another by means of CZ array, returned
|
|
by this function, which allows you to convert indexes stored in CIdx
|
|
to the numeration system used by Rep.Z.
|
|
|
|
NOTE: this subroutine is optimized for moderate values of K. Say, for K=5
|
|
it will perform many times faster than for K=100. Its worst-case
|
|
performance is O(N*K), although in average case it perform better
|
|
(up to O(N*log(K))).
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void clusterizerseparatedbycorr(ahcreport* rep,
|
|
double r,
|
|
ae_int_t* k,
|
|
/* Integer */ ae_vector* cidx,
|
|
/* Integer */ ae_vector* cz,
|
|
ae_state *_state)
|
|
{
|
|
|
|
*k = 0;
|
|
ae_vector_clear(cidx);
|
|
ae_vector_clear(cz);
|
|
|
|
ae_assert((ae_isfinite(r, _state)&&ae_fp_greater_eq(r,(double)(-1)))&&ae_fp_less_eq(r,(double)(1)), "ClusterizerSeparatedByCorr: R is infinite or less than 0", _state);
|
|
*k = 1;
|
|
while(*k<rep->npoints&&ae_fp_greater_eq(rep->mergedist.ptr.p_double[rep->npoints-1-(*k)],1-r))
|
|
{
|
|
*k = *k+1;
|
|
}
|
|
clusterizergetkclusters(rep, *k, cidx, cz, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
K-means++ initialization
|
|
|
|
INPUT PARAMETERS:
|
|
Buf - special reusable structure which stores previously allocated
|
|
memory, intended to avoid memory fragmentation when solving
|
|
multiple subsequent problems. Must be initialized prior to
|
|
usage.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Buf - initialized structure
|
|
|
|
-- ALGLIB --
|
|
Copyright 24.07.2015 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void kmeansinitbuf(kmeansbuffers* buf, ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
apbuffers updateseed;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&updateseed, 0, sizeof(updateseed));
|
|
_apbuffers_init(&updateseed, _state, ae_true);
|
|
|
|
ae_shared_pool_set_seed(&buf->updatepool, &updateseed, sizeof(updateseed), _apbuffers_init, _apbuffers_init_copy, _apbuffers_destroy, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
K-means++ clusterization
|
|
|
|
INPUT PARAMETERS:
|
|
XY - dataset, array [0..NPoints-1,0..NVars-1].
|
|
NPoints - dataset size, NPoints>=K
|
|
NVars - number of variables, NVars>=1
|
|
K - desired number of clusters, K>=1
|
|
InitAlgo - initialization algorithm:
|
|
* 0 - automatic selection of best algorithm
|
|
* 1 - random selection of centers
|
|
* 2 - k-means++
|
|
* 3 - fast-greedy init
|
|
*-1 - first K rows of dataset are used
|
|
(special debug algorithm)
|
|
Seed - seed value for internal RNG:
|
|
* positive value is used to initialize RNG in order to
|
|
induce deterministic behavior of algorithm
|
|
* zero or negative value means that random seed is
|
|
generated
|
|
MaxIts - iterations limit or zero for no limit
|
|
Restarts - number of restarts, Restarts>=1
|
|
KMeansDbgNoIts- debug flag; if set, Lloyd's iteration is not performed,
|
|
only initialization phase.
|
|
Buf - special reusable structure which stores previously allocated
|
|
memory, intended to avoid memory fragmentation when solving
|
|
multiple subsequent problems:
|
|
* MUST BE INITIALIZED WITH KMeansInitBuffers() CALL BEFORE
|
|
FIRST PASS TO THIS FUNCTION!
|
|
* subsequent passes must be made without re-initialization
|
|
|
|
OUTPUT PARAMETERS:
|
|
Info - return code:
|
|
* -3, if task is degenerate (number of distinct points is
|
|
less than K)
|
|
* -1, if incorrect NPoints/NFeatures/K/Restarts was passed
|
|
* 1, if subroutine finished successfully
|
|
IterationsCount- actual number of iterations performed by clusterizer
|
|
CCol - array[0..NVars-1,0..K-1].matrix whose columns store
|
|
cluster's centers
|
|
NeedCCol - True in case caller requires to store result in CCol
|
|
CRow - array[0..K-1,0..NVars-1], same as CCol, but centers are
|
|
stored in rows
|
|
NeedCRow - True in case caller requires to store result in CCol
|
|
XYC - array[NPoints], which contains cluster indexes
|
|
Energy - merit function of clusterization
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.03.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void kmeansgenerateinternal(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t k,
|
|
ae_int_t initalgo,
|
|
ae_int_t seed,
|
|
ae_int_t maxits,
|
|
ae_int_t restarts,
|
|
ae_bool kmeansdbgnoits,
|
|
ae_int_t* info,
|
|
ae_int_t* iterationscount,
|
|
/* Real */ ae_matrix* ccol,
|
|
ae_bool needccol,
|
|
/* Real */ ae_matrix* crow,
|
|
ae_bool needcrow,
|
|
/* Integer */ ae_vector* xyc,
|
|
double* energy,
|
|
kmeansbuffers* buf,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t i1;
|
|
double e;
|
|
double eprev;
|
|
double v;
|
|
double vv;
|
|
ae_bool waschanges;
|
|
ae_bool zerosizeclusters;
|
|
ae_int_t pass;
|
|
ae_int_t itcnt;
|
|
hqrndstate rs;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&rs, 0, sizeof(rs));
|
|
*info = 0;
|
|
*iterationscount = 0;
|
|
ae_matrix_clear(ccol);
|
|
ae_matrix_clear(crow);
|
|
ae_vector_clear(xyc);
|
|
*energy = 0;
|
|
_hqrndstate_init(&rs, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Test parameters
|
|
*/
|
|
if( ((npoints<k||nvars<1)||k<1)||restarts<1 )
|
|
{
|
|
*info = -1;
|
|
*iterationscount = 0;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* TODO: special case K=1
|
|
* TODO: special case K=NPoints
|
|
*/
|
|
*info = 1;
|
|
*iterationscount = 0;
|
|
|
|
/*
|
|
* Multiple passes of k-means++ algorithm
|
|
*/
|
|
if( seed<=0 )
|
|
{
|
|
hqrndrandomize(&rs, _state);
|
|
}
|
|
else
|
|
{
|
|
hqrndseed(325355, seed, &rs, _state);
|
|
}
|
|
ae_vector_set_length(xyc, npoints, _state);
|
|
rmatrixsetlengthatleast(&buf->ct, k, nvars, _state);
|
|
rmatrixsetlengthatleast(&buf->ctbest, k, nvars, _state);
|
|
ivectorsetlengthatleast(&buf->xycprev, npoints, _state);
|
|
ivectorsetlengthatleast(&buf->xycbest, npoints, _state);
|
|
rvectorsetlengthatleast(&buf->d2, npoints, _state);
|
|
ivectorsetlengthatleast(&buf->csizes, k, _state);
|
|
*energy = ae_maxrealnumber;
|
|
for(pass=1; pass<=restarts; pass++)
|
|
{
|
|
|
|
/*
|
|
* Select initial centers.
|
|
*
|
|
* Note that for performance reasons centers are stored in ROWS of CT, not
|
|
* in columns. We'll transpose CT in the end and store it in the C.
|
|
*
|
|
* Also note that SelectInitialCenters() may return degenerate set of centers
|
|
* (some of them have no corresponding points in dataset, some are non-distinct).
|
|
* Algorithm below is robust enough to deal with such set.
|
|
*/
|
|
clustering_selectinitialcenters(xy, npoints, nvars, initalgo, &rs, k, &buf->ct, &buf->initbuf, &buf->updatepool, _state);
|
|
|
|
/*
|
|
* Lloyd's iteration
|
|
*/
|
|
if( !kmeansdbgnoits )
|
|
{
|
|
|
|
/*
|
|
* Perform iteration as usual, in normal mode
|
|
*/
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
xyc->ptr.p_int[i] = -1;
|
|
}
|
|
eprev = ae_maxrealnumber;
|
|
e = ae_maxrealnumber;
|
|
itcnt = 0;
|
|
while(maxits==0||itcnt<maxits)
|
|
{
|
|
|
|
/*
|
|
* Update iteration counter
|
|
*/
|
|
itcnt = itcnt+1;
|
|
inc(iterationscount, _state);
|
|
|
|
/*
|
|
* Call KMeansUpdateDistances(), fill XYC with center numbers,
|
|
* D2 with center distances.
|
|
*/
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
buf->xycprev.ptr.p_int[i] = xyc->ptr.p_int[i];
|
|
}
|
|
kmeansupdatedistances(xy, 0, npoints, nvars, &buf->ct, 0, k, xyc, &buf->d2, &buf->updatepool, _state);
|
|
waschanges = ae_false;
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
waschanges = waschanges||xyc->ptr.p_int[i]!=buf->xycprev.ptr.p_int[i];
|
|
}
|
|
|
|
/*
|
|
* Update centers
|
|
*/
|
|
for(j=0; j<=k-1; j++)
|
|
{
|
|
buf->csizes.ptr.p_int[j] = 0;
|
|
}
|
|
for(i=0; i<=k-1; i++)
|
|
{
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
buf->ct.ptr.pp_double[i][j] = (double)(0);
|
|
}
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
buf->csizes.ptr.p_int[xyc->ptr.p_int[i]] = buf->csizes.ptr.p_int[xyc->ptr.p_int[i]]+1;
|
|
ae_v_add(&buf->ct.ptr.pp_double[xyc->ptr.p_int[i]][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
|
|
}
|
|
zerosizeclusters = ae_false;
|
|
for(j=0; j<=k-1; j++)
|
|
{
|
|
if( buf->csizes.ptr.p_int[j]!=0 )
|
|
{
|
|
v = (double)1/(double)buf->csizes.ptr.p_int[j];
|
|
ae_v_muld(&buf->ct.ptr.pp_double[j][0], 1, ae_v_len(0,nvars-1), v);
|
|
}
|
|
zerosizeclusters = zerosizeclusters||buf->csizes.ptr.p_int[j]==0;
|
|
}
|
|
if( zerosizeclusters )
|
|
{
|
|
|
|
/*
|
|
* Some clusters have zero size - rare, but possible.
|
|
* We'll choose new centers for such clusters using k-means++ rule
|
|
* and restart algorithm, decrementing iteration counter
|
|
* in order to allow one more iteration (this one was useless
|
|
* and should not be counted).
|
|
*/
|
|
if( !clustering_fixcenters(xy, npoints, nvars, &buf->ct, k, &buf->initbuf, &buf->updatepool, _state) )
|
|
{
|
|
*info = -3;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
itcnt = itcnt-1;
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Stop if one of two conditions is met:
|
|
* 1. nothing has changed during iteration
|
|
* 2. energy function increased after recalculation on new centers
|
|
*/
|
|
e = (double)(0);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
v = 0.0;
|
|
i1 = xyc->ptr.p_int[i];
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
vv = xy->ptr.pp_double[i][j]-buf->ct.ptr.pp_double[i1][j];
|
|
v = v+vv*vv;
|
|
}
|
|
e = e+v;
|
|
}
|
|
if( !waschanges||ae_fp_greater_eq(e,eprev) )
|
|
{
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Update EPrev
|
|
*/
|
|
eprev = e;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Debug mode: no Lloyd's iteration.
|
|
* We just calculate potential E.
|
|
*/
|
|
kmeansupdatedistances(xy, 0, npoints, nvars, &buf->ct, 0, k, xyc, &buf->d2, &buf->updatepool, _state);
|
|
e = (double)(0);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
e = e+buf->d2.ptr.p_double[i];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Compare E with best centers found so far
|
|
*/
|
|
if( ae_fp_less(e,*energy) )
|
|
{
|
|
|
|
/*
|
|
* store partition.
|
|
*/
|
|
*energy = e;
|
|
copymatrix(&buf->ct, 0, k-1, 0, nvars-1, &buf->ctbest, 0, k-1, 0, nvars-1, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
buf->xycbest.ptr.p_int[i] = xyc->ptr.p_int[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Copy and transpose
|
|
*/
|
|
if( needccol )
|
|
{
|
|
ae_matrix_set_length(ccol, nvars, k, _state);
|
|
copyandtranspose(&buf->ctbest, 0, k-1, 0, nvars-1, ccol, 0, nvars-1, 0, k-1, _state);
|
|
}
|
|
if( needcrow )
|
|
{
|
|
ae_matrix_set_length(crow, k, nvars, _state);
|
|
rmatrixcopy(k, nvars, &buf->ctbest, 0, 0, crow, 0, 0, _state);
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
xyc->ptr.p_int[i] = buf->xycbest.ptr.p_int[i];
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This procedure recalculates distances from points to centers and assigns
|
|
each point to closest center.
|
|
|
|
INPUT PARAMETERS:
|
|
XY - dataset, array [0..NPoints-1,0..NVars-1].
|
|
Idx0,Idx1 - define range of dataset [Idx0,Idx1) to process;
|
|
right boundary is not included.
|
|
NVars - number of variables, NVars>=1
|
|
CT - matrix of centers, centers are stored in rows
|
|
CIdx0,CIdx1 - define range of centers [CIdx0,CIdx1) to process;
|
|
right boundary is not included.
|
|
XYC - preallocated output buffer,
|
|
XYDist2 - preallocated output buffer
|
|
Tmp - temporary buffer, automatically reallocated if needed
|
|
BufferPool - shared pool seeded with instance of APBuffers structure
|
|
(seed instance can be unitialized). It is recommended
|
|
to use this pool only with KMeansUpdateDistances()
|
|
function.
|
|
|
|
OUTPUT PARAMETERS:
|
|
XYC - new assignment of points to centers are stored
|
|
in [Idx0,Idx1)
|
|
XYDist2 - squared distances from points to their centers are
|
|
stored in [Idx0,Idx1)
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.01.2015 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void kmeansupdatedistances(/* Real */ ae_matrix* xy,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_int_t nvars,
|
|
/* Real */ ae_matrix* ct,
|
|
ae_int_t cidx0,
|
|
ae_int_t cidx1,
|
|
/* Integer */ ae_vector* xyc,
|
|
/* Real */ ae_vector* xydist2,
|
|
ae_shared_pool* bufferpool,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t i0;
|
|
ae_int_t i1;
|
|
ae_int_t j;
|
|
ae_int_t cclosest;
|
|
double dclosest;
|
|
double vv;
|
|
apbuffers *buf;
|
|
ae_smart_ptr _buf;
|
|
double rcomplexity;
|
|
ae_int_t task0;
|
|
ae_int_t task1;
|
|
ae_int_t pblkcnt;
|
|
ae_int_t cblkcnt;
|
|
ae_int_t vblkcnt;
|
|
ae_int_t pblk;
|
|
ae_int_t cblk;
|
|
ae_int_t vblk;
|
|
ae_int_t p0;
|
|
ae_int_t p1;
|
|
ae_int_t c0;
|
|
ae_int_t c1;
|
|
ae_int_t v0;
|
|
ae_int_t v1;
|
|
double v00;
|
|
double v01;
|
|
double v10;
|
|
double v11;
|
|
double vp0;
|
|
double vp1;
|
|
double vc0;
|
|
double vc1;
|
|
ae_int_t pcnt;
|
|
ae_int_t pcntpadded;
|
|
ae_int_t ccnt;
|
|
ae_int_t ccntpadded;
|
|
ae_int_t offs0;
|
|
ae_int_t offs00;
|
|
ae_int_t offs01;
|
|
ae_int_t offs10;
|
|
ae_int_t offs11;
|
|
ae_int_t vcnt;
|
|
ae_int_t stride;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_buf, 0, sizeof(_buf));
|
|
ae_smart_ptr_init(&_buf, (void**)&buf, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Quick exit for special cases
|
|
*/
|
|
if( idx1<=idx0 )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
if( cidx1<=cidx0 )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
if( nvars<=0 )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Try to recursively divide/process dataset
|
|
*
|
|
* NOTE: real arithmetics is used to avoid integer overflow on large problem sizes
|
|
*/
|
|
rcomplexity = 2*rmul3((double)(idx1-idx0), (double)(cidx1-cidx0), (double)(nvars), _state);
|
|
if( ae_fp_greater_eq(rcomplexity,smpactivationlevel(_state))&&idx1-idx0>=2*clustering_kmeansblocksize )
|
|
{
|
|
if( _trypexec_kmeansupdatedistances(xy,idx0,idx1,nvars,ct,cidx0,cidx1,xyc,xydist2,bufferpool, _state) )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
if( ((ae_fp_greater_eq(rcomplexity,spawnlevel(_state))&&idx1-idx0>=2*clustering_kmeansblocksize)&&nvars>=clustering_kmeansparalleldim)&&cidx1-cidx0>=clustering_kmeansparallelk )
|
|
{
|
|
splitlength(idx1-idx0, clustering_kmeansblocksize, &task0, &task1, _state);
|
|
kmeansupdatedistances(xy, idx0, idx0+task0, nvars, ct, cidx0, cidx1, xyc, xydist2, bufferpool, _state);
|
|
kmeansupdatedistances(xy, idx0+task0, idx1, nvars, ct, cidx0, cidx1, xyc, xydist2, bufferpool, _state);
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Dataset chunk is selected.
|
|
*
|
|
* Process it with blocked algorithm:
|
|
* * iterate over points, process them in KMeansBlockSize-ed chunks
|
|
* * for each chunk of dataset, iterate over centers, process them in KMeansBlockSize-ed chunks
|
|
* * for each chunk of dataset/centerset, iterate over variables, process them in KMeansBlockSize-ed chunks
|
|
*/
|
|
ae_assert(clustering_kmeansblocksize%2==0, "KMeansUpdateDistances: internal error", _state);
|
|
ae_shared_pool_retrieve(bufferpool, &_buf, _state);
|
|
rvectorsetlengthatleast(&buf->ra0, clustering_kmeansblocksize*clustering_kmeansblocksize, _state);
|
|
rvectorsetlengthatleast(&buf->ra1, clustering_kmeansblocksize*clustering_kmeansblocksize, _state);
|
|
rvectorsetlengthatleast(&buf->ra2, clustering_kmeansblocksize*clustering_kmeansblocksize, _state);
|
|
rvectorsetlengthatleast(&buf->ra3, clustering_kmeansblocksize, _state);
|
|
ivectorsetlengthatleast(&buf->ia3, clustering_kmeansblocksize, _state);
|
|
pblkcnt = chunkscount(idx1-idx0, clustering_kmeansblocksize, _state);
|
|
cblkcnt = chunkscount(cidx1-cidx0, clustering_kmeansblocksize, _state);
|
|
vblkcnt = chunkscount(nvars, clustering_kmeansblocksize, _state);
|
|
for(pblk=0; pblk<=pblkcnt-1; pblk++)
|
|
{
|
|
|
|
/*
|
|
* Process PBlk-th chunk of dataset.
|
|
*/
|
|
p0 = idx0+pblk*clustering_kmeansblocksize;
|
|
p1 = ae_minint(p0+clustering_kmeansblocksize, idx1, _state);
|
|
|
|
/*
|
|
* Prepare RA3[]/IA3[] for storage of best distances and best cluster numbers.
|
|
*/
|
|
for(i=0; i<=clustering_kmeansblocksize-1; i++)
|
|
{
|
|
buf->ra3.ptr.p_double[i] = ae_maxrealnumber;
|
|
buf->ia3.ptr.p_int[i] = -1;
|
|
}
|
|
|
|
/*
|
|
* Iterare over chunks of centerset.
|
|
*/
|
|
for(cblk=0; cblk<=cblkcnt-1; cblk++)
|
|
{
|
|
|
|
/*
|
|
* Process CBlk-th chunk of centerset
|
|
*/
|
|
c0 = cidx0+cblk*clustering_kmeansblocksize;
|
|
c1 = ae_minint(c0+clustering_kmeansblocksize, cidx1, _state);
|
|
|
|
/*
|
|
* At this point we have to calculate a set of pairwise distances
|
|
* between points [P0,P1) and centers [C0,C1) and select best center
|
|
* for each point. It can also be done with blocked algorithm
|
|
* (blocking for variables).
|
|
*
|
|
* Following arrays are used:
|
|
* * RA0[] - matrix of distances, padded by zeros for even size,
|
|
* rows are stored with stride KMeansBlockSize.
|
|
* * RA1[] - matrix of points (variables corresponding to current
|
|
* block are extracted), padded by zeros for even size,
|
|
* rows are stored with stride KMeansBlockSize.
|
|
* * RA2[] - matrix of centers (variables corresponding to current
|
|
* block are extracted), padded by zeros for even size,
|
|
* rows are stored with stride KMeansBlockSize.
|
|
*
|
|
*/
|
|
pcnt = p1-p0;
|
|
pcntpadded = pcnt+pcnt%2;
|
|
ccnt = c1-c0;
|
|
ccntpadded = ccnt+ccnt%2;
|
|
stride = clustering_kmeansblocksize;
|
|
ae_assert(pcntpadded<=clustering_kmeansblocksize, "KMeansUpdateDistances: integrity error", _state);
|
|
ae_assert(ccntpadded<=clustering_kmeansblocksize, "KMeansUpdateDistances: integrity error", _state);
|
|
for(i=0; i<=pcntpadded-1; i++)
|
|
{
|
|
for(j=0; j<=ccntpadded-1; j++)
|
|
{
|
|
buf->ra0.ptr.p_double[i*stride+j] = 0.0;
|
|
}
|
|
}
|
|
for(vblk=0; vblk<=vblkcnt-1; vblk++)
|
|
{
|
|
|
|
/*
|
|
* Fetch VBlk-th block of variables to arrays RA1 (points) and RA2 (centers).
|
|
* Pad points and centers with zeros.
|
|
*/
|
|
v0 = vblk*clustering_kmeansblocksize;
|
|
v1 = ae_minint(v0+clustering_kmeansblocksize, nvars, _state);
|
|
vcnt = v1-v0;
|
|
for(i=0; i<=pcnt-1; i++)
|
|
{
|
|
for(j=0; j<=vcnt-1; j++)
|
|
{
|
|
buf->ra1.ptr.p_double[i*stride+j] = xy->ptr.pp_double[p0+i][v0+j];
|
|
}
|
|
}
|
|
for(i=pcnt; i<=pcntpadded-1; i++)
|
|
{
|
|
for(j=0; j<=vcnt-1; j++)
|
|
{
|
|
buf->ra1.ptr.p_double[i*stride+j] = 0.0;
|
|
}
|
|
}
|
|
for(i=0; i<=ccnt-1; i++)
|
|
{
|
|
for(j=0; j<=vcnt-1; j++)
|
|
{
|
|
buf->ra2.ptr.p_double[i*stride+j] = ct->ptr.pp_double[c0+i][v0+j];
|
|
}
|
|
}
|
|
for(i=ccnt; i<=ccntpadded-1; i++)
|
|
{
|
|
for(j=0; j<=vcnt-1; j++)
|
|
{
|
|
buf->ra2.ptr.p_double[i*stride+j] = 0.0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Update distance matrix with sums-of-squared-differences of RA1 and RA2
|
|
*/
|
|
i0 = 0;
|
|
while(i0<pcntpadded)
|
|
{
|
|
i1 = 0;
|
|
while(i1<ccntpadded)
|
|
{
|
|
offs0 = i0*stride+i1;
|
|
v00 = buf->ra0.ptr.p_double[offs0];
|
|
v01 = buf->ra0.ptr.p_double[offs0+1];
|
|
v10 = buf->ra0.ptr.p_double[offs0+stride];
|
|
v11 = buf->ra0.ptr.p_double[offs0+stride+1];
|
|
offs00 = i0*stride;
|
|
offs01 = offs00+stride;
|
|
offs10 = i1*stride;
|
|
offs11 = offs10+stride;
|
|
for(j=0; j<=vcnt-1; j++)
|
|
{
|
|
vp0 = buf->ra1.ptr.p_double[offs00+j];
|
|
vp1 = buf->ra1.ptr.p_double[offs01+j];
|
|
vc0 = buf->ra2.ptr.p_double[offs10+j];
|
|
vc1 = buf->ra2.ptr.p_double[offs11+j];
|
|
vv = vp0-vc0;
|
|
v00 = v00+vv*vv;
|
|
vv = vp0-vc1;
|
|
v01 = v01+vv*vv;
|
|
vv = vp1-vc0;
|
|
v10 = v10+vv*vv;
|
|
vv = vp1-vc1;
|
|
v11 = v11+vv*vv;
|
|
}
|
|
offs0 = i0*stride+i1;
|
|
buf->ra0.ptr.p_double[offs0] = v00;
|
|
buf->ra0.ptr.p_double[offs0+1] = v01;
|
|
buf->ra0.ptr.p_double[offs0+stride] = v10;
|
|
buf->ra0.ptr.p_double[offs0+stride+1] = v11;
|
|
i1 = i1+2;
|
|
}
|
|
i0 = i0+2;
|
|
}
|
|
}
|
|
for(i=0; i<=pcnt-1; i++)
|
|
{
|
|
cclosest = buf->ia3.ptr.p_int[i];
|
|
dclosest = buf->ra3.ptr.p_double[i];
|
|
for(j=0; j<=ccnt-1; j++)
|
|
{
|
|
if( ae_fp_less(buf->ra0.ptr.p_double[i*stride+j],dclosest) )
|
|
{
|
|
dclosest = buf->ra0.ptr.p_double[i*stride+j];
|
|
cclosest = c0+j;
|
|
}
|
|
}
|
|
buf->ia3.ptr.p_int[i] = cclosest;
|
|
buf->ra3.ptr.p_double[i] = dclosest;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Store best centers to XYC[]
|
|
*/
|
|
for(i=p0; i<=p1-1; i++)
|
|
{
|
|
xyc->ptr.p_int[i] = buf->ia3.ptr.p_int[i-p0];
|
|
xydist2->ptr.p_double[i] = buf->ra3.ptr.p_double[i-p0];
|
|
}
|
|
}
|
|
ae_shared_pool_recycle(bufferpool, &_buf, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serial stub for GPL edition.
|
|
*************************************************************************/
|
|
ae_bool _trypexec_kmeansupdatedistances(/* Real */ ae_matrix* xy,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_int_t nvars,
|
|
/* Real */ ae_matrix* ct,
|
|
ae_int_t cidx0,
|
|
ae_int_t cidx1,
|
|
/* Integer */ ae_vector* xyc,
|
|
/* Real */ ae_vector* xydist2,
|
|
ae_shared_pool* bufferpool,
|
|
ae_state *_state)
|
|
{
|
|
return ae_false;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function selects initial centers according to specified initialization
|
|
algorithm.
|
|
|
|
IMPORTANT: this function provides no guarantees regarding selection of
|
|
DIFFERENT centers. Centers returned by this function may
|
|
include duplicates (say, when random sampling is used). It is
|
|
also possible that some centers are empty.
|
|
Algorithm which uses this function must be able to deal with it.
|
|
Say, you may want to use FixCenters() in order to fix empty centers.
|
|
|
|
INPUT PARAMETERS:
|
|
XY - dataset, array [0..NPoints-1,0..NVars-1].
|
|
NPoints - points count
|
|
NVars - number of variables, NVars>=1
|
|
InitAlgo - initialization algorithm:
|
|
* 0 - automatic selection of best algorithm
|
|
* 1 - random selection
|
|
* 2 - k-means++
|
|
* 3 - fast-greedy init
|
|
*-1 - first K rows of dataset are used (debug algorithm)
|
|
RS - RNG used to select centers
|
|
K - number of centers, K>=1
|
|
CT - possibly preallocated output buffer, resized if needed
|
|
InitBuf - internal buffer, possibly unitialized instance of
|
|
APBuffers. It is recommended to use this instance only
|
|
with SelectInitialCenters() and FixCenters() functions,
|
|
because these functions may allocate really large storage.
|
|
UpdatePool - shared pool seeded with instance of APBuffers structure
|
|
(seed instance can be unitialized). Used internally with
|
|
KMeansUpdateDistances() function. It is recommended
|
|
to use this pool ONLY with KMeansUpdateDistances()
|
|
function.
|
|
|
|
OUTPUT PARAMETERS:
|
|
CT - set of K clusters, one per row
|
|
|
|
RESULT:
|
|
True on success, False on failure (impossible to create K independent clusters)
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.01.2015 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void clustering_selectinitialcenters(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t initalgo,
|
|
hqrndstate* rs,
|
|
ae_int_t k,
|
|
/* Real */ ae_matrix* ct,
|
|
apbuffers* initbuf,
|
|
ae_shared_pool* updatepool,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t cidx;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
double v;
|
|
double vv;
|
|
double s;
|
|
ae_int_t lastnz;
|
|
ae_int_t ptidx;
|
|
ae_int_t samplesize;
|
|
ae_int_t samplescntnew;
|
|
ae_int_t samplescntall;
|
|
double samplescale;
|
|
|
|
|
|
|
|
/*
|
|
* Check parameters
|
|
*/
|
|
ae_assert(npoints>0, "SelectInitialCenters: internal error", _state);
|
|
ae_assert(nvars>0, "SelectInitialCenters: internal error", _state);
|
|
ae_assert(k>0, "SelectInitialCenters: internal error", _state);
|
|
if( initalgo==0 )
|
|
{
|
|
initalgo = 3;
|
|
}
|
|
rmatrixsetlengthatleast(ct, k, nvars, _state);
|
|
|
|
/*
|
|
* Random initialization
|
|
*/
|
|
if( initalgo==-1 )
|
|
{
|
|
for(i=0; i<=k-1; i++)
|
|
{
|
|
ae_v_move(&ct->ptr.pp_double[i][0], 1, &xy->ptr.pp_double[i%npoints][0], 1, ae_v_len(0,nvars-1));
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Random initialization
|
|
*/
|
|
if( initalgo==1 )
|
|
{
|
|
for(i=0; i<=k-1; i++)
|
|
{
|
|
j = hqrnduniformi(rs, npoints, _state);
|
|
ae_v_move(&ct->ptr.pp_double[i][0], 1, &xy->ptr.pp_double[j][0], 1, ae_v_len(0,nvars-1));
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* k-means++ initialization
|
|
*/
|
|
if( initalgo==2 )
|
|
{
|
|
|
|
/*
|
|
* Prepare distances array.
|
|
* Select initial center at random.
|
|
*/
|
|
rvectorsetlengthatleast(&initbuf->ra0, npoints, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
initbuf->ra0.ptr.p_double[i] = ae_maxrealnumber;
|
|
}
|
|
ptidx = hqrnduniformi(rs, npoints, _state);
|
|
ae_v_move(&ct->ptr.pp_double[0][0], 1, &xy->ptr.pp_double[ptidx][0], 1, ae_v_len(0,nvars-1));
|
|
|
|
/*
|
|
* For each newly added center repeat:
|
|
* * reevaluate distances from points to best centers
|
|
* * sample points with probability dependent on distance
|
|
* * add new center
|
|
*/
|
|
for(cidx=0; cidx<=k-2; cidx++)
|
|
{
|
|
|
|
/*
|
|
* Reevaluate distances
|
|
*/
|
|
s = 0.0;
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
v = 0.0;
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
vv = xy->ptr.pp_double[i][j]-ct->ptr.pp_double[cidx][j];
|
|
v = v+vv*vv;
|
|
}
|
|
if( ae_fp_less(v,initbuf->ra0.ptr.p_double[i]) )
|
|
{
|
|
initbuf->ra0.ptr.p_double[i] = v;
|
|
}
|
|
s = s+initbuf->ra0.ptr.p_double[i];
|
|
}
|
|
|
|
/*
|
|
* If all distances are zero, it means that we can not find enough
|
|
* distinct points. In this case we just select non-distinct center
|
|
* at random and continue iterations. This issue will be handled
|
|
* later in the FixCenters() function.
|
|
*/
|
|
if( ae_fp_eq(s,0.0) )
|
|
{
|
|
ptidx = hqrnduniformi(rs, npoints, _state);
|
|
ae_v_move(&ct->ptr.pp_double[cidx+1][0], 1, &xy->ptr.pp_double[ptidx][0], 1, ae_v_len(0,nvars-1));
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Select point as center using its distance.
|
|
* We also handle situation when because of rounding errors
|
|
* no point was selected - in this case, last non-zero one
|
|
* will be used.
|
|
*/
|
|
v = hqrnduniformr(rs, _state);
|
|
vv = 0.0;
|
|
lastnz = -1;
|
|
ptidx = -1;
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( ae_fp_eq(initbuf->ra0.ptr.p_double[i],0.0) )
|
|
{
|
|
continue;
|
|
}
|
|
lastnz = i;
|
|
vv = vv+initbuf->ra0.ptr.p_double[i];
|
|
if( ae_fp_less_eq(v,vv/s) )
|
|
{
|
|
ptidx = i;
|
|
break;
|
|
}
|
|
}
|
|
ae_assert(lastnz>=0, "SelectInitialCenters: integrity error", _state);
|
|
if( ptidx<0 )
|
|
{
|
|
ptidx = lastnz;
|
|
}
|
|
ae_v_move(&ct->ptr.pp_double[cidx+1][0], 1, &xy->ptr.pp_double[ptidx][0], 1, ae_v_len(0,nvars-1));
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* "Fast-greedy" algorithm based on "Scalable k-means++".
|
|
*
|
|
* We perform several rounds, within each round we sample about 0.5*K points
|
|
* (not exactly 0.5*K) until we have 2*K points sampled. Before each round
|
|
* we calculate distances from dataset points to closest points sampled so far.
|
|
* We sample dataset points independently using distance xtimes 0.5*K divided by total
|
|
* as probability (similar to k-means++, but each point is sampled independently;
|
|
* after each round we have roughtly 0.5*K points added to sample).
|
|
*
|
|
* After sampling is done, we run "greedy" version of k-means++ on this subsample
|
|
* which selects most distant point on every round.
|
|
*/
|
|
if( initalgo==3 )
|
|
{
|
|
|
|
/*
|
|
* Prepare arrays.
|
|
* Select initial center at random, add it to "new" part of sample,
|
|
* which is stored at the beginning of the array
|
|
*/
|
|
samplesize = 2*k;
|
|
samplescale = 0.5*k;
|
|
rmatrixsetlengthatleast(&initbuf->rm0, samplesize, nvars, _state);
|
|
ptidx = hqrnduniformi(rs, npoints, _state);
|
|
ae_v_move(&initbuf->rm0.ptr.pp_double[0][0], 1, &xy->ptr.pp_double[ptidx][0], 1, ae_v_len(0,nvars-1));
|
|
samplescntnew = 1;
|
|
samplescntall = 1;
|
|
rvectorsetlengthatleast(&initbuf->ra0, npoints, _state);
|
|
rvectorsetlengthatleast(&initbuf->ra1, npoints, _state);
|
|
ivectorsetlengthatleast(&initbuf->ia1, npoints, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
initbuf->ra0.ptr.p_double[i] = ae_maxrealnumber;
|
|
}
|
|
|
|
/*
|
|
* Repeat until samples count is 2*K
|
|
*/
|
|
while(samplescntall<samplesize)
|
|
{
|
|
|
|
/*
|
|
* Evaluate distances from points to NEW centers, store to RA1.
|
|
* Reset counter of "new" centers.
|
|
*/
|
|
kmeansupdatedistances(xy, 0, npoints, nvars, &initbuf->rm0, samplescntall-samplescntnew, samplescntall, &initbuf->ia1, &initbuf->ra1, updatepool, _state);
|
|
samplescntnew = 0;
|
|
|
|
/*
|
|
* Merge new distances with old ones.
|
|
* Calculate sum of distances, if sum is exactly zero - fill sample
|
|
* by randomly selected points and terminate.
|
|
*/
|
|
s = 0.0;
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
initbuf->ra0.ptr.p_double[i] = ae_minreal(initbuf->ra0.ptr.p_double[i], initbuf->ra1.ptr.p_double[i], _state);
|
|
s = s+initbuf->ra0.ptr.p_double[i];
|
|
}
|
|
if( ae_fp_eq(s,0.0) )
|
|
{
|
|
while(samplescntall<samplesize)
|
|
{
|
|
ptidx = hqrnduniformi(rs, npoints, _state);
|
|
ae_v_move(&initbuf->rm0.ptr.pp_double[samplescntall][0], 1, &xy->ptr.pp_double[ptidx][0], 1, ae_v_len(0,nvars-1));
|
|
inc(&samplescntall, _state);
|
|
inc(&samplescntnew, _state);
|
|
}
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Sample points independently.
|
|
*/
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( samplescntall==samplesize )
|
|
{
|
|
break;
|
|
}
|
|
if( ae_fp_eq(initbuf->ra0.ptr.p_double[i],0.0) )
|
|
{
|
|
continue;
|
|
}
|
|
if( ae_fp_less_eq(hqrnduniformr(rs, _state),samplescale*initbuf->ra0.ptr.p_double[i]/s) )
|
|
{
|
|
ae_v_move(&initbuf->rm0.ptr.pp_double[samplescntall][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
|
|
inc(&samplescntall, _state);
|
|
inc(&samplescntnew, _state);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Run greedy version of k-means on sampled points
|
|
*/
|
|
rvectorsetlengthatleast(&initbuf->ra0, samplescntall, _state);
|
|
for(i=0; i<=samplescntall-1; i++)
|
|
{
|
|
initbuf->ra0.ptr.p_double[i] = ae_maxrealnumber;
|
|
}
|
|
ptidx = hqrnduniformi(rs, samplescntall, _state);
|
|
ae_v_move(&ct->ptr.pp_double[0][0], 1, &initbuf->rm0.ptr.pp_double[ptidx][0], 1, ae_v_len(0,nvars-1));
|
|
for(cidx=0; cidx<=k-2; cidx++)
|
|
{
|
|
|
|
/*
|
|
* Reevaluate distances
|
|
*/
|
|
for(i=0; i<=samplescntall-1; i++)
|
|
{
|
|
v = 0.0;
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
vv = initbuf->rm0.ptr.pp_double[i][j]-ct->ptr.pp_double[cidx][j];
|
|
v = v+vv*vv;
|
|
}
|
|
if( ae_fp_less(v,initbuf->ra0.ptr.p_double[i]) )
|
|
{
|
|
initbuf->ra0.ptr.p_double[i] = v;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Select point as center in greedy manner - most distant
|
|
* point is selected.
|
|
*/
|
|
ptidx = 0;
|
|
for(i=0; i<=samplescntall-1; i++)
|
|
{
|
|
if( ae_fp_greater(initbuf->ra0.ptr.p_double[i],initbuf->ra0.ptr.p_double[ptidx]) )
|
|
{
|
|
ptidx = i;
|
|
}
|
|
}
|
|
ae_v_move(&ct->ptr.pp_double[cidx+1][0], 1, &initbuf->rm0.ptr.pp_double[ptidx][0], 1, ae_v_len(0,nvars-1));
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Internal error
|
|
*/
|
|
ae_assert(ae_false, "SelectInitialCenters: internal error", _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function "fixes" centers, i.e. replaces ones which have no neighbor
|
|
points by new centers which have at least one neighbor. If it is impossible
|
|
to fix centers (not enough distinct points in the dataset), this function
|
|
returns False.
|
|
|
|
INPUT PARAMETERS:
|
|
XY - dataset, array [0..NPoints-1,0..NVars-1].
|
|
NPoints - points count, >=1
|
|
NVars - number of variables, NVars>=1
|
|
CT - centers
|
|
K - number of centers, K>=1
|
|
InitBuf - internal buffer, possibly unitialized instance of
|
|
APBuffers. It is recommended to use this instance only
|
|
with SelectInitialCenters() and FixCenters() functions,
|
|
because these functions may allocate really large storage.
|
|
UpdatePool - shared pool seeded with instance of APBuffers structure
|
|
(seed instance can be unitialized). Used internally with
|
|
KMeansUpdateDistances() function. It is recommended
|
|
to use this pool ONLY with KMeansUpdateDistances()
|
|
function.
|
|
|
|
OUTPUT PARAMETERS:
|
|
CT - set of K centers, one per row
|
|
|
|
RESULT:
|
|
True on success, False on failure (impossible to create K independent clusters)
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.01.2015 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static ae_bool clustering_fixcenters(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
/* Real */ ae_matrix* ct,
|
|
ae_int_t k,
|
|
apbuffers* initbuf,
|
|
ae_shared_pool* updatepool,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t fixiteration;
|
|
ae_int_t centertofix;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t pdistant;
|
|
double ddistant;
|
|
double v;
|
|
ae_bool result;
|
|
|
|
|
|
ae_assert(npoints>=1, "FixCenters: internal error", _state);
|
|
ae_assert(nvars>=1, "FixCenters: internal error", _state);
|
|
ae_assert(k>=1, "FixCenters: internal error", _state);
|
|
|
|
/*
|
|
* Calculate distances from points to best centers (RA0)
|
|
* and best center indexes (IA0)
|
|
*/
|
|
ivectorsetlengthatleast(&initbuf->ia0, npoints, _state);
|
|
rvectorsetlengthatleast(&initbuf->ra0, npoints, _state);
|
|
kmeansupdatedistances(xy, 0, npoints, nvars, ct, 0, k, &initbuf->ia0, &initbuf->ra0, updatepool, _state);
|
|
|
|
/*
|
|
* Repeat loop:
|
|
* * find first center which has no corresponding point
|
|
* * set it to the most distant (from the rest of the centerset) point
|
|
* * recalculate distances, update IA0/RA0
|
|
* * repeat
|
|
*
|
|
* Loop is repeated for at most 2*K iterations. It is stopped once we have
|
|
* no "empty" clusters.
|
|
*/
|
|
bvectorsetlengthatleast(&initbuf->ba0, k, _state);
|
|
for(fixiteration=0; fixiteration<=2*k; fixiteration++)
|
|
{
|
|
|
|
/*
|
|
* Select center to fix (one which is not mentioned in IA0),
|
|
* terminate if there is no such center.
|
|
* BA0[] stores True for centers which have at least one point.
|
|
*/
|
|
for(i=0; i<=k-1; i++)
|
|
{
|
|
initbuf->ba0.ptr.p_bool[i] = ae_false;
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
initbuf->ba0.ptr.p_bool[initbuf->ia0.ptr.p_int[i]] = ae_true;
|
|
}
|
|
centertofix = -1;
|
|
for(i=0; i<=k-1; i++)
|
|
{
|
|
if( !initbuf->ba0.ptr.p_bool[i] )
|
|
{
|
|
centertofix = i;
|
|
break;
|
|
}
|
|
}
|
|
if( centertofix<0 )
|
|
{
|
|
result = ae_true;
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Replace center to fix by the most distant point.
|
|
* Update IA0/RA0
|
|
*/
|
|
pdistant = 0;
|
|
ddistant = initbuf->ra0.ptr.p_double[pdistant];
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( ae_fp_greater(initbuf->ra0.ptr.p_double[i],ddistant) )
|
|
{
|
|
ddistant = initbuf->ra0.ptr.p_double[i];
|
|
pdistant = i;
|
|
}
|
|
}
|
|
if( ae_fp_eq(ddistant,0.0) )
|
|
{
|
|
break;
|
|
}
|
|
ae_v_move(&ct->ptr.pp_double[centertofix][0], 1, &xy->ptr.pp_double[pdistant][0], 1, ae_v_len(0,nvars-1));
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
v = 0.0;
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
v = v+ae_sqr(xy->ptr.pp_double[i][j]-ct->ptr.pp_double[centertofix][j], _state);
|
|
}
|
|
if( ae_fp_less(v,initbuf->ra0.ptr.p_double[i]) )
|
|
{
|
|
initbuf->ra0.ptr.p_double[i] = v;
|
|
initbuf->ia0.ptr.p_int[i] = centertofix;
|
|
}
|
|
}
|
|
}
|
|
result = ae_false;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function performs agglomerative hierarchical clustering using
|
|
precomputed distance matrix. Internal function, should not be called
|
|
directly.
|
|
|
|
INPUT PARAMETERS:
|
|
S - clusterizer state, initialized by ClusterizerCreate()
|
|
D - distance matrix, array[S.NFeatures,S.NFeatures]
|
|
Contents of the matrix is destroyed during
|
|
algorithm operation.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Rep - clustering results; see description of AHCReport
|
|
structure for more information.
|
|
|
|
-- ALGLIB --
|
|
Copyright 10.07.2012 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void clustering_clusterizerrunahcinternal(clusterizerstate* s,
|
|
/* Real */ ae_matrix* d,
|
|
ahcreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
double v;
|
|
ae_int_t mergeidx;
|
|
ae_int_t c0;
|
|
ae_int_t c1;
|
|
ae_int_t s0;
|
|
ae_int_t s1;
|
|
ae_int_t ar;
|
|
ae_int_t br;
|
|
ae_int_t npoints;
|
|
ae_vector cidx;
|
|
ae_vector csizes;
|
|
ae_vector nnidx;
|
|
ae_matrix cinfo;
|
|
ae_int_t n0;
|
|
ae_int_t n1;
|
|
ae_int_t ni;
|
|
double d01;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&cidx, 0, sizeof(cidx));
|
|
memset(&csizes, 0, sizeof(csizes));
|
|
memset(&nnidx, 0, sizeof(nnidx));
|
|
memset(&cinfo, 0, sizeof(cinfo));
|
|
ae_vector_init(&cidx, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&csizes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&nnidx, 0, DT_INT, _state, ae_true);
|
|
ae_matrix_init(&cinfo, 0, 0, DT_INT, _state, ae_true);
|
|
|
|
npoints = s->npoints;
|
|
|
|
/*
|
|
* Fill Rep.NPoints, quick exit when NPoints<=1
|
|
*/
|
|
rep->npoints = npoints;
|
|
if( npoints==0 )
|
|
{
|
|
ae_vector_set_length(&rep->p, 0, _state);
|
|
ae_matrix_set_length(&rep->z, 0, 0, _state);
|
|
ae_matrix_set_length(&rep->pz, 0, 0, _state);
|
|
ae_matrix_set_length(&rep->pm, 0, 0, _state);
|
|
ae_vector_set_length(&rep->mergedist, 0, _state);
|
|
rep->terminationtype = 1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
if( npoints==1 )
|
|
{
|
|
ae_vector_set_length(&rep->p, 1, _state);
|
|
ae_matrix_set_length(&rep->z, 0, 0, _state);
|
|
ae_matrix_set_length(&rep->pz, 0, 0, _state);
|
|
ae_matrix_set_length(&rep->pm, 0, 0, _state);
|
|
ae_vector_set_length(&rep->mergedist, 0, _state);
|
|
rep->p.ptr.p_int[0] = 0;
|
|
rep->terminationtype = 1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
ae_matrix_set_length(&rep->z, npoints-1, 2, _state);
|
|
ae_vector_set_length(&rep->mergedist, npoints-1, _state);
|
|
rep->terminationtype = 1;
|
|
|
|
/*
|
|
* Build list of nearest neighbors
|
|
*/
|
|
ae_vector_set_length(&nnidx, npoints, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
|
|
/*
|
|
* Calculate index of the nearest neighbor
|
|
*/
|
|
k = -1;
|
|
v = ae_maxrealnumber;
|
|
for(j=0; j<=npoints-1; j++)
|
|
{
|
|
if( j!=i&&ae_fp_less(d->ptr.pp_double[i][j],v) )
|
|
{
|
|
k = j;
|
|
v = d->ptr.pp_double[i][j];
|
|
}
|
|
}
|
|
ae_assert(ae_fp_less(v,ae_maxrealnumber), "ClusterizerRunAHC: internal error", _state);
|
|
nnidx.ptr.p_int[i] = k;
|
|
}
|
|
|
|
/*
|
|
* For AHCAlgo=4 (Ward's method) replace distances by their squares times 0.5
|
|
*/
|
|
if( s->ahcalgo==4 )
|
|
{
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
for(j=0; j<=npoints-1; j++)
|
|
{
|
|
d->ptr.pp_double[i][j] = 0.5*d->ptr.pp_double[i][j]*d->ptr.pp_double[i][j];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Distance matrix is built, perform merges.
|
|
*
|
|
* NOTE 1: CIdx is array[NPoints] which maps rows/columns of the
|
|
* distance matrix D to indexes of clusters. Values of CIdx
|
|
* from [0,NPoints) denote single-point clusters, and values
|
|
* from [NPoints,2*NPoints-1) denote ones obtained by merging
|
|
* smaller clusters. Negative calues correspond to absent clusters.
|
|
*
|
|
* Initially it contains [0...NPoints-1], after each merge
|
|
* one element of CIdx (one with index C0) is replaced by
|
|
* NPoints+MergeIdx, and another one with index C1 is
|
|
* rewritten by -1.
|
|
*
|
|
* NOTE 2: CSizes is array[NPoints] which stores sizes of clusters.
|
|
*
|
|
*/
|
|
ae_vector_set_length(&cidx, npoints, _state);
|
|
ae_vector_set_length(&csizes, npoints, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
cidx.ptr.p_int[i] = i;
|
|
csizes.ptr.p_int[i] = 1;
|
|
}
|
|
for(mergeidx=0; mergeidx<=npoints-2; mergeidx++)
|
|
{
|
|
|
|
/*
|
|
* Select pair of clusters (C0,C1) with CIdx[C0]<CIdx[C1] to merge.
|
|
*/
|
|
c0 = -1;
|
|
c1 = -1;
|
|
d01 = ae_maxrealnumber;
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( cidx.ptr.p_int[i]>=0 )
|
|
{
|
|
if( ae_fp_less(d->ptr.pp_double[i][nnidx.ptr.p_int[i]],d01) )
|
|
{
|
|
c0 = i;
|
|
c1 = nnidx.ptr.p_int[i];
|
|
d01 = d->ptr.pp_double[i][nnidx.ptr.p_int[i]];
|
|
}
|
|
}
|
|
}
|
|
ae_assert(ae_fp_less(d01,ae_maxrealnumber), "ClusterizerRunAHC: internal error", _state);
|
|
if( cidx.ptr.p_int[c0]>cidx.ptr.p_int[c1] )
|
|
{
|
|
i = c1;
|
|
c1 = c0;
|
|
c0 = i;
|
|
}
|
|
|
|
/*
|
|
* Fill one row of Rep.Z and one element of Rep.MergeDist
|
|
*/
|
|
rep->z.ptr.pp_int[mergeidx][0] = cidx.ptr.p_int[c0];
|
|
rep->z.ptr.pp_int[mergeidx][1] = cidx.ptr.p_int[c1];
|
|
rep->mergedist.ptr.p_double[mergeidx] = d01;
|
|
|
|
/*
|
|
* Update distance matrix:
|
|
* * row/column C0 are updated by distances to the new cluster
|
|
* * row/column C1 are considered empty (we can fill them by zeros,
|
|
* but do not want to spend time - we just ignore them)
|
|
*
|
|
* NOTE: it is important to update distance matrix BEFORE CIdx/CSizes
|
|
* are updated.
|
|
*/
|
|
ae_assert((((s->ahcalgo==0||s->ahcalgo==1)||s->ahcalgo==2)||s->ahcalgo==3)||s->ahcalgo==4, "ClusterizerRunAHC: internal error", _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( i!=c0&&i!=c1 )
|
|
{
|
|
n0 = csizes.ptr.p_int[c0];
|
|
n1 = csizes.ptr.p_int[c1];
|
|
ni = csizes.ptr.p_int[i];
|
|
if( s->ahcalgo==0 )
|
|
{
|
|
d->ptr.pp_double[i][c0] = ae_maxreal(d->ptr.pp_double[i][c0], d->ptr.pp_double[i][c1], _state);
|
|
}
|
|
if( s->ahcalgo==1 )
|
|
{
|
|
d->ptr.pp_double[i][c0] = ae_minreal(d->ptr.pp_double[i][c0], d->ptr.pp_double[i][c1], _state);
|
|
}
|
|
if( s->ahcalgo==2 )
|
|
{
|
|
d->ptr.pp_double[i][c0] = (csizes.ptr.p_int[c0]*d->ptr.pp_double[i][c0]+csizes.ptr.p_int[c1]*d->ptr.pp_double[i][c1])/(csizes.ptr.p_int[c0]+csizes.ptr.p_int[c1]);
|
|
}
|
|
if( s->ahcalgo==3 )
|
|
{
|
|
d->ptr.pp_double[i][c0] = (d->ptr.pp_double[i][c0]+d->ptr.pp_double[i][c1])/2;
|
|
}
|
|
if( s->ahcalgo==4 )
|
|
{
|
|
d->ptr.pp_double[i][c0] = ((n0+ni)*d->ptr.pp_double[i][c0]+(n1+ni)*d->ptr.pp_double[i][c1]-ni*d01)/(n0+n1+ni);
|
|
}
|
|
d->ptr.pp_double[c0][i] = d->ptr.pp_double[i][c0];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Update CIdx and CSizes
|
|
*/
|
|
cidx.ptr.p_int[c0] = npoints+mergeidx;
|
|
cidx.ptr.p_int[c1] = -1;
|
|
csizes.ptr.p_int[c0] = csizes.ptr.p_int[c0]+csizes.ptr.p_int[c1];
|
|
csizes.ptr.p_int[c1] = 0;
|
|
|
|
/*
|
|
* Update nearest neighbors array:
|
|
* * update nearest neighbors of everything except for C0/C1
|
|
* * update neighbors of C0/C1
|
|
*/
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( (cidx.ptr.p_int[i]>=0&&i!=c0)&&(nnidx.ptr.p_int[i]==c0||nnidx.ptr.p_int[i]==c1) )
|
|
{
|
|
|
|
/*
|
|
* I-th cluster which is distinct from C0/C1 has former C0/C1 cluster as its nearest
|
|
* neighbor. We handle this issue depending on specific AHC algorithm being used.
|
|
*/
|
|
if( s->ahcalgo==1 )
|
|
{
|
|
|
|
/*
|
|
* Single linkage. Merging of two clusters together
|
|
* does NOT change distances between new cluster and
|
|
* other clusters.
|
|
*
|
|
* The only thing we have to do is to update nearest neighbor index
|
|
*/
|
|
nnidx.ptr.p_int[i] = c0;
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Something other than single linkage. We have to re-examine
|
|
* all the row to find nearest neighbor.
|
|
*/
|
|
k = -1;
|
|
v = ae_maxrealnumber;
|
|
for(j=0; j<=npoints-1; j++)
|
|
{
|
|
if( (cidx.ptr.p_int[j]>=0&&j!=i)&&ae_fp_less(d->ptr.pp_double[i][j],v) )
|
|
{
|
|
k = j;
|
|
v = d->ptr.pp_double[i][j];
|
|
}
|
|
}
|
|
ae_assert(ae_fp_less(v,ae_maxrealnumber)||mergeidx==npoints-2, "ClusterizerRunAHC: internal error", _state);
|
|
nnidx.ptr.p_int[i] = k;
|
|
}
|
|
}
|
|
}
|
|
k = -1;
|
|
v = ae_maxrealnumber;
|
|
for(j=0; j<=npoints-1; j++)
|
|
{
|
|
if( (cidx.ptr.p_int[j]>=0&&j!=c0)&&ae_fp_less(d->ptr.pp_double[c0][j],v) )
|
|
{
|
|
k = j;
|
|
v = d->ptr.pp_double[c0][j];
|
|
}
|
|
}
|
|
ae_assert(ae_fp_less(v,ae_maxrealnumber)||mergeidx==npoints-2, "ClusterizerRunAHC: internal error", _state);
|
|
nnidx.ptr.p_int[c0] = k;
|
|
}
|
|
|
|
/*
|
|
* Calculate Rep.P and Rep.PM.
|
|
*
|
|
* In order to do that, we fill CInfo matrix - (2*NPoints-1)*3 matrix,
|
|
* with I-th row containing:
|
|
* * CInfo[I,0] - size of I-th cluster
|
|
* * CInfo[I,1] - beginning of I-th cluster
|
|
* * CInfo[I,2] - end of I-th cluster
|
|
* * CInfo[I,3] - height of I-th cluster
|
|
*
|
|
* We perform it as follows:
|
|
* * first NPoints clusters have unit size (CInfo[I,0]=1) and zero
|
|
* height (CInfo[I,3]=0)
|
|
* * we replay NPoints-1 merges from first to last and fill sizes of
|
|
* corresponding clusters (new size is a sum of sizes of clusters
|
|
* being merged) and height (new height is max(heights)+1).
|
|
* * now we ready to determine locations of clusters. Last cluster
|
|
* spans entire dataset, we know it. We replay merges from last to
|
|
* first, during each merge we already know location of the merge
|
|
* result, and we can position first cluster to the left part of
|
|
* the result, and second cluster to the right part.
|
|
*/
|
|
ae_vector_set_length(&rep->p, npoints, _state);
|
|
ae_matrix_set_length(&rep->pm, npoints-1, 6, _state);
|
|
ae_matrix_set_length(&cinfo, 2*npoints-1, 4, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
cinfo.ptr.pp_int[i][0] = 1;
|
|
cinfo.ptr.pp_int[i][3] = 0;
|
|
}
|
|
for(i=0; i<=npoints-2; i++)
|
|
{
|
|
cinfo.ptr.pp_int[npoints+i][0] = cinfo.ptr.pp_int[rep->z.ptr.pp_int[i][0]][0]+cinfo.ptr.pp_int[rep->z.ptr.pp_int[i][1]][0];
|
|
cinfo.ptr.pp_int[npoints+i][3] = ae_maxint(cinfo.ptr.pp_int[rep->z.ptr.pp_int[i][0]][3], cinfo.ptr.pp_int[rep->z.ptr.pp_int[i][1]][3], _state)+1;
|
|
}
|
|
cinfo.ptr.pp_int[2*npoints-2][1] = 0;
|
|
cinfo.ptr.pp_int[2*npoints-2][2] = npoints-1;
|
|
for(i=npoints-2; i>=0; i--)
|
|
{
|
|
|
|
/*
|
|
* We merge C0 which spans [A0,B0] and C1 (spans [A1,B1]),
|
|
* with unknown A0, B0, A1, B1. However, we know that result
|
|
* is CR, which spans [AR,BR] with known AR/BR, and we know
|
|
* sizes of C0, C1, CR (denotes as S0, S1, SR).
|
|
*/
|
|
c0 = rep->z.ptr.pp_int[i][0];
|
|
c1 = rep->z.ptr.pp_int[i][1];
|
|
s0 = cinfo.ptr.pp_int[c0][0];
|
|
s1 = cinfo.ptr.pp_int[c1][0];
|
|
ar = cinfo.ptr.pp_int[npoints+i][1];
|
|
br = cinfo.ptr.pp_int[npoints+i][2];
|
|
cinfo.ptr.pp_int[c0][1] = ar;
|
|
cinfo.ptr.pp_int[c0][2] = ar+s0-1;
|
|
cinfo.ptr.pp_int[c1][1] = br-(s1-1);
|
|
cinfo.ptr.pp_int[c1][2] = br;
|
|
rep->pm.ptr.pp_int[i][0] = cinfo.ptr.pp_int[c0][1];
|
|
rep->pm.ptr.pp_int[i][1] = cinfo.ptr.pp_int[c0][2];
|
|
rep->pm.ptr.pp_int[i][2] = cinfo.ptr.pp_int[c1][1];
|
|
rep->pm.ptr.pp_int[i][3] = cinfo.ptr.pp_int[c1][2];
|
|
rep->pm.ptr.pp_int[i][4] = cinfo.ptr.pp_int[c0][3];
|
|
rep->pm.ptr.pp_int[i][5] = cinfo.ptr.pp_int[c1][3];
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_assert(cinfo.ptr.pp_int[i][1]==cinfo.ptr.pp_int[i][2], "Assertion failed", _state);
|
|
rep->p.ptr.p_int[i] = cinfo.ptr.pp_int[i][1];
|
|
}
|
|
|
|
/*
|
|
* Calculate Rep.PZ
|
|
*/
|
|
ae_matrix_set_length(&rep->pz, npoints-1, 2, _state);
|
|
for(i=0; i<=npoints-2; i++)
|
|
{
|
|
rep->pz.ptr.pp_int[i][0] = rep->z.ptr.pp_int[i][0];
|
|
rep->pz.ptr.pp_int[i][1] = rep->z.ptr.pp_int[i][1];
|
|
if( rep->pz.ptr.pp_int[i][0]<npoints )
|
|
{
|
|
rep->pz.ptr.pp_int[i][0] = rep->p.ptr.p_int[rep->pz.ptr.pp_int[i][0]];
|
|
}
|
|
if( rep->pz.ptr.pp_int[i][1]<npoints )
|
|
{
|
|
rep->pz.ptr.pp_int[i][1] = rep->p.ptr.p_int[rep->pz.ptr.pp_int[i][1]];
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function recursively evaluates distance matrix for SOME (not all!)
|
|
distance types.
|
|
|
|
INPUT PARAMETERS:
|
|
XY - array[?,NFeatures], dataset
|
|
NFeatures- number of features, >=1
|
|
DistType- distance function:
|
|
* 0 Chebyshev distance (L-inf norm)
|
|
* 1 city block distance (L1 norm)
|
|
D - preallocated output matrix
|
|
I0,I1 - half interval of rows to calculate: [I0,I1) is processed
|
|
J0,J1 - half interval of cols to calculate: [J0,J1) is processed
|
|
|
|
OUTPUT PARAMETERS:
|
|
D - array[NPoints,NPoints], distance matrix
|
|
upper triangle and main diagonal are initialized with
|
|
data.
|
|
|
|
NOTE: intersection of [I0,I1) and [J0,J1) may completely lie in upper
|
|
triangle, only partially intersect with it, or have zero intersection.
|
|
In any case, only intersection of submatrix given by [I0,I1)*[J0,J1)
|
|
with upper triangle of the matrix is evaluated.
|
|
|
|
Say, for 4x4 distance matrix A:
|
|
* [0,2)*[0,2) will result in evaluation of A00, A01, A11
|
|
* [2,4)*[2,4) will result in evaluation of A22, A23, A32, A33
|
|
* [2,4)*[0,2) will result in evaluation of empty set of elements
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 07.04.2013 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void clustering_evaluatedistancematrixrec(/* Real */ ae_matrix* xy,
|
|
ae_int_t nfeatures,
|
|
ae_int_t disttype,
|
|
/* Real */ ae_matrix* d,
|
|
ae_int_t i0,
|
|
ae_int_t i1,
|
|
ae_int_t j0,
|
|
ae_int_t j1,
|
|
ae_state *_state)
|
|
{
|
|
double rcomplexity;
|
|
ae_int_t len0;
|
|
ae_int_t len1;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
double v;
|
|
double vv;
|
|
|
|
|
|
ae_assert(disttype==0||disttype==1, "EvaluateDistanceMatrixRec: incorrect DistType", _state);
|
|
|
|
/*
|
|
* Normalize J0/J1:
|
|
* * J0:=max(J0,I0) - we ignore lower triangle
|
|
* * J1:=max(J1,J0) - normalize J1
|
|
*/
|
|
j0 = ae_maxint(j0, i0, _state);
|
|
j1 = ae_maxint(j1, j0, _state);
|
|
if( j1<=j0||i1<=i0 )
|
|
{
|
|
return;
|
|
}
|
|
rcomplexity = clustering_complexitymultiplier*rmul3((double)(i1-i0), (double)(j1-j0), (double)(nfeatures), _state);
|
|
if( (i1-i0>2||j1-j0>2)&&ae_fp_greater_eq(rcomplexity,smpactivationlevel(_state)) )
|
|
{
|
|
if( _trypexec_clustering_evaluatedistancematrixrec(xy,nfeatures,disttype,d,i0,i1,j0,j1, _state) )
|
|
{
|
|
return;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Try to process in parallel. Two condtions must hold in order to
|
|
* activate parallel processing:
|
|
* 1. I1-I0>2 or J1-J0>2
|
|
* 2. (I1-I0)*(J1-J0)*NFeatures>=ParallelComplexity
|
|
*
|
|
* NOTE: all quantities are converted to reals in order to avoid
|
|
* integer overflow during multiplication
|
|
*
|
|
* NOTE: strict inequality in (1) is necessary to reduce task to 2x2
|
|
* basecases. In future versions we will be able to handle such
|
|
* basecases more efficiently than 1x1 cases.
|
|
*/
|
|
if( ae_fp_greater_eq(rcomplexity,spawnlevel(_state))&&(i1-i0>2||j1-j0>2) )
|
|
{
|
|
|
|
/*
|
|
* Recursive division along largest of dimensions
|
|
*/
|
|
if( i1-i0>j1-j0 )
|
|
{
|
|
splitlengtheven(i1-i0, &len0, &len1, _state);
|
|
clustering_evaluatedistancematrixrec(xy, nfeatures, disttype, d, i0, i0+len0, j0, j1, _state);
|
|
clustering_evaluatedistancematrixrec(xy, nfeatures, disttype, d, i0+len0, i1, j0, j1, _state);
|
|
}
|
|
else
|
|
{
|
|
splitlengtheven(j1-j0, &len0, &len1, _state);
|
|
clustering_evaluatedistancematrixrec(xy, nfeatures, disttype, d, i0, i1, j0, j0+len0, _state);
|
|
clustering_evaluatedistancematrixrec(xy, nfeatures, disttype, d, i0, i1, j0+len0, j1, _state);
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Sequential processing
|
|
*/
|
|
for(i=i0; i<=i1-1; i++)
|
|
{
|
|
for(j=j0; j<=j1-1; j++)
|
|
{
|
|
if( j>=i )
|
|
{
|
|
v = 0.0;
|
|
if( disttype==0 )
|
|
{
|
|
for(k=0; k<=nfeatures-1; k++)
|
|
{
|
|
vv = xy->ptr.pp_double[i][k]-xy->ptr.pp_double[j][k];
|
|
if( ae_fp_less(vv,(double)(0)) )
|
|
{
|
|
vv = -vv;
|
|
}
|
|
if( ae_fp_greater(vv,v) )
|
|
{
|
|
v = vv;
|
|
}
|
|
}
|
|
}
|
|
if( disttype==1 )
|
|
{
|
|
for(k=0; k<=nfeatures-1; k++)
|
|
{
|
|
vv = xy->ptr.pp_double[i][k]-xy->ptr.pp_double[j][k];
|
|
if( ae_fp_less(vv,(double)(0)) )
|
|
{
|
|
vv = -vv;
|
|
}
|
|
v = v+vv;
|
|
}
|
|
}
|
|
d->ptr.pp_double[i][j] = v;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serial stub for GPL edition.
|
|
*************************************************************************/
|
|
ae_bool _trypexec_clustering_evaluatedistancematrixrec(/* Real */ ae_matrix* xy,
|
|
ae_int_t nfeatures,
|
|
ae_int_t disttype,
|
|
/* Real */ ae_matrix* d,
|
|
ae_int_t i0,
|
|
ae_int_t i1,
|
|
ae_int_t j0,
|
|
ae_int_t j1,
|
|
ae_state *_state)
|
|
{
|
|
return ae_false;
|
|
}
|
|
|
|
|
|
void _kmeansbuffers_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
kmeansbuffers *p = (kmeansbuffers*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_init(&p->ct, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->ctbest, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->xycbest, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->xycprev, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->d2, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->csizes, 0, DT_INT, _state, make_automatic);
|
|
_apbuffers_init(&p->initbuf, _state, make_automatic);
|
|
ae_shared_pool_init(&p->updatepool, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _kmeansbuffers_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
kmeansbuffers *dst = (kmeansbuffers*)_dst;
|
|
kmeansbuffers *src = (kmeansbuffers*)_src;
|
|
ae_matrix_init_copy(&dst->ct, &src->ct, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->ctbest, &src->ctbest, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->xycbest, &src->xycbest, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->xycprev, &src->xycprev, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->d2, &src->d2, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->csizes, &src->csizes, _state, make_automatic);
|
|
_apbuffers_init_copy(&dst->initbuf, &src->initbuf, _state, make_automatic);
|
|
ae_shared_pool_init_copy(&dst->updatepool, &src->updatepool, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _kmeansbuffers_clear(void* _p)
|
|
{
|
|
kmeansbuffers *p = (kmeansbuffers*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_clear(&p->ct);
|
|
ae_matrix_clear(&p->ctbest);
|
|
ae_vector_clear(&p->xycbest);
|
|
ae_vector_clear(&p->xycprev);
|
|
ae_vector_clear(&p->d2);
|
|
ae_vector_clear(&p->csizes);
|
|
_apbuffers_clear(&p->initbuf);
|
|
ae_shared_pool_clear(&p->updatepool);
|
|
}
|
|
|
|
|
|
void _kmeansbuffers_destroy(void* _p)
|
|
{
|
|
kmeansbuffers *p = (kmeansbuffers*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_destroy(&p->ct);
|
|
ae_matrix_destroy(&p->ctbest);
|
|
ae_vector_destroy(&p->xycbest);
|
|
ae_vector_destroy(&p->xycprev);
|
|
ae_vector_destroy(&p->d2);
|
|
ae_vector_destroy(&p->csizes);
|
|
_apbuffers_destroy(&p->initbuf);
|
|
ae_shared_pool_destroy(&p->updatepool);
|
|
}
|
|
|
|
|
|
void _clusterizerstate_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
clusterizerstate *p = (clusterizerstate*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_init(&p->xy, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->d, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_matrix_init(&p->tmpd, 0, 0, DT_REAL, _state, make_automatic);
|
|
_apbuffers_init(&p->distbuf, _state, make_automatic);
|
|
_kmeansbuffers_init(&p->kmeanstmp, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _clusterizerstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
clusterizerstate *dst = (clusterizerstate*)_dst;
|
|
clusterizerstate *src = (clusterizerstate*)_src;
|
|
dst->npoints = src->npoints;
|
|
dst->nfeatures = src->nfeatures;
|
|
dst->disttype = src->disttype;
|
|
ae_matrix_init_copy(&dst->xy, &src->xy, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->d, &src->d, _state, make_automatic);
|
|
dst->ahcalgo = src->ahcalgo;
|
|
dst->kmeansrestarts = src->kmeansrestarts;
|
|
dst->kmeansmaxits = src->kmeansmaxits;
|
|
dst->kmeansinitalgo = src->kmeansinitalgo;
|
|
dst->kmeansdbgnoits = src->kmeansdbgnoits;
|
|
dst->seed = src->seed;
|
|
ae_matrix_init_copy(&dst->tmpd, &src->tmpd, _state, make_automatic);
|
|
_apbuffers_init_copy(&dst->distbuf, &src->distbuf, _state, make_automatic);
|
|
_kmeansbuffers_init_copy(&dst->kmeanstmp, &src->kmeanstmp, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _clusterizerstate_clear(void* _p)
|
|
{
|
|
clusterizerstate *p = (clusterizerstate*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_clear(&p->xy);
|
|
ae_matrix_clear(&p->d);
|
|
ae_matrix_clear(&p->tmpd);
|
|
_apbuffers_clear(&p->distbuf);
|
|
_kmeansbuffers_clear(&p->kmeanstmp);
|
|
}
|
|
|
|
|
|
void _clusterizerstate_destroy(void* _p)
|
|
{
|
|
clusterizerstate *p = (clusterizerstate*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_destroy(&p->xy);
|
|
ae_matrix_destroy(&p->d);
|
|
ae_matrix_destroy(&p->tmpd);
|
|
_apbuffers_destroy(&p->distbuf);
|
|
_kmeansbuffers_destroy(&p->kmeanstmp);
|
|
}
|
|
|
|
|
|
void _ahcreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
ahcreport *p = (ahcreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->p, 0, DT_INT, _state, make_automatic);
|
|
ae_matrix_init(&p->z, 0, 0, DT_INT, _state, make_automatic);
|
|
ae_matrix_init(&p->pz, 0, 0, DT_INT, _state, make_automatic);
|
|
ae_matrix_init(&p->pm, 0, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->mergedist, 0, DT_REAL, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _ahcreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
ahcreport *dst = (ahcreport*)_dst;
|
|
ahcreport *src = (ahcreport*)_src;
|
|
dst->terminationtype = src->terminationtype;
|
|
dst->npoints = src->npoints;
|
|
ae_vector_init_copy(&dst->p, &src->p, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->z, &src->z, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->pz, &src->pz, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->pm, &src->pm, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->mergedist, &src->mergedist, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _ahcreport_clear(void* _p)
|
|
{
|
|
ahcreport *p = (ahcreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->p);
|
|
ae_matrix_clear(&p->z);
|
|
ae_matrix_clear(&p->pz);
|
|
ae_matrix_clear(&p->pm);
|
|
ae_vector_clear(&p->mergedist);
|
|
}
|
|
|
|
|
|
void _ahcreport_destroy(void* _p)
|
|
{
|
|
ahcreport *p = (ahcreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->p);
|
|
ae_matrix_destroy(&p->z);
|
|
ae_matrix_destroy(&p->pz);
|
|
ae_matrix_destroy(&p->pm);
|
|
ae_vector_destroy(&p->mergedist);
|
|
}
|
|
|
|
|
|
void _kmeansreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
kmeansreport *p = (kmeansreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_init(&p->c, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->cidx, 0, DT_INT, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _kmeansreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
kmeansreport *dst = (kmeansreport*)_dst;
|
|
kmeansreport *src = (kmeansreport*)_src;
|
|
dst->npoints = src->npoints;
|
|
dst->nfeatures = src->nfeatures;
|
|
dst->terminationtype = src->terminationtype;
|
|
dst->iterationscount = src->iterationscount;
|
|
dst->energy = src->energy;
|
|
dst->k = src->k;
|
|
ae_matrix_init_copy(&dst->c, &src->c, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->cidx, &src->cidx, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _kmeansreport_clear(void* _p)
|
|
{
|
|
kmeansreport *p = (kmeansreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_clear(&p->c);
|
|
ae_vector_clear(&p->cidx);
|
|
}
|
|
|
|
|
|
void _kmeansreport_destroy(void* _p)
|
|
{
|
|
kmeansreport *p = (kmeansreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_destroy(&p->c);
|
|
ae_vector_destroy(&p->cidx);
|
|
}
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_DFOREST) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
/*************************************************************************
|
|
This function creates buffer structure which can be used to perform
|
|
parallel inference requests.
|
|
|
|
DF subpackage provides two sets of computing functions - ones which use
|
|
internal buffer of DF model (these functions are single-threaded because
|
|
they use same buffer, which can not shared between threads), and ones
|
|
which use external buffer.
|
|
|
|
This function is used to initialize external buffer.
|
|
|
|
INPUT PARAMETERS
|
|
Model - DF model which is associated with newly created buffer
|
|
|
|
OUTPUT PARAMETERS
|
|
Buf - external buffer.
|
|
|
|
|
|
IMPORTANT: buffer object should be used only with model which was used to
|
|
initialize buffer. Any attempt to use buffer with different
|
|
object is dangerous - you may get integrity check failure
|
|
(exception) because sizes of internal arrays do not fit to
|
|
dimensions of the model structure.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfcreatebuffer(decisionforest* model,
|
|
decisionforestbuffer* buf,
|
|
ae_state *_state)
|
|
{
|
|
|
|
_decisionforestbuffer_clear(buf);
|
|
|
|
ae_vector_set_length(&buf->x, model->nvars, _state);
|
|
ae_vector_set_length(&buf->y, model->nclasses, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This subroutine creates DecisionForestBuilder object which is used to
|
|
train decision forests.
|
|
|
|
By default, new builder stores empty dataset and some reasonable default
|
|
settings. At the very least, you should specify dataset prior to building
|
|
decision forest. You can also tweak settings of the forest construction
|
|
algorithm (recommended, although default setting should work well).
|
|
|
|
Following actions are mandatory:
|
|
* calling dfbuildersetdataset() to specify dataset
|
|
* calling dfbuilderbuildrandomforest() to build decision forest using
|
|
current dataset and default settings
|
|
|
|
Additionally, you may call:
|
|
* dfbuildersetrndvars() or dfbuildersetrndvarsratio() to specify number of
|
|
variables randomly chosen for each split
|
|
* dfbuildersetsubsampleratio() to specify fraction of the dataset randomly
|
|
subsampled to build each tree
|
|
* dfbuildersetseed() to control random seed chosen for tree construction
|
|
|
|
INPUT PARAMETERS:
|
|
none
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildercreate(decisionforestbuilder* s, ae_state *_state)
|
|
{
|
|
|
|
_decisionforestbuilder_clear(s);
|
|
|
|
|
|
/*
|
|
* Empty dataset
|
|
*/
|
|
s->dstype = -1;
|
|
s->npoints = 0;
|
|
s->nvars = 0;
|
|
s->nclasses = 1;
|
|
|
|
/*
|
|
* Default training settings
|
|
*/
|
|
s->rdfalgo = 0;
|
|
s->rdfratio = 0.5;
|
|
s->rdfvars = 0.0;
|
|
s->rdfglobalseed = 0;
|
|
s->rdfsplitstrength = 2;
|
|
s->rdfimportance = 0;
|
|
|
|
/*
|
|
* Other fields
|
|
*/
|
|
s->rdfprogress = 0;
|
|
s->rdftotal = 1;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This subroutine adds dense dataset to the internal storage of the builder
|
|
object. Specifying your dataset in the dense format means that the dense
|
|
version of the forest construction algorithm will be invoked.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
XY - array[NPoints,NVars+1] (minimum size; actual size can
|
|
be larger, only leading part is used anyway), dataset:
|
|
* first NVars elements of each row store values of the
|
|
independent variables
|
|
* last column store class number (in 0...NClasses-1)
|
|
or real value of the dependent variable
|
|
NPoints - number of rows in the dataset, NPoints>=1
|
|
NVars - number of independent variables, NVars>=1
|
|
NClasses - indicates type of the problem being solved:
|
|
* NClasses>=2 means that classification problem is
|
|
solved (last column of the dataset stores class
|
|
number)
|
|
* NClasses=1 means that regression problem is solved
|
|
(last column of the dataset stores variable value)
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetdataset(decisionforestbuilder* s,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t nclasses,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
|
|
|
|
|
|
/*
|
|
* Check parameters
|
|
*/
|
|
ae_assert(npoints>=1, "dfbuildersetdataset: npoints<1", _state);
|
|
ae_assert(nvars>=1, "dfbuildersetdataset: nvars<1", _state);
|
|
ae_assert(nclasses>=1, "dfbuildersetdataset: nclasses<1", _state);
|
|
ae_assert(xy->rows>=npoints, "dfbuildersetdataset: rows(xy)<npoints", _state);
|
|
ae_assert(xy->cols>=nvars+1, "dfbuildersetdataset: cols(xy)<nvars+1", _state);
|
|
ae_assert(apservisfinitematrix(xy, npoints, nvars+1, _state), "dfbuildersetdataset: xy parameter contains INFs or NANs", _state);
|
|
if( nclasses>1 )
|
|
{
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
j = ae_round(xy->ptr.pp_double[i][nvars], _state);
|
|
ae_assert(j>=0&&j<nclasses, "dfbuildersetdataset: last column of xy contains invalid class number", _state);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Set dataset
|
|
*/
|
|
s->dstype = 0;
|
|
s->npoints = npoints;
|
|
s->nvars = nvars;
|
|
s->nclasses = nclasses;
|
|
rvectorsetlengthatleast(&s->dsdata, npoints*nvars, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
s->dsdata.ptr.p_double[j*npoints+i] = xy->ptr.pp_double[i][j];
|
|
}
|
|
}
|
|
if( nclasses>1 )
|
|
{
|
|
ivectorsetlengthatleast(&s->dsival, npoints, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
s->dsival.ptr.p_int[i] = ae_round(xy->ptr.pp_double[i][nvars], _state);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
rvectorsetlengthatleast(&s->dsrval, npoints, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
s->dsrval.ptr.p_double[i] = xy->ptr.pp_double[i][nvars];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets number of variables (in [1,NVars] range) used by
|
|
decision forest construction algorithm.
|
|
|
|
The default option is to use roughly sqrt(NVars) variables.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
RndVars - number of randomly selected variables; values outside
|
|
of [1,NVars] range are silently clipped.
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetrndvars(decisionforestbuilder* s,
|
|
ae_int_t rndvars,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
s->rdfvars = (double)(ae_maxint(rndvars, 1, _state));
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets number of variables used by decision forest construction
|
|
algorithm as a fraction of total variable count (0,1) range.
|
|
|
|
The default option is to use roughly sqrt(NVars) variables.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
F - round(NVars*F) variables are selected
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetrndvarsratio(decisionforestbuilder* s,
|
|
double f,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(ae_isfinite(f, _state), "dfbuildersetrndvarsratio: F is INF or NAN", _state);
|
|
s->rdfvars = -ae_maxreal(f, ae_machineepsilon, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function tells decision forest builder to automatically choose number
|
|
of variables used by decision forest construction algorithm. Roughly
|
|
sqrt(NVars) variables will be used.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetrndvarsauto(decisionforestbuilder* s, ae_state *_state)
|
|
{
|
|
|
|
|
|
s->rdfvars = (double)(0);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets size of dataset subsample generated the decision forest
|
|
construction algorithm. Size is specified as a fraction of total dataset
|
|
size.
|
|
|
|
The default option is to use 50% of the dataset for training, 50% for the
|
|
OOB estimates. You can decrease fraction F down to 10%, 1% or even below
|
|
in order to reduce overfitting.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
F - fraction of the dataset to use, in (0,1] range. Values
|
|
outside of this range will be silently clipped. At
|
|
least one element is always selected for the training
|
|
set.
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetsubsampleratio(decisionforestbuilder* s,
|
|
double f,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(ae_isfinite(f, _state), "dfbuildersetrndvarsfraction: F is INF or NAN", _state);
|
|
s->rdfratio = ae_maxreal(f, ae_machineepsilon, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets seed used by internal RNG for random subsampling and
|
|
random selection of variable subsets.
|
|
|
|
By default random seed is used, i.e. every time you build decision forest,
|
|
we seed generator with new value obtained from system-wide RNG. Thus,
|
|
decision forest builder returns non-deterministic results. You can change
|
|
such behavior by specyfing fixed positive seed value.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
SeedVal - seed value:
|
|
* positive values are used for seeding RNG with fixed
|
|
seed, i.e. subsequent runs on same data will return
|
|
same decision forests
|
|
* non-positive seed means that random seed is used
|
|
for every run of builder, i.e. subsequent runs on
|
|
same datasets will return slightly different
|
|
decision forests
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder, see
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetseed(decisionforestbuilder* s,
|
|
ae_int_t seedval,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
s->rdfglobalseed = seedval;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets random decision forest construction algorithm.
|
|
|
|
As for now, only one decision forest construction algorithm is supported -
|
|
a dense "baseline" RDF algorithm.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
AlgoType - algorithm type:
|
|
* 0 = baseline dense RDF
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder, see
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetrdfalgo(decisionforestbuilder* s,
|
|
ae_int_t algotype,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(algotype==0, "dfbuildersetrdfalgo: unexpected algotype", _state);
|
|
s->rdfalgo = algotype;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets split selection algorithm used by decision forest
|
|
classifier. You may choose several algorithms, with different speed and
|
|
quality of the results.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
SplitStrength- split type:
|
|
* 0 = split at the random position, fastest one
|
|
* 1 = split at the middle of the range
|
|
* 2 = strong split at the best point of the range (default)
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder, see
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetrdfsplitstrength(decisionforestbuilder* s,
|
|
ae_int_t splitstrength,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert((splitstrength==0||splitstrength==1)||splitstrength==2, "dfbuildersetrdfsplitstrength: unexpected split type", _state);
|
|
s->rdfsplitstrength = splitstrength;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function tells decision forest construction algorithm to use
|
|
Gini impurity based variable importance estimation (also known as MDI).
|
|
|
|
This version of importance estimation algorithm analyzes mean decrease in
|
|
impurity (MDI) on training sample during splits. The result is divided
|
|
by impurity at the root node in order to produce estimate in [0,1] range.
|
|
|
|
Such estimates are fast to calculate and beautifully normalized (sum to
|
|
one) but have following downsides:
|
|
* They ALWAYS sum to 1.0, even if output is completely unpredictable. I.e.
|
|
MDI allows to order variables by importance, but does not tell us about
|
|
"absolute" importances of variables
|
|
* there exist some bias towards continuous and high-cardinality categorical
|
|
variables
|
|
|
|
NOTE: informally speaking, MDA (permutation importance) rating answers the
|
|
question "what part of the model predictive power is ruined by
|
|
permuting k-th variable?" while MDI tells us "what part of the model
|
|
predictive power was achieved due to usage of k-th variable".
|
|
|
|
Thus, MDA rates each variable independently at "0 to 1" scale while
|
|
MDI (and OOB-MDI too) tends to divide "unit amount of importance"
|
|
between several important variables.
|
|
|
|
If all variables are equally important, they will have same
|
|
MDI/OOB-MDI rating, equal (for OOB-MDI: roughly equal) to 1/NVars.
|
|
However, roughly same picture will be produced for the "all
|
|
variables provide information no one is critical" situation and for
|
|
the "all variables are critical, drop any one, everything is ruined"
|
|
situation.
|
|
|
|
Contrary to that, MDA will rate critical variable as ~1.0 important,
|
|
and important but non-critical variable will have less than unit
|
|
rating.
|
|
|
|
NOTE: quite an often MDA and MDI return same results. It generally happens
|
|
on problems with low test set error (a few percents at most) and
|
|
large enough training set to avoid overfitting.
|
|
|
|
The difference between MDA, MDI and OOB-MDI becomes important only
|
|
on "hard" tasks with high test set error and/or small training set.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder object. Next call to the forest
|
|
construction function will produce:
|
|
* importance estimates in rep.varimportances field
|
|
* variable ranks in rep.topvars field
|
|
|
|
-- ALGLIB --
|
|
Copyright 29.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetimportancetrngini(decisionforestbuilder* s,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
s->rdfimportance = dforest_needtrngini;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function tells decision forest construction algorithm to use
|
|
out-of-bag version of Gini variable importance estimation (also known as
|
|
OOB-MDI).
|
|
|
|
This version of importance estimation algorithm analyzes mean decrease in
|
|
impurity (MDI) on out-of-bag sample during splits. The result is divided
|
|
by impurity at the root node in order to produce estimate in [0,1] range.
|
|
|
|
Such estimates are fast to calculate and resistant to overfitting issues
|
|
(thanks to the out-of-bag estimates used). However, OOB Gini rating has
|
|
following downsides:
|
|
* there exist some bias towards continuous and high-cardinality categorical
|
|
variables
|
|
* Gini rating allows us to order variables by importance, but it is hard
|
|
to define importance of the variable by itself.
|
|
|
|
NOTE: informally speaking, MDA (permutation importance) rating answers the
|
|
question "what part of the model predictive power is ruined by
|
|
permuting k-th variable?" while MDI tells us "what part of the model
|
|
predictive power was achieved due to usage of k-th variable".
|
|
|
|
Thus, MDA rates each variable independently at "0 to 1" scale while
|
|
MDI (and OOB-MDI too) tends to divide "unit amount of importance"
|
|
between several important variables.
|
|
|
|
If all variables are equally important, they will have same
|
|
MDI/OOB-MDI rating, equal (for OOB-MDI: roughly equal) to 1/NVars.
|
|
However, roughly same picture will be produced for the "all
|
|
variables provide information no one is critical" situation and for
|
|
the "all variables are critical, drop any one, everything is ruined"
|
|
situation.
|
|
|
|
Contrary to that, MDA will rate critical variable as ~1.0 important,
|
|
and important but non-critical variable will have less than unit
|
|
rating.
|
|
|
|
NOTE: quite an often MDA and MDI return same results. It generally happens
|
|
on problems with low test set error (a few percents at most) and
|
|
large enough training set to avoid overfitting.
|
|
|
|
The difference between MDA, MDI and OOB-MDI becomes important only
|
|
on "hard" tasks with high test set error and/or small training set.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder object. Next call to the forest
|
|
construction function will produce:
|
|
* importance estimates in rep.varimportances field
|
|
* variable ranks in rep.topvars field
|
|
|
|
-- ALGLIB --
|
|
Copyright 29.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetimportanceoobgini(decisionforestbuilder* s,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
s->rdfimportance = dforest_needoobgini;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function tells decision forest construction algorithm to use
|
|
permutation variable importance estimator (also known as MDA).
|
|
|
|
This version of importance estimation algorithm analyzes mean increase in
|
|
out-of-bag sum of squared residuals after random permutation of J-th
|
|
variable. The result is divided by error computed with all variables being
|
|
perturbed in order to produce R-squared-like estimate in [0,1] range.
|
|
|
|
Such estimate is slower to calculate than Gini-based rating because it
|
|
needs multiple inference runs for each of variables being studied.
|
|
|
|
ALGLIB uses parallelized and highly optimized algorithm which analyzes
|
|
path through the decision tree and allows to handle most perturbations
|
|
in O(1) time; nevertheless, requesting MDA importances may increase forest
|
|
construction time from 10% to 200% (or more, if you have thousands of
|
|
variables).
|
|
|
|
However, MDA rating has following benefits over Gini-based ones:
|
|
* no bias towards specific variable types
|
|
* ability to directly evaluate "absolute" importance of some variable at
|
|
"0 to 1" scale (contrary to Gini-based rating, which returns comparative
|
|
importances).
|
|
|
|
NOTE: informally speaking, MDA (permutation importance) rating answers the
|
|
question "what part of the model predictive power is ruined by
|
|
permuting k-th variable?" while MDI tells us "what part of the model
|
|
predictive power was achieved due to usage of k-th variable".
|
|
|
|
Thus, MDA rates each variable independently at "0 to 1" scale while
|
|
MDI (and OOB-MDI too) tends to divide "unit amount of importance"
|
|
between several important variables.
|
|
|
|
If all variables are equally important, they will have same
|
|
MDI/OOB-MDI rating, equal (for OOB-MDI: roughly equal) to 1/NVars.
|
|
However, roughly same picture will be produced for the "all
|
|
variables provide information no one is critical" situation and for
|
|
the "all variables are critical, drop any one, everything is ruined"
|
|
situation.
|
|
|
|
Contrary to that, MDA will rate critical variable as ~1.0 important,
|
|
and important but non-critical variable will have less than unit
|
|
rating.
|
|
|
|
NOTE: quite an often MDA and MDI return same results. It generally happens
|
|
on problems with low test set error (a few percents at most) and
|
|
large enough training set to avoid overfitting.
|
|
|
|
The difference between MDA, MDI and OOB-MDI becomes important only
|
|
on "hard" tasks with high test set error and/or small training set.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder object. Next call to the forest
|
|
construction function will produce:
|
|
* importance estimates in rep.varimportances field
|
|
* variable ranks in rep.topvars field
|
|
|
|
-- ALGLIB --
|
|
Copyright 29.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetimportancepermutation(decisionforestbuilder* s,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
s->rdfimportance = dforest_needpermutation;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function tells decision forest construction algorithm to skip
|
|
variable importance estimation.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder object. Next call to the forest
|
|
construction function will result in forest being built
|
|
without variable importance estimation.
|
|
|
|
-- ALGLIB --
|
|
Copyright 29.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildersetimportancenone(decisionforestbuilder* s,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
s->rdfimportance = 0;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function is an alias for dfbuilderpeekprogress(), left in ALGLIB for
|
|
backward compatibility reasons.
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfbuildergetprogress(decisionforestbuilder* s, ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
result = dfbuilderpeekprogress(s, _state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function is used to peek into decision forest construction process
|
|
from some other thread and get current progress indicator.
|
|
|
|
It returns value in [0,1].
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object used to build forest
|
|
in some other thread
|
|
|
|
RESULT:
|
|
progress value, in [0,1]
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfbuilderpeekprogress(decisionforestbuilder* s, ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
result = s->rdfprogress/ae_maxreal((double)(s->rdftotal), (double)(1), _state);
|
|
result = ae_maxreal(result, (double)(0), _state);
|
|
result = ae_minreal(result, (double)(1), _state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This subroutine builds decision forest according to current settings using
|
|
dataset internally stored in the builder object. Dense algorithm is used.
|
|
|
|
NOTE: this function uses dense algorithm for forest construction
|
|
independently from the dataset format (dense or sparse).
|
|
|
|
NOTE: forest built with this function is stored in-memory using 64-bit
|
|
data structures for offsets/indexes/split values. It is possible to
|
|
convert forest into more memory-efficient compressed binary
|
|
representation. Depending on the problem properties, 3.7x-5.7x
|
|
compression factors are possible.
|
|
|
|
The downsides of compression are (a) slight reduction in the model
|
|
accuracy and (b) ~1.5x reduction in the inference speed (due to
|
|
increased complexity of the storage format).
|
|
|
|
See comments on dfbinarycompression() for more info.
|
|
|
|
Default settings are used by the algorithm; you can tweak them with the
|
|
help of the following functions:
|
|
* dfbuildersetrfactor() - to control a fraction of the dataset used for
|
|
subsampling
|
|
* dfbuildersetrandomvars() - to control number of variables randomly chosen
|
|
for decision rule creation
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
NTrees - NTrees>=1, number of trees to train
|
|
|
|
OUTPUT PARAMETERS:
|
|
DF - decision forest. You can compress this forest to more
|
|
compact 16-bit representation with dfbinarycompression()
|
|
Rep - report, see below for information on its fields.
|
|
|
|
=== report information produced by forest construction function ==========
|
|
|
|
Decision forest training report includes following information:
|
|
* training set errors
|
|
* out-of-bag estimates of errors
|
|
* variable importance ratings
|
|
|
|
Following fields are used to store information:
|
|
* training set errors are stored in rep.relclserror, rep.avgce, rep.rmserror,
|
|
rep.avgerror and rep.avgrelerror
|
|
* out-of-bag estimates of errors are stored in rep.oobrelclserror, rep.oobavgce,
|
|
rep.oobrmserror, rep.oobavgerror and rep.oobavgrelerror
|
|
|
|
Variable importance reports, if requested by dfbuildersetimportancegini(),
|
|
dfbuildersetimportancetrngini() or dfbuildersetimportancepermutation()
|
|
call, are stored in:
|
|
* rep.varimportances field stores importance ratings
|
|
* rep.topvars stores variable indexes ordered from the most important to
|
|
less important ones
|
|
|
|
You can find more information about report fields in:
|
|
* comments on dfreport structure
|
|
* comments on dfbuildersetimportancegini function
|
|
* comments on dfbuildersetimportancetrngini function
|
|
* comments on dfbuildersetimportancepermutation function
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuilderbuildrandomforest(decisionforestbuilder* s,
|
|
ae_int_t ntrees,
|
|
decisionforest* df,
|
|
dfreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t nvars;
|
|
ae_int_t nclasses;
|
|
ae_int_t npoints;
|
|
ae_int_t trnsize;
|
|
ae_int_t maxtreesize;
|
|
ae_int_t sessionseed;
|
|
dfworkbuf workbufseed;
|
|
dfvotebuf votebufseed;
|
|
dftreebuf treebufseed;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&workbufseed, 0, sizeof(workbufseed));
|
|
memset(&votebufseed, 0, sizeof(votebufseed));
|
|
memset(&treebufseed, 0, sizeof(treebufseed));
|
|
_decisionforest_clear(df);
|
|
_dfreport_clear(rep);
|
|
_dfworkbuf_init(&workbufseed, _state, ae_true);
|
|
_dfvotebuf_init(&votebufseed, _state, ae_true);
|
|
_dftreebuf_init(&treebufseed, _state, ae_true);
|
|
|
|
ae_assert(ntrees>=1, "DFBuilderBuildRandomForest: ntrees<1", _state);
|
|
dforest_cleanreport(s, rep, _state);
|
|
npoints = s->npoints;
|
|
nvars = s->nvars;
|
|
nclasses = s->nclasses;
|
|
|
|
/*
|
|
* Set up progress counter
|
|
*/
|
|
s->rdfprogress = 0;
|
|
s->rdftotal = ntrees*npoints;
|
|
if( s->rdfimportance==dforest_needpermutation )
|
|
{
|
|
s->rdftotal = s->rdftotal+ntrees*npoints;
|
|
}
|
|
|
|
/*
|
|
* Quick exit for empty dataset
|
|
*/
|
|
if( s->dstype==-1||npoints==0 )
|
|
{
|
|
ae_assert(dforest_leafnodewidth==2, "DFBuilderBuildRandomForest: integrity check failed", _state);
|
|
df->forestformat = dforest_dfuncompressedv0;
|
|
df->nvars = s->nvars;
|
|
df->nclasses = s->nclasses;
|
|
df->ntrees = 1;
|
|
df->bufsize = 1+dforest_leafnodewidth;
|
|
ae_vector_set_length(&df->trees, 1+dforest_leafnodewidth, _state);
|
|
df->trees.ptr.p_double[0] = (double)(1+dforest_leafnodewidth);
|
|
df->trees.ptr.p_double[1] = (double)(-1);
|
|
df->trees.ptr.p_double[2] = 0.0;
|
|
dfcreatebuffer(df, &df->buffer, _state);
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
ae_assert(npoints>0, "DFBuilderBuildRandomForest: integrity check failed", _state);
|
|
|
|
/*
|
|
* Analyze dataset statistics, perform preprocessing
|
|
*/
|
|
dforest_analyzeandpreprocessdataset(s, _state);
|
|
|
|
/*
|
|
* Prepare "work", "vote" and "tree" pools and other settings
|
|
*/
|
|
trnsize = ae_round(npoints*s->rdfratio, _state);
|
|
trnsize = ae_maxint(trnsize, 1, _state);
|
|
trnsize = ae_minint(trnsize, npoints, _state);
|
|
maxtreesize = 1+dforest_innernodewidth*(trnsize-1)+dforest_leafnodewidth*trnsize;
|
|
ae_vector_set_length(&workbufseed.varpool, nvars, _state);
|
|
ae_vector_set_length(&workbufseed.trnset, trnsize, _state);
|
|
ae_vector_set_length(&workbufseed.oobset, npoints-trnsize, _state);
|
|
ae_vector_set_length(&workbufseed.tmp0i, npoints, _state);
|
|
ae_vector_set_length(&workbufseed.tmp1i, npoints, _state);
|
|
ae_vector_set_length(&workbufseed.tmp0r, npoints, _state);
|
|
ae_vector_set_length(&workbufseed.tmp1r, npoints, _state);
|
|
ae_vector_set_length(&workbufseed.tmp2r, npoints, _state);
|
|
ae_vector_set_length(&workbufseed.tmp3r, npoints, _state);
|
|
ae_vector_set_length(&workbufseed.trnlabelsi, npoints, _state);
|
|
ae_vector_set_length(&workbufseed.trnlabelsr, npoints, _state);
|
|
ae_vector_set_length(&workbufseed.ooblabelsi, npoints, _state);
|
|
ae_vector_set_length(&workbufseed.ooblabelsr, npoints, _state);
|
|
ae_vector_set_length(&workbufseed.curvals, npoints, _state);
|
|
ae_vector_set_length(&workbufseed.bestvals, npoints, _state);
|
|
ae_vector_set_length(&workbufseed.classpriors, nclasses, _state);
|
|
ae_vector_set_length(&workbufseed.classtotals0, nclasses, _state);
|
|
ae_vector_set_length(&workbufseed.classtotals1, nclasses, _state);
|
|
ae_vector_set_length(&workbufseed.classtotals01, 2*nclasses, _state);
|
|
ae_vector_set_length(&workbufseed.treebuf, maxtreesize, _state);
|
|
workbufseed.trnsize = trnsize;
|
|
workbufseed.oobsize = npoints-trnsize;
|
|
ae_vector_set_length(&votebufseed.trntotals, npoints*nclasses, _state);
|
|
ae_vector_set_length(&votebufseed.oobtotals, npoints*nclasses, _state);
|
|
for(i=0; i<=npoints*nclasses-1; i++)
|
|
{
|
|
votebufseed.trntotals.ptr.p_double[i] = (double)(0);
|
|
votebufseed.oobtotals.ptr.p_double[i] = (double)(0);
|
|
}
|
|
ae_vector_set_length(&votebufseed.trncounts, npoints, _state);
|
|
ae_vector_set_length(&votebufseed.oobcounts, npoints, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
votebufseed.trncounts.ptr.p_int[i] = 0;
|
|
votebufseed.oobcounts.ptr.p_int[i] = 0;
|
|
}
|
|
ae_vector_set_length(&votebufseed.giniimportances, nvars, _state);
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
votebufseed.giniimportances.ptr.p_double[i] = 0.0;
|
|
}
|
|
treebufseed.treeidx = -1;
|
|
ae_shared_pool_set_seed(&s->workpool, &workbufseed, sizeof(workbufseed), _dfworkbuf_init, _dfworkbuf_init_copy, _dfworkbuf_destroy, _state);
|
|
ae_shared_pool_set_seed(&s->votepool, &votebufseed, sizeof(votebufseed), _dfvotebuf_init, _dfvotebuf_init_copy, _dfvotebuf_destroy, _state);
|
|
ae_shared_pool_set_seed(&s->treepool, &treebufseed, sizeof(treebufseed), _dftreebuf_init, _dftreebuf_init_copy, _dftreebuf_destroy, _state);
|
|
ae_shared_pool_set_seed(&s->treefactory, &treebufseed, sizeof(treebufseed), _dftreebuf_init, _dftreebuf_init_copy, _dftreebuf_destroy, _state);
|
|
|
|
/*
|
|
* Select session seed (individual trees are constructed using
|
|
* combination of session and local seeds).
|
|
*/
|
|
sessionseed = s->rdfglobalseed;
|
|
if( s->rdfglobalseed<=0 )
|
|
{
|
|
sessionseed = ae_randominteger(30000, _state);
|
|
}
|
|
|
|
/*
|
|
* Prepare In-and-Out-of-Bag matrix, if needed
|
|
*/
|
|
s->neediobmatrix = s->rdfimportance==dforest_needpermutation;
|
|
if( s->neediobmatrix )
|
|
{
|
|
|
|
/*
|
|
* Prepare default state of In-and-Out-of-Bag matrix
|
|
*/
|
|
bmatrixsetlengthatleast(&s->iobmatrix, ntrees, npoints, _state);
|
|
for(i=0; i<=ntrees-1; i++)
|
|
{
|
|
for(j=0; j<=npoints-1; j++)
|
|
{
|
|
s->iobmatrix.ptr.pp_bool[i][j] = ae_false;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Build trees (in parallel, if possible)
|
|
*/
|
|
dforest_buildrandomtree(s, 0, ntrees, _state);
|
|
|
|
/*
|
|
* Merge trees and output result
|
|
*/
|
|
dforest_mergetrees(s, df, _state);
|
|
|
|
/*
|
|
* Process voting results and output training set and OOB errors.
|
|
* Finalize tree construction.
|
|
*/
|
|
dforest_processvotingresults(s, ntrees, &votebufseed, rep, _state);
|
|
dfcreatebuffer(df, &df->buffer, _state);
|
|
|
|
/*
|
|
* Perform variable importance estimation
|
|
*/
|
|
dforest_estimatevariableimportance(s, sessionseed, df, ntrees, rep, _state);
|
|
|
|
/*
|
|
* Update progress counter
|
|
*/
|
|
s->rdfprogress = s->rdftotal;
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function performs binary compression of the decision forest.
|
|
|
|
Original decision forest produced by the forest builder is stored using
|
|
64-bit representation for all numbers - offsets, variable indexes, split
|
|
points.
|
|
|
|
It is possible to significantly reduce model size by means of:
|
|
* using compressed dynamic encoding for integers (offsets and variable
|
|
indexes), which uses just 1 byte to store small ints (less than 128),
|
|
just 2 bytes for larger values (less than 128^2) and so on
|
|
* storing floating point numbers using 8-bit exponent and 16-bit mantissa
|
|
|
|
As result, model needs significantly less memory (compression factor
|
|
depends on variable and class counts). In particular:
|
|
* NVars<128 and NClasses<128 result in 4.4x-5.7x model size reduction
|
|
* NVars<16384 and NClasses<128 result in 3.7x-4.5x model size reduction
|
|
|
|
Such storage format performs lossless compression of all integers, but
|
|
compression of floating point values (split values) is lossy, with roughly
|
|
0.01% relative error introduced during rounding. Thus, we recommend you to
|
|
re-evaluate model accuracy after compression.
|
|
|
|
Another downside of compression is ~1.5x reduction in the inference
|
|
speed due to necessity of dynamic decompression of the compressed model.
|
|
|
|
INPUT PARAMETERS:
|
|
DF - decision forest built by forest builder
|
|
|
|
OUTPUT PARAMETERS:
|
|
DF - replaced by compressed forest
|
|
|
|
RESULT:
|
|
compression factor (in-RAM size of the compressed model vs than of the
|
|
uncompressed one), positive number larger than 1.0
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfbinarycompression(decisionforest* df, ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
result = dforest_binarycompression(df, ae_false, _state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This is a 8-bit version of dfbinarycompression.
|
|
Not recommended for external use because it is too lossy.
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfbinarycompression8(decisionforest* df, ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
result = dforest_binarycompression(df, ae_true, _state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Inference using decision forest
|
|
|
|
IMPORTANT: this function is thread-unsafe and may modify internal
|
|
structures of the model! You can not use same model object for
|
|
parallel evaluation from several threads.
|
|
|
|
Use dftsprocess() with independent thread-local buffers if
|
|
you need thread-safe evaluation.
|
|
|
|
INPUT PARAMETERS:
|
|
DF - decision forest model
|
|
X - input vector, array[NVars]
|
|
Y - possibly preallocated buffer, reallocated if too small
|
|
|
|
OUTPUT PARAMETERS:
|
|
Y - result. Regression estimate when solving regression task,
|
|
vector of posterior probabilities for classification task.
|
|
|
|
See also DFProcessI.
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 16.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfprocess(decisionforest* df,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t offs;
|
|
ae_int_t i;
|
|
double v;
|
|
ae_int_t treesize;
|
|
ae_bool processed;
|
|
|
|
|
|
|
|
/*
|
|
* Process
|
|
*
|
|
* Although comments above warn you about thread-unsafety of this
|
|
* function, it is de facto thread-safe. However, thread safety is
|
|
* an accidental side-effect of the specific inference algorithm
|
|
* being used. It may disappear in the future versions of the DF
|
|
* models, so you should NOT rely on it.
|
|
*/
|
|
if( y->cnt<df->nclasses )
|
|
{
|
|
ae_vector_set_length(y, df->nclasses, _state);
|
|
}
|
|
for(i=0; i<=df->nclasses-1; i++)
|
|
{
|
|
y->ptr.p_double[i] = (double)(0);
|
|
}
|
|
processed = ae_false;
|
|
if( df->forestformat==dforest_dfuncompressedv0 )
|
|
{
|
|
|
|
/*
|
|
* Process trees stored in uncompressed format
|
|
*/
|
|
offs = 0;
|
|
for(i=0; i<=df->ntrees-1; i++)
|
|
{
|
|
dforest_dfprocessinternaluncompressed(df, offs, offs+1, x, y, _state);
|
|
offs = offs+ae_round(df->trees.ptr.p_double[offs], _state);
|
|
}
|
|
processed = ae_true;
|
|
}
|
|
if( df->forestformat==dforest_dfcompressedv0 )
|
|
{
|
|
|
|
/*
|
|
* Process trees stored in compressed format
|
|
*/
|
|
offs = 0;
|
|
for(i=0; i<=df->ntrees-1; i++)
|
|
{
|
|
treesize = dforest_unstreamuint(&df->trees8, &offs, _state);
|
|
dforest_dfprocessinternalcompressed(df, offs, x, y, _state);
|
|
offs = offs+treesize;
|
|
}
|
|
processed = ae_true;
|
|
}
|
|
ae_assert(processed, "DFProcess: integrity check failed (unexpected format?)", _state);
|
|
v = (double)1/(double)df->ntrees;
|
|
ae_v_muld(&y->ptr.p_double[0], 1, ae_v_len(0,df->nclasses-1), v);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
'interactive' variant of DFProcess for languages like Python which support
|
|
constructs like "Y = DFProcessI(DF,X)" and interactive mode of interpreter
|
|
|
|
This function allocates new array on each call, so it is significantly
|
|
slower than its 'non-interactive' counterpart, but it is more convenient
|
|
when you call it from command line.
|
|
|
|
IMPORTANT: this function is thread-unsafe and may modify internal
|
|
structures of the model! You can not use same model object for
|
|
parallel evaluation from several threads.
|
|
|
|
Use dftsprocess() with independent thread-local buffers if
|
|
you need thread-safe evaluation.
|
|
|
|
-- ALGLIB --
|
|
Copyright 28.02.2010 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfprocessi(decisionforest* df,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state)
|
|
{
|
|
|
|
ae_vector_clear(y);
|
|
|
|
dfprocess(df, x, y, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns first component of the inferred vector (i.e. one
|
|
with index #0).
|
|
|
|
It is a convenience wrapper for dfprocess() intended for either:
|
|
* 1-dimensional regression problems
|
|
* 2-class classification problems
|
|
|
|
In the former case this function returns inference result as scalar, which
|
|
is definitely more convenient that wrapping it as vector. In the latter
|
|
case it returns probability of object belonging to class #0.
|
|
|
|
If you call it for anything different from two cases above, it will work
|
|
as defined, i.e. return y[0], although it is of less use in such cases.
|
|
|
|
IMPORTANT: this function is thread-unsafe and modifies internal structures
|
|
of the model! You can not use same model object for parallel
|
|
evaluation from several threads.
|
|
|
|
Use dftsprocess() with independent thread-local buffers, if
|
|
you need thread-safe evaluation.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - DF model
|
|
X - input vector, array[0..NVars-1].
|
|
|
|
RESULT:
|
|
Y[0]
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfprocess0(decisionforest* model,
|
|
/* Real */ ae_vector* x,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t nvars;
|
|
double result;
|
|
|
|
|
|
nvars = model->nvars;
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
model->buffer.x.ptr.p_double[i] = x->ptr.p_double[i];
|
|
}
|
|
dfprocess(model, &model->buffer.x, &model->buffer.y, _state);
|
|
result = model->buffer.y.ptr.p_double[0];
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns most probable class number for an input X. It is
|
|
same as calling dfprocess(model,x,y), then determining i=argmax(y[i]) and
|
|
returning i.
|
|
|
|
A class number in [0,NOut) range in returned for classification problems,
|
|
-1 is returned when this function is called for regression problems.
|
|
|
|
IMPORTANT: this function is thread-unsafe and modifies internal structures
|
|
of the model! You can not use same model object for parallel
|
|
evaluation from several threads.
|
|
|
|
Use dftsprocess() with independent thread-local buffers, if
|
|
you need thread-safe evaluation.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - decision forest model
|
|
X - input vector, array[0..NVars-1].
|
|
|
|
RESULT:
|
|
class number, -1 for regression tasks
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t dfclassify(decisionforest* model,
|
|
/* Real */ ae_vector* x,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t nvars;
|
|
ae_int_t nout;
|
|
ae_int_t result;
|
|
|
|
|
|
if( model->nclasses<2 )
|
|
{
|
|
result = -1;
|
|
return result;
|
|
}
|
|
nvars = model->nvars;
|
|
nout = model->nclasses;
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
model->buffer.x.ptr.p_double[i] = x->ptr.p_double[i];
|
|
}
|
|
dfprocess(model, &model->buffer.x, &model->buffer.y, _state);
|
|
result = 0;
|
|
for(i=1; i<=nout-1; i++)
|
|
{
|
|
if( model->buffer.y.ptr.p_double[i]>model->buffer.y.ptr.p_double[result] )
|
|
{
|
|
result = i;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Inference using decision forest
|
|
|
|
Thread-safe procesing using external buffer for temporaries.
|
|
|
|
This function is thread-safe (i.e . you can use same DF model from
|
|
multiple threads) as long as you use different buffer objects for different
|
|
threads.
|
|
|
|
INPUT PARAMETERS:
|
|
DF - decision forest model
|
|
Buf - buffer object, must be allocated specifically for this
|
|
model with dfcreatebuffer().
|
|
X - input vector, array[NVars]
|
|
Y - possibly preallocated buffer, reallocated if too small
|
|
|
|
OUTPUT PARAMETERS:
|
|
Y - result. Regression estimate when solving regression task,
|
|
vector of posterior probabilities for classification task.
|
|
|
|
See also DFProcessI.
|
|
|
|
|
|
-- ALGLIB --
|
|
Copyright 16.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dftsprocess(decisionforest* df,
|
|
decisionforestbuffer* buf,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
|
|
/*
|
|
* Although docs warn you about thread-unsafety of the dfprocess()
|
|
* function, it is de facto thread-safe. However, thread safety is
|
|
* an accidental side-effect of the specific inference algorithm
|
|
* being used. It may disappear in the future versions of the DF
|
|
* models, so you should NOT rely on it.
|
|
*/
|
|
dfprocess(df, x, y, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Relative classification error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
DF - decision forest model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
percent of incorrectly classified cases.
|
|
Zero if model solves regression task.
|
|
|
|
-- ALGLIB --
|
|
Copyright 16.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfrelclserror(decisionforest* df,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
result = (double)dforest_dfclserror(df, xy, npoints, _state)/(double)npoints;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average cross-entropy (in bits per element) on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
DF - decision forest model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
CrossEntropy/(NPoints*LN(2)).
|
|
Zero if model solves regression task.
|
|
|
|
-- ALGLIB --
|
|
Copyright 16.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfavgce(decisionforest* df,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector x;
|
|
ae_vector y;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t tmpi;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&x, 0, sizeof(x));
|
|
memset(&y, 0, sizeof(y));
|
|
ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
|
|
|
|
ae_vector_set_length(&x, df->nvars-1+1, _state);
|
|
ae_vector_set_length(&y, df->nclasses-1+1, _state);
|
|
result = (double)(0);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,df->nvars-1));
|
|
dfprocess(df, &x, &y, _state);
|
|
if( df->nclasses>1 )
|
|
{
|
|
|
|
/*
|
|
* classification-specific code
|
|
*/
|
|
k = ae_round(xy->ptr.pp_double[i][df->nvars], _state);
|
|
tmpi = 0;
|
|
for(j=1; j<=df->nclasses-1; j++)
|
|
{
|
|
if( ae_fp_greater(y.ptr.p_double[j],y.ptr.p_double[tmpi]) )
|
|
{
|
|
tmpi = j;
|
|
}
|
|
}
|
|
if( ae_fp_neq(y.ptr.p_double[k],(double)(0)) )
|
|
{
|
|
result = result-ae_log(y.ptr.p_double[k], _state);
|
|
}
|
|
else
|
|
{
|
|
result = result-ae_log(ae_minrealnumber, _state);
|
|
}
|
|
}
|
|
}
|
|
result = result/npoints;
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
RMS error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
DF - decision forest model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
root mean square error.
|
|
Its meaning for regression task is obvious. As for
|
|
classification task, RMS error means error when estimating posterior
|
|
probabilities.
|
|
|
|
-- ALGLIB --
|
|
Copyright 16.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfrmserror(decisionforest* df,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector x;
|
|
ae_vector y;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t tmpi;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&x, 0, sizeof(x));
|
|
memset(&y, 0, sizeof(y));
|
|
ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
|
|
|
|
ae_vector_set_length(&x, df->nvars-1+1, _state);
|
|
ae_vector_set_length(&y, df->nclasses-1+1, _state);
|
|
result = (double)(0);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,df->nvars-1));
|
|
dfprocess(df, &x, &y, _state);
|
|
if( df->nclasses>1 )
|
|
{
|
|
|
|
/*
|
|
* classification-specific code
|
|
*/
|
|
k = ae_round(xy->ptr.pp_double[i][df->nvars], _state);
|
|
tmpi = 0;
|
|
for(j=1; j<=df->nclasses-1; j++)
|
|
{
|
|
if( ae_fp_greater(y.ptr.p_double[j],y.ptr.p_double[tmpi]) )
|
|
{
|
|
tmpi = j;
|
|
}
|
|
}
|
|
for(j=0; j<=df->nclasses-1; j++)
|
|
{
|
|
if( j==k )
|
|
{
|
|
result = result+ae_sqr(y.ptr.p_double[j]-1, _state);
|
|
}
|
|
else
|
|
{
|
|
result = result+ae_sqr(y.ptr.p_double[j], _state);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* regression-specific code
|
|
*/
|
|
result = result+ae_sqr(y.ptr.p_double[0]-xy->ptr.pp_double[i][df->nvars], _state);
|
|
}
|
|
}
|
|
result = ae_sqrt(result/(npoints*df->nclasses), _state);
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
DF - decision forest model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
Its meaning for regression task is obvious. As for
|
|
classification task, it means average error when estimating posterior
|
|
probabilities.
|
|
|
|
-- ALGLIB --
|
|
Copyright 16.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfavgerror(decisionforest* df,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector x;
|
|
ae_vector y;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&x, 0, sizeof(x));
|
|
memset(&y, 0, sizeof(y));
|
|
ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
|
|
|
|
ae_vector_set_length(&x, df->nvars-1+1, _state);
|
|
ae_vector_set_length(&y, df->nclasses-1+1, _state);
|
|
result = (double)(0);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,df->nvars-1));
|
|
dfprocess(df, &x, &y, _state);
|
|
if( df->nclasses>1 )
|
|
{
|
|
|
|
/*
|
|
* classification-specific code
|
|
*/
|
|
k = ae_round(xy->ptr.pp_double[i][df->nvars], _state);
|
|
for(j=0; j<=df->nclasses-1; j++)
|
|
{
|
|
if( j==k )
|
|
{
|
|
result = result+ae_fabs(y.ptr.p_double[j]-1, _state);
|
|
}
|
|
else
|
|
{
|
|
result = result+ae_fabs(y.ptr.p_double[j], _state);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* regression-specific code
|
|
*/
|
|
result = result+ae_fabs(y.ptr.p_double[0]-xy->ptr.pp_double[i][df->nvars], _state);
|
|
}
|
|
}
|
|
result = result/(npoints*df->nclasses);
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average relative error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
DF - decision forest model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
Its meaning for regression task is obvious. As for
|
|
classification task, it means average relative error when estimating
|
|
posterior probability of belonging to the correct class.
|
|
|
|
-- ALGLIB --
|
|
Copyright 16.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double dfavgrelerror(decisionforest* df,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector x;
|
|
ae_vector y;
|
|
ae_int_t relcnt;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&x, 0, sizeof(x));
|
|
memset(&y, 0, sizeof(y));
|
|
ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
|
|
|
|
ae_vector_set_length(&x, df->nvars-1+1, _state);
|
|
ae_vector_set_length(&y, df->nclasses-1+1, _state);
|
|
result = (double)(0);
|
|
relcnt = 0;
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,df->nvars-1));
|
|
dfprocess(df, &x, &y, _state);
|
|
if( df->nclasses>1 )
|
|
{
|
|
|
|
/*
|
|
* classification-specific code
|
|
*/
|
|
k = ae_round(xy->ptr.pp_double[i][df->nvars], _state);
|
|
for(j=0; j<=df->nclasses-1; j++)
|
|
{
|
|
if( j==k )
|
|
{
|
|
result = result+ae_fabs(y.ptr.p_double[j]-1, _state);
|
|
relcnt = relcnt+1;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* regression-specific code
|
|
*/
|
|
if( ae_fp_neq(xy->ptr.pp_double[i][df->nvars],(double)(0)) )
|
|
{
|
|
result = result+ae_fabs((y.ptr.p_double[0]-xy->ptr.pp_double[i][df->nvars])/xy->ptr.pp_double[i][df->nvars], _state);
|
|
relcnt = relcnt+1;
|
|
}
|
|
}
|
|
}
|
|
if( relcnt>0 )
|
|
{
|
|
result = result/relcnt;
|
|
}
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Copying of DecisionForest strucure
|
|
|
|
INPUT PARAMETERS:
|
|
DF1 - original
|
|
|
|
OUTPUT PARAMETERS:
|
|
DF2 - copy
|
|
|
|
-- ALGLIB --
|
|
Copyright 13.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfcopy(decisionforest* df1, decisionforest* df2, ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t bufsize;
|
|
|
|
_decisionforest_clear(df2);
|
|
|
|
if( df1->forestformat==dforest_dfuncompressedv0 )
|
|
{
|
|
df2->forestformat = df1->forestformat;
|
|
df2->nvars = df1->nvars;
|
|
df2->nclasses = df1->nclasses;
|
|
df2->ntrees = df1->ntrees;
|
|
df2->bufsize = df1->bufsize;
|
|
ae_vector_set_length(&df2->trees, df1->bufsize, _state);
|
|
ae_v_move(&df2->trees.ptr.p_double[0], 1, &df1->trees.ptr.p_double[0], 1, ae_v_len(0,df1->bufsize-1));
|
|
dfcreatebuffer(df2, &df2->buffer, _state);
|
|
return;
|
|
}
|
|
if( df1->forestformat==dforest_dfcompressedv0 )
|
|
{
|
|
df2->forestformat = df1->forestformat;
|
|
df2->usemantissa8 = df1->usemantissa8;
|
|
df2->nvars = df1->nvars;
|
|
df2->nclasses = df1->nclasses;
|
|
df2->ntrees = df1->ntrees;
|
|
bufsize = df1->trees8.cnt;
|
|
ae_vector_set_length(&(df2->trees8), bufsize, _state);
|
|
for(i=0; i<=bufsize-1; i++)
|
|
{
|
|
df2->trees8.ptr.p_ubyte[i] = (unsigned char)(df1->trees8.ptr.p_ubyte[i]);
|
|
}
|
|
dfcreatebuffer(df2, &df2->buffer, _state);
|
|
return;
|
|
}
|
|
ae_assert(ae_false, "DFCopy: unexpected forest format", _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serializer: allocation
|
|
|
|
-- ALGLIB --
|
|
Copyright 14.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfalloc(ae_serializer* s, decisionforest* forest, ae_state *_state)
|
|
{
|
|
|
|
|
|
if( forest->forestformat==dforest_dfuncompressedv0 )
|
|
{
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
allocrealarray(s, &forest->trees, forest->bufsize, _state);
|
|
return;
|
|
}
|
|
if( forest->forestformat==dforest_dfcompressedv0 )
|
|
{
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_byte_array(s, &forest->trees8);
|
|
return;
|
|
}
|
|
ae_assert(ae_false, "DFAlloc: unexpected forest format", _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serializer: serialization
|
|
|
|
-- ALGLIB --
|
|
Copyright 14.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfserialize(ae_serializer* s,
|
|
decisionforest* forest,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
if( forest->forestformat==dforest_dfuncompressedv0 )
|
|
{
|
|
ae_serializer_serialize_int(s, getrdfserializationcode(_state), _state);
|
|
ae_serializer_serialize_int(s, dforest_dfuncompressedv0, _state);
|
|
ae_serializer_serialize_int(s, forest->nvars, _state);
|
|
ae_serializer_serialize_int(s, forest->nclasses, _state);
|
|
ae_serializer_serialize_int(s, forest->ntrees, _state);
|
|
ae_serializer_serialize_int(s, forest->bufsize, _state);
|
|
serializerealarray(s, &forest->trees, forest->bufsize, _state);
|
|
return;
|
|
}
|
|
if( forest->forestformat==dforest_dfcompressedv0 )
|
|
{
|
|
ae_serializer_serialize_int(s, getrdfserializationcode(_state), _state);
|
|
ae_serializer_serialize_int(s, forest->forestformat, _state);
|
|
ae_serializer_serialize_bool(s, forest->usemantissa8, _state);
|
|
ae_serializer_serialize_int(s, forest->nvars, _state);
|
|
ae_serializer_serialize_int(s, forest->nclasses, _state);
|
|
ae_serializer_serialize_int(s, forest->ntrees, _state);
|
|
ae_serializer_serialize_byte_array(s, &forest->trees8, _state);
|
|
return;
|
|
}
|
|
ae_assert(ae_false, "DFSerialize: unexpected forest format", _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serializer: unserialization
|
|
|
|
-- ALGLIB --
|
|
Copyright 14.03.2011 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfunserialize(ae_serializer* s,
|
|
decisionforest* forest,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i0;
|
|
ae_int_t forestformat;
|
|
ae_bool processed;
|
|
|
|
_decisionforest_clear(forest);
|
|
|
|
|
|
/*
|
|
* check correctness of header
|
|
*/
|
|
ae_serializer_unserialize_int(s, &i0, _state);
|
|
ae_assert(i0==getrdfserializationcode(_state), "DFUnserialize: stream header corrupted", _state);
|
|
|
|
/*
|
|
* Read forest
|
|
*/
|
|
ae_serializer_unserialize_int(s, &forestformat, _state);
|
|
processed = ae_false;
|
|
if( forestformat==dforest_dfuncompressedv0 )
|
|
{
|
|
|
|
/*
|
|
* Unserialize data
|
|
*/
|
|
forest->forestformat = forestformat;
|
|
ae_serializer_unserialize_int(s, &forest->nvars, _state);
|
|
ae_serializer_unserialize_int(s, &forest->nclasses, _state);
|
|
ae_serializer_unserialize_int(s, &forest->ntrees, _state);
|
|
ae_serializer_unserialize_int(s, &forest->bufsize, _state);
|
|
unserializerealarray(s, &forest->trees, _state);
|
|
processed = ae_true;
|
|
}
|
|
if( forestformat==dforest_dfcompressedv0 )
|
|
{
|
|
|
|
/*
|
|
* Unserialize data
|
|
*/
|
|
forest->forestformat = forestformat;
|
|
ae_serializer_unserialize_bool(s, &forest->usemantissa8, _state);
|
|
ae_serializer_unserialize_int(s, &forest->nvars, _state);
|
|
ae_serializer_unserialize_int(s, &forest->nclasses, _state);
|
|
ae_serializer_unserialize_int(s, &forest->ntrees, _state);
|
|
ae_serializer_unserialize_byte_array(s, &forest->trees8, _state);
|
|
processed = ae_true;
|
|
}
|
|
ae_assert(processed, "DFUnserialize: unexpected forest format", _state);
|
|
|
|
/*
|
|
* Prepare buffer
|
|
*/
|
|
dfcreatebuffer(forest, &forest->buffer, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This subroutine builds random decision forest.
|
|
|
|
--------- DEPRECATED VERSION! USE DECISION FOREST BUILDER OBJECT ---------
|
|
|
|
-- ALGLIB --
|
|
Copyright 19.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildrandomdecisionforest(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t nclasses,
|
|
ae_int_t ntrees,
|
|
double r,
|
|
ae_int_t* info,
|
|
decisionforest* df,
|
|
dfreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t samplesize;
|
|
|
|
*info = 0;
|
|
_decisionforest_clear(df);
|
|
_dfreport_clear(rep);
|
|
|
|
if( ae_fp_less_eq(r,(double)(0))||ae_fp_greater(r,(double)(1)) )
|
|
{
|
|
*info = -1;
|
|
return;
|
|
}
|
|
samplesize = ae_maxint(ae_round(r*npoints, _state), 1, _state);
|
|
dfbuildinternal(xy, npoints, nvars, nclasses, ntrees, samplesize, ae_maxint(nvars/2, 1, _state), dforest_dfusestrongsplits+dforest_dfuseevs, info, df, rep, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This subroutine builds random decision forest.
|
|
|
|
--------- DEPRECATED VERSION! USE DECISION FOREST BUILDER OBJECT ---------
|
|
|
|
-- ALGLIB --
|
|
Copyright 19.02.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void dfbuildrandomdecisionforestx1(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t nclasses,
|
|
ae_int_t ntrees,
|
|
ae_int_t nrndvars,
|
|
double r,
|
|
ae_int_t* info,
|
|
decisionforest* df,
|
|
dfreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t samplesize;
|
|
|
|
*info = 0;
|
|
_decisionforest_clear(df);
|
|
_dfreport_clear(rep);
|
|
|
|
if( ae_fp_less_eq(r,(double)(0))||ae_fp_greater(r,(double)(1)) )
|
|
{
|
|
*info = -1;
|
|
return;
|
|
}
|
|
if( nrndvars<=0||nrndvars>nvars )
|
|
{
|
|
*info = -1;
|
|
return;
|
|
}
|
|
samplesize = ae_maxint(ae_round(r*npoints, _state), 1, _state);
|
|
dfbuildinternal(xy, npoints, nvars, nclasses, ntrees, samplesize, nrndvars, dforest_dfusestrongsplits+dforest_dfuseevs, info, df, rep, _state);
|
|
}
|
|
|
|
|
|
void dfbuildinternal(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t nclasses,
|
|
ae_int_t ntrees,
|
|
ae_int_t samplesize,
|
|
ae_int_t nfeatures,
|
|
ae_int_t flags,
|
|
ae_int_t* info,
|
|
decisionforest* df,
|
|
dfreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
decisionforestbuilder builder;
|
|
ae_int_t i;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&builder, 0, sizeof(builder));
|
|
*info = 0;
|
|
_decisionforest_clear(df);
|
|
_dfreport_clear(rep);
|
|
_decisionforestbuilder_init(&builder, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Test for inputs
|
|
*/
|
|
if( (((((npoints<1||samplesize<1)||samplesize>npoints)||nvars<1)||nclasses<1)||ntrees<1)||nfeatures<1 )
|
|
{
|
|
*info = -1;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
if( nclasses>1 )
|
|
{
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( ae_round(xy->ptr.pp_double[i][nvars], _state)<0||ae_round(xy->ptr.pp_double[i][nvars], _state)>=nclasses )
|
|
{
|
|
*info = -2;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
*info = 1;
|
|
dfbuildercreate(&builder, _state);
|
|
dfbuildersetdataset(&builder, xy, npoints, nvars, nclasses, _state);
|
|
dfbuildersetsubsampleratio(&builder, (double)samplesize/(double)npoints, _state);
|
|
dfbuildersetrndvars(&builder, nfeatures, _state);
|
|
dfbuilderbuildrandomforest(&builder, ntrees, df, rep, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Builds a range of random trees [TreeIdx0,TreeIdx1) using decision forest
|
|
algorithm. Tree index is used to seed per-tree RNG.
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void dforest_buildrandomtree(decisionforestbuilder* s,
|
|
ae_int_t treeidx0,
|
|
ae_int_t treeidx1,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t treeidx;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t npoints;
|
|
ae_int_t nvars;
|
|
ae_int_t nclasses;
|
|
hqrndstate rs;
|
|
dfworkbuf *workbuf;
|
|
ae_smart_ptr _workbuf;
|
|
dfvotebuf *votebuf;
|
|
ae_smart_ptr _votebuf;
|
|
dftreebuf *treebuf;
|
|
ae_smart_ptr _treebuf;
|
|
ae_int_t treesize;
|
|
ae_int_t varstoselect;
|
|
ae_int_t workingsetsize;
|
|
double meanloss;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&rs, 0, sizeof(rs));
|
|
memset(&_workbuf, 0, sizeof(_workbuf));
|
|
memset(&_votebuf, 0, sizeof(_votebuf));
|
|
memset(&_treebuf, 0, sizeof(_treebuf));
|
|
_hqrndstate_init(&rs, _state, ae_true);
|
|
ae_smart_ptr_init(&_workbuf, (void**)&workbuf, _state, ae_true);
|
|
ae_smart_ptr_init(&_votebuf, (void**)&votebuf, _state, ae_true);
|
|
ae_smart_ptr_init(&_treebuf, (void**)&treebuf, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Perform parallelization
|
|
*/
|
|
if( treeidx1-treeidx0>1 )
|
|
{
|
|
if( _trypexec_dforest_buildrandomtree(s,treeidx0,treeidx1, _state) )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
j = (treeidx1-treeidx0)/2;
|
|
dforest_buildrandomtree(s, treeidx0, treeidx0+j, _state);
|
|
dforest_buildrandomtree(s, treeidx0+j, treeidx1, _state);
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
ae_assert(treeidx1-treeidx0==1, "RDF: integrity check failed", _state);
|
|
treeidx = treeidx0;
|
|
}
|
|
|
|
/*
|
|
* Prepare
|
|
*/
|
|
npoints = s->npoints;
|
|
nvars = s->nvars;
|
|
nclasses = s->nclasses;
|
|
if( s->rdfglobalseed>0 )
|
|
{
|
|
hqrndseed(s->rdfglobalseed, 1+treeidx, &rs, _state);
|
|
}
|
|
else
|
|
{
|
|
hqrndseed(ae_randominteger(30000, _state), 1+treeidx, &rs, _state);
|
|
}
|
|
|
|
/*
|
|
* Retrieve buffers.
|
|
*/
|
|
ae_shared_pool_retrieve(&s->workpool, &_workbuf, _state);
|
|
ae_shared_pool_retrieve(&s->votepool, &_votebuf, _state);
|
|
|
|
/*
|
|
* Prepare everything for tree construction.
|
|
*/
|
|
ae_assert(workbuf->trnsize>=1, "DForest: integrity check failed (34636)", _state);
|
|
ae_assert(workbuf->oobsize>=0, "DForest: integrity check failed (45745)", _state);
|
|
ae_assert(workbuf->trnsize+workbuf->oobsize==npoints, "DForest: integrity check failed (89415)", _state);
|
|
workingsetsize = -1;
|
|
workbuf->varpoolsize = 0;
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
if( ae_fp_neq(s->dsmin.ptr.p_double[i],s->dsmax.ptr.p_double[i]) )
|
|
{
|
|
workbuf->varpool.ptr.p_int[workbuf->varpoolsize] = i;
|
|
inc(&workbuf->varpoolsize, _state);
|
|
}
|
|
}
|
|
workingsetsize = workbuf->varpoolsize;
|
|
ae_assert(workingsetsize>=0, "DForest: integrity check failed (73f5)", _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
workbuf->tmp0i.ptr.p_int[i] = i;
|
|
}
|
|
for(i=0; i<=workbuf->trnsize-1; i++)
|
|
{
|
|
j = hqrnduniformi(&rs, npoints-i, _state);
|
|
swapelementsi(&workbuf->tmp0i, i, i+j, _state);
|
|
workbuf->trnset.ptr.p_int[i] = workbuf->tmp0i.ptr.p_int[i];
|
|
if( nclasses>1 )
|
|
{
|
|
workbuf->trnlabelsi.ptr.p_int[i] = s->dsival.ptr.p_int[workbuf->tmp0i.ptr.p_int[i]];
|
|
}
|
|
else
|
|
{
|
|
workbuf->trnlabelsr.ptr.p_double[i] = s->dsrval.ptr.p_double[workbuf->tmp0i.ptr.p_int[i]];
|
|
}
|
|
if( s->neediobmatrix )
|
|
{
|
|
s->iobmatrix.ptr.pp_bool[treeidx][workbuf->trnset.ptr.p_int[i]] = ae_true;
|
|
}
|
|
}
|
|
for(i=0; i<=workbuf->oobsize-1; i++)
|
|
{
|
|
j = workbuf->tmp0i.ptr.p_int[workbuf->trnsize+i];
|
|
workbuf->oobset.ptr.p_int[i] = j;
|
|
if( nclasses>1 )
|
|
{
|
|
workbuf->ooblabelsi.ptr.p_int[i] = s->dsival.ptr.p_int[j];
|
|
}
|
|
else
|
|
{
|
|
workbuf->ooblabelsr.ptr.p_double[i] = s->dsrval.ptr.p_double[j];
|
|
}
|
|
}
|
|
varstoselect = ae_round(ae_sqrt((double)(nvars), _state), _state);
|
|
if( ae_fp_greater(s->rdfvars,(double)(0)) )
|
|
{
|
|
varstoselect = ae_round(s->rdfvars, _state);
|
|
}
|
|
if( ae_fp_less(s->rdfvars,(double)(0)) )
|
|
{
|
|
varstoselect = ae_round(-nvars*s->rdfvars, _state);
|
|
}
|
|
varstoselect = ae_maxint(varstoselect, 1, _state);
|
|
varstoselect = ae_minint(varstoselect, nvars, _state);
|
|
|
|
/*
|
|
* Perform recurrent construction
|
|
*/
|
|
if( s->rdfimportance==dforest_needtrngini )
|
|
{
|
|
meanloss = dforest_meannrms2(nclasses, &workbuf->trnlabelsi, &workbuf->trnlabelsr, 0, workbuf->trnsize, &workbuf->trnlabelsi, &workbuf->trnlabelsr, 0, workbuf->trnsize, &workbuf->tmpnrms2, _state);
|
|
}
|
|
else
|
|
{
|
|
meanloss = dforest_meannrms2(nclasses, &workbuf->trnlabelsi, &workbuf->trnlabelsr, 0, workbuf->trnsize, &workbuf->ooblabelsi, &workbuf->ooblabelsr, 0, workbuf->oobsize, &workbuf->tmpnrms2, _state);
|
|
}
|
|
treesize = 1;
|
|
dforest_buildrandomtreerec(s, workbuf, workingsetsize, varstoselect, &workbuf->treebuf, votebuf, &rs, 0, workbuf->trnsize, 0, workbuf->oobsize, meanloss, meanloss, &treesize, _state);
|
|
workbuf->treebuf.ptr.p_double[0] = (double)(treesize);
|
|
|
|
/*
|
|
* Store tree
|
|
*/
|
|
ae_shared_pool_retrieve(&s->treefactory, &_treebuf, _state);
|
|
ae_vector_set_length(&treebuf->treebuf, treesize, _state);
|
|
for(i=0; i<=treesize-1; i++)
|
|
{
|
|
treebuf->treebuf.ptr.p_double[i] = workbuf->treebuf.ptr.p_double[i];
|
|
}
|
|
treebuf->treeidx = treeidx;
|
|
ae_shared_pool_recycle(&s->treepool, &_treebuf, _state);
|
|
|
|
/*
|
|
* Return other buffers to appropriate pools
|
|
*/
|
|
ae_shared_pool_recycle(&s->workpool, &_workbuf, _state);
|
|
ae_shared_pool_recycle(&s->votepool, &_votebuf, _state);
|
|
|
|
/*
|
|
* Update progress indicator
|
|
*/
|
|
threadunsafeincby(&s->rdfprogress, npoints, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serial stub for GPL edition.
|
|
*************************************************************************/
|
|
ae_bool _trypexec_dforest_buildrandomtree(decisionforestbuilder* s,
|
|
ae_int_t treeidx0,
|
|
ae_int_t treeidx1,
|
|
ae_state *_state)
|
|
{
|
|
return ae_false;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Recurrent tree construction function using caller-allocated buffers and
|
|
caller-initialized RNG.
|
|
|
|
Following iterms are processed:
|
|
* items [Idx0,Idx1) of WorkBuf.TrnSet
|
|
* items [OOBIdx0, OOBIdx1) of WorkBuf.OOBSet
|
|
|
|
TreeSize on input must be 1 (header element of the tree), on output it
|
|
contains size of the tree.
|
|
|
|
OOBLoss on input must contain value of MeanNRMS2(...) computed for entire
|
|
dataset.
|
|
|
|
Variables from #0 to #WorkingSet-1 from WorkBuf.VarPool are used (for
|
|
block algorithm: blocks, not vars)
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void dforest_buildrandomtreerec(decisionforestbuilder* s,
|
|
dfworkbuf* workbuf,
|
|
ae_int_t workingset,
|
|
ae_int_t varstoselect,
|
|
/* Real */ ae_vector* treebuf,
|
|
dfvotebuf* votebuf,
|
|
hqrndstate* rs,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_int_t oobidx0,
|
|
ae_int_t oobidx1,
|
|
double meanloss,
|
|
double topmostmeanloss,
|
|
ae_int_t* treesize,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t npoints;
|
|
ae_int_t nclasses;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t j0;
|
|
double v;
|
|
ae_bool labelsaresame;
|
|
ae_int_t offs;
|
|
ae_int_t varbest;
|
|
double splitbest;
|
|
ae_int_t i1;
|
|
ae_int_t i2;
|
|
ae_int_t idxtrn;
|
|
ae_int_t idxoob;
|
|
double meanloss0;
|
|
double meanloss1;
|
|
|
|
|
|
ae_assert(s->dstype==0, "not supported skbdgfsi!", _state);
|
|
ae_assert(idx0<idx1, "BuildRandomTreeRec: integrity check failed (3445)", _state);
|
|
ae_assert(oobidx0<=oobidx1, "BuildRandomTreeRec: integrity check failed (7452)", _state);
|
|
npoints = s->npoints;
|
|
nclasses = s->nclasses;
|
|
|
|
/*
|
|
* Check labels: all same or not?
|
|
*/
|
|
if( nclasses>1 )
|
|
{
|
|
labelsaresame = ae_true;
|
|
for(i=0; i<=nclasses-1; i++)
|
|
{
|
|
workbuf->classpriors.ptr.p_int[i] = 0;
|
|
}
|
|
j0 = workbuf->trnlabelsi.ptr.p_int[idx0];
|
|
for(i=idx0; i<=idx1-1; i++)
|
|
{
|
|
j = workbuf->trnlabelsi.ptr.p_int[i];
|
|
workbuf->classpriors.ptr.p_int[j] = workbuf->classpriors.ptr.p_int[j]+1;
|
|
labelsaresame = labelsaresame&&j0==j;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
labelsaresame = ae_false;
|
|
}
|
|
|
|
/*
|
|
* Leaf node
|
|
*/
|
|
if( idx1-idx0==1||labelsaresame )
|
|
{
|
|
if( nclasses==1 )
|
|
{
|
|
dforest_outputleaf(s, workbuf, treebuf, votebuf, idx0, idx1, oobidx0, oobidx1, treesize, workbuf->trnlabelsr.ptr.p_double[idx0], _state);
|
|
}
|
|
else
|
|
{
|
|
dforest_outputleaf(s, workbuf, treebuf, votebuf, idx0, idx1, oobidx0, oobidx1, treesize, (double)(workbuf->trnlabelsi.ptr.p_int[idx0]), _state);
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Non-leaf node.
|
|
* Investigate possible splits.
|
|
*/
|
|
ae_assert(s->rdfalgo==0, "BuildRandomForest: unexpected algo", _state);
|
|
dforest_choosecurrentsplitdense(s, workbuf, &workingset, varstoselect, rs, idx0, idx1, &varbest, &splitbest, _state);
|
|
if( varbest<0 )
|
|
{
|
|
|
|
/*
|
|
* No good split was found; make leaf (label is randomly chosen) and exit.
|
|
*/
|
|
if( nclasses>1 )
|
|
{
|
|
v = (double)(workbuf->trnlabelsi.ptr.p_int[idx0+hqrnduniformi(rs, idx1-idx0, _state)]);
|
|
}
|
|
else
|
|
{
|
|
v = workbuf->trnlabelsr.ptr.p_double[idx0+hqrnduniformi(rs, idx1-idx0, _state)];
|
|
}
|
|
dforest_outputleaf(s, workbuf, treebuf, votebuf, idx0, idx1, oobidx0, oobidx1, treesize, v, _state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Good split WAS found, we can perform it:
|
|
* * first, we split training set
|
|
* * then, we similarly split OOB set
|
|
*/
|
|
ae_assert(s->dstype==0, "not supported 54bfdh", _state);
|
|
offs = npoints*varbest;
|
|
i1 = idx0;
|
|
i2 = idx1-1;
|
|
while(i1<=i2)
|
|
{
|
|
|
|
/*
|
|
* Reorder indexes so that left partition is in [Idx0..I1),
|
|
* and right partition is in [I2+1..Idx1)
|
|
*/
|
|
if( workbuf->bestvals.ptr.p_double[i1]<splitbest )
|
|
{
|
|
i1 = i1+1;
|
|
continue;
|
|
}
|
|
if( workbuf->bestvals.ptr.p_double[i2]>=splitbest )
|
|
{
|
|
i2 = i2-1;
|
|
continue;
|
|
}
|
|
j = workbuf->trnset.ptr.p_int[i1];
|
|
workbuf->trnset.ptr.p_int[i1] = workbuf->trnset.ptr.p_int[i2];
|
|
workbuf->trnset.ptr.p_int[i2] = j;
|
|
if( nclasses>1 )
|
|
{
|
|
j = workbuf->trnlabelsi.ptr.p_int[i1];
|
|
workbuf->trnlabelsi.ptr.p_int[i1] = workbuf->trnlabelsi.ptr.p_int[i2];
|
|
workbuf->trnlabelsi.ptr.p_int[i2] = j;
|
|
}
|
|
else
|
|
{
|
|
v = workbuf->trnlabelsr.ptr.p_double[i1];
|
|
workbuf->trnlabelsr.ptr.p_double[i1] = workbuf->trnlabelsr.ptr.p_double[i2];
|
|
workbuf->trnlabelsr.ptr.p_double[i2] = v;
|
|
}
|
|
i1 = i1+1;
|
|
i2 = i2-1;
|
|
}
|
|
ae_assert(i1==i2+1, "BuildRandomTreeRec: integrity check failed (45rds3)", _state);
|
|
idxtrn = i1;
|
|
if( oobidx0<oobidx1 )
|
|
{
|
|
|
|
/*
|
|
* Unlike the training subset, the out-of-bag subset corresponding to the
|
|
* current sequence of decisions can be empty; thus, we have to explicitly
|
|
* handle situation of zero OOB subset.
|
|
*/
|
|
i1 = oobidx0;
|
|
i2 = oobidx1-1;
|
|
while(i1<=i2)
|
|
{
|
|
|
|
/*
|
|
* Reorder indexes so that left partition is in [Idx0..I1),
|
|
* and right partition is in [I2+1..Idx1)
|
|
*/
|
|
if( s->dsdata.ptr.p_double[offs+workbuf->oobset.ptr.p_int[i1]]<splitbest )
|
|
{
|
|
i1 = i1+1;
|
|
continue;
|
|
}
|
|
if( s->dsdata.ptr.p_double[offs+workbuf->oobset.ptr.p_int[i2]]>=splitbest )
|
|
{
|
|
i2 = i2-1;
|
|
continue;
|
|
}
|
|
j = workbuf->oobset.ptr.p_int[i1];
|
|
workbuf->oobset.ptr.p_int[i1] = workbuf->oobset.ptr.p_int[i2];
|
|
workbuf->oobset.ptr.p_int[i2] = j;
|
|
if( nclasses>1 )
|
|
{
|
|
j = workbuf->ooblabelsi.ptr.p_int[i1];
|
|
workbuf->ooblabelsi.ptr.p_int[i1] = workbuf->ooblabelsi.ptr.p_int[i2];
|
|
workbuf->ooblabelsi.ptr.p_int[i2] = j;
|
|
}
|
|
else
|
|
{
|
|
v = workbuf->ooblabelsr.ptr.p_double[i1];
|
|
workbuf->ooblabelsr.ptr.p_double[i1] = workbuf->ooblabelsr.ptr.p_double[i2];
|
|
workbuf->ooblabelsr.ptr.p_double[i2] = v;
|
|
}
|
|
i1 = i1+1;
|
|
i2 = i2-1;
|
|
}
|
|
ae_assert(i1==i2+1, "BuildRandomTreeRec: integrity check failed (643fs3)", _state);
|
|
idxoob = i1;
|
|
}
|
|
else
|
|
{
|
|
idxoob = oobidx0;
|
|
}
|
|
|
|
/*
|
|
* Compute estimates of NRMS2 loss over TRN or OOB subsets, update Gini importances
|
|
*/
|
|
if( s->rdfimportance==dforest_needtrngini )
|
|
{
|
|
meanloss0 = dforest_meannrms2(nclasses, &workbuf->trnlabelsi, &workbuf->trnlabelsr, idx0, idxtrn, &workbuf->trnlabelsi, &workbuf->trnlabelsr, idx0, idxtrn, &workbuf->tmpnrms2, _state);
|
|
meanloss1 = dforest_meannrms2(nclasses, &workbuf->trnlabelsi, &workbuf->trnlabelsr, idxtrn, idx1, &workbuf->trnlabelsi, &workbuf->trnlabelsr, idxtrn, idx1, &workbuf->tmpnrms2, _state);
|
|
}
|
|
else
|
|
{
|
|
meanloss0 = dforest_meannrms2(nclasses, &workbuf->trnlabelsi, &workbuf->trnlabelsr, idx0, idxtrn, &workbuf->ooblabelsi, &workbuf->ooblabelsr, oobidx0, idxoob, &workbuf->tmpnrms2, _state);
|
|
meanloss1 = dforest_meannrms2(nclasses, &workbuf->trnlabelsi, &workbuf->trnlabelsr, idxtrn, idx1, &workbuf->ooblabelsi, &workbuf->ooblabelsr, idxoob, oobidx1, &workbuf->tmpnrms2, _state);
|
|
}
|
|
votebuf->giniimportances.ptr.p_double[varbest] = votebuf->giniimportances.ptr.p_double[varbest]+(meanloss-(meanloss0+meanloss1))/(topmostmeanloss+1.0e-20);
|
|
|
|
/*
|
|
* Generate tree node and subtrees (recursively)
|
|
*/
|
|
treebuf->ptr.p_double[*treesize] = (double)(varbest);
|
|
treebuf->ptr.p_double[*treesize+1] = splitbest;
|
|
i = *treesize;
|
|
*treesize = *treesize+dforest_innernodewidth;
|
|
dforest_buildrandomtreerec(s, workbuf, workingset, varstoselect, treebuf, votebuf, rs, idx0, idxtrn, oobidx0, idxoob, meanloss0, topmostmeanloss, treesize, _state);
|
|
treebuf->ptr.p_double[i+2] = (double)(*treesize);
|
|
dforest_buildrandomtreerec(s, workbuf, workingset, varstoselect, treebuf, votebuf, rs, idxtrn, idx1, idxoob, oobidx1, meanloss1, topmostmeanloss, treesize, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Estimates permutation variable importance ratings for a range of dataset
|
|
points.
|
|
|
|
Initial call to this function should span entire range of the dataset,
|
|
[Idx0,Idx1)=[0,NPoints), because function performs initialization of some
|
|
internal structures when called with these arguments.
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void dforest_estimatevariableimportance(decisionforestbuilder* s,
|
|
ae_int_t sessionseed,
|
|
decisionforest* df,
|
|
ae_int_t ntrees,
|
|
dfreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t npoints;
|
|
ae_int_t nvars;
|
|
ae_int_t nclasses;
|
|
ae_int_t nperm;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
dfvotebuf *vote;
|
|
ae_smart_ptr _vote;
|
|
ae_vector tmpr0;
|
|
ae_vector tmpr1;
|
|
ae_vector tmpi0;
|
|
ae_vector losses;
|
|
dfpermimpbuf permseed;
|
|
dfpermimpbuf *permresult;
|
|
ae_smart_ptr _permresult;
|
|
ae_shared_pool permpool;
|
|
double nopermloss;
|
|
double totalpermloss;
|
|
hqrndstate varimprs;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_vote, 0, sizeof(_vote));
|
|
memset(&tmpr0, 0, sizeof(tmpr0));
|
|
memset(&tmpr1, 0, sizeof(tmpr1));
|
|
memset(&tmpi0, 0, sizeof(tmpi0));
|
|
memset(&losses, 0, sizeof(losses));
|
|
memset(&permseed, 0, sizeof(permseed));
|
|
memset(&_permresult, 0, sizeof(_permresult));
|
|
memset(&permpool, 0, sizeof(permpool));
|
|
memset(&varimprs, 0, sizeof(varimprs));
|
|
ae_smart_ptr_init(&_vote, (void**)&vote, _state, ae_true);
|
|
ae_vector_init(&tmpr0, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&tmpr1, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&tmpi0, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&losses, 0, DT_REAL, _state, ae_true);
|
|
_dfpermimpbuf_init(&permseed, _state, ae_true);
|
|
ae_smart_ptr_init(&_permresult, (void**)&permresult, _state, ae_true);
|
|
ae_shared_pool_init(&permpool, _state, ae_true);
|
|
_hqrndstate_init(&varimprs, _state, ae_true);
|
|
|
|
npoints = s->npoints;
|
|
nvars = s->nvars;
|
|
nclasses = s->nclasses;
|
|
|
|
/*
|
|
* No importance rating
|
|
*/
|
|
if( s->rdfimportance==0 )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Gini importance
|
|
*/
|
|
if( s->rdfimportance==dforest_needtrngini||s->rdfimportance==dforest_needoobgini )
|
|
{
|
|
|
|
/*
|
|
* Merge OOB Gini importances computed during tree generation
|
|
*/
|
|
ae_shared_pool_first_recycled(&s->votepool, &_vote, _state);
|
|
while(vote!=NULL)
|
|
{
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
rep->varimportances.ptr.p_double[i] = rep->varimportances.ptr.p_double[i]+vote->giniimportances.ptr.p_double[i]/ntrees;
|
|
}
|
|
ae_shared_pool_next_recycled(&s->votepool, &_vote, _state);
|
|
}
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
rep->varimportances.ptr.p_double[i] = boundval(rep->varimportances.ptr.p_double[i], (double)(0), (double)(1), _state);
|
|
}
|
|
|
|
/*
|
|
* Compute topvars[] array
|
|
*/
|
|
ae_vector_set_length(&tmpr0, nvars, _state);
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
tmpr0.ptr.p_double[j] = -rep->varimportances.ptr.p_double[j];
|
|
rep->topvars.ptr.p_int[j] = j;
|
|
}
|
|
tagsortfasti(&tmpr0, &rep->topvars, &tmpr1, &tmpi0, nvars, _state);
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Permutation importance
|
|
*/
|
|
if( s->rdfimportance==dforest_needpermutation )
|
|
{
|
|
ae_assert(df->forestformat==dforest_dfuncompressedv0, "EstimateVariableImportance: integrity check failed (ff)", _state);
|
|
ae_assert(s->iobmatrix.rows>=ntrees&&s->iobmatrix.cols>=npoints, "EstimateVariableImportance: integrity check failed (IOB)", _state);
|
|
|
|
/*
|
|
* Generate packed representation of the shuffle which is applied to all variables
|
|
*
|
|
* Ideally we want to apply different permutations to different variables,
|
|
* i.e. we have to generate and store NPoints*NVars random numbers.
|
|
* However due to performance and memory restrictions we prefer to use compact
|
|
* representation:
|
|
* * we store one "reference" permutation P_ref in VarImpShuffle2[0:NPoints-1]
|
|
* * a permutation P_j applied to variable J is obtained by circularly shifting
|
|
* elements in P_ref by VarImpShuffle2[NPoints+J]
|
|
*/
|
|
hqrndseed(sessionseed, 1117, &varimprs, _state);
|
|
ivectorsetlengthatleast(&s->varimpshuffle2, npoints+nvars, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
s->varimpshuffle2.ptr.p_int[i] = i;
|
|
}
|
|
for(i=0; i<=npoints-2; i++)
|
|
{
|
|
j = i+hqrnduniformi(&varimprs, npoints-i, _state);
|
|
k = s->varimpshuffle2.ptr.p_int[i];
|
|
s->varimpshuffle2.ptr.p_int[i] = s->varimpshuffle2.ptr.p_int[j];
|
|
s->varimpshuffle2.ptr.p_int[j] = k;
|
|
}
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
s->varimpshuffle2.ptr.p_int[npoints+i] = hqrnduniformi(&varimprs, npoints, _state);
|
|
}
|
|
|
|
/*
|
|
* Prepare buffer object, seed pool
|
|
*/
|
|
nperm = nvars+2;
|
|
ae_vector_set_length(&permseed.losses, nperm, _state);
|
|
for(j=0; j<=nperm-1; j++)
|
|
{
|
|
permseed.losses.ptr.p_double[j] = (double)(0);
|
|
}
|
|
ae_vector_set_length(&permseed.yv, nperm*nclasses, _state);
|
|
ae_vector_set_length(&permseed.xraw, nvars, _state);
|
|
ae_vector_set_length(&permseed.xdist, nvars, _state);
|
|
ae_vector_set_length(&permseed.xcur, nvars, _state);
|
|
ae_vector_set_length(&permseed.targety, nclasses, _state);
|
|
ae_vector_set_length(&permseed.startnodes, nvars, _state);
|
|
ae_vector_set_length(&permseed.y, nclasses, _state);
|
|
ae_shared_pool_set_seed(&permpool, &permseed, sizeof(permseed), _dfpermimpbuf_init, _dfpermimpbuf_init_copy, _dfpermimpbuf_destroy, _state);
|
|
|
|
/*
|
|
* Recursively split subset and process (using parallel capabilities, if possible)
|
|
*/
|
|
dforest_estimatepermutationimportances(s, df, ntrees, &permpool, 0, npoints, _state);
|
|
|
|
/*
|
|
* Merge results
|
|
*/
|
|
ae_vector_set_length(&losses, nperm, _state);
|
|
for(j=0; j<=nperm-1; j++)
|
|
{
|
|
losses.ptr.p_double[j] = 1.0e-20;
|
|
}
|
|
ae_shared_pool_first_recycled(&permpool, &_permresult, _state);
|
|
while(permresult!=NULL)
|
|
{
|
|
for(j=0; j<=nperm-1; j++)
|
|
{
|
|
losses.ptr.p_double[j] = losses.ptr.p_double[j]+permresult->losses.ptr.p_double[j];
|
|
}
|
|
ae_shared_pool_next_recycled(&permpool, &_permresult, _state);
|
|
}
|
|
|
|
/*
|
|
* Compute importances
|
|
*/
|
|
nopermloss = losses.ptr.p_double[nvars+1];
|
|
totalpermloss = losses.ptr.p_double[nvars];
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
rep->varimportances.ptr.p_double[i] = 1-nopermloss/totalpermloss-(1-losses.ptr.p_double[i]/totalpermloss);
|
|
rep->varimportances.ptr.p_double[i] = boundval(rep->varimportances.ptr.p_double[i], (double)(0), (double)(1), _state);
|
|
}
|
|
|
|
/*
|
|
* Compute topvars[] array
|
|
*/
|
|
ae_vector_set_length(&tmpr0, nvars, _state);
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
tmpr0.ptr.p_double[j] = -rep->varimportances.ptr.p_double[j];
|
|
rep->topvars.ptr.p_int[j] = j;
|
|
}
|
|
tagsortfasti(&tmpr0, &rep->topvars, &tmpr1, &tmpi0, nvars, _state);
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
ae_assert(ae_false, "EstimateVariableImportance: unexpected importance type", _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serial stub for GPL edition.
|
|
*************************************************************************/
|
|
ae_bool _trypexec_dforest_estimatevariableimportance(decisionforestbuilder* s,
|
|
ae_int_t sessionseed,
|
|
decisionforest* df,
|
|
ae_int_t ntrees,
|
|
dfreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
return ae_false;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Estimates permutation variable importance ratings for a range of dataset
|
|
points.
|
|
|
|
Initial call to this function should span entire range of the dataset,
|
|
[Idx0,Idx1)=[0,NPoints), because function performs initialization of some
|
|
internal structures when called with these arguments.
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void dforest_estimatepermutationimportances(decisionforestbuilder* s,
|
|
decisionforest* df,
|
|
ae_int_t ntrees,
|
|
ae_shared_pool* permpool,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t npoints;
|
|
ae_int_t nvars;
|
|
ae_int_t nclasses;
|
|
ae_int_t nperm;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
double v;
|
|
ae_int_t treeroot;
|
|
ae_int_t nodeoffs;
|
|
double prediction;
|
|
ae_int_t varidx;
|
|
ae_int_t oobcounts;
|
|
ae_int_t srcidx;
|
|
dfpermimpbuf *permimpbuf;
|
|
ae_smart_ptr _permimpbuf;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_permimpbuf, 0, sizeof(_permimpbuf));
|
|
ae_smart_ptr_init(&_permimpbuf, (void**)&permimpbuf, _state, ae_true);
|
|
|
|
npoints = s->npoints;
|
|
nvars = s->nvars;
|
|
nclasses = s->nclasses;
|
|
ae_assert(df->forestformat==dforest_dfuncompressedv0, "EstimateVariableImportance: integrity check failed (ff)", _state);
|
|
ae_assert((idx0>=0&&idx0<=idx1)&&idx1<=npoints, "EstimateVariableImportance: integrity check failed (idx)", _state);
|
|
ae_assert(s->iobmatrix.rows>=ntrees&&s->iobmatrix.cols>=npoints, "EstimateVariableImportance: integrity check failed (IOB)", _state);
|
|
|
|
/*
|
|
* Perform parallelization if batch is too large
|
|
*/
|
|
if( idx1-idx0>dforest_permutationimportancebatchsize )
|
|
{
|
|
if( _trypexec_dforest_estimatepermutationimportances(s,df,ntrees,permpool,idx0,idx1, _state) )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
j = (idx1-idx0)/2;
|
|
dforest_estimatepermutationimportances(s, df, ntrees, permpool, idx0, idx0+j, _state);
|
|
dforest_estimatepermutationimportances(s, df, ntrees, permpool, idx0+j, idx1, _state);
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Retrieve buffer object from pool
|
|
*/
|
|
ae_shared_pool_retrieve(permpool, &_permimpbuf, _state);
|
|
|
|
/*
|
|
* Process range of points [idx0,idx1)
|
|
*/
|
|
nperm = nvars+2;
|
|
for(i=idx0; i<=idx1-1; i++)
|
|
{
|
|
ae_assert(s->dstype==0, "EstimateVariableImportance: unexpected dataset type", _state);
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
permimpbuf->xraw.ptr.p_double[j] = s->dsdata.ptr.p_double[j*npoints+i];
|
|
srcidx = s->varimpshuffle2.ptr.p_int[(i+s->varimpshuffle2.ptr.p_int[npoints+j])%npoints];
|
|
permimpbuf->xdist.ptr.p_double[j] = s->dsdata.ptr.p_double[j*npoints+srcidx];
|
|
}
|
|
if( nclasses>1 )
|
|
{
|
|
for(j=0; j<=nclasses-1; j++)
|
|
{
|
|
permimpbuf->targety.ptr.p_double[j] = (double)(0);
|
|
}
|
|
permimpbuf->targety.ptr.p_double[s->dsival.ptr.p_int[i]] = (double)(1);
|
|
}
|
|
else
|
|
{
|
|
permimpbuf->targety.ptr.p_double[0] = s->dsrval.ptr.p_double[i];
|
|
}
|
|
|
|
/*
|
|
* Process all trees, for each tree compute NPerm losses corresponding
|
|
* to various permutations of variable values
|
|
*/
|
|
for(j=0; j<=nperm*nclasses-1; j++)
|
|
{
|
|
permimpbuf->yv.ptr.p_double[j] = (double)(0);
|
|
}
|
|
oobcounts = 0;
|
|
treeroot = 0;
|
|
for(k=0; k<=ntrees-1; k++)
|
|
{
|
|
if( !s->iobmatrix.ptr.pp_bool[k][i] )
|
|
{
|
|
|
|
/*
|
|
* Process original (unperturbed) point and analyze path from the
|
|
* tree root to the final leaf. Output prediction to RawPrediction.
|
|
*
|
|
* Additionally, for each variable in [0,NVars-1] save offset of
|
|
* the first split on this variable. It allows us to quickly compute
|
|
* tree decision when perturbation does not change decision path.
|
|
*/
|
|
ae_assert(df->forestformat==dforest_dfuncompressedv0, "EstimateVariableImportance: integrity check failed (ff)", _state);
|
|
nodeoffs = treeroot+1;
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
permimpbuf->startnodes.ptr.p_int[j] = -1;
|
|
}
|
|
prediction = (double)(0);
|
|
for(;;)
|
|
{
|
|
if( ae_fp_eq(df->trees.ptr.p_double[nodeoffs],(double)(-1)) )
|
|
{
|
|
prediction = df->trees.ptr.p_double[nodeoffs+1];
|
|
break;
|
|
}
|
|
j = ae_round(df->trees.ptr.p_double[nodeoffs], _state);
|
|
if( permimpbuf->startnodes.ptr.p_int[j]<0 )
|
|
{
|
|
permimpbuf->startnodes.ptr.p_int[j] = nodeoffs;
|
|
}
|
|
if( permimpbuf->xraw.ptr.p_double[j]<df->trees.ptr.p_double[nodeoffs+1] )
|
|
{
|
|
nodeoffs = nodeoffs+dforest_innernodewidth;
|
|
}
|
|
else
|
|
{
|
|
nodeoffs = treeroot+ae_round(df->trees.ptr.p_double[nodeoffs+2], _state);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Save loss for unperturbed point
|
|
*/
|
|
varidx = nvars+1;
|
|
if( nclasses>1 )
|
|
{
|
|
j = ae_round(prediction, _state);
|
|
permimpbuf->yv.ptr.p_double[varidx*nclasses+j] = permimpbuf->yv.ptr.p_double[varidx*nclasses+j]+1;
|
|
}
|
|
else
|
|
{
|
|
permimpbuf->yv.ptr.p_double[varidx] = permimpbuf->yv.ptr.p_double[varidx]+prediction;
|
|
}
|
|
|
|
/*
|
|
* Save loss for all variables being perturbed (XDist).
|
|
* This loss is used as a reference loss when we compute R-squared.
|
|
*/
|
|
varidx = nvars;
|
|
for(j=0; j<=nclasses-1; j++)
|
|
{
|
|
permimpbuf->y.ptr.p_double[j] = (double)(0);
|
|
}
|
|
dforest_dfprocessinternaluncompressed(df, treeroot, treeroot+1, &permimpbuf->xdist, &permimpbuf->y, _state);
|
|
for(j=0; j<=nclasses-1; j++)
|
|
{
|
|
permimpbuf->yv.ptr.p_double[varidx*nclasses+j] = permimpbuf->yv.ptr.p_double[varidx*nclasses+j]+permimpbuf->y.ptr.p_double[j];
|
|
}
|
|
|
|
/*
|
|
* Compute losses for variable #VarIdx being perturbed. Quite an often decision
|
|
* process does not actually depend on the variable #VarIdx (path from the tree
|
|
* root does not include splits on this variable). In such cases we perform
|
|
* quick exit from the loop with precomputed value.
|
|
*/
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
permimpbuf->xcur.ptr.p_double[j] = permimpbuf->xraw.ptr.p_double[j];
|
|
}
|
|
for(varidx=0; varidx<=nvars-1; varidx++)
|
|
{
|
|
if( permimpbuf->startnodes.ptr.p_int[varidx]>=0 )
|
|
{
|
|
|
|
/*
|
|
* Path from tree root to the final leaf involves split on variable #VarIdx.
|
|
* Restart computation from the position first split on #VarIdx.
|
|
*/
|
|
ae_assert(df->forestformat==dforest_dfuncompressedv0, "EstimateVariableImportance: integrity check failed (ff)", _state);
|
|
permimpbuf->xcur.ptr.p_double[varidx] = permimpbuf->xdist.ptr.p_double[varidx];
|
|
nodeoffs = permimpbuf->startnodes.ptr.p_int[varidx];
|
|
for(;;)
|
|
{
|
|
if( ae_fp_eq(df->trees.ptr.p_double[nodeoffs],(double)(-1)) )
|
|
{
|
|
if( nclasses>1 )
|
|
{
|
|
j = ae_round(df->trees.ptr.p_double[nodeoffs+1], _state);
|
|
permimpbuf->yv.ptr.p_double[varidx*nclasses+j] = permimpbuf->yv.ptr.p_double[varidx*nclasses+j]+1;
|
|
}
|
|
else
|
|
{
|
|
permimpbuf->yv.ptr.p_double[varidx] = permimpbuf->yv.ptr.p_double[varidx]+df->trees.ptr.p_double[nodeoffs+1];
|
|
}
|
|
break;
|
|
}
|
|
j = ae_round(df->trees.ptr.p_double[nodeoffs], _state);
|
|
if( permimpbuf->xcur.ptr.p_double[j]<df->trees.ptr.p_double[nodeoffs+1] )
|
|
{
|
|
nodeoffs = nodeoffs+dforest_innernodewidth;
|
|
}
|
|
else
|
|
{
|
|
nodeoffs = treeroot+ae_round(df->trees.ptr.p_double[nodeoffs+2], _state);
|
|
}
|
|
}
|
|
permimpbuf->xcur.ptr.p_double[varidx] = permimpbuf->xraw.ptr.p_double[varidx];
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Path from tree root to the final leaf does NOT involve split on variable #VarIdx.
|
|
* Permutation does not change tree output, reuse already computed value.
|
|
*/
|
|
if( nclasses>1 )
|
|
{
|
|
j = ae_round(prediction, _state);
|
|
permimpbuf->yv.ptr.p_double[varidx*nclasses+j] = permimpbuf->yv.ptr.p_double[varidx*nclasses+j]+1;
|
|
}
|
|
else
|
|
{
|
|
permimpbuf->yv.ptr.p_double[varidx] = permimpbuf->yv.ptr.p_double[varidx]+prediction;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* update OOB counter
|
|
*/
|
|
inc(&oobcounts, _state);
|
|
}
|
|
treeroot = treeroot+ae_round(df->trees.ptr.p_double[treeroot], _state);
|
|
}
|
|
|
|
/*
|
|
* Now YV[] stores NPerm versions of the forest output for various permutations of variable values.
|
|
* Update losses.
|
|
*/
|
|
for(j=0; j<=nperm-1; j++)
|
|
{
|
|
for(k=0; k<=nclasses-1; k++)
|
|
{
|
|
permimpbuf->yv.ptr.p_double[j*nclasses+k] = permimpbuf->yv.ptr.p_double[j*nclasses+k]/coalesce((double)(oobcounts), (double)(1), _state);
|
|
}
|
|
v = (double)(0);
|
|
for(k=0; k<=nclasses-1; k++)
|
|
{
|
|
v = v+ae_sqr(permimpbuf->yv.ptr.p_double[j*nclasses+k]-permimpbuf->targety.ptr.p_double[k], _state);
|
|
}
|
|
permimpbuf->losses.ptr.p_double[j] = permimpbuf->losses.ptr.p_double[j]+v;
|
|
}
|
|
|
|
/*
|
|
* Update progress indicator
|
|
*/
|
|
threadunsafeincby(&s->rdfprogress, ntrees, _state);
|
|
}
|
|
|
|
/*
|
|
* Recycle buffer object with updated Losses[] field
|
|
*/
|
|
ae_shared_pool_recycle(permpool, &_permimpbuf, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serial stub for GPL edition.
|
|
*************************************************************************/
|
|
ae_bool _trypexec_dforest_estimatepermutationimportances(decisionforestbuilder* s,
|
|
decisionforest* df,
|
|
ae_int_t ntrees,
|
|
ae_shared_pool* permpool,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_state *_state)
|
|
{
|
|
return ae_false;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Sets report fields to their default values
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void dforest_cleanreport(decisionforestbuilder* s,
|
|
dfreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
|
|
|
|
rep->relclserror = (double)(0);
|
|
rep->avgce = (double)(0);
|
|
rep->rmserror = (double)(0);
|
|
rep->avgerror = (double)(0);
|
|
rep->avgrelerror = (double)(0);
|
|
rep->oobrelclserror = (double)(0);
|
|
rep->oobavgce = (double)(0);
|
|
rep->oobrmserror = (double)(0);
|
|
rep->oobavgerror = (double)(0);
|
|
rep->oobavgrelerror = (double)(0);
|
|
ae_vector_set_length(&rep->topvars, s->nvars, _state);
|
|
ae_vector_set_length(&rep->varimportances, s->nvars, _state);
|
|
for(i=0; i<=s->nvars-1; i++)
|
|
{
|
|
rep->topvars.ptr.p_int[i] = i;
|
|
rep->varimportances.ptr.p_double[i] = (double)(0);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns NRMS2 loss (sum of squared residuals) for a constant-
|
|
output model:
|
|
* model output is a mean over TRN set being passed (for classification
|
|
problems - NClasses-dimensional vector of class probabilities)
|
|
* model is evaluated over TST set being passed, with L2 loss being returned
|
|
|
|
Input parameters:
|
|
NClasses - ">1" for classification, "=1" for regression
|
|
TrnLabelsI - training set labels, class indexes (for NClasses>1)
|
|
TrnLabelsR - training set output values (for NClasses=1)
|
|
TrnIdx0, TrnIdx1 - a range [Idx0,Idx1) of elements in LabelsI/R is considered
|
|
TstLabelsI - training set labels, class indexes (for NClasses>1)
|
|
TstLabelsR - training set output values (for NClasses=1)
|
|
TstIdx0, TstIdx1 - a range [Idx0,Idx1) of elements in LabelsI/R is considered
|
|
TmpI - temporary array, reallocated as needed
|
|
|
|
Result:
|
|
sum of squared residuals;
|
|
for NClasses>=2 it coincides with Gini impurity times (Idx1-Idx0)
|
|
|
|
Following fields of WorkBuf are used as temporaries:
|
|
* TmpMeanNRMS2
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static double dforest_meannrms2(ae_int_t nclasses,
|
|
/* Integer */ ae_vector* trnlabelsi,
|
|
/* Real */ ae_vector* trnlabelsr,
|
|
ae_int_t trnidx0,
|
|
ae_int_t trnidx1,
|
|
/* Integer */ ae_vector* tstlabelsi,
|
|
/* Real */ ae_vector* tstlabelsr,
|
|
ae_int_t tstidx0,
|
|
ae_int_t tstidx1,
|
|
/* Integer */ ae_vector* tmpi,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t k;
|
|
ae_int_t ntrn;
|
|
ae_int_t ntst;
|
|
double v;
|
|
double vv;
|
|
double invntrn;
|
|
double pitrn;
|
|
double nitst;
|
|
double result;
|
|
|
|
|
|
ae_assert(trnidx0<=trnidx1, "MeanNRMS2: integrity check failed (8754)", _state);
|
|
ae_assert(tstidx0<=tstidx1, "MeanNRMS2: integrity check failed (8754)", _state);
|
|
result = (double)(0);
|
|
ntrn = trnidx1-trnidx0;
|
|
ntst = tstidx1-tstidx0;
|
|
if( ntrn==0||ntst==0 )
|
|
{
|
|
return result;
|
|
}
|
|
invntrn = 1.0/ntrn;
|
|
if( nclasses>1 )
|
|
{
|
|
|
|
/*
|
|
* Classification problem
|
|
*/
|
|
ivectorsetlengthatleast(tmpi, 2*nclasses, _state);
|
|
for(i=0; i<=2*nclasses-1; i++)
|
|
{
|
|
tmpi->ptr.p_int[i] = 0;
|
|
}
|
|
for(i=trnidx0; i<=trnidx1-1; i++)
|
|
{
|
|
k = trnlabelsi->ptr.p_int[i];
|
|
tmpi->ptr.p_int[k] = tmpi->ptr.p_int[k]+1;
|
|
}
|
|
for(i=tstidx0; i<=tstidx1-1; i++)
|
|
{
|
|
k = tstlabelsi->ptr.p_int[i];
|
|
tmpi->ptr.p_int[k+nclasses] = tmpi->ptr.p_int[k+nclasses]+1;
|
|
}
|
|
for(i=0; i<=nclasses-1; i++)
|
|
{
|
|
pitrn = tmpi->ptr.p_int[i]*invntrn;
|
|
nitst = (double)(tmpi->ptr.p_int[i+nclasses]);
|
|
result = result+nitst*(1-pitrn)*(1-pitrn);
|
|
result = result+(ntst-nitst)*pitrn*pitrn;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* regression-specific code
|
|
*/
|
|
v = (double)(0);
|
|
for(i=trnidx0; i<=trnidx1-1; i++)
|
|
{
|
|
v = v+trnlabelsr->ptr.p_double[i];
|
|
}
|
|
v = v*invntrn;
|
|
for(i=tstidx0; i<=tstidx1-1; i++)
|
|
{
|
|
vv = tstlabelsr->ptr.p_double[i]-v;
|
|
result = result+vv*vv;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function is a part of the recurrent tree construction function; it
|
|
selects variable for splitting according to current tree construction
|
|
algorithm.
|
|
|
|
Note: modifies VarsInPool, may decrease it if some variables become non-informative
|
|
and leave the pool.
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void dforest_choosecurrentsplitdense(decisionforestbuilder* s,
|
|
dfworkbuf* workbuf,
|
|
ae_int_t* varsinpool,
|
|
ae_int_t varstoselect,
|
|
hqrndstate* rs,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_int_t* varbest,
|
|
double* splitbest,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t npoints;
|
|
double errbest;
|
|
ae_int_t varstried;
|
|
ae_int_t varcur;
|
|
ae_bool valuesaresame;
|
|
ae_int_t offs;
|
|
double split;
|
|
ae_int_t i;
|
|
double v;
|
|
double v0;
|
|
double currms;
|
|
ae_int_t info;
|
|
|
|
*varbest = 0;
|
|
*splitbest = 0;
|
|
|
|
ae_assert(s->dstype==0, "sparsity is not supported 4terg!", _state);
|
|
ae_assert(s->rdfalgo==0, "BuildRandomTreeRec: integrity check failed (1657)", _state);
|
|
ae_assert(idx0<idx1, "BuildRandomTreeRec: integrity check failed (3445)", _state);
|
|
npoints = s->npoints;
|
|
|
|
/*
|
|
* Select split according to dense direct RDF algorithm
|
|
*/
|
|
*varbest = -1;
|
|
errbest = ae_maxrealnumber;
|
|
*splitbest = (double)(0);
|
|
varstried = 0;
|
|
while(varstried<=ae_minint(varstoselect, *varsinpool, _state)-1)
|
|
{
|
|
|
|
/*
|
|
* select variables from pool
|
|
*/
|
|
swapelementsi(&workbuf->varpool, varstried, varstried+hqrnduniformi(rs, *varsinpool-varstried, _state), _state);
|
|
varcur = workbuf->varpool.ptr.p_int[varstried];
|
|
|
|
/*
|
|
* Load variable values to working array.
|
|
* If all variable values are same, variable is excluded from pool and we re-run variable selection.
|
|
*/
|
|
valuesaresame = ae_true;
|
|
ae_assert(s->dstype==0, "not supported segsv34fs", _state);
|
|
offs = npoints*varcur;
|
|
v0 = s->dsdata.ptr.p_double[offs+workbuf->trnset.ptr.p_int[idx0]];
|
|
for(i=idx0; i<=idx1-1; i++)
|
|
{
|
|
v = s->dsdata.ptr.p_double[offs+workbuf->trnset.ptr.p_int[i]];
|
|
workbuf->curvals.ptr.p_double[i] = v;
|
|
valuesaresame = valuesaresame&&v==v0;
|
|
}
|
|
if( valuesaresame )
|
|
{
|
|
|
|
/*
|
|
* Variable does not change across current subset.
|
|
* Exclude variable from pool, go to the next iteration.
|
|
* VarsTried is not increased.
|
|
*
|
|
* NOTE: it is essential that updated VarsInPool is passed
|
|
* down to children but not up to caller - it is
|
|
* possible that one level higher this variable is
|
|
* not-fixed.
|
|
*/
|
|
swapelementsi(&workbuf->varpool, varstried, *varsinpool-1, _state);
|
|
*varsinpool = *varsinpool-1;
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Now we are ready to infer the split
|
|
*/
|
|
dforest_evaluatedensesplit(s, workbuf, rs, varcur, idx0, idx1, &info, &split, &currms, _state);
|
|
if( info>0&&(*varbest<0||ae_fp_less_eq(currms,errbest)) )
|
|
{
|
|
errbest = currms;
|
|
*varbest = varcur;
|
|
*splitbest = split;
|
|
for(i=idx0; i<=idx1-1; i++)
|
|
{
|
|
workbuf->bestvals.ptr.p_double[i] = workbuf->curvals.ptr.p_double[i];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Next iteration
|
|
*/
|
|
varstried = varstried+1;
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function performs split on some specific dense variable whose values
|
|
are stored in WorkBuf.CurVals[Idx0,Idx1) and labels are stored in
|
|
WorkBuf.TrnLabelsR/I[Idx0,Idx1).
|
|
|
|
It returns split value and associated RMS error. It is responsibility of
|
|
the caller to make sure that variable has at least two distinct values,
|
|
i.e. it is possible to make a split.
|
|
|
|
Precomputed values of following fields of WorkBuf are used:
|
|
* ClassPriors
|
|
|
|
Following fields of WorkBuf are used as temporaries:
|
|
* ClassTotals0,1,01
|
|
* Tmp0I, Tmp1I, Tmp0R, Tmp1R, Tmp2R, Tmp3R
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void dforest_evaluatedensesplit(decisionforestbuilder* s,
|
|
dfworkbuf* workbuf,
|
|
hqrndstate* rs,
|
|
ae_int_t splitvar,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_int_t* info,
|
|
double* split,
|
|
double* rms,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t nclasses;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k0;
|
|
ae_int_t k1;
|
|
double v;
|
|
double v0;
|
|
double v1;
|
|
double v2;
|
|
ae_int_t sl;
|
|
ae_int_t sr;
|
|
|
|
*info = 0;
|
|
*split = 0;
|
|
*rms = 0;
|
|
|
|
ae_assert(idx0<idx1, "BuildRandomTreeRec: integrity check failed (8754)", _state);
|
|
nclasses = s->nclasses;
|
|
if( s->dsbinary.ptr.p_bool[splitvar] )
|
|
{
|
|
|
|
/*
|
|
* Try simple binary split, if possible
|
|
* Split can be inferred from minimum/maximum values, just calculate RMS error
|
|
*/
|
|
*info = 1;
|
|
*split = dforest_getsplit(s, s->dsmin.ptr.p_double[splitvar], s->dsmax.ptr.p_double[splitvar], rs, _state);
|
|
if( nclasses>1 )
|
|
{
|
|
|
|
/*
|
|
* Classification problem
|
|
*/
|
|
for(j=0; j<=nclasses-1; j++)
|
|
{
|
|
workbuf->classtotals0.ptr.p_int[j] = 0;
|
|
}
|
|
sl = 0;
|
|
for(i=idx0; i<=idx1-1; i++)
|
|
{
|
|
if( workbuf->curvals.ptr.p_double[i]<*split )
|
|
{
|
|
j = workbuf->trnlabelsi.ptr.p_int[i];
|
|
workbuf->classtotals0.ptr.p_int[j] = workbuf->classtotals0.ptr.p_int[j]+1;
|
|
sl = sl+1;
|
|
}
|
|
}
|
|
sr = idx1-idx0-sl;
|
|
ae_assert(sl!=0&&sr!=0, "BuildRandomTreeRec: something strange, impossible failure!", _state);
|
|
v0 = (double)1/(double)sl;
|
|
v1 = (double)1/(double)sr;
|
|
*rms = (double)(0);
|
|
for(j=0; j<=nclasses-1; j++)
|
|
{
|
|
k0 = workbuf->classtotals0.ptr.p_int[j];
|
|
k1 = workbuf->classpriors.ptr.p_int[j]-k0;
|
|
*rms = *rms+k0*(1-v0*k0)+k1*(1-v1*k1);
|
|
}
|
|
*rms = ae_sqrt(*rms/(nclasses*(idx1-idx0+1)), _state);
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* regression-specific code
|
|
*/
|
|
sl = 0;
|
|
sr = 0;
|
|
v1 = (double)(0);
|
|
v2 = (double)(0);
|
|
for(j=idx0; j<=idx1-1; j++)
|
|
{
|
|
if( workbuf->curvals.ptr.p_double[j]<*split )
|
|
{
|
|
v1 = v1+workbuf->trnlabelsr.ptr.p_double[j];
|
|
sl = sl+1;
|
|
}
|
|
else
|
|
{
|
|
v2 = v2+workbuf->trnlabelsr.ptr.p_double[j];
|
|
sr = sr+1;
|
|
}
|
|
}
|
|
ae_assert(sl!=0&&sr!=0, "BuildRandomTreeRec: something strange, impossible failure!", _state);
|
|
v1 = v1/sl;
|
|
v2 = v2/sr;
|
|
*rms = (double)(0);
|
|
for(j=0; j<=idx1-idx0-1; j++)
|
|
{
|
|
v = workbuf->trnlabelsr.ptr.p_double[idx0+j];
|
|
if( workbuf->curvals.ptr.p_double[j]<*split )
|
|
{
|
|
v = v-v1;
|
|
}
|
|
else
|
|
{
|
|
v = v-v2;
|
|
}
|
|
*rms = *rms+v*v;
|
|
}
|
|
*rms = ae_sqrt(*rms/(idx1-idx0+1), _state);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* General split
|
|
*/
|
|
*info = 0;
|
|
if( nclasses>1 )
|
|
{
|
|
for(i=0; i<=idx1-idx0-1; i++)
|
|
{
|
|
workbuf->tmp0r.ptr.p_double[i] = workbuf->curvals.ptr.p_double[idx0+i];
|
|
workbuf->tmp0i.ptr.p_int[i] = workbuf->trnlabelsi.ptr.p_int[idx0+i];
|
|
}
|
|
dforest_classifiersplit(s, workbuf, &workbuf->tmp0r, &workbuf->tmp0i, idx1-idx0, rs, info, split, rms, &workbuf->tmp1r, &workbuf->tmp1i, _state);
|
|
}
|
|
else
|
|
{
|
|
for(i=0; i<=idx1-idx0-1; i++)
|
|
{
|
|
workbuf->tmp0r.ptr.p_double[i] = workbuf->curvals.ptr.p_double[idx0+i];
|
|
workbuf->tmp1r.ptr.p_double[i] = workbuf->trnlabelsr.ptr.p_double[idx0+i];
|
|
}
|
|
dforest_regressionsplit(s, workbuf, &workbuf->tmp0r, &workbuf->tmp1r, idx1-idx0, info, split, rms, &workbuf->tmp2r, &workbuf->tmp3r, _state);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Classifier split
|
|
*************************************************************************/
|
|
static void dforest_classifiersplit(decisionforestbuilder* s,
|
|
dfworkbuf* workbuf,
|
|
/* Real */ ae_vector* x,
|
|
/* Integer */ ae_vector* c,
|
|
ae_int_t n,
|
|
hqrndstate* rs,
|
|
ae_int_t* info,
|
|
double* threshold,
|
|
double* e,
|
|
/* Real */ ae_vector* sortrbuf,
|
|
/* Integer */ ae_vector* sortibuf,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t n0;
|
|
ae_int_t n0prev;
|
|
double v;
|
|
ae_int_t advanceby;
|
|
double rms;
|
|
ae_int_t k0;
|
|
ae_int_t k1;
|
|
double v0;
|
|
double v1;
|
|
ae_int_t nclasses;
|
|
double vmin;
|
|
double vmax;
|
|
|
|
*info = 0;
|
|
*threshold = 0;
|
|
*e = 0;
|
|
|
|
ae_assert((s->rdfsplitstrength==0||s->rdfsplitstrength==1)||s->rdfsplitstrength==2, "RDF: unexpected split type at ClassifierSplit()", _state);
|
|
nclasses = s->nclasses;
|
|
advanceby = 1;
|
|
if( n>=20 )
|
|
{
|
|
advanceby = ae_maxint(2, ae_round(n*0.05, _state), _state);
|
|
}
|
|
*info = -1;
|
|
*threshold = (double)(0);
|
|
*e = ae_maxrealnumber;
|
|
|
|
/*
|
|
* Random split
|
|
*/
|
|
if( s->rdfsplitstrength==0 )
|
|
{
|
|
|
|
/*
|
|
* Evaluate minimum, maximum and randomly selected values
|
|
*/
|
|
vmin = x->ptr.p_double[0];
|
|
vmax = x->ptr.p_double[0];
|
|
for(i=1; i<=n-1; i++)
|
|
{
|
|
v = x->ptr.p_double[i];
|
|
if( v<vmin )
|
|
{
|
|
vmin = v;
|
|
}
|
|
if( v>vmax )
|
|
{
|
|
vmax = v;
|
|
}
|
|
}
|
|
if( ae_fp_eq(vmin,vmax) )
|
|
{
|
|
return;
|
|
}
|
|
v = x->ptr.p_double[hqrnduniformi(rs, n, _state)];
|
|
if( ae_fp_eq(v,vmin) )
|
|
{
|
|
v = vmax;
|
|
}
|
|
|
|
/*
|
|
* Calculate RMS error associated with the split
|
|
*/
|
|
for(i=0; i<=nclasses-1; i++)
|
|
{
|
|
workbuf->classtotals0.ptr.p_int[i] = 0;
|
|
}
|
|
n0 = 0;
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
if( x->ptr.p_double[i]<v )
|
|
{
|
|
k = c->ptr.p_int[i];
|
|
workbuf->classtotals0.ptr.p_int[k] = workbuf->classtotals0.ptr.p_int[k]+1;
|
|
n0 = n0+1;
|
|
}
|
|
}
|
|
ae_assert(n0>0&&n0<n, "RDF: critical integrity check failed at ClassifierSplit()", _state);
|
|
v0 = (double)1/(double)n0;
|
|
v1 = (double)1/(double)(n-n0);
|
|
rms = (double)(0);
|
|
for(j=0; j<=nclasses-1; j++)
|
|
{
|
|
k0 = workbuf->classtotals0.ptr.p_int[j];
|
|
k1 = workbuf->classpriors.ptr.p_int[j]-k0;
|
|
rms = rms+k0*(1-v0*k0)+k1*(1-v1*k1);
|
|
}
|
|
*threshold = v;
|
|
*info = 1;
|
|
*e = rms;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Stronger splits which require us to sort the data
|
|
* Quick check for degeneracy
|
|
*/
|
|
tagsortfasti(x, c, sortrbuf, sortibuf, n, _state);
|
|
v = 0.5*(x->ptr.p_double[0]+x->ptr.p_double[n-1]);
|
|
if( !(ae_fp_less(x->ptr.p_double[0],v)&&ae_fp_less(v,x->ptr.p_double[n-1])) )
|
|
{
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Split at the middle
|
|
*/
|
|
if( s->rdfsplitstrength==1 )
|
|
{
|
|
|
|
/*
|
|
* Select split position
|
|
*/
|
|
vmin = x->ptr.p_double[0];
|
|
vmax = x->ptr.p_double[n-1];
|
|
v = x->ptr.p_double[n/2];
|
|
if( ae_fp_eq(v,vmin) )
|
|
{
|
|
v = vmin+0.001*(vmax-vmin);
|
|
}
|
|
if( ae_fp_eq(v,vmin) )
|
|
{
|
|
v = vmax;
|
|
}
|
|
|
|
/*
|
|
* Calculate RMS error associated with the split
|
|
*/
|
|
for(i=0; i<=nclasses-1; i++)
|
|
{
|
|
workbuf->classtotals0.ptr.p_int[i] = 0;
|
|
}
|
|
n0 = 0;
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
if( x->ptr.p_double[i]<v )
|
|
{
|
|
k = c->ptr.p_int[i];
|
|
workbuf->classtotals0.ptr.p_int[k] = workbuf->classtotals0.ptr.p_int[k]+1;
|
|
n0 = n0+1;
|
|
}
|
|
}
|
|
ae_assert(n0>0&&n0<n, "RDF: critical integrity check failed at ClassifierSplit()", _state);
|
|
v0 = (double)1/(double)n0;
|
|
v1 = (double)1/(double)(n-n0);
|
|
rms = (double)(0);
|
|
for(j=0; j<=nclasses-1; j++)
|
|
{
|
|
k0 = workbuf->classtotals0.ptr.p_int[j];
|
|
k1 = workbuf->classpriors.ptr.p_int[j]-k0;
|
|
rms = rms+k0*(1-v0*k0)+k1*(1-v1*k1);
|
|
}
|
|
*threshold = v;
|
|
*info = 1;
|
|
*e = rms;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Strong split
|
|
*/
|
|
if( s->rdfsplitstrength==2 )
|
|
{
|
|
|
|
/*
|
|
* Prepare initial split.
|
|
* Evaluate current split, prepare next one, repeat.
|
|
*/
|
|
for(i=0; i<=nclasses-1; i++)
|
|
{
|
|
workbuf->classtotals0.ptr.p_int[i] = 0;
|
|
}
|
|
n0 = 1;
|
|
while(n0<n&&x->ptr.p_double[n0]==x->ptr.p_double[n0-1])
|
|
{
|
|
n0 = n0+1;
|
|
}
|
|
ae_assert(n0<n, "RDF: critical integrity check failed in ClassifierSplit()", _state);
|
|
for(i=0; i<=n0-1; i++)
|
|
{
|
|
k = c->ptr.p_int[i];
|
|
workbuf->classtotals0.ptr.p_int[k] = workbuf->classtotals0.ptr.p_int[k]+1;
|
|
}
|
|
*info = -1;
|
|
*threshold = x->ptr.p_double[n-1];
|
|
*e = ae_maxrealnumber;
|
|
while(n0<n)
|
|
{
|
|
|
|
/*
|
|
* RMS error associated with current split
|
|
*/
|
|
v0 = (double)1/(double)n0;
|
|
v1 = (double)1/(double)(n-n0);
|
|
rms = (double)(0);
|
|
for(j=0; j<=nclasses-1; j++)
|
|
{
|
|
k0 = workbuf->classtotals0.ptr.p_int[j];
|
|
k1 = workbuf->classpriors.ptr.p_int[j]-k0;
|
|
rms = rms+k0*(1-v0*k0)+k1*(1-v1*k1);
|
|
}
|
|
if( *info<0||rms<*e )
|
|
{
|
|
*info = 1;
|
|
*e = rms;
|
|
*threshold = 0.5*(x->ptr.p_double[n0-1]+x->ptr.p_double[n0]);
|
|
if( *threshold<=x->ptr.p_double[n0-1] )
|
|
{
|
|
*threshold = x->ptr.p_double[n0];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Advance
|
|
*/
|
|
n0prev = n0;
|
|
while(n0<n&&n0-n0prev<advanceby)
|
|
{
|
|
v = x->ptr.p_double[n0];
|
|
while(n0<n&&x->ptr.p_double[n0]==v)
|
|
{
|
|
k = c->ptr.p_int[n0];
|
|
workbuf->classtotals0.ptr.p_int[k] = workbuf->classtotals0.ptr.p_int[k]+1;
|
|
n0 = n0+1;
|
|
}
|
|
}
|
|
}
|
|
if( *info>0 )
|
|
{
|
|
*e = ae_sqrt(*e/(nclasses*n), _state);
|
|
}
|
|
return;
|
|
}
|
|
ae_assert(ae_false, "RDF: ClassifierSplit(), critical error", _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Regression model split
|
|
*************************************************************************/
|
|
static void dforest_regressionsplit(decisionforestbuilder* s,
|
|
dfworkbuf* workbuf,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_int_t n,
|
|
ae_int_t* info,
|
|
double* threshold,
|
|
double* e,
|
|
/* Real */ ae_vector* sortrbuf,
|
|
/* Real */ ae_vector* sortrbuf2,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
double vmin;
|
|
double vmax;
|
|
double bnd01;
|
|
double bnd12;
|
|
double bnd23;
|
|
ae_int_t total0;
|
|
ae_int_t total1;
|
|
ae_int_t total2;
|
|
ae_int_t total3;
|
|
ae_int_t cnt0;
|
|
ae_int_t cnt1;
|
|
ae_int_t cnt2;
|
|
ae_int_t cnt3;
|
|
ae_int_t n0;
|
|
ae_int_t advanceby;
|
|
double v;
|
|
double v0;
|
|
double v1;
|
|
double rms;
|
|
ae_int_t n0prev;
|
|
ae_int_t k0;
|
|
ae_int_t k1;
|
|
|
|
*info = 0;
|
|
*threshold = 0;
|
|
*e = 0;
|
|
|
|
advanceby = 1;
|
|
if( n>=20 )
|
|
{
|
|
advanceby = ae_maxint(2, ae_round(n*0.05, _state), _state);
|
|
}
|
|
|
|
/*
|
|
* Sort data
|
|
* Quick check for degeneracy
|
|
*/
|
|
tagsortfastr(x, y, sortrbuf, sortrbuf2, n, _state);
|
|
v = 0.5*(x->ptr.p_double[0]+x->ptr.p_double[n-1]);
|
|
if( !(ae_fp_less(x->ptr.p_double[0],v)&&ae_fp_less(v,x->ptr.p_double[n-1])) )
|
|
{
|
|
*info = -1;
|
|
*threshold = x->ptr.p_double[n-1];
|
|
*e = ae_maxrealnumber;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Prepare initial split.
|
|
* Evaluate current split, prepare next one, repeat.
|
|
*/
|
|
vmin = y->ptr.p_double[0];
|
|
vmax = y->ptr.p_double[0];
|
|
for(i=1; i<=n-1; i++)
|
|
{
|
|
v = y->ptr.p_double[i];
|
|
if( v<vmin )
|
|
{
|
|
vmin = v;
|
|
}
|
|
if( v>vmax )
|
|
{
|
|
vmax = v;
|
|
}
|
|
}
|
|
bnd12 = 0.5*(vmin+vmax);
|
|
bnd01 = 0.5*(vmin+bnd12);
|
|
bnd23 = 0.5*(vmax+bnd12);
|
|
total0 = 0;
|
|
total1 = 0;
|
|
total2 = 0;
|
|
total3 = 0;
|
|
for(i=0; i<=n-1; i++)
|
|
{
|
|
v = y->ptr.p_double[i];
|
|
if( v<bnd12 )
|
|
{
|
|
if( v<bnd01 )
|
|
{
|
|
total0 = total0+1;
|
|
}
|
|
else
|
|
{
|
|
total1 = total1+1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( v<bnd23 )
|
|
{
|
|
total2 = total2+1;
|
|
}
|
|
else
|
|
{
|
|
total3 = total3+1;
|
|
}
|
|
}
|
|
}
|
|
n0 = 1;
|
|
while(n0<n&&x->ptr.p_double[n0]==x->ptr.p_double[n0-1])
|
|
{
|
|
n0 = n0+1;
|
|
}
|
|
ae_assert(n0<n, "RDF: critical integrity check failed in ClassifierSplit()", _state);
|
|
cnt0 = 0;
|
|
cnt1 = 0;
|
|
cnt2 = 0;
|
|
cnt3 = 0;
|
|
for(i=0; i<=n0-1; i++)
|
|
{
|
|
v = y->ptr.p_double[i];
|
|
if( v<bnd12 )
|
|
{
|
|
if( v<bnd01 )
|
|
{
|
|
cnt0 = cnt0+1;
|
|
}
|
|
else
|
|
{
|
|
cnt1 = cnt1+1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( v<bnd23 )
|
|
{
|
|
cnt2 = cnt2+1;
|
|
}
|
|
else
|
|
{
|
|
cnt3 = cnt3+1;
|
|
}
|
|
}
|
|
}
|
|
*info = -1;
|
|
*threshold = x->ptr.p_double[n-1];
|
|
*e = ae_maxrealnumber;
|
|
while(n0<n)
|
|
{
|
|
|
|
/*
|
|
* RMS error associated with current split
|
|
*/
|
|
v0 = (double)1/(double)n0;
|
|
v1 = (double)1/(double)(n-n0);
|
|
rms = (double)(0);
|
|
k0 = cnt0;
|
|
k1 = total0-cnt0;
|
|
rms = rms+k0*(1-v0*k0)+k1*(1-v1*k1);
|
|
k0 = cnt1;
|
|
k1 = total1-cnt1;
|
|
rms = rms+k0*(1-v0*k0)+k1*(1-v1*k1);
|
|
k0 = cnt2;
|
|
k1 = total2-cnt2;
|
|
rms = rms+k0*(1-v0*k0)+k1*(1-v1*k1);
|
|
k0 = cnt3;
|
|
k1 = total3-cnt3;
|
|
rms = rms+k0*(1-v0*k0)+k1*(1-v1*k1);
|
|
if( *info<0||rms<*e )
|
|
{
|
|
*info = 1;
|
|
*e = rms;
|
|
*threshold = 0.5*(x->ptr.p_double[n0-1]+x->ptr.p_double[n0]);
|
|
if( *threshold<=x->ptr.p_double[n0-1] )
|
|
{
|
|
*threshold = x->ptr.p_double[n0];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Advance
|
|
*/
|
|
n0prev = n0;
|
|
while(n0<n&&n0-n0prev<advanceby)
|
|
{
|
|
v0 = x->ptr.p_double[n0];
|
|
while(n0<n&&x->ptr.p_double[n0]==v0)
|
|
{
|
|
v = y->ptr.p_double[n0];
|
|
if( v<bnd12 )
|
|
{
|
|
if( v<bnd01 )
|
|
{
|
|
cnt0 = cnt0+1;
|
|
}
|
|
else
|
|
{
|
|
cnt1 = cnt1+1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( v<bnd23 )
|
|
{
|
|
cnt2 = cnt2+1;
|
|
}
|
|
else
|
|
{
|
|
cnt3 = cnt3+1;
|
|
}
|
|
}
|
|
n0 = n0+1;
|
|
}
|
|
}
|
|
}
|
|
if( *info>0 )
|
|
{
|
|
*e = ae_sqrt(*e/(4*n), _state);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Returns split: either deterministic split at the middle of [A,B], or randomly
|
|
chosen split.
|
|
|
|
It is guaranteed that A<Split<=B.
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static double dforest_getsplit(decisionforestbuilder* s,
|
|
double a,
|
|
double b,
|
|
hqrndstate* rs,
|
|
ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
result = 0.5*(a+b);
|
|
if( ae_fp_less_eq(result,a) )
|
|
{
|
|
result = b;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Outputs leaf to the tree
|
|
|
|
Following items of TRN and OOB sets are updated in the voting buffer:
|
|
* items [Idx0,Idx1) of WorkBuf.TrnSet
|
|
* items [OOBIdx0, OOBIdx1) of WorkBuf.OOBSet
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void dforest_outputleaf(decisionforestbuilder* s,
|
|
dfworkbuf* workbuf,
|
|
/* Real */ ae_vector* treebuf,
|
|
dfvotebuf* votebuf,
|
|
ae_int_t idx0,
|
|
ae_int_t idx1,
|
|
ae_int_t oobidx0,
|
|
ae_int_t oobidx1,
|
|
ae_int_t* treesize,
|
|
double leafval,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t leafvali;
|
|
ae_int_t nclasses;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
|
|
|
|
nclasses = s->nclasses;
|
|
if( nclasses==1 )
|
|
{
|
|
|
|
/*
|
|
* Store split to the tree
|
|
*/
|
|
treebuf->ptr.p_double[*treesize] = (double)(-1);
|
|
treebuf->ptr.p_double[*treesize+1] = leafval;
|
|
|
|
/*
|
|
* Update training and OOB voting stats
|
|
*/
|
|
for(i=idx0; i<=idx1-1; i++)
|
|
{
|
|
j = workbuf->trnset.ptr.p_int[i];
|
|
votebuf->trntotals.ptr.p_double[j] = votebuf->trntotals.ptr.p_double[j]+leafval;
|
|
votebuf->trncounts.ptr.p_int[j] = votebuf->trncounts.ptr.p_int[j]+1;
|
|
}
|
|
for(i=oobidx0; i<=oobidx1-1; i++)
|
|
{
|
|
j = workbuf->oobset.ptr.p_int[i];
|
|
votebuf->oobtotals.ptr.p_double[j] = votebuf->oobtotals.ptr.p_double[j]+leafval;
|
|
votebuf->oobcounts.ptr.p_int[j] = votebuf->oobcounts.ptr.p_int[j]+1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Store split to the tree
|
|
*/
|
|
treebuf->ptr.p_double[*treesize] = (double)(-1);
|
|
treebuf->ptr.p_double[*treesize+1] = leafval;
|
|
|
|
/*
|
|
* Update training and OOB voting stats
|
|
*/
|
|
leafvali = ae_round(leafval, _state);
|
|
for(i=idx0; i<=idx1-1; i++)
|
|
{
|
|
j = workbuf->trnset.ptr.p_int[i];
|
|
votebuf->trntotals.ptr.p_double[j*nclasses+leafvali] = votebuf->trntotals.ptr.p_double[j*nclasses+leafvali]+1;
|
|
votebuf->trncounts.ptr.p_int[j] = votebuf->trncounts.ptr.p_int[j]+1;
|
|
}
|
|
for(i=oobidx0; i<=oobidx1-1; i++)
|
|
{
|
|
j = workbuf->oobset.ptr.p_int[i];
|
|
votebuf->oobtotals.ptr.p_double[j*nclasses+leafvali] = votebuf->oobtotals.ptr.p_double[j*nclasses+leafvali]+1;
|
|
votebuf->oobcounts.ptr.p_int[j] = votebuf->oobcounts.ptr.p_int[j]+1;
|
|
}
|
|
}
|
|
*treesize = *treesize+dforest_leafnodewidth;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function performs generic and algorithm-specific preprocessing of the
|
|
dataset
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void dforest_analyzeandpreprocessdataset(decisionforestbuilder* s,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t nvars;
|
|
ae_int_t nclasses;
|
|
ae_int_t npoints;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_bool isbinary;
|
|
double v;
|
|
double v0;
|
|
double v1;
|
|
hqrndstate rs;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&rs, 0, sizeof(rs));
|
|
_hqrndstate_init(&rs, _state, ae_true);
|
|
|
|
ae_assert(s->dstype==0, "no sparsity", _state);
|
|
npoints = s->npoints;
|
|
nvars = s->nvars;
|
|
nclasses = s->nclasses;
|
|
|
|
/*
|
|
* seed local RNG
|
|
*/
|
|
if( s->rdfglobalseed>0 )
|
|
{
|
|
hqrndseed(s->rdfglobalseed, 3532, &rs, _state);
|
|
}
|
|
else
|
|
{
|
|
hqrndseed(ae_randominteger(30000, _state), 3532, &rs, _state);
|
|
}
|
|
|
|
/*
|
|
* Generic processing
|
|
*/
|
|
ae_assert(npoints>=1, "BuildRandomForest: integrity check failed", _state);
|
|
rvectorsetlengthatleast(&s->dsmin, nvars, _state);
|
|
rvectorsetlengthatleast(&s->dsmax, nvars, _state);
|
|
bvectorsetlengthatleast(&s->dsbinary, nvars, _state);
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
v0 = s->dsdata.ptr.p_double[i*npoints+0];
|
|
v1 = s->dsdata.ptr.p_double[i*npoints+0];
|
|
for(j=1; j<=npoints-1; j++)
|
|
{
|
|
v = s->dsdata.ptr.p_double[i*npoints+j];
|
|
if( v<v0 )
|
|
{
|
|
v0 = v;
|
|
}
|
|
if( v>v1 )
|
|
{
|
|
v1 = v;
|
|
}
|
|
}
|
|
s->dsmin.ptr.p_double[i] = v0;
|
|
s->dsmax.ptr.p_double[i] = v1;
|
|
ae_assert(ae_fp_less_eq(v0,v1), "BuildRandomForest: strange integrity check failure", _state);
|
|
isbinary = ae_true;
|
|
for(j=0; j<=npoints-1; j++)
|
|
{
|
|
v = s->dsdata.ptr.p_double[i*npoints+j];
|
|
isbinary = isbinary&&(v==v0||v==v1);
|
|
}
|
|
s->dsbinary.ptr.p_bool[i] = isbinary;
|
|
}
|
|
if( nclasses==1 )
|
|
{
|
|
s->dsravg = (double)(0);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
s->dsravg = s->dsravg+s->dsrval.ptr.p_double[i];
|
|
}
|
|
s->dsravg = s->dsravg/npoints;
|
|
}
|
|
else
|
|
{
|
|
ivectorsetlengthatleast(&s->dsctotals, nclasses, _state);
|
|
for(i=0; i<=nclasses-1; i++)
|
|
{
|
|
s->dsctotals.ptr.p_int[i] = 0;
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
s->dsctotals.ptr.p_int[s->dsival.ptr.p_int[i]] = s->dsctotals.ptr.p_int[s->dsival.ptr.p_int[i]]+1;
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function merges together trees generated during training and outputs
|
|
it to the decision forest.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
NTrees - NTrees>=1, number of trees to train
|
|
|
|
OUTPUT PARAMETERS:
|
|
DF - decision forest
|
|
Rep - report
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void dforest_mergetrees(decisionforestbuilder* s,
|
|
decisionforest* df,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t cursize;
|
|
ae_int_t offs;
|
|
dftreebuf *tree;
|
|
ae_smart_ptr _tree;
|
|
ae_vector treesizes;
|
|
ae_vector treeoffsets;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_tree, 0, sizeof(_tree));
|
|
memset(&treesizes, 0, sizeof(treesizes));
|
|
memset(&treeoffsets, 0, sizeof(treeoffsets));
|
|
ae_smart_ptr_init(&_tree, (void**)&tree, _state, ae_true);
|
|
ae_vector_init(&treesizes, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&treeoffsets, 0, DT_INT, _state, ae_true);
|
|
|
|
df->forestformat = dforest_dfuncompressedv0;
|
|
df->nvars = s->nvars;
|
|
df->nclasses = s->nclasses;
|
|
df->bufsize = 0;
|
|
df->ntrees = 0;
|
|
|
|
/*
|
|
* Determine trees count
|
|
*/
|
|
ae_shared_pool_first_recycled(&s->treepool, &_tree, _state);
|
|
while(tree!=NULL)
|
|
{
|
|
df->ntrees = df->ntrees+1;
|
|
ae_shared_pool_next_recycled(&s->treepool, &_tree, _state);
|
|
}
|
|
ae_assert(df->ntrees>0, "MergeTrees: integrity check failed, zero trees count", _state);
|
|
|
|
/*
|
|
* Determine individual tree sizes and total buffer size
|
|
*/
|
|
ae_vector_set_length(&treesizes, df->ntrees, _state);
|
|
for(i=0; i<=df->ntrees-1; i++)
|
|
{
|
|
treesizes.ptr.p_int[i] = -1;
|
|
}
|
|
ae_shared_pool_first_recycled(&s->treepool, &_tree, _state);
|
|
while(tree!=NULL)
|
|
{
|
|
ae_assert(tree->treeidx>=0&&tree->treeidx<df->ntrees, "MergeTrees: integrity check failed (wrong TreeIdx)", _state);
|
|
ae_assert(treesizes.ptr.p_int[tree->treeidx]<0, "MergeTrees: integrity check failed (duplicate TreeIdx)", _state);
|
|
df->bufsize = df->bufsize+ae_round(tree->treebuf.ptr.p_double[0], _state);
|
|
treesizes.ptr.p_int[tree->treeidx] = ae_round(tree->treebuf.ptr.p_double[0], _state);
|
|
ae_shared_pool_next_recycled(&s->treepool, &_tree, _state);
|
|
}
|
|
for(i=0; i<=df->ntrees-1; i++)
|
|
{
|
|
ae_assert(treesizes.ptr.p_int[i]>0, "MergeTrees: integrity check failed (wrong TreeSize)", _state);
|
|
}
|
|
|
|
/*
|
|
* Determine offsets for individual trees in output buffer
|
|
*/
|
|
ae_vector_set_length(&treeoffsets, df->ntrees, _state);
|
|
treeoffsets.ptr.p_int[0] = 0;
|
|
for(i=1; i<=df->ntrees-1; i++)
|
|
{
|
|
treeoffsets.ptr.p_int[i] = treeoffsets.ptr.p_int[i-1]+treesizes.ptr.p_int[i-1];
|
|
}
|
|
|
|
/*
|
|
* Output trees
|
|
*
|
|
* NOTE: since ALGLIB 3.16.0 trees are sorted by tree index prior to
|
|
* output (necessary for variable importance estimation), that's
|
|
* why we need array of tree offsets
|
|
*/
|
|
ae_vector_set_length(&df->trees, df->bufsize, _state);
|
|
ae_shared_pool_first_recycled(&s->treepool, &_tree, _state);
|
|
while(tree!=NULL)
|
|
{
|
|
cursize = ae_round(tree->treebuf.ptr.p_double[0], _state);
|
|
offs = treeoffsets.ptr.p_int[tree->treeidx];
|
|
for(i=0; i<=cursize-1; i++)
|
|
{
|
|
df->trees.ptr.p_double[offs+i] = tree->treebuf.ptr.p_double[i];
|
|
}
|
|
ae_shared_pool_next_recycled(&s->treepool, &_tree, _state);
|
|
}
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function post-processes voting array and calculates TRN and OOB errors.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
NTrees - number of trees in the forest
|
|
Buf - possibly preallocated vote buffer, its contents is
|
|
overwritten by this function
|
|
|
|
OUTPUT PARAMETERS:
|
|
Rep - report fields corresponding to errors are updated
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.05.2018 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void dforest_processvotingresults(decisionforestbuilder* s,
|
|
ae_int_t ntrees,
|
|
dfvotebuf* buf,
|
|
dfreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
dfvotebuf *vote;
|
|
ae_smart_ptr _vote;
|
|
ae_int_t nvars;
|
|
ae_int_t nclasses;
|
|
ae_int_t npoints;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t k1;
|
|
double v;
|
|
ae_int_t avgrelcnt;
|
|
ae_int_t oobavgrelcnt;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&_vote, 0, sizeof(_vote));
|
|
ae_smart_ptr_init(&_vote, (void**)&vote, _state, ae_true);
|
|
|
|
npoints = s->npoints;
|
|
nvars = s->nvars;
|
|
nclasses = s->nclasses;
|
|
ae_assert(npoints>0, "DFOREST: integrity check failed", _state);
|
|
ae_assert(nvars>0, "DFOREST: integrity check failed", _state);
|
|
ae_assert(nclasses>0, "DFOREST: integrity check failed", _state);
|
|
|
|
/*
|
|
* Prepare vote buffer
|
|
*/
|
|
rvectorsetlengthatleast(&buf->trntotals, npoints*nclasses, _state);
|
|
rvectorsetlengthatleast(&buf->oobtotals, npoints*nclasses, _state);
|
|
for(i=0; i<=npoints*nclasses-1; i++)
|
|
{
|
|
buf->trntotals.ptr.p_double[i] = (double)(0);
|
|
buf->oobtotals.ptr.p_double[i] = (double)(0);
|
|
}
|
|
ivectorsetlengthatleast(&buf->trncounts, npoints, _state);
|
|
ivectorsetlengthatleast(&buf->oobcounts, npoints, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
buf->trncounts.ptr.p_int[i] = 0;
|
|
buf->oobcounts.ptr.p_int[i] = 0;
|
|
}
|
|
|
|
/*
|
|
* Merge voting arrays
|
|
*/
|
|
ae_shared_pool_first_recycled(&s->votepool, &_vote, _state);
|
|
while(vote!=NULL)
|
|
{
|
|
for(i=0; i<=npoints*nclasses-1; i++)
|
|
{
|
|
buf->trntotals.ptr.p_double[i] = buf->trntotals.ptr.p_double[i]+vote->trntotals.ptr.p_double[i]+vote->oobtotals.ptr.p_double[i];
|
|
buf->oobtotals.ptr.p_double[i] = buf->oobtotals.ptr.p_double[i]+vote->oobtotals.ptr.p_double[i];
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
buf->trncounts.ptr.p_int[i] = buf->trncounts.ptr.p_int[i]+vote->trncounts.ptr.p_int[i]+vote->oobcounts.ptr.p_int[i];
|
|
buf->oobcounts.ptr.p_int[i] = buf->oobcounts.ptr.p_int[i]+vote->oobcounts.ptr.p_int[i];
|
|
}
|
|
ae_shared_pool_next_recycled(&s->votepool, &_vote, _state);
|
|
}
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
v = 1/coalesce((double)(buf->trncounts.ptr.p_int[i]), (double)(1), _state);
|
|
for(j=0; j<=nclasses-1; j++)
|
|
{
|
|
buf->trntotals.ptr.p_double[i*nclasses+j] = buf->trntotals.ptr.p_double[i*nclasses+j]*v;
|
|
}
|
|
v = 1/coalesce((double)(buf->oobcounts.ptr.p_int[i]), (double)(1), _state);
|
|
for(j=0; j<=nclasses-1; j++)
|
|
{
|
|
buf->oobtotals.ptr.p_double[i*nclasses+j] = buf->oobtotals.ptr.p_double[i*nclasses+j]*v;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Use aggregated voting data to output error metrics
|
|
*/
|
|
avgrelcnt = 0;
|
|
oobavgrelcnt = 0;
|
|
rep->rmserror = (double)(0);
|
|
rep->avgerror = (double)(0);
|
|
rep->avgrelerror = (double)(0);
|
|
rep->relclserror = (double)(0);
|
|
rep->avgce = (double)(0);
|
|
rep->oobrmserror = (double)(0);
|
|
rep->oobavgerror = (double)(0);
|
|
rep->oobavgrelerror = (double)(0);
|
|
rep->oobrelclserror = (double)(0);
|
|
rep->oobavgce = (double)(0);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
if( nclasses>1 )
|
|
{
|
|
|
|
/*
|
|
* classification-specific code
|
|
*/
|
|
k = s->dsival.ptr.p_int[i];
|
|
for(j=0; j<=nclasses-1; j++)
|
|
{
|
|
v = buf->trntotals.ptr.p_double[i*nclasses+j];
|
|
if( j==k )
|
|
{
|
|
rep->avgce = rep->avgce-ae_log(coalesce(v, ae_minrealnumber, _state), _state);
|
|
rep->rmserror = rep->rmserror+ae_sqr(v-1, _state);
|
|
rep->avgerror = rep->avgerror+ae_fabs(v-1, _state);
|
|
rep->avgrelerror = rep->avgrelerror+ae_fabs(v-1, _state);
|
|
inc(&avgrelcnt, _state);
|
|
}
|
|
else
|
|
{
|
|
rep->rmserror = rep->rmserror+ae_sqr(v, _state);
|
|
rep->avgerror = rep->avgerror+ae_fabs(v, _state);
|
|
}
|
|
v = buf->oobtotals.ptr.p_double[i*nclasses+j];
|
|
if( j==k )
|
|
{
|
|
rep->oobavgce = rep->oobavgce-ae_log(coalesce(v, ae_minrealnumber, _state), _state);
|
|
rep->oobrmserror = rep->oobrmserror+ae_sqr(v-1, _state);
|
|
rep->oobavgerror = rep->oobavgerror+ae_fabs(v-1, _state);
|
|
rep->oobavgrelerror = rep->oobavgrelerror+ae_fabs(v-1, _state);
|
|
inc(&oobavgrelcnt, _state);
|
|
}
|
|
else
|
|
{
|
|
rep->oobrmserror = rep->oobrmserror+ae_sqr(v, _state);
|
|
rep->oobavgerror = rep->oobavgerror+ae_fabs(v, _state);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Classification errors are handled separately
|
|
*/
|
|
k1 = 0;
|
|
for(j=1; j<=nclasses-1; j++)
|
|
{
|
|
if( buf->trntotals.ptr.p_double[i*nclasses+j]>buf->trntotals.ptr.p_double[i*nclasses+k1] )
|
|
{
|
|
k1 = j;
|
|
}
|
|
}
|
|
if( k1!=k )
|
|
{
|
|
rep->relclserror = rep->relclserror+1;
|
|
}
|
|
k1 = 0;
|
|
for(j=1; j<=nclasses-1; j++)
|
|
{
|
|
if( buf->oobtotals.ptr.p_double[i*nclasses+j]>buf->oobtotals.ptr.p_double[i*nclasses+k1] )
|
|
{
|
|
k1 = j;
|
|
}
|
|
}
|
|
if( k1!=k )
|
|
{
|
|
rep->oobrelclserror = rep->oobrelclserror+1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* regression-specific code
|
|
*/
|
|
v = buf->trntotals.ptr.p_double[i]-s->dsrval.ptr.p_double[i];
|
|
rep->rmserror = rep->rmserror+ae_sqr(v, _state);
|
|
rep->avgerror = rep->avgerror+ae_fabs(v, _state);
|
|
if( ae_fp_neq(s->dsrval.ptr.p_double[i],(double)(0)) )
|
|
{
|
|
rep->avgrelerror = rep->avgrelerror+ae_fabs(v/s->dsrval.ptr.p_double[i], _state);
|
|
avgrelcnt = avgrelcnt+1;
|
|
}
|
|
v = buf->oobtotals.ptr.p_double[i]-s->dsrval.ptr.p_double[i];
|
|
rep->oobrmserror = rep->oobrmserror+ae_sqr(v, _state);
|
|
rep->oobavgerror = rep->oobavgerror+ae_fabs(v, _state);
|
|
if( ae_fp_neq(s->dsrval.ptr.p_double[i],(double)(0)) )
|
|
{
|
|
rep->oobavgrelerror = rep->oobavgrelerror+ae_fabs(v/s->dsrval.ptr.p_double[i], _state);
|
|
oobavgrelcnt = oobavgrelcnt+1;
|
|
}
|
|
}
|
|
}
|
|
rep->relclserror = rep->relclserror/npoints;
|
|
rep->rmserror = ae_sqrt(rep->rmserror/(npoints*nclasses), _state);
|
|
rep->avgerror = rep->avgerror/(npoints*nclasses);
|
|
rep->avgrelerror = rep->avgrelerror/coalesce((double)(avgrelcnt), (double)(1), _state);
|
|
rep->oobrelclserror = rep->oobrelclserror/npoints;
|
|
rep->oobrmserror = ae_sqrt(rep->oobrmserror/(npoints*nclasses), _state);
|
|
rep->oobavgerror = rep->oobavgerror/(npoints*nclasses);
|
|
rep->oobavgrelerror = rep->oobavgrelerror/coalesce((double)(oobavgrelcnt), (double)(1), _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function performs binary compression of decision forest, using either
|
|
8-bit mantissa (a bit more compact representation) or 16-bit mantissa for
|
|
splits and regression outputs.
|
|
|
|
Forest is compressed in-place.
|
|
|
|
Return value is a compression factor.
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static double dforest_binarycompression(decisionforest* df,
|
|
ae_bool usemantissa8,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t size8;
|
|
ae_int_t size8i;
|
|
ae_int_t offssrc;
|
|
ae_int_t offsdst;
|
|
ae_int_t i;
|
|
ae_vector dummyi;
|
|
ae_int_t maxrawtreesize;
|
|
ae_vector compressedsizes;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&dummyi, 0, sizeof(dummyi));
|
|
memset(&compressedsizes, 0, sizeof(compressedsizes));
|
|
ae_vector_init(&dummyi, 0, DT_INT, _state, ae_true);
|
|
ae_vector_init(&compressedsizes, 0, DT_INT, _state, ae_true);
|
|
|
|
|
|
/*
|
|
* Quick exit if already compressed
|
|
*/
|
|
if( df->forestformat==dforest_dfcompressedv0 )
|
|
{
|
|
result = (double)(1);
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Check that source format is supported
|
|
*/
|
|
ae_assert(df->forestformat==dforest_dfuncompressedv0, "BinaryCompression: unexpected forest format", _state);
|
|
|
|
/*
|
|
* Compute sizes of uncompressed and compressed trees.
|
|
*/
|
|
size8 = 0;
|
|
offssrc = 0;
|
|
maxrawtreesize = 0;
|
|
for(i=0; i<=df->ntrees-1; i++)
|
|
{
|
|
size8i = dforest_computecompressedsizerec(df, usemantissa8, offssrc, offssrc+1, &dummyi, ae_false, _state);
|
|
size8 = size8+dforest_computecompresseduintsize(size8i, _state)+size8i;
|
|
maxrawtreesize = ae_maxint(maxrawtreesize, ae_round(df->trees.ptr.p_double[offssrc], _state), _state);
|
|
offssrc = offssrc+ae_round(df->trees.ptr.p_double[offssrc], _state);
|
|
}
|
|
result = (double)(8*df->trees.cnt)/(double)(size8+1);
|
|
|
|
/*
|
|
* Allocate memory and perform compression
|
|
*/
|
|
ae_vector_set_length(&(df->trees8), size8, _state);
|
|
ae_vector_set_length(&compressedsizes, maxrawtreesize, _state);
|
|
offssrc = 0;
|
|
offsdst = 0;
|
|
for(i=0; i<=df->ntrees-1; i++)
|
|
{
|
|
|
|
/*
|
|
* Call compressed size evaluator one more time, now saving subtree sizes into temporary array
|
|
*/
|
|
size8i = dforest_computecompressedsizerec(df, usemantissa8, offssrc, offssrc+1, &compressedsizes, ae_true, _state);
|
|
|
|
/*
|
|
* Output tree header (length in bytes)
|
|
*/
|
|
dforest_streamuint(&df->trees8, &offsdst, size8i, _state);
|
|
|
|
/*
|
|
* Compress recursively
|
|
*/
|
|
dforest_compressrec(df, usemantissa8, offssrc, offssrc+1, &compressedsizes, &df->trees8, &offsdst, _state);
|
|
|
|
/*
|
|
* Next tree
|
|
*/
|
|
offssrc = offssrc+ae_round(df->trees.ptr.p_double[offssrc], _state);
|
|
}
|
|
ae_assert(offsdst==size8, "BinaryCompression: integrity check failed (stream length)", _state);
|
|
|
|
/*
|
|
* Finalize forest conversion, clear previously allocated memory
|
|
*/
|
|
df->forestformat = dforest_dfcompressedv0;
|
|
df->usemantissa8 = usemantissa8;
|
|
ae_vector_set_length(&df->trees, 0, _state);
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns exact number of bytes required to store compressed
|
|
version of the tree starting at location TreeBase.
|
|
|
|
PARAMETERS:
|
|
DF - decision forest
|
|
UseMantissa8 - whether 8-bit or 16-bit mantissas are used to store
|
|
floating point numbers
|
|
TreeRoot - root of the specific tree being stored (offset in DF.Trees)
|
|
TreePos - position within tree (first location in the tree
|
|
is TreeRoot+1)
|
|
CompressedSizes - not referenced if SaveCompressedSizes is False;
|
|
otherwise, values computed by this function for
|
|
specific values of TreePos are stored to
|
|
CompressedSizes[TreePos-TreeRoot] (other elements
|
|
of the array are not referenced).
|
|
This array must be preallocated by caller.
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static ae_int_t dforest_computecompressedsizerec(decisionforest* df,
|
|
ae_bool usemantissa8,
|
|
ae_int_t treeroot,
|
|
ae_int_t treepos,
|
|
/* Integer */ ae_vector* compressedsizes,
|
|
ae_bool savecompressedsizes,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t jmponbranch;
|
|
ae_int_t child0size;
|
|
ae_int_t child1size;
|
|
ae_int_t fpwidth;
|
|
ae_int_t result;
|
|
|
|
|
|
if( usemantissa8 )
|
|
{
|
|
fpwidth = 2;
|
|
}
|
|
else
|
|
{
|
|
fpwidth = 3;
|
|
}
|
|
|
|
/*
|
|
* Leaf or split?
|
|
*/
|
|
if( ae_fp_eq(df->trees.ptr.p_double[treepos],(double)(-1)) )
|
|
{
|
|
|
|
/*
|
|
* Leaf
|
|
*/
|
|
result = dforest_computecompresseduintsize(2*df->nvars, _state);
|
|
if( df->nclasses==1 )
|
|
{
|
|
result = result+fpwidth;
|
|
}
|
|
else
|
|
{
|
|
result = result+dforest_computecompresseduintsize(ae_round(df->trees.ptr.p_double[treepos+1], _state), _state);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Split
|
|
*/
|
|
jmponbranch = ae_round(df->trees.ptr.p_double[treepos+2], _state);
|
|
child0size = dforest_computecompressedsizerec(df, usemantissa8, treeroot, treepos+dforest_innernodewidth, compressedsizes, savecompressedsizes, _state);
|
|
child1size = dforest_computecompressedsizerec(df, usemantissa8, treeroot, treeroot+jmponbranch, compressedsizes, savecompressedsizes, _state);
|
|
if( child0size<=child1size )
|
|
{
|
|
|
|
/*
|
|
* Child #0 comes first because it is shorter
|
|
*/
|
|
result = dforest_computecompresseduintsize(ae_round(df->trees.ptr.p_double[treepos], _state), _state);
|
|
result = result+fpwidth;
|
|
result = result+dforest_computecompresseduintsize(child0size, _state);
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Child #1 comes first because it is shorter
|
|
*/
|
|
result = dforest_computecompresseduintsize(ae_round(df->trees.ptr.p_double[treepos], _state)+df->nvars, _state);
|
|
result = result+fpwidth;
|
|
result = result+dforest_computecompresseduintsize(child1size, _state);
|
|
}
|
|
result = result+child0size+child1size;
|
|
}
|
|
|
|
/*
|
|
* Do we have to save compressed sizes?
|
|
*/
|
|
if( savecompressedsizes )
|
|
{
|
|
ae_assert(treepos-treeroot<compressedsizes->cnt, "ComputeCompressedSizeRec: integrity check failed", _state);
|
|
compressedsizes->ptr.p_int[treepos-treeroot] = result;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns exact number of bytes required to store compressed
|
|
version of the tree starting at location TreeBase.
|
|
|
|
PARAMETERS:
|
|
DF - decision forest
|
|
UseMantissa8 - whether 8-bit or 16-bit mantissas are used to store
|
|
floating point numbers
|
|
TreeRoot - root of the specific tree being stored (offset in DF.Trees)
|
|
TreePos - position within tree (first location in the tree
|
|
is TreeRoot+1)
|
|
CompressedSizes - not referenced if SaveCompressedSizes is False;
|
|
otherwise, values computed by this function for
|
|
specific values of TreePos are stored to
|
|
CompressedSizes[TreePos-TreeRoot] (other elements
|
|
of the array are not referenced).
|
|
This array must be preallocated by caller.
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void dforest_compressrec(decisionforest* df,
|
|
ae_bool usemantissa8,
|
|
ae_int_t treeroot,
|
|
ae_int_t treepos,
|
|
/* Integer */ ae_vector* compressedsizes,
|
|
ae_vector* buf,
|
|
ae_int_t* dstoffs,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t jmponbranch;
|
|
ae_int_t child0size;
|
|
ae_int_t child1size;
|
|
ae_int_t varidx;
|
|
double leafval;
|
|
double splitval;
|
|
ae_int_t fpwidth;
|
|
ae_int_t dstoffsold;
|
|
|
|
|
|
dstoffsold = *dstoffs;
|
|
if( usemantissa8 )
|
|
{
|
|
fpwidth = 2;
|
|
}
|
|
else
|
|
{
|
|
fpwidth = 3;
|
|
}
|
|
|
|
/*
|
|
* Leaf or split?
|
|
*/
|
|
varidx = ae_round(df->trees.ptr.p_double[treepos], _state);
|
|
if( varidx==-1 )
|
|
{
|
|
|
|
/*
|
|
* Leaf node:
|
|
* * stream special value which denotes leaf (2*NVars)
|
|
* * then, stream scalar value (floating point) or class number (unsigned integer)
|
|
*/
|
|
leafval = df->trees.ptr.p_double[treepos+1];
|
|
dforest_streamuint(buf, dstoffs, 2*df->nvars, _state);
|
|
if( df->nclasses==1 )
|
|
{
|
|
dforest_streamfloat(buf, usemantissa8, dstoffs, leafval, _state);
|
|
}
|
|
else
|
|
{
|
|
dforest_streamuint(buf, dstoffs, ae_round(leafval, _state), _state);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Split node:
|
|
* * fetch compressed sizes of child nodes, decide which child goes first
|
|
*/
|
|
jmponbranch = ae_round(df->trees.ptr.p_double[treepos+2], _state);
|
|
splitval = df->trees.ptr.p_double[treepos+1];
|
|
child0size = compressedsizes->ptr.p_int[treepos+dforest_innernodewidth-treeroot];
|
|
child1size = compressedsizes->ptr.p_int[treeroot+jmponbranch-treeroot];
|
|
if( child0size<=child1size )
|
|
{
|
|
|
|
/*
|
|
* Child #0 comes first because it is shorter:
|
|
* * stream variable index used for splitting;
|
|
* value in [0,NVars) range indicates that split is
|
|
* "if VAR<VAL then BRANCH0 else BRANCH1"
|
|
* * stream value used for splitting
|
|
* * stream children #0 and #1
|
|
*/
|
|
dforest_streamuint(buf, dstoffs, varidx, _state);
|
|
dforest_streamfloat(buf, usemantissa8, dstoffs, splitval, _state);
|
|
dforest_streamuint(buf, dstoffs, child0size, _state);
|
|
dforest_compressrec(df, usemantissa8, treeroot, treepos+dforest_innernodewidth, compressedsizes, buf, dstoffs, _state);
|
|
dforest_compressrec(df, usemantissa8, treeroot, treeroot+jmponbranch, compressedsizes, buf, dstoffs, _state);
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Child #1 comes first because it is shorter:
|
|
* * stream variable index used for splitting + NVars;
|
|
* value in [NVars,2*NVars) range indicates that split is
|
|
* "if VAR>=VAL then BRANCH0 else BRANCH1"
|
|
* * stream value used for splitting
|
|
* * stream children #0 and #1
|
|
*/
|
|
dforest_streamuint(buf, dstoffs, varidx+df->nvars, _state);
|
|
dforest_streamfloat(buf, usemantissa8, dstoffs, splitval, _state);
|
|
dforest_streamuint(buf, dstoffs, child1size, _state);
|
|
dforest_compressrec(df, usemantissa8, treeroot, treeroot+jmponbranch, compressedsizes, buf, dstoffs, _state);
|
|
dforest_compressrec(df, usemantissa8, treeroot, treepos+dforest_innernodewidth, compressedsizes, buf, dstoffs, _state);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Integrity check at the end
|
|
*/
|
|
ae_assert(*dstoffs-dstoffsold==compressedsizes->ptr.p_int[treepos-treeroot], "CompressRec: integrity check failed (compressed size at leaf)", _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns exact number of bytes required to store compressed
|
|
unsigned integer number (negative arguments result in assertion being
|
|
generated).
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static ae_int_t dforest_computecompresseduintsize(ae_int_t v,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t result;
|
|
|
|
|
|
ae_assert(v>=0, "Assertion failed", _state);
|
|
result = 1;
|
|
while(v>=128)
|
|
{
|
|
v = v/128;
|
|
result = result+1;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function stores compressed unsigned integer number (negative arguments
|
|
result in assertion being generated) to byte array at location Offs and
|
|
increments Offs by number of bytes being stored.
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void dforest_streamuint(ae_vector* buf,
|
|
ae_int_t* offs,
|
|
ae_int_t v,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t v0;
|
|
|
|
|
|
ae_assert(v>=0, "Assertion failed", _state);
|
|
for(;;)
|
|
{
|
|
|
|
/*
|
|
* Save 7 least significant bits of V, use 8th bit as a flag which
|
|
* tells us whether subsequent 7-bit packages will be sent.
|
|
*/
|
|
v0 = v%128;
|
|
if( v>=128 )
|
|
{
|
|
v0 = v0+128;
|
|
}
|
|
buf->ptr.p_ubyte[*(offs)] = (unsigned char)(v0);
|
|
*offs = *offs+1;
|
|
v = v/128;
|
|
if( v==0 )
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function reads compressed unsigned integer number from byte array
|
|
starting at location Offs and increments Offs by number of bytes being
|
|
read.
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static ae_int_t dforest_unstreamuint(ae_vector* buf,
|
|
ae_int_t* offs,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t v0;
|
|
ae_int_t p;
|
|
ae_int_t result;
|
|
|
|
|
|
result = 0;
|
|
p = 1;
|
|
for(;;)
|
|
{
|
|
|
|
/*
|
|
* Rad 7 bits of V, use 8th bit as a flag which tells us whether
|
|
* subsequent 7-bit packages will be received.
|
|
*/
|
|
v0 = buf->ptr.p_ubyte[*(offs)];
|
|
*offs = *offs+1;
|
|
result = result+v0%128*p;
|
|
if( v0<128 )
|
|
{
|
|
break;
|
|
}
|
|
p = p*128;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function stores compressed floating point number to byte array at
|
|
location Offs and increments Offs by number of bytes being stored.
|
|
|
|
Either 8-bit mantissa or 16-bit mantissa is used. The exponent is always
|
|
7 bits of exponent + sign. Values which do not fit into exponent range are
|
|
truncated to fit.
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void dforest_streamfloat(ae_vector* buf,
|
|
ae_bool usemantissa8,
|
|
ae_int_t* offs,
|
|
double v,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t signbit;
|
|
ae_int_t e;
|
|
ae_int_t m;
|
|
double twopow30;
|
|
double twopowm30;
|
|
double twopow10;
|
|
double twopowm10;
|
|
|
|
|
|
ae_assert(ae_isfinite(v, _state), "StreamFloat: V is not finite number", _state);
|
|
|
|
/*
|
|
* Special case: zero
|
|
*/
|
|
if( v==0.0 )
|
|
{
|
|
if( usemantissa8 )
|
|
{
|
|
buf->ptr.p_ubyte[*offs+0] = (unsigned char)(0);
|
|
buf->ptr.p_ubyte[*offs+1] = (unsigned char)(0);
|
|
*offs = *offs+2;
|
|
}
|
|
else
|
|
{
|
|
buf->ptr.p_ubyte[*offs+0] = (unsigned char)(0);
|
|
buf->ptr.p_ubyte[*offs+1] = (unsigned char)(0);
|
|
buf->ptr.p_ubyte[*offs+2] = (unsigned char)(0);
|
|
*offs = *offs+3;
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Handle sign
|
|
*/
|
|
signbit = 0;
|
|
if( v<0.0 )
|
|
{
|
|
v = -v;
|
|
signbit = 128;
|
|
}
|
|
|
|
/*
|
|
* Compute exponent
|
|
*/
|
|
twopow30 = (double)(1073741824);
|
|
twopow10 = (double)(1024);
|
|
twopowm30 = 1.0/twopow30;
|
|
twopowm10 = 1.0/twopow10;
|
|
e = 0;
|
|
while(v>=twopow30)
|
|
{
|
|
v = v*twopowm30;
|
|
e = e+30;
|
|
}
|
|
while(v>=twopow10)
|
|
{
|
|
v = v*twopowm10;
|
|
e = e+10;
|
|
}
|
|
while(v>=1.0)
|
|
{
|
|
v = v*0.5;
|
|
e = e+1;
|
|
}
|
|
while(v<twopowm30)
|
|
{
|
|
v = v*twopow30;
|
|
e = e-30;
|
|
}
|
|
while(v<twopowm10)
|
|
{
|
|
v = v*twopow10;
|
|
e = e-10;
|
|
}
|
|
while(v<0.5)
|
|
{
|
|
v = v*2;
|
|
e = e-1;
|
|
}
|
|
ae_assert(v>=0.5&&v<1.0, "StreamFloat: integrity check failed", _state);
|
|
|
|
/*
|
|
* Handle exponent underflow/overflow
|
|
*/
|
|
if( e<-63 )
|
|
{
|
|
signbit = 0;
|
|
e = 0;
|
|
v = (double)(0);
|
|
}
|
|
if( e>63 )
|
|
{
|
|
e = 63;
|
|
v = 1.0;
|
|
}
|
|
|
|
/*
|
|
* Save to stream
|
|
*/
|
|
if( usemantissa8 )
|
|
{
|
|
m = ae_round(v*256, _state);
|
|
if( m==256 )
|
|
{
|
|
m = m/2;
|
|
e = ae_minint(e+1, 63, _state);
|
|
}
|
|
buf->ptr.p_ubyte[*offs+0] = (unsigned char)(e+64+signbit);
|
|
buf->ptr.p_ubyte[*offs+1] = (unsigned char)(m);
|
|
*offs = *offs+2;
|
|
}
|
|
else
|
|
{
|
|
m = ae_round(v*65536, _state);
|
|
if( m==65536 )
|
|
{
|
|
m = m/2;
|
|
e = ae_minint(e+1, 63, _state);
|
|
}
|
|
buf->ptr.p_ubyte[*offs+0] = (unsigned char)(e+64+signbit);
|
|
buf->ptr.p_ubyte[*offs+1] = (unsigned char)(m%256);
|
|
buf->ptr.p_ubyte[*offs+2] = (unsigned char)(m/256);
|
|
*offs = *offs+3;
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function reads compressed floating point number from the byte array
|
|
starting from location Offs and increments Offs by number of bytes being
|
|
read.
|
|
|
|
Either 8-bit mantissa or 16-bit mantissa is used. The exponent is always
|
|
7 bits of exponent + sign. Values which do not fit into exponent range are
|
|
truncated to fit.
|
|
|
|
-- ALGLIB --
|
|
Copyright 22.07.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static double dforest_unstreamfloat(ae_vector* buf,
|
|
ae_bool usemantissa8,
|
|
ae_int_t* offs,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t e;
|
|
double v;
|
|
double inv256;
|
|
double result;
|
|
|
|
|
|
|
|
/*
|
|
* Read from stream
|
|
*/
|
|
inv256 = 1.0/256.0;
|
|
if( usemantissa8 )
|
|
{
|
|
e = buf->ptr.p_ubyte[*offs+0];
|
|
v = buf->ptr.p_ubyte[*offs+1]*inv256;
|
|
*offs = *offs+2;
|
|
}
|
|
else
|
|
{
|
|
e = buf->ptr.p_ubyte[*offs+0];
|
|
v = (buf->ptr.p_ubyte[*offs+1]*inv256+buf->ptr.p_ubyte[*offs+2])*inv256;
|
|
*offs = *offs+3;
|
|
}
|
|
|
|
/*
|
|
* Decode
|
|
*/
|
|
if( e>128 )
|
|
{
|
|
v = -v;
|
|
e = e-128;
|
|
}
|
|
e = e-64;
|
|
result = dforest_xfastpow((double)(2), e, _state)*v;
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Classification error
|
|
*************************************************************************/
|
|
static ae_int_t dforest_dfclserror(decisionforest* df,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_vector x;
|
|
ae_vector y;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t k;
|
|
ae_int_t tmpi;
|
|
ae_int_t result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&x, 0, sizeof(x));
|
|
memset(&y, 0, sizeof(y));
|
|
ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
|
|
|
|
if( df->nclasses<=1 )
|
|
{
|
|
result = 0;
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
ae_vector_set_length(&x, df->nvars-1+1, _state);
|
|
ae_vector_set_length(&y, df->nclasses-1+1, _state);
|
|
result = 0;
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,df->nvars-1));
|
|
dfprocess(df, &x, &y, _state);
|
|
k = ae_round(xy->ptr.pp_double[i][df->nvars], _state);
|
|
tmpi = 0;
|
|
for(j=1; j<=df->nclasses-1; j++)
|
|
{
|
|
if( ae_fp_greater(y.ptr.p_double[j],y.ptr.p_double[tmpi]) )
|
|
{
|
|
tmpi = j;
|
|
}
|
|
}
|
|
if( tmpi!=k )
|
|
{
|
|
result = result+1;
|
|
}
|
|
}
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal subroutine for processing one decision tree stored in uncompressed
|
|
format starting at SubtreeRoot (this index points to the header of the tree,
|
|
not its first node). First node being processed is located at NodeOffs.
|
|
*************************************************************************/
|
|
static void dforest_dfprocessinternaluncompressed(decisionforest* df,
|
|
ae_int_t subtreeroot,
|
|
ae_int_t nodeoffs,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t idx;
|
|
|
|
|
|
ae_assert(df->forestformat==dforest_dfuncompressedv0, "DFProcessInternal: unexpected forest format", _state);
|
|
|
|
/*
|
|
* Navigate through the tree
|
|
*/
|
|
for(;;)
|
|
{
|
|
if( ae_fp_eq(df->trees.ptr.p_double[nodeoffs],(double)(-1)) )
|
|
{
|
|
if( df->nclasses==1 )
|
|
{
|
|
y->ptr.p_double[0] = y->ptr.p_double[0]+df->trees.ptr.p_double[nodeoffs+1];
|
|
}
|
|
else
|
|
{
|
|
idx = ae_round(df->trees.ptr.p_double[nodeoffs+1], _state);
|
|
y->ptr.p_double[idx] = y->ptr.p_double[idx]+1;
|
|
}
|
|
break;
|
|
}
|
|
if( x->ptr.p_double[ae_round(df->trees.ptr.p_double[nodeoffs], _state)]<df->trees.ptr.p_double[nodeoffs+1] )
|
|
{
|
|
nodeoffs = nodeoffs+dforest_innernodewidth;
|
|
}
|
|
else
|
|
{
|
|
nodeoffs = subtreeroot+ae_round(df->trees.ptr.p_double[nodeoffs+2], _state);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Internal subroutine for processing one decision tree stored in compressed
|
|
format starting at Offs (this index points to the first node of the tree,
|
|
right past the header field).
|
|
*************************************************************************/
|
|
static void dforest_dfprocessinternalcompressed(decisionforest* df,
|
|
ae_int_t offs,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t leafindicator;
|
|
ae_int_t varidx;
|
|
double splitval;
|
|
ae_int_t jmplen;
|
|
double leafval;
|
|
ae_int_t leafcls;
|
|
|
|
|
|
ae_assert(df->forestformat==dforest_dfcompressedv0, "DFProcessInternal: unexpected forest format", _state);
|
|
|
|
/*
|
|
* Navigate through the tree
|
|
*/
|
|
leafindicator = 2*df->nvars;
|
|
for(;;)
|
|
{
|
|
|
|
/*
|
|
* Read variable idx
|
|
*/
|
|
varidx = dforest_unstreamuint(&df->trees8, &offs, _state);
|
|
|
|
/*
|
|
* Is it leaf?
|
|
*/
|
|
if( varidx==leafindicator )
|
|
{
|
|
if( df->nclasses==1 )
|
|
{
|
|
|
|
/*
|
|
* Regression forest
|
|
*/
|
|
leafval = dforest_unstreamfloat(&df->trees8, df->usemantissa8, &offs, _state);
|
|
y->ptr.p_double[0] = y->ptr.p_double[0]+leafval;
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* Classification forest
|
|
*/
|
|
leafcls = dforest_unstreamuint(&df->trees8, &offs, _state);
|
|
y->ptr.p_double[leafcls] = y->ptr.p_double[leafcls]+1;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Process node
|
|
*/
|
|
splitval = dforest_unstreamfloat(&df->trees8, df->usemantissa8, &offs, _state);
|
|
jmplen = dforest_unstreamuint(&df->trees8, &offs, _state);
|
|
if( varidx<df->nvars )
|
|
{
|
|
|
|
/*
|
|
* The split rule is "if VAR<VAL then BRANCH0 else BRANCH1"
|
|
*/
|
|
if( x->ptr.p_double[varidx]>=splitval )
|
|
{
|
|
offs = offs+jmplen;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
/*
|
|
* The split rule is "if VAR>=VAL then BRANCH0 else BRANCH1"
|
|
*/
|
|
varidx = varidx-df->nvars;
|
|
if( x->ptr.p_double[varidx]<splitval )
|
|
{
|
|
offs = offs+jmplen;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Fast Pow
|
|
|
|
-- ALGLIB --
|
|
Copyright 24.08.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static double dforest_xfastpow(double r, ae_int_t n, ae_state *_state)
|
|
{
|
|
double result;
|
|
|
|
|
|
result = (double)(0);
|
|
if( n>0 )
|
|
{
|
|
if( n%2==0 )
|
|
{
|
|
result = dforest_xfastpow(r, n/2, _state);
|
|
result = result*result;
|
|
}
|
|
else
|
|
{
|
|
result = r*dforest_xfastpow(r, n-1, _state);
|
|
}
|
|
return result;
|
|
}
|
|
if( n==0 )
|
|
{
|
|
result = (double)(1);
|
|
}
|
|
if( n<0 )
|
|
{
|
|
result = dforest_xfastpow(1/r, -n, _state);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
void _decisionforestbuilder_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
decisionforestbuilder *p = (decisionforestbuilder*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->dsdata, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->dsrval, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->dsival, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->dsmin, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->dsmax, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->dsbinary, 0, DT_BOOL, _state, make_automatic);
|
|
ae_vector_init(&p->dsctotals, 0, DT_INT, _state, make_automatic);
|
|
ae_shared_pool_init(&p->workpool, _state, make_automatic);
|
|
ae_shared_pool_init(&p->votepool, _state, make_automatic);
|
|
ae_shared_pool_init(&p->treepool, _state, make_automatic);
|
|
ae_shared_pool_init(&p->treefactory, _state, make_automatic);
|
|
ae_matrix_init(&p->iobmatrix, 0, 0, DT_BOOL, _state, make_automatic);
|
|
ae_vector_init(&p->varimpshuffle2, 0, DT_INT, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _decisionforestbuilder_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
decisionforestbuilder *dst = (decisionforestbuilder*)_dst;
|
|
decisionforestbuilder *src = (decisionforestbuilder*)_src;
|
|
dst->dstype = src->dstype;
|
|
dst->npoints = src->npoints;
|
|
dst->nvars = src->nvars;
|
|
dst->nclasses = src->nclasses;
|
|
ae_vector_init_copy(&dst->dsdata, &src->dsdata, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->dsrval, &src->dsrval, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->dsival, &src->dsival, _state, make_automatic);
|
|
dst->rdfalgo = src->rdfalgo;
|
|
dst->rdfratio = src->rdfratio;
|
|
dst->rdfvars = src->rdfvars;
|
|
dst->rdfglobalseed = src->rdfglobalseed;
|
|
dst->rdfsplitstrength = src->rdfsplitstrength;
|
|
dst->rdfimportance = src->rdfimportance;
|
|
ae_vector_init_copy(&dst->dsmin, &src->dsmin, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->dsmax, &src->dsmax, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->dsbinary, &src->dsbinary, _state, make_automatic);
|
|
dst->dsravg = src->dsravg;
|
|
ae_vector_init_copy(&dst->dsctotals, &src->dsctotals, _state, make_automatic);
|
|
dst->rdfprogress = src->rdfprogress;
|
|
dst->rdftotal = src->rdftotal;
|
|
ae_shared_pool_init_copy(&dst->workpool, &src->workpool, _state, make_automatic);
|
|
ae_shared_pool_init_copy(&dst->votepool, &src->votepool, _state, make_automatic);
|
|
ae_shared_pool_init_copy(&dst->treepool, &src->treepool, _state, make_automatic);
|
|
ae_shared_pool_init_copy(&dst->treefactory, &src->treefactory, _state, make_automatic);
|
|
dst->neediobmatrix = src->neediobmatrix;
|
|
ae_matrix_init_copy(&dst->iobmatrix, &src->iobmatrix, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->varimpshuffle2, &src->varimpshuffle2, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _decisionforestbuilder_clear(void* _p)
|
|
{
|
|
decisionforestbuilder *p = (decisionforestbuilder*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->dsdata);
|
|
ae_vector_clear(&p->dsrval);
|
|
ae_vector_clear(&p->dsival);
|
|
ae_vector_clear(&p->dsmin);
|
|
ae_vector_clear(&p->dsmax);
|
|
ae_vector_clear(&p->dsbinary);
|
|
ae_vector_clear(&p->dsctotals);
|
|
ae_shared_pool_clear(&p->workpool);
|
|
ae_shared_pool_clear(&p->votepool);
|
|
ae_shared_pool_clear(&p->treepool);
|
|
ae_shared_pool_clear(&p->treefactory);
|
|
ae_matrix_clear(&p->iobmatrix);
|
|
ae_vector_clear(&p->varimpshuffle2);
|
|
}
|
|
|
|
|
|
void _decisionforestbuilder_destroy(void* _p)
|
|
{
|
|
decisionforestbuilder *p = (decisionforestbuilder*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->dsdata);
|
|
ae_vector_destroy(&p->dsrval);
|
|
ae_vector_destroy(&p->dsival);
|
|
ae_vector_destroy(&p->dsmin);
|
|
ae_vector_destroy(&p->dsmax);
|
|
ae_vector_destroy(&p->dsbinary);
|
|
ae_vector_destroy(&p->dsctotals);
|
|
ae_shared_pool_destroy(&p->workpool);
|
|
ae_shared_pool_destroy(&p->votepool);
|
|
ae_shared_pool_destroy(&p->treepool);
|
|
ae_shared_pool_destroy(&p->treefactory);
|
|
ae_matrix_destroy(&p->iobmatrix);
|
|
ae_vector_destroy(&p->varimpshuffle2);
|
|
}
|
|
|
|
|
|
void _dfworkbuf_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
dfworkbuf *p = (dfworkbuf*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->classpriors, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->varpool, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->trnset, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->trnlabelsr, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->trnlabelsi, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->oobset, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->ooblabelsr, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->ooblabelsi, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->treebuf, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->curvals, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->bestvals, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->tmp0i, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->tmp1i, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->tmp0r, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->tmp1r, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->tmp2r, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->tmp3r, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->tmpnrms2, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->classtotals0, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->classtotals1, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->classtotals01, 0, DT_INT, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _dfworkbuf_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
dfworkbuf *dst = (dfworkbuf*)_dst;
|
|
dfworkbuf *src = (dfworkbuf*)_src;
|
|
ae_vector_init_copy(&dst->classpriors, &src->classpriors, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->varpool, &src->varpool, _state, make_automatic);
|
|
dst->varpoolsize = src->varpoolsize;
|
|
ae_vector_init_copy(&dst->trnset, &src->trnset, _state, make_automatic);
|
|
dst->trnsize = src->trnsize;
|
|
ae_vector_init_copy(&dst->trnlabelsr, &src->trnlabelsr, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->trnlabelsi, &src->trnlabelsi, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->oobset, &src->oobset, _state, make_automatic);
|
|
dst->oobsize = src->oobsize;
|
|
ae_vector_init_copy(&dst->ooblabelsr, &src->ooblabelsr, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->ooblabelsi, &src->ooblabelsi, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->treebuf, &src->treebuf, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->curvals, &src->curvals, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->bestvals, &src->bestvals, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->tmp0i, &src->tmp0i, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->tmp1i, &src->tmp1i, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->tmp0r, &src->tmp0r, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->tmp1r, &src->tmp1r, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->tmp2r, &src->tmp2r, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->tmp3r, &src->tmp3r, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->tmpnrms2, &src->tmpnrms2, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->classtotals0, &src->classtotals0, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->classtotals1, &src->classtotals1, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->classtotals01, &src->classtotals01, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _dfworkbuf_clear(void* _p)
|
|
{
|
|
dfworkbuf *p = (dfworkbuf*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->classpriors);
|
|
ae_vector_clear(&p->varpool);
|
|
ae_vector_clear(&p->trnset);
|
|
ae_vector_clear(&p->trnlabelsr);
|
|
ae_vector_clear(&p->trnlabelsi);
|
|
ae_vector_clear(&p->oobset);
|
|
ae_vector_clear(&p->ooblabelsr);
|
|
ae_vector_clear(&p->ooblabelsi);
|
|
ae_vector_clear(&p->treebuf);
|
|
ae_vector_clear(&p->curvals);
|
|
ae_vector_clear(&p->bestvals);
|
|
ae_vector_clear(&p->tmp0i);
|
|
ae_vector_clear(&p->tmp1i);
|
|
ae_vector_clear(&p->tmp0r);
|
|
ae_vector_clear(&p->tmp1r);
|
|
ae_vector_clear(&p->tmp2r);
|
|
ae_vector_clear(&p->tmp3r);
|
|
ae_vector_clear(&p->tmpnrms2);
|
|
ae_vector_clear(&p->classtotals0);
|
|
ae_vector_clear(&p->classtotals1);
|
|
ae_vector_clear(&p->classtotals01);
|
|
}
|
|
|
|
|
|
void _dfworkbuf_destroy(void* _p)
|
|
{
|
|
dfworkbuf *p = (dfworkbuf*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->classpriors);
|
|
ae_vector_destroy(&p->varpool);
|
|
ae_vector_destroy(&p->trnset);
|
|
ae_vector_destroy(&p->trnlabelsr);
|
|
ae_vector_destroy(&p->trnlabelsi);
|
|
ae_vector_destroy(&p->oobset);
|
|
ae_vector_destroy(&p->ooblabelsr);
|
|
ae_vector_destroy(&p->ooblabelsi);
|
|
ae_vector_destroy(&p->treebuf);
|
|
ae_vector_destroy(&p->curvals);
|
|
ae_vector_destroy(&p->bestvals);
|
|
ae_vector_destroy(&p->tmp0i);
|
|
ae_vector_destroy(&p->tmp1i);
|
|
ae_vector_destroy(&p->tmp0r);
|
|
ae_vector_destroy(&p->tmp1r);
|
|
ae_vector_destroy(&p->tmp2r);
|
|
ae_vector_destroy(&p->tmp3r);
|
|
ae_vector_destroy(&p->tmpnrms2);
|
|
ae_vector_destroy(&p->classtotals0);
|
|
ae_vector_destroy(&p->classtotals1);
|
|
ae_vector_destroy(&p->classtotals01);
|
|
}
|
|
|
|
|
|
void _dfvotebuf_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
dfvotebuf *p = (dfvotebuf*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->trntotals, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->oobtotals, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->trncounts, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->oobcounts, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->giniimportances, 0, DT_REAL, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _dfvotebuf_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
dfvotebuf *dst = (dfvotebuf*)_dst;
|
|
dfvotebuf *src = (dfvotebuf*)_src;
|
|
ae_vector_init_copy(&dst->trntotals, &src->trntotals, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->oobtotals, &src->oobtotals, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->trncounts, &src->trncounts, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->oobcounts, &src->oobcounts, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->giniimportances, &src->giniimportances, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _dfvotebuf_clear(void* _p)
|
|
{
|
|
dfvotebuf *p = (dfvotebuf*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->trntotals);
|
|
ae_vector_clear(&p->oobtotals);
|
|
ae_vector_clear(&p->trncounts);
|
|
ae_vector_clear(&p->oobcounts);
|
|
ae_vector_clear(&p->giniimportances);
|
|
}
|
|
|
|
|
|
void _dfvotebuf_destroy(void* _p)
|
|
{
|
|
dfvotebuf *p = (dfvotebuf*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->trntotals);
|
|
ae_vector_destroy(&p->oobtotals);
|
|
ae_vector_destroy(&p->trncounts);
|
|
ae_vector_destroy(&p->oobcounts);
|
|
ae_vector_destroy(&p->giniimportances);
|
|
}
|
|
|
|
|
|
void _dfpermimpbuf_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
dfpermimpbuf *p = (dfpermimpbuf*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->losses, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->xraw, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->xdist, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->xcur, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->y, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->yv, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->targety, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->startnodes, 0, DT_INT, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _dfpermimpbuf_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
dfpermimpbuf *dst = (dfpermimpbuf*)_dst;
|
|
dfpermimpbuf *src = (dfpermimpbuf*)_src;
|
|
ae_vector_init_copy(&dst->losses, &src->losses, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->xraw, &src->xraw, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->xdist, &src->xdist, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->xcur, &src->xcur, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->y, &src->y, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->yv, &src->yv, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->targety, &src->targety, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->startnodes, &src->startnodes, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _dfpermimpbuf_clear(void* _p)
|
|
{
|
|
dfpermimpbuf *p = (dfpermimpbuf*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->losses);
|
|
ae_vector_clear(&p->xraw);
|
|
ae_vector_clear(&p->xdist);
|
|
ae_vector_clear(&p->xcur);
|
|
ae_vector_clear(&p->y);
|
|
ae_vector_clear(&p->yv);
|
|
ae_vector_clear(&p->targety);
|
|
ae_vector_clear(&p->startnodes);
|
|
}
|
|
|
|
|
|
void _dfpermimpbuf_destroy(void* _p)
|
|
{
|
|
dfpermimpbuf *p = (dfpermimpbuf*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->losses);
|
|
ae_vector_destroy(&p->xraw);
|
|
ae_vector_destroy(&p->xdist);
|
|
ae_vector_destroy(&p->xcur);
|
|
ae_vector_destroy(&p->y);
|
|
ae_vector_destroy(&p->yv);
|
|
ae_vector_destroy(&p->targety);
|
|
ae_vector_destroy(&p->startnodes);
|
|
}
|
|
|
|
|
|
void _dftreebuf_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
dftreebuf *p = (dftreebuf*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->treebuf, 0, DT_REAL, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _dftreebuf_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
dftreebuf *dst = (dftreebuf*)_dst;
|
|
dftreebuf *src = (dftreebuf*)_src;
|
|
ae_vector_init_copy(&dst->treebuf, &src->treebuf, _state, make_automatic);
|
|
dst->treeidx = src->treeidx;
|
|
}
|
|
|
|
|
|
void _dftreebuf_clear(void* _p)
|
|
{
|
|
dftreebuf *p = (dftreebuf*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->treebuf);
|
|
}
|
|
|
|
|
|
void _dftreebuf_destroy(void* _p)
|
|
{
|
|
dftreebuf *p = (dftreebuf*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->treebuf);
|
|
}
|
|
|
|
|
|
void _decisionforestbuffer_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
decisionforestbuffer *p = (decisionforestbuffer*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->y, 0, DT_REAL, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _decisionforestbuffer_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
decisionforestbuffer *dst = (decisionforestbuffer*)_dst;
|
|
decisionforestbuffer *src = (decisionforestbuffer*)_src;
|
|
ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->y, &src->y, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _decisionforestbuffer_clear(void* _p)
|
|
{
|
|
decisionforestbuffer *p = (decisionforestbuffer*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->x);
|
|
ae_vector_clear(&p->y);
|
|
}
|
|
|
|
|
|
void _decisionforestbuffer_destroy(void* _p)
|
|
{
|
|
decisionforestbuffer *p = (decisionforestbuffer*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->x);
|
|
ae_vector_destroy(&p->y);
|
|
}
|
|
|
|
|
|
void _decisionforest_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
decisionforest *p = (decisionforest*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->trees, 0, DT_REAL, _state, make_automatic);
|
|
_decisionforestbuffer_init(&p->buffer, _state, make_automatic);
|
|
ae_vector_init(&p->trees8, 0, DT_BYTE, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _decisionforest_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
decisionforest *dst = (decisionforest*)_dst;
|
|
decisionforest *src = (decisionforest*)_src;
|
|
dst->forestformat = src->forestformat;
|
|
dst->usemantissa8 = src->usemantissa8;
|
|
dst->nvars = src->nvars;
|
|
dst->nclasses = src->nclasses;
|
|
dst->ntrees = src->ntrees;
|
|
dst->bufsize = src->bufsize;
|
|
ae_vector_init_copy(&dst->trees, &src->trees, _state, make_automatic);
|
|
_decisionforestbuffer_init_copy(&dst->buffer, &src->buffer, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->trees8, &src->trees8, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _decisionforest_clear(void* _p)
|
|
{
|
|
decisionforest *p = (decisionforest*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->trees);
|
|
_decisionforestbuffer_clear(&p->buffer);
|
|
ae_vector_clear(&p->trees8);
|
|
}
|
|
|
|
|
|
void _decisionforest_destroy(void* _p)
|
|
{
|
|
decisionforest *p = (decisionforest*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->trees);
|
|
_decisionforestbuffer_destroy(&p->buffer);
|
|
ae_vector_destroy(&p->trees8);
|
|
}
|
|
|
|
|
|
void _dfreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
dfreport *p = (dfreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->topvars, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->varimportances, 0, DT_REAL, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _dfreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
dfreport *dst = (dfreport*)_dst;
|
|
dfreport *src = (dfreport*)_src;
|
|
dst->relclserror = src->relclserror;
|
|
dst->avgce = src->avgce;
|
|
dst->rmserror = src->rmserror;
|
|
dst->avgerror = src->avgerror;
|
|
dst->avgrelerror = src->avgrelerror;
|
|
dst->oobrelclserror = src->oobrelclserror;
|
|
dst->oobavgce = src->oobavgce;
|
|
dst->oobrmserror = src->oobrmserror;
|
|
dst->oobavgerror = src->oobavgerror;
|
|
dst->oobavgrelerror = src->oobavgrelerror;
|
|
ae_vector_init_copy(&dst->topvars, &src->topvars, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->varimportances, &src->varimportances, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _dfreport_clear(void* _p)
|
|
{
|
|
dfreport *p = (dfreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->topvars);
|
|
ae_vector_clear(&p->varimportances);
|
|
}
|
|
|
|
|
|
void _dfreport_destroy(void* _p)
|
|
{
|
|
dfreport *p = (dfreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->topvars);
|
|
ae_vector_destroy(&p->varimportances);
|
|
}
|
|
|
|
|
|
void _dfinternalbuffers_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
dfinternalbuffers *p = (dfinternalbuffers*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_init(&p->treebuf, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->idxbuf, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->tmpbufr, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->tmpbufr2, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->tmpbufi, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->classibuf, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->sortrbuf, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->sortrbuf2, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->sortibuf, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->varpool, 0, DT_INT, _state, make_automatic);
|
|
ae_vector_init(&p->evsbin, 0, DT_BOOL, _state, make_automatic);
|
|
ae_vector_init(&p->evssplits, 0, DT_REAL, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _dfinternalbuffers_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
dfinternalbuffers *dst = (dfinternalbuffers*)_dst;
|
|
dfinternalbuffers *src = (dfinternalbuffers*)_src;
|
|
ae_vector_init_copy(&dst->treebuf, &src->treebuf, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->idxbuf, &src->idxbuf, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->tmpbufr, &src->tmpbufr, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->tmpbufr2, &src->tmpbufr2, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->tmpbufi, &src->tmpbufi, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->classibuf, &src->classibuf, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->sortrbuf, &src->sortrbuf, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->sortrbuf2, &src->sortrbuf2, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->sortibuf, &src->sortibuf, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->varpool, &src->varpool, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->evsbin, &src->evsbin, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->evssplits, &src->evssplits, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _dfinternalbuffers_clear(void* _p)
|
|
{
|
|
dfinternalbuffers *p = (dfinternalbuffers*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_clear(&p->treebuf);
|
|
ae_vector_clear(&p->idxbuf);
|
|
ae_vector_clear(&p->tmpbufr);
|
|
ae_vector_clear(&p->tmpbufr2);
|
|
ae_vector_clear(&p->tmpbufi);
|
|
ae_vector_clear(&p->classibuf);
|
|
ae_vector_clear(&p->sortrbuf);
|
|
ae_vector_clear(&p->sortrbuf2);
|
|
ae_vector_clear(&p->sortibuf);
|
|
ae_vector_clear(&p->varpool);
|
|
ae_vector_clear(&p->evsbin);
|
|
ae_vector_clear(&p->evssplits);
|
|
}
|
|
|
|
|
|
void _dfinternalbuffers_destroy(void* _p)
|
|
{
|
|
dfinternalbuffers *p = (dfinternalbuffers*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_vector_destroy(&p->treebuf);
|
|
ae_vector_destroy(&p->idxbuf);
|
|
ae_vector_destroy(&p->tmpbufr);
|
|
ae_vector_destroy(&p->tmpbufr2);
|
|
ae_vector_destroy(&p->tmpbufi);
|
|
ae_vector_destroy(&p->classibuf);
|
|
ae_vector_destroy(&p->sortrbuf);
|
|
ae_vector_destroy(&p->sortrbuf2);
|
|
ae_vector_destroy(&p->sortibuf);
|
|
ae_vector_destroy(&p->varpool);
|
|
ae_vector_destroy(&p->evsbin);
|
|
ae_vector_destroy(&p->evssplits);
|
|
}
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_KNN) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
/*************************************************************************
|
|
This function creates buffer structure which can be used to perform
|
|
parallel KNN requests.
|
|
|
|
KNN subpackage provides two sets of computing functions - ones which use
|
|
internal buffer of KNN model (these functions are single-threaded because
|
|
they use same buffer, which can not shared between threads), and ones
|
|
which use external buffer.
|
|
|
|
This function is used to initialize external buffer.
|
|
|
|
INPUT PARAMETERS
|
|
Model - KNN model which is associated with newly created buffer
|
|
|
|
OUTPUT PARAMETERS
|
|
Buf - external buffer.
|
|
|
|
|
|
IMPORTANT: buffer object should be used only with model which was used to
|
|
initialize buffer. Any attempt to use buffer with different
|
|
object is dangerous - you may get integrity check failure
|
|
(exception) because sizes of internal arrays do not fit to
|
|
dimensions of the model structure.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knncreatebuffer(knnmodel* model, knnbuffer* buf, ae_state *_state)
|
|
{
|
|
|
|
_knnbuffer_clear(buf);
|
|
|
|
if( !model->isdummy )
|
|
{
|
|
kdtreecreaterequestbuffer(&model->tree, &buf->treebuf, _state);
|
|
}
|
|
ae_vector_set_length(&buf->x, model->nvars, _state);
|
|
ae_vector_set_length(&buf->y, model->nout, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This subroutine creates KNNBuilder object which is used to train KNN models.
|
|
|
|
By default, new builder stores empty dataset and some reasonable default
|
|
settings. At the very least, you should specify dataset prior to building
|
|
KNN model. You can also tweak settings of the model construction algorithm
|
|
(recommended, although default settings should work well).
|
|
|
|
Following actions are mandatory:
|
|
* calling knnbuildersetdataset() to specify dataset
|
|
* calling knnbuilderbuildknnmodel() to build KNN model using current
|
|
dataset and default settings
|
|
|
|
Additionally, you may call:
|
|
* knnbuildersetnorm() to change norm being used
|
|
|
|
INPUT PARAMETERS:
|
|
none
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - KNN builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnbuildercreate(knnbuilder* s, ae_state *_state)
|
|
{
|
|
|
|
_knnbuilder_clear(s);
|
|
|
|
|
|
/*
|
|
* Empty dataset
|
|
*/
|
|
s->dstype = -1;
|
|
s->npoints = 0;
|
|
s->nvars = 0;
|
|
s->iscls = ae_false;
|
|
s->nout = 1;
|
|
|
|
/*
|
|
* Default training settings
|
|
*/
|
|
s->knnnrm = 2;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Specifies regression problem (one or more continuous output variables are
|
|
predicted). There also exists "classification" version of this function.
|
|
|
|
This subroutine adds dense dataset to the internal storage of the builder
|
|
object. Specifying your dataset in the dense format means that the dense
|
|
version of the KNN construction algorithm will be invoked.
|
|
|
|
INPUT PARAMETERS:
|
|
S - KNN builder object
|
|
XY - array[NPoints,NVars+NOut] (note: actual size can be
|
|
larger, only leading part is used anyway), dataset:
|
|
* first NVars elements of each row store values of the
|
|
independent variables
|
|
* next NOut elements store values of the dependent
|
|
variables
|
|
NPoints - number of rows in the dataset, NPoints>=1
|
|
NVars - number of independent variables, NVars>=1
|
|
NOut - number of dependent variables, NOut>=1
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - KNN builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnbuildersetdatasetreg(knnbuilder* s,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t nout,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
|
|
|
|
|
|
/*
|
|
* Check parameters
|
|
*/
|
|
ae_assert(npoints>=1, "knnbuildersetdatasetreg: npoints<1", _state);
|
|
ae_assert(nvars>=1, "knnbuildersetdatasetreg: nvars<1", _state);
|
|
ae_assert(nout>=1, "knnbuildersetdatasetreg: nout<1", _state);
|
|
ae_assert(xy->rows>=npoints, "knnbuildersetdatasetreg: rows(xy)<npoints", _state);
|
|
ae_assert(xy->cols>=nvars+nout, "knnbuildersetdatasetreg: cols(xy)<nvars+nout", _state);
|
|
ae_assert(apservisfinitematrix(xy, npoints, nvars+nout, _state), "knnbuildersetdatasetreg: xy parameter contains INFs or NANs", _state);
|
|
|
|
/*
|
|
* Set dataset
|
|
*/
|
|
s->dstype = 0;
|
|
s->iscls = ae_false;
|
|
s->npoints = npoints;
|
|
s->nvars = nvars;
|
|
s->nout = nout;
|
|
rmatrixsetlengthatleast(&s->dsdata, npoints, nvars, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
s->dsdata.ptr.pp_double[i][j] = xy->ptr.pp_double[i][j];
|
|
}
|
|
}
|
|
rvectorsetlengthatleast(&s->dsrval, npoints*nout, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
for(j=0; j<=nout-1; j++)
|
|
{
|
|
s->dsrval.ptr.p_double[i*nout+j] = xy->ptr.pp_double[i][nvars+j];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Specifies classification problem (two or more classes are predicted).
|
|
There also exists "regression" version of this function.
|
|
|
|
This subroutine adds dense dataset to the internal storage of the builder
|
|
object. Specifying your dataset in the dense format means that the dense
|
|
version of the KNN construction algorithm will be invoked.
|
|
|
|
INPUT PARAMETERS:
|
|
S - KNN builder object
|
|
XY - array[NPoints,NVars+1] (note: actual size can be
|
|
larger, only leading part is used anyway), dataset:
|
|
* first NVars elements of each row store values of the
|
|
independent variables
|
|
* next element stores class index, in [0,NClasses)
|
|
NPoints - number of rows in the dataset, NPoints>=1
|
|
NVars - number of independent variables, NVars>=1
|
|
NClasses - number of classes, NClasses>=2
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - KNN builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnbuildersetdatasetcls(knnbuilder* s,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t nclasses,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
|
|
|
|
|
|
/*
|
|
* Check parameters
|
|
*/
|
|
ae_assert(npoints>=1, "knnbuildersetdatasetcls: npoints<1", _state);
|
|
ae_assert(nvars>=1, "knnbuildersetdatasetcls: nvars<1", _state);
|
|
ae_assert(nclasses>=2, "knnbuildersetdatasetcls: nclasses<2", _state);
|
|
ae_assert(xy->rows>=npoints, "knnbuildersetdatasetcls: rows(xy)<npoints", _state);
|
|
ae_assert(xy->cols>=nvars+1, "knnbuildersetdatasetcls: cols(xy)<nvars+1", _state);
|
|
ae_assert(apservisfinitematrix(xy, npoints, nvars+1, _state), "knnbuildersetdatasetcls: xy parameter contains INFs or NANs", _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
j = ae_round(xy->ptr.pp_double[i][nvars], _state);
|
|
ae_assert(j>=0&&j<nclasses, "knnbuildersetdatasetcls: last column of xy contains invalid class number", _state);
|
|
}
|
|
|
|
/*
|
|
* Set dataset
|
|
*/
|
|
s->iscls = ae_true;
|
|
s->dstype = 0;
|
|
s->npoints = npoints;
|
|
s->nvars = nvars;
|
|
s->nout = nclasses;
|
|
rmatrixsetlengthatleast(&s->dsdata, npoints, nvars, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
s->dsdata.ptr.pp_double[i][j] = xy->ptr.pp_double[i][j];
|
|
}
|
|
}
|
|
ivectorsetlengthatleast(&s->dsival, npoints, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
s->dsival.ptr.p_int[i] = ae_round(xy->ptr.pp_double[i][nvars], _state);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function sets norm type used for neighbor search.
|
|
|
|
INPUT PARAMETERS:
|
|
S - decision forest builder object
|
|
NormType - norm type:
|
|
* 0 inf-norm
|
|
* 1 1-norm
|
|
* 2 Euclidean norm (default)
|
|
|
|
OUTPUT PARAMETERS:
|
|
S - decision forest builder
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnbuildersetnorm(knnbuilder* s, ae_int_t nrmtype, ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert((nrmtype==0||nrmtype==1)||nrmtype==2, "knnbuildersetnorm: unexpected norm type", _state);
|
|
s->knnnrm = nrmtype;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This subroutine builds KNN model according to current settings, using
|
|
dataset internally stored in the builder object.
|
|
|
|
The model being built performs inference using Eps-approximate K nearest
|
|
neighbors search algorithm, with:
|
|
* K=1, Eps=0 corresponding to the "nearest neighbor algorithm"
|
|
* K>1, Eps=0 corresponding to the "K nearest neighbors algorithm"
|
|
* K>=1, Eps>0 corresponding to "approximate nearest neighbors algorithm"
|
|
|
|
An approximate KNN is a good option for high-dimensional datasets (exact
|
|
KNN works slowly when dimensions count grows).
|
|
|
|
An ALGLIB implementation of kd-trees is used to perform k-nn searches.
|
|
|
|
! COMMERCIAL EDITION OF ALGLIB:
|
|
!
|
|
! Commercial Edition of ALGLIB includes following important improvements
|
|
! of this function:
|
|
! * high-performance native backend with same C# interface (C# version)
|
|
! * multithreading support (C++ and C# versions)
|
|
!
|
|
! We recommend you to read 'Working with commercial version' section of
|
|
! ALGLIB Reference Manual in order to find out how to use performance-
|
|
! related features provided by commercial edition of ALGLIB.
|
|
|
|
INPUT PARAMETERS:
|
|
S - KNN builder object
|
|
K - number of neighbors to search for, K>=1
|
|
Eps - approximation factor:
|
|
* Eps=0 means that exact kNN search is performed
|
|
* Eps>0 means that (1+Eps)-approximate search is performed
|
|
|
|
OUTPUT PARAMETERS:
|
|
Model - KNN model
|
|
Rep - report
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnbuilderbuildknnmodel(knnbuilder* s,
|
|
ae_int_t k,
|
|
double eps,
|
|
knnmodel* model,
|
|
knnreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
ae_int_t nvars;
|
|
ae_int_t nout;
|
|
ae_int_t npoints;
|
|
ae_bool iscls;
|
|
ae_matrix xy;
|
|
ae_vector tags;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&xy, 0, sizeof(xy));
|
|
memset(&tags, 0, sizeof(tags));
|
|
_knnmodel_clear(model);
|
|
_knnreport_clear(rep);
|
|
ae_matrix_init(&xy, 0, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&tags, 0, DT_INT, _state, ae_true);
|
|
|
|
npoints = s->npoints;
|
|
nvars = s->nvars;
|
|
nout = s->nout;
|
|
iscls = s->iscls;
|
|
|
|
/*
|
|
* Check settings
|
|
*/
|
|
ae_assert(k>=1, "knnbuilderbuildknnmodel: k<1", _state);
|
|
ae_assert(ae_isfinite(eps, _state)&&ae_fp_greater_eq(eps,(double)(0)), "knnbuilderbuildknnmodel: eps<0", _state);
|
|
|
|
/*
|
|
* Prepare output
|
|
*/
|
|
knn_clearreport(rep, _state);
|
|
model->nvars = nvars;
|
|
model->nout = nout;
|
|
model->iscls = iscls;
|
|
model->k = k;
|
|
model->eps = eps;
|
|
model->isdummy = ae_false;
|
|
|
|
/*
|
|
* Quick exit for empty dataset
|
|
*/
|
|
if( s->dstype==-1 )
|
|
{
|
|
model->isdummy = ae_true;
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Build kd-tree
|
|
*/
|
|
if( iscls )
|
|
{
|
|
ae_matrix_set_length(&xy, npoints, nvars+1, _state);
|
|
ae_vector_set_length(&tags, npoints, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
xy.ptr.pp_double[i][j] = s->dsdata.ptr.pp_double[i][j];
|
|
}
|
|
xy.ptr.pp_double[i][nvars] = (double)(s->dsival.ptr.p_int[i]);
|
|
tags.ptr.p_int[i] = s->dsival.ptr.p_int[i];
|
|
}
|
|
kdtreebuildtagged(&xy, &tags, npoints, nvars, 0, s->knnnrm, &model->tree, _state);
|
|
}
|
|
else
|
|
{
|
|
ae_matrix_set_length(&xy, npoints, nvars+nout, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
xy.ptr.pp_double[i][j] = s->dsdata.ptr.pp_double[i][j];
|
|
}
|
|
for(j=0; j<=nout-1; j++)
|
|
{
|
|
xy.ptr.pp_double[i][nvars+j] = s->dsrval.ptr.p_double[i*nout+j];
|
|
}
|
|
}
|
|
kdtreebuild(&xy, npoints, nvars, nout, s->knnnrm, &model->tree, _state);
|
|
}
|
|
|
|
/*
|
|
* Build buffer
|
|
*/
|
|
knncreatebuffer(model, &model->buffer, _state);
|
|
|
|
/*
|
|
* Report
|
|
*/
|
|
knnallerrors(model, &xy, npoints, rep, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Changing search settings of KNN model.
|
|
|
|
K and EPS parameters of KNN (AKNN) search are specified during model
|
|
construction. However, plain KNN algorithm with Euclidean distance allows
|
|
you to change them at any moment.
|
|
|
|
NOTE: future versions of KNN model may support advanced versions of KNN,
|
|
such as NCA or LMNN. It is possible that such algorithms won't allow
|
|
you to change search settings on the fly. If you call this function
|
|
for an algorithm which does not support on-the-fly changes, it will
|
|
throw an exception.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
K - K>=1, neighbors count
|
|
EPS - accuracy of the EPS-approximate NN search. Set to 0.0, if
|
|
you want to perform "classic" KNN search. Specify larger
|
|
values if you need to speed-up high-dimensional KNN
|
|
queries.
|
|
|
|
OUTPUT PARAMETERS:
|
|
nothing on success, exception on failure
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnrewritekeps(knnmodel* model,
|
|
ae_int_t k,
|
|
double eps,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_assert(k>=1, "knnrewritekeps: k<1", _state);
|
|
ae_assert(ae_isfinite(eps, _state)&&ae_fp_greater_eq(eps,(double)(0)), "knnrewritekeps: eps<0", _state);
|
|
model->k = k;
|
|
model->eps = eps;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Inference using KNN model.
|
|
|
|
See also knnprocess0(), knnprocessi() and knnclassify() for options with a
|
|
bit more convenient interface.
|
|
|
|
IMPORTANT: this function is thread-unsafe and modifies internal structures
|
|
of the model! You can not use same model object for parallel
|
|
evaluation from several threads.
|
|
|
|
Use knntsprocess() with independent thread-local buffers, if
|
|
you need thread-safe evaluation.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
X - input vector, array[0..NVars-1].
|
|
Y - possible preallocated buffer. Reused if long enough.
|
|
|
|
OUTPUT PARAMETERS:
|
|
Y - result. Regression estimate when solving regression task,
|
|
vector of posterior probabilities for classification task.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnprocess(knnmodel* model,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state)
|
|
{
|
|
|
|
|
|
knntsprocess(model, &model->buffer, x, y, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns first component of the inferred vector (i.e. one
|
|
with index #0).
|
|
|
|
It is a convenience wrapper for knnprocess() intended for either:
|
|
* 1-dimensional regression problems
|
|
* 2-class classification problems
|
|
|
|
In the former case this function returns inference result as scalar, which
|
|
is definitely more convenient that wrapping it as vector. In the latter
|
|
case it returns probability of object belonging to class #0.
|
|
|
|
If you call it for anything different from two cases above, it will work
|
|
as defined, i.e. return y[0], although it is of less use in such cases.
|
|
|
|
IMPORTANT: this function is thread-unsafe and modifies internal structures
|
|
of the model! You can not use same model object for parallel
|
|
evaluation from several threads.
|
|
|
|
Use knntsprocess() with independent thread-local buffers, if
|
|
you need thread-safe evaluation.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
X - input vector, array[0..NVars-1].
|
|
|
|
RESULT:
|
|
Y[0]
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double knnprocess0(knnmodel* model,
|
|
/* Real */ ae_vector* x,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t nvars;
|
|
double result;
|
|
|
|
|
|
nvars = model->nvars;
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
model->buffer.x.ptr.p_double[i] = x->ptr.p_double[i];
|
|
}
|
|
knn_processinternal(model, &model->buffer, _state);
|
|
result = model->buffer.y.ptr.p_double[0];
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function returns most probable class number for an input X. It is
|
|
same as calling knnprocess(model,x,y), then determining i=argmax(y[i]) and
|
|
returning i.
|
|
|
|
A class number in [0,NOut) range in returned for classification problems,
|
|
-1 is returned when this function is called for regression problems.
|
|
|
|
IMPORTANT: this function is thread-unsafe and modifies internal structures
|
|
of the model! You can not use same model object for parallel
|
|
evaluation from several threads.
|
|
|
|
Use knntsprocess() with independent thread-local buffers, if
|
|
you need thread-safe evaluation.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
X - input vector, array[0..NVars-1].
|
|
|
|
RESULT:
|
|
class number, -1 for regression tasks
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
ae_int_t knnclassify(knnmodel* model,
|
|
/* Real */ ae_vector* x,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t nvars;
|
|
ae_int_t nout;
|
|
ae_int_t result;
|
|
|
|
|
|
if( !model->iscls )
|
|
{
|
|
result = -1;
|
|
return result;
|
|
}
|
|
nvars = model->nvars;
|
|
nout = model->nout;
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
model->buffer.x.ptr.p_double[i] = x->ptr.p_double[i];
|
|
}
|
|
knn_processinternal(model, &model->buffer, _state);
|
|
result = 0;
|
|
for(i=1; i<=nout-1; i++)
|
|
{
|
|
if( model->buffer.y.ptr.p_double[i]>model->buffer.y.ptr.p_double[result] )
|
|
{
|
|
result = i;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
'interactive' variant of knnprocess() for languages like Python which
|
|
support constructs like "y = knnprocessi(model,x)" and interactive mode of
|
|
the interpreter.
|
|
|
|
This function allocates new array on each call, so it is significantly
|
|
slower than its 'non-interactive' counterpart, but it is more convenient
|
|
when you call it from command line.
|
|
|
|
IMPORTANT: this function is thread-unsafe and may modify internal
|
|
structures of the model! You can not use same model object for
|
|
parallel evaluation from several threads.
|
|
|
|
Use knntsprocess() with independent thread-local buffers if
|
|
you need thread-safe evaluation.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnprocessi(knnmodel* model,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state)
|
|
{
|
|
|
|
ae_vector_clear(y);
|
|
|
|
knnprocess(model, x, y, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Thread-safe procesing using external buffer for temporaries.
|
|
|
|
This function is thread-safe (i.e . you can use same KNN model from
|
|
multiple threads) as long as you use different buffer objects for different
|
|
threads.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
Buf - buffer object, must be allocated specifically for this
|
|
model with knncreatebuffer().
|
|
X - input vector, array[NVars]
|
|
|
|
OUTPUT PARAMETERS:
|
|
Y - result, array[NOut]. Regression estimate when solving
|
|
regression task, vector of posterior probabilities for
|
|
a classification task.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knntsprocess(knnmodel* model,
|
|
knnbuffer* buf,
|
|
/* Real */ ae_vector* x,
|
|
/* Real */ ae_vector* y,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t i;
|
|
ae_int_t nvars;
|
|
ae_int_t nout;
|
|
|
|
|
|
nvars = model->nvars;
|
|
nout = model->nout;
|
|
for(i=0; i<=nvars-1; i++)
|
|
{
|
|
buf->x.ptr.p_double[i] = x->ptr.p_double[i];
|
|
}
|
|
knn_processinternal(model, buf, _state);
|
|
if( y->cnt<nout )
|
|
{
|
|
ae_vector_set_length(y, nout, _state);
|
|
}
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
y->ptr.p_double[i] = buf->y.ptr.p_double[i];
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Relative classification error on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
percent of incorrectly classified cases.
|
|
Zero if model solves regression task.
|
|
|
|
NOTE: if you need several different kinds of error metrics, it is better
|
|
to use knnallerrors() which computes all error metric with just one
|
|
pass over dataset.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double knnrelclserror(knnmodel* model,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
knnreport rep;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&rep, 0, sizeof(rep));
|
|
_knnreport_init(&rep, _state, ae_true);
|
|
|
|
knnallerrors(model, xy, npoints, &rep, _state);
|
|
result = rep.relclserror;
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average cross-entropy (in bits per element) on the test set
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
CrossEntropy/NPoints.
|
|
Zero if model solves regression task.
|
|
|
|
NOTE: the cross-entropy metric is too unstable when used to evaluate KNN
|
|
models (such models can report exactly zero probabilities), so we
|
|
do not recommend using it.
|
|
|
|
NOTE: if you need several different kinds of error metrics, it is better
|
|
to use knnallerrors() which computes all error metric with just one
|
|
pass over dataset.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double knnavgce(knnmodel* model,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
knnreport rep;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&rep, 0, sizeof(rep));
|
|
_knnreport_init(&rep, _state, ae_true);
|
|
|
|
knnallerrors(model, xy, npoints, &rep, _state);
|
|
result = rep.avgce;
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
RMS error on the test set.
|
|
|
|
Its meaning for regression task is obvious. As for classification problems,
|
|
RMS error means error when estimating posterior probabilities.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
root mean square error.
|
|
|
|
NOTE: if you need several different kinds of error metrics, it is better
|
|
to use knnallerrors() which computes all error metric with just one
|
|
pass over dataset.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double knnrmserror(knnmodel* model,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
knnreport rep;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&rep, 0, sizeof(rep));
|
|
_knnreport_init(&rep, _state, ae_true);
|
|
|
|
knnallerrors(model, xy, npoints, &rep, _state);
|
|
result = rep.rmserror;
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average error on the test set
|
|
|
|
Its meaning for regression task is obvious. As for classification problems,
|
|
average error means error when estimating posterior probabilities.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
average error
|
|
|
|
NOTE: if you need several different kinds of error metrics, it is better
|
|
to use knnallerrors() which computes all error metric with just one
|
|
pass over dataset.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double knnavgerror(knnmodel* model,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
knnreport rep;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&rep, 0, sizeof(rep));
|
|
_knnreport_init(&rep, _state, ae_true);
|
|
|
|
knnallerrors(model, xy, npoints, &rep, _state);
|
|
result = rep.avgerror;
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Average relative error on the test set
|
|
|
|
Its meaning for regression task is obvious. As for classification problems,
|
|
average relative error means error when estimating posterior probabilities.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
XY - test set
|
|
NPoints - test set size
|
|
|
|
RESULT:
|
|
average relative error
|
|
|
|
NOTE: if you need several different kinds of error metrics, it is better
|
|
to use knnallerrors() which computes all error metric with just one
|
|
pass over dataset.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
double knnavgrelerror(knnmodel* model,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
knnreport rep;
|
|
double result;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&rep, 0, sizeof(rep));
|
|
_knnreport_init(&rep, _state, ae_true);
|
|
|
|
knnallerrors(model, xy, npoints, &rep, _state);
|
|
result = rep.avgrelerror;
|
|
ae_frame_leave(_state);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Calculates all kinds of errors for the model in one call.
|
|
|
|
INPUT PARAMETERS:
|
|
Model - KNN model
|
|
XY - test set:
|
|
* one row per point
|
|
* first NVars columns store independent variables
|
|
* depending on problem type:
|
|
* next column stores class number in [0,NClasses) - for
|
|
classification problems
|
|
* next NOut columns store dependent variables - for
|
|
regression problems
|
|
NPoints - test set size, NPoints>=0
|
|
|
|
OUTPUT PARAMETERS:
|
|
Rep - following fields are loaded with errors for both regression
|
|
and classification models:
|
|
* rep.rmserror - RMS error for the output
|
|
* rep.avgerror - average error
|
|
* rep.avgrelerror - average relative error
|
|
following fields are set only for classification models,
|
|
zero for regression ones:
|
|
* relclserror - relative classification error, in [0,1]
|
|
* avgce - average cross-entropy in bits per dataset entry
|
|
|
|
NOTE: the cross-entropy metric is too unstable when used to evaluate KNN
|
|
models (such models can report exactly zero probabilities), so we
|
|
do not recommend using it.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnallerrors(knnmodel* model,
|
|
/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
knnreport* rep,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
knnbuffer buf;
|
|
ae_vector desiredy;
|
|
ae_vector errbuf;
|
|
ae_int_t nvars;
|
|
ae_int_t nout;
|
|
ae_int_t ny;
|
|
ae_bool iscls;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&buf, 0, sizeof(buf));
|
|
memset(&desiredy, 0, sizeof(desiredy));
|
|
memset(&errbuf, 0, sizeof(errbuf));
|
|
_knnreport_clear(rep);
|
|
_knnbuffer_init(&buf, _state, ae_true);
|
|
ae_vector_init(&desiredy, 0, DT_REAL, _state, ae_true);
|
|
ae_vector_init(&errbuf, 0, DT_REAL, _state, ae_true);
|
|
|
|
nvars = model->nvars;
|
|
nout = model->nout;
|
|
iscls = model->iscls;
|
|
if( iscls )
|
|
{
|
|
ny = 1;
|
|
}
|
|
else
|
|
{
|
|
ny = nout;
|
|
}
|
|
|
|
/*
|
|
* Check input
|
|
*/
|
|
ae_assert(npoints>=0, "knnallerrors: npoints<0", _state);
|
|
ae_assert(xy->rows>=npoints, "knnallerrors: rows(xy)<npoints", _state);
|
|
ae_assert(xy->cols>=nvars+ny, "knnallerrors: cols(xy)<nvars+nout", _state);
|
|
ae_assert(apservisfinitematrix(xy, npoints, nvars+ny, _state), "knnallerrors: xy parameter contains INFs or NANs", _state);
|
|
|
|
/*
|
|
* Clean up report
|
|
*/
|
|
knn_clearreport(rep, _state);
|
|
|
|
/*
|
|
* Quick exit if needed
|
|
*/
|
|
if( model->isdummy||npoints==0 )
|
|
{
|
|
ae_frame_leave(_state);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Process using local buffer
|
|
*/
|
|
knncreatebuffer(model, &buf, _state);
|
|
if( iscls )
|
|
{
|
|
dserrallocate(nout, &errbuf, _state);
|
|
}
|
|
else
|
|
{
|
|
dserrallocate(-nout, &errbuf, _state);
|
|
}
|
|
ae_vector_set_length(&desiredy, ny, _state);
|
|
for(i=0; i<=npoints-1; i++)
|
|
{
|
|
for(j=0; j<=nvars-1; j++)
|
|
{
|
|
buf.x.ptr.p_double[j] = xy->ptr.pp_double[i][j];
|
|
}
|
|
if( iscls )
|
|
{
|
|
j = ae_round(xy->ptr.pp_double[i][nvars], _state);
|
|
ae_assert(j>=0&&j<nout, "knnallerrors: one of the class labels is not in [0,NClasses)", _state);
|
|
desiredy.ptr.p_double[0] = (double)(j);
|
|
}
|
|
else
|
|
{
|
|
for(j=0; j<=nout-1; j++)
|
|
{
|
|
desiredy.ptr.p_double[j] = xy->ptr.pp_double[i][nvars+j];
|
|
}
|
|
}
|
|
knn_processinternal(model, &buf, _state);
|
|
dserraccumulate(&errbuf, &buf.y, &desiredy, _state);
|
|
}
|
|
dserrfinish(&errbuf, _state);
|
|
|
|
/*
|
|
* Extract results
|
|
*/
|
|
if( iscls )
|
|
{
|
|
rep->relclserror = errbuf.ptr.p_double[0];
|
|
rep->avgce = errbuf.ptr.p_double[1];
|
|
}
|
|
rep->rmserror = errbuf.ptr.p_double[2];
|
|
rep->avgerror = errbuf.ptr.p_double[3];
|
|
rep->avgrelerror = errbuf.ptr.p_double[4];
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serializer: allocation
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnalloc(ae_serializer* s, knnmodel* model, ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
ae_serializer_alloc_entry(s);
|
|
if( !model->isdummy )
|
|
{
|
|
kdtreealloc(s, &model->tree, _state);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serializer: serialization
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnserialize(ae_serializer* s, knnmodel* model, ae_state *_state)
|
|
{
|
|
|
|
|
|
ae_serializer_serialize_int(s, getknnserializationcode(_state), _state);
|
|
ae_serializer_serialize_int(s, knn_knnfirstversion, _state);
|
|
ae_serializer_serialize_int(s, model->nvars, _state);
|
|
ae_serializer_serialize_int(s, model->nout, _state);
|
|
ae_serializer_serialize_int(s, model->k, _state);
|
|
ae_serializer_serialize_double(s, model->eps, _state);
|
|
ae_serializer_serialize_bool(s, model->iscls, _state);
|
|
ae_serializer_serialize_bool(s, model->isdummy, _state);
|
|
if( !model->isdummy )
|
|
{
|
|
kdtreeserialize(s, &model->tree, _state);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Serializer: unserialization
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void knnunserialize(ae_serializer* s, knnmodel* model, ae_state *_state)
|
|
{
|
|
ae_int_t i0;
|
|
ae_int_t i1;
|
|
|
|
_knnmodel_clear(model);
|
|
|
|
|
|
/*
|
|
* check correctness of header
|
|
*/
|
|
ae_serializer_unserialize_int(s, &i0, _state);
|
|
ae_assert(i0==getknnserializationcode(_state), "KNNUnserialize: stream header corrupted", _state);
|
|
ae_serializer_unserialize_int(s, &i1, _state);
|
|
ae_assert(i1==knn_knnfirstversion, "KNNUnserialize: stream header corrupted", _state);
|
|
|
|
/*
|
|
* Unserialize data
|
|
*/
|
|
ae_serializer_unserialize_int(s, &model->nvars, _state);
|
|
ae_serializer_unserialize_int(s, &model->nout, _state);
|
|
ae_serializer_unserialize_int(s, &model->k, _state);
|
|
ae_serializer_unserialize_double(s, &model->eps, _state);
|
|
ae_serializer_unserialize_bool(s, &model->iscls, _state);
|
|
ae_serializer_unserialize_bool(s, &model->isdummy, _state);
|
|
if( !model->isdummy )
|
|
{
|
|
kdtreeunserialize(s, &model->tree, _state);
|
|
}
|
|
|
|
/*
|
|
* Prepare local buffer
|
|
*/
|
|
knncreatebuffer(model, &model->buffer, _state);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
Sets report fields to their default values
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void knn_clearreport(knnreport* rep, ae_state *_state)
|
|
{
|
|
|
|
|
|
rep->relclserror = (double)(0);
|
|
rep->avgce = (double)(0);
|
|
rep->rmserror = (double)(0);
|
|
rep->avgerror = (double)(0);
|
|
rep->avgrelerror = (double)(0);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
This function processes buf.X and stores result to buf.Y
|
|
|
|
INPUT PARAMETERS
|
|
Model - KNN model
|
|
Buf - processing buffer.
|
|
|
|
|
|
IMPORTANT: buffer object should be used only with model which was used to
|
|
initialize buffer. Any attempt to use buffer with different
|
|
object is dangerous - you may get integrity check failure
|
|
(exception) because sizes of internal arrays do not fit to
|
|
dimensions of the model structure.
|
|
|
|
-- ALGLIB --
|
|
Copyright 15.02.2019 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
static void knn_processinternal(knnmodel* model,
|
|
knnbuffer* buf,
|
|
ae_state *_state)
|
|
{
|
|
ae_int_t nvars;
|
|
ae_int_t nout;
|
|
ae_bool iscls;
|
|
ae_int_t nncnt;
|
|
ae_int_t i;
|
|
ae_int_t j;
|
|
double v;
|
|
|
|
|
|
nvars = model->nvars;
|
|
nout = model->nout;
|
|
iscls = model->iscls;
|
|
|
|
/*
|
|
* Quick exit if needed
|
|
*/
|
|
if( model->isdummy )
|
|
{
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
buf->y.ptr.p_double[i] = (double)(0);
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Perform request, average results
|
|
*/
|
|
for(i=0; i<=nout-1; i++)
|
|
{
|
|
buf->y.ptr.p_double[i] = (double)(0);
|
|
}
|
|
nncnt = kdtreetsqueryaknn(&model->tree, &buf->treebuf, &buf->x, model->k, ae_true, model->eps, _state);
|
|
v = 1/coalesce((double)(nncnt), (double)(1), _state);
|
|
if( iscls )
|
|
{
|
|
kdtreetsqueryresultstags(&model->tree, &buf->treebuf, &buf->tags, _state);
|
|
for(i=0; i<=nncnt-1; i++)
|
|
{
|
|
j = buf->tags.ptr.p_int[i];
|
|
buf->y.ptr.p_double[j] = buf->y.ptr.p_double[j]+v;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
kdtreetsqueryresultsxy(&model->tree, &buf->treebuf, &buf->xy, _state);
|
|
for(i=0; i<=nncnt-1; i++)
|
|
{
|
|
for(j=0; j<=nout-1; j++)
|
|
{
|
|
buf->y.ptr.p_double[j] = buf->y.ptr.p_double[j]+v*buf->xy.ptr.pp_double[i][nvars+j];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void _knnbuffer_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
knnbuffer *p = (knnbuffer*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
_kdtreerequestbuffer_init(&p->treebuf, _state, make_automatic);
|
|
ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->y, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->tags, 0, DT_INT, _state, make_automatic);
|
|
ae_matrix_init(&p->xy, 0, 0, DT_REAL, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _knnbuffer_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
knnbuffer *dst = (knnbuffer*)_dst;
|
|
knnbuffer *src = (knnbuffer*)_src;
|
|
_kdtreerequestbuffer_init_copy(&dst->treebuf, &src->treebuf, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->y, &src->y, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->tags, &src->tags, _state, make_automatic);
|
|
ae_matrix_init_copy(&dst->xy, &src->xy, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _knnbuffer_clear(void* _p)
|
|
{
|
|
knnbuffer *p = (knnbuffer*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
_kdtreerequestbuffer_clear(&p->treebuf);
|
|
ae_vector_clear(&p->x);
|
|
ae_vector_clear(&p->y);
|
|
ae_vector_clear(&p->tags);
|
|
ae_matrix_clear(&p->xy);
|
|
}
|
|
|
|
|
|
void _knnbuffer_destroy(void* _p)
|
|
{
|
|
knnbuffer *p = (knnbuffer*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
_kdtreerequestbuffer_destroy(&p->treebuf);
|
|
ae_vector_destroy(&p->x);
|
|
ae_vector_destroy(&p->y);
|
|
ae_vector_destroy(&p->tags);
|
|
ae_matrix_destroy(&p->xy);
|
|
}
|
|
|
|
|
|
void _knnbuilder_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
knnbuilder *p = (knnbuilder*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_init(&p->dsdata, 0, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->dsrval, 0, DT_REAL, _state, make_automatic);
|
|
ae_vector_init(&p->dsival, 0, DT_INT, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _knnbuilder_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
knnbuilder *dst = (knnbuilder*)_dst;
|
|
knnbuilder *src = (knnbuilder*)_src;
|
|
dst->dstype = src->dstype;
|
|
dst->npoints = src->npoints;
|
|
dst->nvars = src->nvars;
|
|
dst->iscls = src->iscls;
|
|
dst->nout = src->nout;
|
|
ae_matrix_init_copy(&dst->dsdata, &src->dsdata, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->dsrval, &src->dsrval, _state, make_automatic);
|
|
ae_vector_init_copy(&dst->dsival, &src->dsival, _state, make_automatic);
|
|
dst->knnnrm = src->knnnrm;
|
|
}
|
|
|
|
|
|
void _knnbuilder_clear(void* _p)
|
|
{
|
|
knnbuilder *p = (knnbuilder*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_clear(&p->dsdata);
|
|
ae_vector_clear(&p->dsrval);
|
|
ae_vector_clear(&p->dsival);
|
|
}
|
|
|
|
|
|
void _knnbuilder_destroy(void* _p)
|
|
{
|
|
knnbuilder *p = (knnbuilder*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
ae_matrix_destroy(&p->dsdata);
|
|
ae_vector_destroy(&p->dsrval);
|
|
ae_vector_destroy(&p->dsival);
|
|
}
|
|
|
|
|
|
void _knnmodel_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
knnmodel *p = (knnmodel*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
_kdtree_init(&p->tree, _state, make_automatic);
|
|
_knnbuffer_init(&p->buffer, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _knnmodel_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
knnmodel *dst = (knnmodel*)_dst;
|
|
knnmodel *src = (knnmodel*)_src;
|
|
dst->nvars = src->nvars;
|
|
dst->nout = src->nout;
|
|
dst->k = src->k;
|
|
dst->eps = src->eps;
|
|
dst->iscls = src->iscls;
|
|
dst->isdummy = src->isdummy;
|
|
_kdtree_init_copy(&dst->tree, &src->tree, _state, make_automatic);
|
|
_knnbuffer_init_copy(&dst->buffer, &src->buffer, _state, make_automatic);
|
|
}
|
|
|
|
|
|
void _knnmodel_clear(void* _p)
|
|
{
|
|
knnmodel *p = (knnmodel*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
_kdtree_clear(&p->tree);
|
|
_knnbuffer_clear(&p->buffer);
|
|
}
|
|
|
|
|
|
void _knnmodel_destroy(void* _p)
|
|
{
|
|
knnmodel *p = (knnmodel*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
_kdtree_destroy(&p->tree);
|
|
_knnbuffer_destroy(&p->buffer);
|
|
}
|
|
|
|
|
|
void _knnreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
knnreport *p = (knnreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _knnreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
|
|
{
|
|
knnreport *dst = (knnreport*)_dst;
|
|
knnreport *src = (knnreport*)_src;
|
|
dst->relclserror = src->relclserror;
|
|
dst->avgce = src->avgce;
|
|
dst->rmserror = src->rmserror;
|
|
dst->avgerror = src->avgerror;
|
|
dst->avgrelerror = src->avgrelerror;
|
|
}
|
|
|
|
|
|
void _knnreport_clear(void* _p)
|
|
{
|
|
knnreport *p = (knnreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
void _knnreport_destroy(void* _p)
|
|
{
|
|
knnreport *p = (knnreport*)_p;
|
|
ae_touch_ptr((void*)p);
|
|
}
|
|
|
|
|
|
#endif
|
|
#if defined(AE_COMPILE_DATACOMP) || !defined(AE_PARTIAL_BUILD)
|
|
|
|
|
|
/*************************************************************************
|
|
k-means++ clusterization.
|
|
Backward compatibility function, we recommend to use CLUSTERING subpackage
|
|
as better replacement.
|
|
|
|
-- ALGLIB --
|
|
Copyright 21.03.2009 by Bochkanov Sergey
|
|
*************************************************************************/
|
|
void kmeansgenerate(/* Real */ ae_matrix* xy,
|
|
ae_int_t npoints,
|
|
ae_int_t nvars,
|
|
ae_int_t k,
|
|
ae_int_t restarts,
|
|
ae_int_t* info,
|
|
/* Real */ ae_matrix* c,
|
|
/* Integer */ ae_vector* xyc,
|
|
ae_state *_state)
|
|
{
|
|
ae_frame _frame_block;
|
|
ae_matrix dummy;
|
|
ae_int_t itscnt;
|
|
double e;
|
|
kmeansbuffers buf;
|
|
|
|
ae_frame_make(_state, &_frame_block);
|
|
memset(&dummy, 0, sizeof(dummy));
|
|
memset(&buf, 0, sizeof(buf));
|
|
*info = 0;
|
|
ae_matrix_clear(c);
|
|
ae_vector_clear(xyc);
|
|
ae_matrix_init(&dummy, 0, 0, DT_REAL, _state, ae_true);
|
|
_kmeansbuffers_init(&buf, _state, ae_true);
|
|
|
|
kmeansinitbuf(&buf, _state);
|
|
kmeansgenerateinternal(xy, npoints, nvars, k, 0, 1, 0, restarts, ae_false, info, &itscnt, c, ae_true, &dummy, ae_false, xyc, &e, &buf, _state);
|
|
ae_frame_leave(_state);
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
}
|
|
|