initial push
|
@ -0,0 +1,9 @@
|
||||||
|
Instead of trying to find a model that is perfect at finding anomalies, ensembles try to combine multiple (maybe bad) models into one.
|
||||||
|
To do this, we need an algorithm to combine the predictions of different models. One way (that I commonly use) is to just average them in some way (score=sqrt(score_1**2+score_2**2)). This works only well if you have a huge number of mostly uncorrelated models.
|
||||||
|
If you have only a few models or correlated ones you can introduce bias this way. Assume we have three models: An isolation forest (iforest), an svm and a kNN algorithm. The iforest has low correlation to the other models (if finds different things anomalous compared to the svm and kNN), bzt the svm and the kNN find basically the same anomalies. If we just average each model, the svm and kNN have a much bigger influence on the result compared to the iforest. There is no real reason why this should be the case.
|
||||||
|
To solve this, you can add models depending on correlations between them. But instead of relying on the correlation existing between the models themself, this repository uses a special kind of neural network to find uncorrelated parts of the model predictions.
|
||||||
|
|
||||||
|
n2ulayer.py and mu.py define this special kind of neural network. loss.py defines the correlation we want to minimize for use in tensorflow.
|
||||||
|
onemodel.py generates a quick (and quite random) anomaly detection model for use on the data defined in data.py (just a 2d gaussian). 20 models are generated and their predictions (sorted from most normal (green) to most anomal (red)) drawn in the numbered images in imgs
|
||||||
|
If you use all 20 models and simply average them this results in imgs/recombine.png. Notice how the green points are much more centered.
|
||||||
|
choosenext
|
After Width: | Height: | Size: 115 KiB |
|
@ -0,0 +1,54 @@
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from tensorflow import keras
|
||||||
|
from mu import *
|
||||||
|
from n2ulayer import ulayer
|
||||||
|
|
||||||
|
from loss import loss
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def choosenext(given,possble):
|
||||||
|
"""given is a list of scores. possble is a list of list of scores. We want to find the combination of elements in possble that has the lowest correlation to given"""
|
||||||
|
opt=len(possble)
|
||||||
|
np.random.shuffle(possble)
|
||||||
|
possble=np.transpose(possble)
|
||||||
|
given=np.expand_dims(given,axis=1)
|
||||||
|
|
||||||
|
|
||||||
|
#print("given",given.shape)
|
||||||
|
#print("possble",possble.shape)
|
||||||
|
#print(loss(given,possble,K=np))
|
||||||
|
#exit()
|
||||||
|
inp=keras.layers.Input(shape=possble.shape[1:])
|
||||||
|
q=inp
|
||||||
|
#q=ulayer(opt,0,1)(q)
|
||||||
|
q=partr(q,1,opt,ulayer)
|
||||||
|
|
||||||
|
model=keras.models.Model(inputs=inp,outputs=q)
|
||||||
|
model.compile(loss=loss,optimizer=keras.optimizers.Adam(lr=0.001))
|
||||||
|
model.summary()
|
||||||
|
|
||||||
|
model.fit(possble,given,
|
||||||
|
batch_size=32,
|
||||||
|
epochs=100,
|
||||||
|
verbose=1,
|
||||||
|
validation_split=0.0,#that stuff cant overfit
|
||||||
|
shuffle=True,
|
||||||
|
callbacks=[keras.callbacks.EarlyStopping(monitor='loss',patience=10,restore_best_weights=True)])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return model.predict(possble)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__=="__main__":
|
||||||
|
f=np.load("merged.npz")
|
||||||
|
x=f["ps"]
|
||||||
|
given=x[0]
|
||||||
|
possble=x[1:5]
|
||||||
|
choosenext(given,possble)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def data(n=1000):
|
||||||
|
"""
|
||||||
|
Generate 2d gaussian data. Few points but every model should have slightly different data.
|
||||||
|
Then use a big dataset as comparison algo. Basically subsampling instead of feature bagging.
|
||||||
|
"""
|
||||||
|
return np.random.normal(1.0,0.5,(n,2))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
x=data()
|
||||||
|
|
||||||
|
from plt import plt
|
||||||
|
|
||||||
|
plt.plot(x[:,0],x[:,1],'.')
|
||||||
|
plt.how()
|
||||||
|
#print(x,y,z)
|
||||||
|
|
After Width: | Height: | Size: 115 KiB |
After Width: | Height: | Size: 107 KiB |
After Width: | Height: | Size: 106 KiB |
After Width: | Height: | Size: 106 KiB |
After Width: | Height: | Size: 97 KiB |
After Width: | Height: | Size: 104 KiB |
After Width: | Height: | Size: 115 KiB |
After Width: | Height: | Size: 105 KiB |
After Width: | Height: | Size: 106 KiB |
After Width: | Height: | Size: 107 KiB |
After Width: | Height: | Size: 106 KiB |
After Width: | Height: | Size: 101 KiB |
After Width: | Height: | Size: 107 KiB |
After Width: | Height: | Size: 114 KiB |
After Width: | Height: | Size: 106 KiB |
After Width: | Height: | Size: 107 KiB |
After Width: | Height: | Size: 121 KiB |
After Width: | Height: | Size: 104 KiB |
After Width: | Height: | Size: 104 KiB |
After Width: | Height: | Size: 113 KiB |
After Width: | Height: | Size: 110 KiB |
|
@ -0,0 +1,23 @@
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow import keras
|
||||||
|
from tensorflow.keras import backend as K
|
||||||
|
|
||||||
|
|
||||||
|
def loss(a,b,K=K):
|
||||||
|
"""correlation between a and the first entry of b should be zero. Correlations are hard to optimize. So use corvariance and metric keeping properties"""
|
||||||
|
if len(b.shape)>1:
|
||||||
|
b=b[:,0]
|
||||||
|
if len(a.shape)>1:
|
||||||
|
a=a[:,0]
|
||||||
|
|
||||||
|
return K.abs(K.mean((a-K.mean(a))*(b-K.mean(b))))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__=='__main__':
|
||||||
|
import numpy as np
|
||||||
|
x=np.random.uniform(-1,1,size=(1000,2))
|
||||||
|
print(numpyloss2d(x[:,0],x[:,1],n=25))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,63 @@
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from choosenext import choosenext
|
||||||
|
|
||||||
|
from plt import plt
|
||||||
|
|
||||||
|
|
||||||
|
def draw(p,dat):
|
||||||
|
mp=np.mean(p)
|
||||||
|
d=np.square(p-mp)
|
||||||
|
sx=[(xx,dd) for xx,dd in zip(dat,d)]
|
||||||
|
sx.sort(key=lambda x:x[1])
|
||||||
|
sx=[zw[0] for zw in sx]
|
||||||
|
sx=np.array(sx)
|
||||||
|
|
||||||
|
col1=[1.0,0.0,0.0]
|
||||||
|
col2=[0.0,1.0,0.0]
|
||||||
|
col1,col2=np.array(col1),np.array(col2)
|
||||||
|
ln=len(sx)
|
||||||
|
cols=[col1*(i/ln)+col2*(1-i/ln) for i in range(ln)]
|
||||||
|
|
||||||
|
plt.scatter(sx[:,0],sx[:,1],c=cols)
|
||||||
|
|
||||||
|
|
||||||
|
def combine(a,b):
|
||||||
|
a=1+(a-np.mean(a))/np.std(a)
|
||||||
|
b=1+(b-np.mean(b))/np.std(b)
|
||||||
|
toc=np.concatenate((np.expand_dims(a,axis=1),np.expand_dims(b,axis=1)),axis=1)
|
||||||
|
toc=np.sqrt(np.mean(toc**2,axis=1))
|
||||||
|
return toc
|
||||||
|
|
||||||
|
if __name__=="__main__":
|
||||||
|
f=np.load("merged.npz")
|
||||||
|
dat=f["x"]
|
||||||
|
x=f["ps"]
|
||||||
|
np.random.shuffle(x)
|
||||||
|
given=x[0]
|
||||||
|
possble=x[1:5]
|
||||||
|
|
||||||
|
ac=choosenext(given,possble)
|
||||||
|
nextbest=ac[:,0]
|
||||||
|
remainder=ac[:,1:]
|
||||||
|
|
||||||
|
for row in np.transpose(ac):
|
||||||
|
print(np.corrcoef(given,row)[0,1])
|
||||||
|
#as you see: the correlation is the lowest for the first row.
|
||||||
|
#so lets combine it
|
||||||
|
updated=combine(given,nextbest)
|
||||||
|
|
||||||
|
|
||||||
|
draw(given,dat)
|
||||||
|
plt.savefig("before.png")
|
||||||
|
plt.show()
|
||||||
|
draw(nextbest,dat)
|
||||||
|
plt.savefig("suggestion.png")
|
||||||
|
plt.show()
|
||||||
|
draw(updated,dat)
|
||||||
|
plt.savefig("updated.png")
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def determu(q,dim,ulayer):
|
||||||
|
for i in range(dim):
|
||||||
|
for j in range(i+1,dim):
|
||||||
|
q=ulayer(dim,i,j)(q)
|
||||||
|
return q
|
||||||
|
def determr(q,dim,ulayer):
|
||||||
|
dex=[]
|
||||||
|
for i in range(dim):
|
||||||
|
for j in range(i+1,dim):
|
||||||
|
dex.append([i,j])
|
||||||
|
np.random.shuffle(dex)
|
||||||
|
for i,j in dex:
|
||||||
|
q=ulayer(dim,i,j)(q)
|
||||||
|
return q
|
||||||
|
def partu(q,pdim,dim,ulayer):
|
||||||
|
for i in range(pdim):
|
||||||
|
for j in range(i+1,dim):
|
||||||
|
q=ulayer(dim,i,j)(q)
|
||||||
|
return q
|
||||||
|
def partr(q,pdim,dim,ulayer):
|
||||||
|
#this is often just an approximation. But for sqrt runtime....
|
||||||
|
dex=[]
|
||||||
|
for i in range(pdim):
|
||||||
|
for j in range(i+1,dim):
|
||||||
|
dex.append([i,j])
|
||||||
|
np.random.shuffle(dex)
|
||||||
|
for i,j in dex:
|
||||||
|
q=ulayer(dim,i,j)(q)
|
||||||
|
return q
|
||||||
|
def cutdown(q,pdim):
|
||||||
|
return q[:,:pdim]
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
#yes I know, not the best way to do this...
|
||||||
|
for i in range(20):
|
||||||
|
os.system(f"python3 onemodel.py {i}")
|
||||||
|
|
|
@ -0,0 +1,72 @@
|
||||||
|
#use sin cos to get better gradients (than nulayer)
|
||||||
|
#migth habe better gradients? (seems that way but not sure yet)
|
||||||
|
|
||||||
|
#should rename it, but who cares
|
||||||
|
#now also able to export the given matrix
|
||||||
|
from tensorflow.keras.layers import Layer
|
||||||
|
from tensorflow.keras import backend as K
|
||||||
|
from tensorflow import keras
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ulayer(Layer):
|
||||||
|
def __init__(self,siz,dex1,dex2, **kwargs):
|
||||||
|
self.siz = siz
|
||||||
|
self.dex1 = dex1
|
||||||
|
self.dex2 = dex2
|
||||||
|
super(ulayer, self).__init__(**kwargs)
|
||||||
|
|
||||||
|
def build(self, input_shape):
|
||||||
|
# Create a trainable weight variable for this layer.
|
||||||
|
self.kernel = self.add_weight(name='kernel',
|
||||||
|
shape=(1,),
|
||||||
|
initializer=keras.initializers.RandomUniform(-0.5, 0.5),
|
||||||
|
trainable=True)
|
||||||
|
super(ulayer, self).build(input_shape) # Be sure to call this at the end
|
||||||
|
|
||||||
|
def numpify(self):
|
||||||
|
mat=np.eye(self.siz)
|
||||||
|
val=self.weights[0].numpy()[0]
|
||||||
|
sin,cos=np.sin(val),np.cos(val)
|
||||||
|
mat[self.dex1,self.dex2]=sin
|
||||||
|
mat[self.dex2,self.dex1]=-sin
|
||||||
|
mat[self.dex1,self.dex1]=cos
|
||||||
|
mat[self.dex2,self.dex2]=cos
|
||||||
|
return mat
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def call(self, x):
|
||||||
|
kernel=self.kernel
|
||||||
|
sin=K.sin(kernel)
|
||||||
|
cos=K.cos(kernel)
|
||||||
|
tan=sin/cos#that should diverge?
|
||||||
|
rows=[tf.expand_dims(x[:,i],1) for i in range(self.siz)]
|
||||||
|
#instead of ((1,a),(-a,1)), I want this to be
|
||||||
|
#((1,a),(-a,1))/sqrt(1+a**2)
|
||||||
|
#and with trigonometry, I can get the same result by
|
||||||
|
#a=sin(kernel)?
|
||||||
|
#multiply to make 1->cos(x) (aka *cos(x))
|
||||||
|
#so a actually tan(kernel)
|
||||||
|
z1=rows[self.dex2]*tan
|
||||||
|
z2=rows[self.dex1]*tan
|
||||||
|
rows[self.dex1]+=z1
|
||||||
|
rows[self.dex2]-=z2
|
||||||
|
rows[self.dex1]*=cos
|
||||||
|
rows[self.dex2]*=cos
|
||||||
|
rows=K.concatenate(rows,axis=1)
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
mat=tf.eye(self.siz)
|
||||||
|
tf.assign(mat[self.dex1,self.dex2],self.kernel)
|
||||||
|
#mat[self.dex2,self.dex1]=-self.kernel
|
||||||
|
return K.dot(x, mat)
|
||||||
|
|
||||||
|
def compute_output_shape(self, input_shape):
|
||||||
|
return input_shape
|
||||||
|
|
||||||
|
|
After Width: | Height: | Size: 110 KiB |
After Width: | Height: | Size: 115 KiB |
After Width: | Height: | Size: 121 KiB |
After Width: | Height: | Size: 100 KiB |
After Width: | Height: | Size: 98 KiB |
After Width: | Height: | Size: 104 KiB |
After Width: | Height: | Size: 108 KiB |
After Width: | Height: | Size: 106 KiB |
After Width: | Height: | Size: 104 KiB |
|
@ -0,0 +1,105 @@
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow import keras
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
from data import data
|
||||||
|
|
||||||
|
os.makedirs('./runs', exist_ok=True)
|
||||||
|
os.makedirs('./imgs', exist_ok=True)
|
||||||
|
|
||||||
|
dex=0
|
||||||
|
if len(sys.argv)>1:
|
||||||
|
dex=int(sys.argv[1])
|
||||||
|
|
||||||
|
|
||||||
|
seed=np.random.randint(100000)
|
||||||
|
x=data(1000)
|
||||||
|
np.random.seed(12)
|
||||||
|
X=data(10000)
|
||||||
|
|
||||||
|
np.random.seed(seed)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
inp=keras.layers.Input(shape=x.shape[1:])
|
||||||
|
q=inp
|
||||||
|
q=keras.layers.Dense(5,activation='relu')(q)
|
||||||
|
q=keras.layers.Dense(5,activation='relu')(q)
|
||||||
|
q=keras.layers.Dense(1,activation='linear')(q)
|
||||||
|
|
||||||
|
model=keras.models.Model(inputs=inp,outputs=q)
|
||||||
|
|
||||||
|
model.compile(optimizer='adam',loss='mse')
|
||||||
|
|
||||||
|
model.fit(x,np.ones(len(x)),
|
||||||
|
epochs=500,
|
||||||
|
batch_size=25,
|
||||||
|
validation_split=0.2,
|
||||||
|
verbose=1,
|
||||||
|
shuffle=True,
|
||||||
|
callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss',patience=10)])
|
||||||
|
|
||||||
|
|
||||||
|
#Evaluation phase
|
||||||
|
|
||||||
|
|
||||||
|
x=X
|
||||||
|
|
||||||
|
p=model.predict(x)
|
||||||
|
mp=np.mean(p)
|
||||||
|
d=(p-mp)**2
|
||||||
|
d=np.sqrt(np.mean(d,axis=-1))
|
||||||
|
|
||||||
|
np.savez_compressed(f"runs/{dex}",d=d,x=x,p=p,mp=mp)
|
||||||
|
|
||||||
|
|
||||||
|
sx=[(xx,dd) for xx,dd in zip(x,d)]
|
||||||
|
sx.sort(key=lambda x:x[1])
|
||||||
|
print(sx[0],sx[-1])
|
||||||
|
|
||||||
|
sx=[xx for xx,dd in sx]
|
||||||
|
sx=np.array(sx)
|
||||||
|
|
||||||
|
from plt import plt
|
||||||
|
|
||||||
|
|
||||||
|
col1=[1.0,0.0,0.0]
|
||||||
|
col2=[0.0,1.0,0.0]
|
||||||
|
|
||||||
|
col1,col2=np.array(col1),np.array(col2)
|
||||||
|
|
||||||
|
ln=len(sx)
|
||||||
|
|
||||||
|
cols=[col1*(i/ln)+col2*(1-i/ln) for i in range(ln)]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
plt.scatter(sx[:,0],sx[:,1],c=cols)
|
||||||
|
|
||||||
|
plt.savefig(f"imgs/{dex}.png")
|
||||||
|
|
||||||
|
#plt.plot(sx[:,0],sx[:,1],'.')
|
||||||
|
|
||||||
|
plt.how()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,77 @@
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow import keras
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
from data import data
|
||||||
|
|
||||||
|
fns=[f"runs/{zw}" for zw in os.listdir("runs")]
|
||||||
|
|
||||||
|
|
||||||
|
fs=[np.load(fn) for fn in fns if os.path.isfile(fn)]
|
||||||
|
|
||||||
|
x=fs[0]["x"]
|
||||||
|
ds=[f["d"] for f in fs]
|
||||||
|
ps=[f["p"][:,0] for f in fs]
|
||||||
|
|
||||||
|
|
||||||
|
ds=np.array(ds)
|
||||||
|
ps=np.array(ps)
|
||||||
|
|
||||||
|
d=np.sqrt(np.mean(np.square(ds),axis=0))
|
||||||
|
|
||||||
|
np.savez_compressed("merged",x=x,ds=ds,ps=ps,d=d)
|
||||||
|
|
||||||
|
|
||||||
|
print(np.corrcoef(ps))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
sx=[(xx,dd) for xx,dd in zip(x,d)]
|
||||||
|
sx.sort(key=lambda x:x[1])
|
||||||
|
|
||||||
|
sx=[xx for xx,dd in sx]
|
||||||
|
sx=np.array(sx)
|
||||||
|
|
||||||
|
from plt import plt
|
||||||
|
|
||||||
|
|
||||||
|
col1=[1.0,0.0,0.0]
|
||||||
|
col2=[0.0,1.0,0.0]
|
||||||
|
|
||||||
|
col1,col2=np.array(col1),np.array(col2)
|
||||||
|
|
||||||
|
ln=len(sx)
|
||||||
|
|
||||||
|
cols=[col1*(i/ln)+col2*(1-i/ln) for i in range(ln)]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
plt.scatter(sx[:,0],sx[:,1],c=cols)
|
||||||
|
|
||||||
|
plt.savefig(f"imgs/recombine.png")
|
||||||
|
|
||||||
|
#plt.plot(sx[:,0],sx[:,1],'.')
|
||||||
|
|
||||||
|
plt.how()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
numpy
|
||||||
|
tensorflow
|
||||||
|
keras
|
||||||
|
matplotlib
|
After Width: | Height: | Size: 114 KiB |
After Width: | Height: | Size: 114 KiB |