initial push

This commit is contained in:
Simon Klüttermann 2021-12-30 09:23:31 +01:00
commit 97b93cd0ef
13 changed files with 336 additions and 0 deletions

15
README Normal file
View File

@ -0,0 +1,15 @@
Usual feature selection tries to find the most important features of a given dataset. This is often done to make any downstream machine learning task easier.
This repository (and the connected thesis) tries to extend the space of possibly selected features from just the given features to linear combinations of features. This could every existing machine learning pipeline relying on feature selection to work better and make lots of data more interpretable.
To do this, it uses a special tensorflow layer (implemented in n2ulayer.py and applied in mu.py), which only allows for special linear combinations (Rotations in nd space, as they dont allow the network to lose information or to create artificial patterns that could be detected by the loss).
This loss (defined in loss.py) is minimal for a high contrast between features. It is currently only able to work with 2 dimensional output data, but is (mostly) differentiable, making it possible to use both it and this kind of rotation networks (in main.py) to combine the features of some toy dataset (defined in data.py) in a highly contrastful manner.
data.py contains a hidden feature that should become visible by comparing x to y-z. But as you can see from multiple runs of main.py, there are also other reations (for example y and z, as y contains some z) and combinations of features to be found. In show.py you find the three toy features x,y,z plottet against each other and the designated feature plotted to for reference.
As you migth see, the basic idea works, but there are some caveats:
- The loss through training is not monotonic. Often is the final loss higher than 1 (the expectation for random data). We can only solve this by restoring the best weights. This migth only be solved by a better loss function or different optimizer
- This is important as it is not clear if the achived value is actually the minimum
- This is implemented only for 2 dimensional output data. For a good feature selection algorithm this would be important to extend.
- It is also not clear how well this scales to higher dimensions. The number of layers is proportional to input_dim*output_dim, but it is unclear how well the algorithm converges with a high number of layers
- You might be able to solve a restricted version of this algorithm analytically and extend this to a greedy algorithm (but only if this is something youre interested in)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

62
data.py Normal file
View File

@ -0,0 +1,62 @@
import numpy as np
#obvious solution
#(nicer version of:)
#[[ 0.95911082 0.27747321 0.0558132 ]
# [-0.14426784 0.64893967 -0.74703693]]
#(maybe)
#[[ 0.97660659 0.20377678 0.06866321]
# [-0.09744323 0.70403038 -0.70345294]]
#saw one (but did not copy) that looked like
#[[ 1.0, 0.0, 0.0],
# [ 0.0, 0.7,-0.7]]
#but there is a secondary solution
#[[ 0.34055104 0.35392664 -0.87106881]
# [-0.64936382 0.75853476 0.0543288 ]]
#x,x**2+y+z,z
#translated into
#a=x+x**2+y+z-2.5*z
#b=x**2+y+z-(13/15)*x
#find a(b)
#a-const*b=const
#x-(13/15)*x
#+(x**2+y+z)-(x**2+y+z)
#-2.5*z
#=2x/15-2.5z
#not great, but...
#tertiary solution
#[[ 0.86908816 0.3797459 -0.31698411]
# [-0.49261647 0.60629139 -0.62429142]]
#which looks like a mirrored version of
#[[ 0.93229881 -0.19299482 0.30589537]
# [ 0.3481528 0.70805987 -0.61436217]]
#which of course makes some sense
def data(n=1000):
"""
Generate 3d data, where a and b have a relation, but x=x(a,b), y=y(a,b), z=z(a,b) will be returned
"""
a=np.random.uniform(-1.0,1.0,n)
b=a**2+np.random.uniform(-0.2,0.2,n)
c=np.random.uniform(-1.0,1.0,n)
x=a
y=b+c
z=c
return x,y,z
if __name__ == '__main__':
x,y,z=data()
from plt import plt
plt.plot(x,y,'.')
plt.show()
#print(x,y,z)

50
loss.py Normal file
View File

@ -0,0 +1,50 @@
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
def running_mean(x,n=100,K=K,tf=tf):
"""
Calculate the running mean of an array
"""
cumsum = tf.cumsum(x)
return (cumsum[n:] - cumsum[:-n]) / float(n)
def running_variance(x,n=100,K=K,tf=tf):
"""
Calculate the running variance of an array
"""
return running_mean(x**2,n=n,K=K,tf=tf)-running_mean(x,n=n,K=K,tf=tf)**2
def running_std(x,n=100,K=K,tf=tf):
"""
Calculate the running standard deviation of an array
"""
return K.sqrt(running_variance(x,n=n,K=K,tf=tf))
def loss2d(a,b,n=25,K=K,tf=tf):
q=b
x,y=q[:,0],q[:,1]
#sort by x, evaluate stds on y
dex=tf.argsort(x)
yy=tf.gather(y,dex)
ss=running_std(yy,n=n)
s=K.std(yy)
return K.mean(ss)/s
def numpyloss2d(a,b,n=25):
import numpy as np
q=np.concatenate((np.expand_dims(a,1),np.expand_dims(b,1)),axis=1)
np.gather=np.take
return loss2d(q,q,n=n,K=np,tf=np)
if __name__=='__main__':
import numpy as np
x=np.random.uniform(-1,1,size=(1000,2))
print(numpyloss2d(x[:,0],x[:,1],n=25))

75
main.py Normal file
View File

@ -0,0 +1,75 @@
from data import data
import numpy as np
x,y,z=data()
x=np.concatenate([np.expand_dims(zw,1) for zw in [x,y,z]],axis=1)
from tensorflow import keras
from mu import *
from n2ulayer import ulayer
from loss import loss2d
dim=int(x.shape[1])
pdim=2
inp=keras.layers.Input(x.shape[1:])
q=inp
q=partr(q,pdim,dim,ulayer)
q=cutdown(q,pdim)
model=keras.models.Model(inp,q)
model.summary()
#opt=keras.optimizers.Adam(lr=0.0001)
#opt=keras.optimizers.Adam(lr=0.001)
opt=keras.optimizers.Adam(lr=0.01)
model.compile(opt,loss=loss2d)
model.fit(x,x,
epochs=10000,
shuffle=False,
validation_split=0.2,
callbacks=[keras.callbacks.EarlyStopping(patience=250,monitor="loss",restore_best_weights=True)])
mats=[]
for lay in model.layers[1:]:
if not ("ulayer" in str(type(lay))):continue
#print(dir(lay))
#try:
mats.append(lay.numpify())
#except:
# pass
mat=None
for m in mats:
if mat is None:
mat=m
else:
mat=np.dot(m,mat)
mat=mat[:pdim]
print(mat)
loss=model.evaluate(x[:800],x[:800])
print(loss)
p=model.predict(x[:800])
import matplotlib.pyplot as plt
plt.plot(p[:,0],p[:,1],".",alpha=0.75)
plt.title(str(loss))
plt.how()

35
mu.py Normal file
View File

@ -0,0 +1,35 @@
import numpy as np
def determu(q,dim,ulayer):
for i in range(dim):
for j in range(i+1,dim):
q=ulayer(dim,i,j)(q)
return q
def determr(q,dim,ulayer):
dex=[]
for i in range(dim):
for j in range(i+1,dim):
dex.append([i,j])
np.random.shuffle(dex)
for i,j in dex:
q=ulayer(dim,i,j)(q)
return q
def partu(q,pdim,dim,ulayer):
for i in range(pdim):
for j in range(i+1,dim):
q=ulayer(dim,i,j)(q)
return q
def partr(q,pdim,dim,ulayer):
dex=[]
for i in range(pdim):
for j in range(i+1,dim):
dex.append([i,j])
np.random.shuffle(dex)
for i,j in dex:
q=ulayer(dim,i,j)(q)
return q
def cutdown(q,pdim):
return q[:,:pdim]

72
n2ulayer.py Normal file
View File

@ -0,0 +1,72 @@
#use sin cos to get better gradients (than nulayer)
#migth habe better gradients? (seems that way but not sure yet)
#should rename it, but who cares
#now also able to export the given matrix
from tensorflow.keras.layers import Layer
from tensorflow.keras import backend as K
from tensorflow import keras
import tensorflow as tf
import numpy as np
class ulayer(Layer):
def __init__(self,siz,dex1,dex2, **kwargs):
self.siz = siz
self.dex1 = dex1
self.dex2 = dex2
super(ulayer, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',
shape=(1,),
initializer=keras.initializers.RandomUniform(-0.5, 0.5),
trainable=True)
super(ulayer, self).build(input_shape) # Be sure to call this at the end
def numpify(self):
mat=np.eye(self.siz)
val=self.weights[0].numpy()[0]
sin,cos=np.sin(val),np.cos(val)
mat[self.dex1,self.dex2]=sin
mat[self.dex2,self.dex1]=-sin
mat[self.dex1,self.dex1]=cos
mat[self.dex2,self.dex2]=cos
return mat
def call(self, x):
kernel=self.kernel
sin=K.sin(kernel)
cos=K.cos(kernel)
tan=sin/cos#that should diverge?
rows=[tf.expand_dims(x[:,i],1) for i in range(self.siz)]
#instead of ((1,a),(-a,1)), I want this to be
#((1,a),(-a,1))/sqrt(1+a**2)
#and with trigonometry, I can get the same result by
#a=sin(kernel)?
#multiply to make 1->cos(x) (aka *cos(x))
#so a actually tan(kernel)
z1=rows[self.dex2]*tan
z2=rows[self.dex1]*tan
rows[self.dex1]+=z1
rows[self.dex2]-=z2
rows[self.dex1]*=cos
rows[self.dex2]*=cos
rows=K.concatenate(rows,axis=1)
return rows
mat=tf.eye(self.siz)
tf.assign(mat[self.dex1,self.dex2],self.kernel)
#mat[self.dex2,self.dex1]=-self.kernel
return K.dot(x, mat)
def compute_output_shape(self, input_shape):
return input_shape

4
requirements.txt Normal file
View File

@ -0,0 +1,4 @@
numpy
tensorflow
keras
matplotlib

23
show.py Normal file
View File

@ -0,0 +1,23 @@
import matplotlib.pyplot as plt
from data2 import data
from loss import numpyloss2d
x,y,z=data()
def plotone(x,y,show=True):
#plt.title(str(numpyloss2d(x,y)))#buggy
plt.plot(x,y,"o",alpha=0.75)
if show:plt.show()
plotone(x,y)
plotone(x,z)
plotone(y,z)
print("desired")
plotone(x,y-z)