initial push
This commit is contained in:
commit
97b93cd0ef
|
@ -0,0 +1,15 @@
|
||||||
|
Usual feature selection tries to find the most important features of a given dataset. This is often done to make any downstream machine learning task easier.
|
||||||
|
This repository (and the connected thesis) tries to extend the space of possibly selected features from just the given features to linear combinations of features. This could every existing machine learning pipeline relying on feature selection to work better and make lots of data more interpretable.
|
||||||
|
|
||||||
|
To do this, it uses a special tensorflow layer (implemented in n2ulayer.py and applied in mu.py), which only allows for special linear combinations (Rotations in nd space, as they dont allow the network to lose information or to create artificial patterns that could be detected by the loss).
|
||||||
|
This loss (defined in loss.py) is minimal for a high contrast between features. It is currently only able to work with 2 dimensional output data, but is (mostly) differentiable, making it possible to use both it and this kind of rotation networks (in main.py) to combine the features of some toy dataset (defined in data.py) in a highly contrastful manner.
|
||||||
|
data.py contains a hidden feature that should become visible by comparing x to y-z. But as you can see from multiple runs of main.py, there are also other reations (for example y and z, as y contains some z) and combinations of features to be found. In show.py you find the three toy features x,y,z plottet against each other and the designated feature plotted to for reference.
|
||||||
|
|
||||||
|
As you migth see, the basic idea works, but there are some caveats:
|
||||||
|
- The loss through training is not monotonic. Often is the final loss higher than 1 (the expectation for random data). We can only solve this by restoring the best weights. This migth only be solved by a better loss function or different optimizer
|
||||||
|
- This is important as it is not clear if the achived value is actually the minimum
|
||||||
|
- This is implemented only for 2 dimensional output data. For a good feature selection algorithm this would be important to extend.
|
||||||
|
- It is also not clear how well this scales to higher dimensions. The number of layers is proportional to input_dim*output_dim, but it is unclear how well the algorithm converges with a high number of layers
|
||||||
|
- You might be able to solve a restricted version of this algorithm analytically and extend this to a greedy algorithm (but only if this is something youre interested in)
|
||||||
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,62 @@
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
#obvious solution
|
||||||
|
#(nicer version of:)
|
||||||
|
#[[ 0.95911082 0.27747321 0.0558132 ]
|
||||||
|
# [-0.14426784 0.64893967 -0.74703693]]
|
||||||
|
#(maybe)
|
||||||
|
#[[ 0.97660659 0.20377678 0.06866321]
|
||||||
|
# [-0.09744323 0.70403038 -0.70345294]]
|
||||||
|
#saw one (but did not copy) that looked like
|
||||||
|
#[[ 1.0, 0.0, 0.0],
|
||||||
|
# [ 0.0, 0.7,-0.7]]
|
||||||
|
#but there is a secondary solution
|
||||||
|
#[[ 0.34055104 0.35392664 -0.87106881]
|
||||||
|
# [-0.64936382 0.75853476 0.0543288 ]]
|
||||||
|
#x,x**2+y+z,z
|
||||||
|
#translated into
|
||||||
|
#a=x+x**2+y+z-2.5*z
|
||||||
|
#b=x**2+y+z-(13/15)*x
|
||||||
|
#find a(b)
|
||||||
|
#a-const*b=const
|
||||||
|
#x-(13/15)*x
|
||||||
|
#+(x**2+y+z)-(x**2+y+z)
|
||||||
|
#-2.5*z
|
||||||
|
#=2x/15-2.5z
|
||||||
|
#not great, but...
|
||||||
|
|
||||||
|
#tertiary solution
|
||||||
|
#[[ 0.86908816 0.3797459 -0.31698411]
|
||||||
|
# [-0.49261647 0.60629139 -0.62429142]]
|
||||||
|
#which looks like a mirrored version of
|
||||||
|
#[[ 0.93229881 -0.19299482 0.30589537]
|
||||||
|
# [ 0.3481528 0.70805987 -0.61436217]]
|
||||||
|
#which of course makes some sense
|
||||||
|
|
||||||
|
|
||||||
|
def data(n=1000):
|
||||||
|
"""
|
||||||
|
Generate 3d data, where a and b have a relation, but x=x(a,b), y=y(a,b), z=z(a,b) will be returned
|
||||||
|
"""
|
||||||
|
a=np.random.uniform(-1.0,1.0,n)
|
||||||
|
b=a**2+np.random.uniform(-0.2,0.2,n)
|
||||||
|
c=np.random.uniform(-1.0,1.0,n)
|
||||||
|
|
||||||
|
x=a
|
||||||
|
y=b+c
|
||||||
|
z=c
|
||||||
|
|
||||||
|
|
||||||
|
return x,y,z
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
x,y,z=data()
|
||||||
|
|
||||||
|
from plt import plt
|
||||||
|
|
||||||
|
plt.plot(x,y,'.')
|
||||||
|
plt.show()
|
||||||
|
#print(x,y,z)
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow import keras
|
||||||
|
from tensorflow.keras import backend as K
|
||||||
|
|
||||||
|
|
||||||
|
def running_mean(x,n=100,K=K,tf=tf):
|
||||||
|
"""
|
||||||
|
Calculate the running mean of an array
|
||||||
|
"""
|
||||||
|
cumsum = tf.cumsum(x)
|
||||||
|
return (cumsum[n:] - cumsum[:-n]) / float(n)
|
||||||
|
def running_variance(x,n=100,K=K,tf=tf):
|
||||||
|
"""
|
||||||
|
Calculate the running variance of an array
|
||||||
|
"""
|
||||||
|
return running_mean(x**2,n=n,K=K,tf=tf)-running_mean(x,n=n,K=K,tf=tf)**2
|
||||||
|
def running_std(x,n=100,K=K,tf=tf):
|
||||||
|
"""
|
||||||
|
Calculate the running standard deviation of an array
|
||||||
|
"""
|
||||||
|
return K.sqrt(running_variance(x,n=n,K=K,tf=tf))
|
||||||
|
|
||||||
|
def loss2d(a,b,n=25,K=K,tf=tf):
|
||||||
|
q=b
|
||||||
|
x,y=q[:,0],q[:,1]
|
||||||
|
#sort by x, evaluate stds on y
|
||||||
|
dex=tf.argsort(x)
|
||||||
|
yy=tf.gather(y,dex)
|
||||||
|
|
||||||
|
ss=running_std(yy,n=n)
|
||||||
|
s=K.std(yy)
|
||||||
|
|
||||||
|
return K.mean(ss)/s
|
||||||
|
|
||||||
|
def numpyloss2d(a,b,n=25):
|
||||||
|
import numpy as np
|
||||||
|
q=np.concatenate((np.expand_dims(a,1),np.expand_dims(b,1)),axis=1)
|
||||||
|
np.gather=np.take
|
||||||
|
return loss2d(q,q,n=n,K=np,tf=np)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__=='__main__':
|
||||||
|
import numpy as np
|
||||||
|
x=np.random.uniform(-1,1,size=(1000,2))
|
||||||
|
print(numpyloss2d(x[:,0],x[:,1],n=25))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,75 @@
|
||||||
|
from data import data
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
x,y,z=data()
|
||||||
|
x=np.concatenate([np.expand_dims(zw,1) for zw in [x,y,z]],axis=1)
|
||||||
|
|
||||||
|
|
||||||
|
from tensorflow import keras
|
||||||
|
from mu import *
|
||||||
|
from n2ulayer import ulayer
|
||||||
|
|
||||||
|
from loss import loss2d
|
||||||
|
|
||||||
|
dim=int(x.shape[1])
|
||||||
|
pdim=2
|
||||||
|
|
||||||
|
inp=keras.layers.Input(x.shape[1:])
|
||||||
|
q=inp
|
||||||
|
|
||||||
|
q=partr(q,pdim,dim,ulayer)
|
||||||
|
q=cutdown(q,pdim)
|
||||||
|
|
||||||
|
model=keras.models.Model(inp,q)
|
||||||
|
|
||||||
|
model.summary()
|
||||||
|
|
||||||
|
#opt=keras.optimizers.Adam(lr=0.0001)
|
||||||
|
#opt=keras.optimizers.Adam(lr=0.001)
|
||||||
|
opt=keras.optimizers.Adam(lr=0.01)
|
||||||
|
|
||||||
|
model.compile(opt,loss=loss2d)
|
||||||
|
|
||||||
|
model.fit(x,x,
|
||||||
|
epochs=10000,
|
||||||
|
shuffle=False,
|
||||||
|
validation_split=0.2,
|
||||||
|
callbacks=[keras.callbacks.EarlyStopping(patience=250,monitor="loss",restore_best_weights=True)])
|
||||||
|
|
||||||
|
|
||||||
|
mats=[]
|
||||||
|
for lay in model.layers[1:]:
|
||||||
|
if not ("ulayer" in str(type(lay))):continue
|
||||||
|
#print(dir(lay))
|
||||||
|
#try:
|
||||||
|
mats.append(lay.numpify())
|
||||||
|
#except:
|
||||||
|
# pass
|
||||||
|
|
||||||
|
mat=None
|
||||||
|
for m in mats:
|
||||||
|
if mat is None:
|
||||||
|
mat=m
|
||||||
|
else:
|
||||||
|
mat=np.dot(m,mat)
|
||||||
|
|
||||||
|
mat=mat[:pdim]
|
||||||
|
|
||||||
|
print(mat)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
loss=model.evaluate(x[:800],x[:800])
|
||||||
|
print(loss)
|
||||||
|
p=model.predict(x[:800])
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
plt.plot(p[:,0],p[:,1],".",alpha=0.75)
|
||||||
|
plt.title(str(loss))
|
||||||
|
plt.how()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def determu(q,dim,ulayer):
|
||||||
|
for i in range(dim):
|
||||||
|
for j in range(i+1,dim):
|
||||||
|
q=ulayer(dim,i,j)(q)
|
||||||
|
return q
|
||||||
|
def determr(q,dim,ulayer):
|
||||||
|
dex=[]
|
||||||
|
for i in range(dim):
|
||||||
|
for j in range(i+1,dim):
|
||||||
|
dex.append([i,j])
|
||||||
|
np.random.shuffle(dex)
|
||||||
|
for i,j in dex:
|
||||||
|
q=ulayer(dim,i,j)(q)
|
||||||
|
return q
|
||||||
|
def partu(q,pdim,dim,ulayer):
|
||||||
|
for i in range(pdim):
|
||||||
|
for j in range(i+1,dim):
|
||||||
|
q=ulayer(dim,i,j)(q)
|
||||||
|
return q
|
||||||
|
def partr(q,pdim,dim,ulayer):
|
||||||
|
dex=[]
|
||||||
|
for i in range(pdim):
|
||||||
|
for j in range(i+1,dim):
|
||||||
|
dex.append([i,j])
|
||||||
|
np.random.shuffle(dex)
|
||||||
|
for i,j in dex:
|
||||||
|
q=ulayer(dim,i,j)(q)
|
||||||
|
return q
|
||||||
|
def cutdown(q,pdim):
|
||||||
|
return q[:,:pdim]
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,72 @@
|
||||||
|
#use sin cos to get better gradients (than nulayer)
|
||||||
|
#migth habe better gradients? (seems that way but not sure yet)
|
||||||
|
|
||||||
|
#should rename it, but who cares
|
||||||
|
#now also able to export the given matrix
|
||||||
|
from tensorflow.keras.layers import Layer
|
||||||
|
from tensorflow.keras import backend as K
|
||||||
|
from tensorflow import keras
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ulayer(Layer):
|
||||||
|
def __init__(self,siz,dex1,dex2, **kwargs):
|
||||||
|
self.siz = siz
|
||||||
|
self.dex1 = dex1
|
||||||
|
self.dex2 = dex2
|
||||||
|
super(ulayer, self).__init__(**kwargs)
|
||||||
|
|
||||||
|
def build(self, input_shape):
|
||||||
|
# Create a trainable weight variable for this layer.
|
||||||
|
self.kernel = self.add_weight(name='kernel',
|
||||||
|
shape=(1,),
|
||||||
|
initializer=keras.initializers.RandomUniform(-0.5, 0.5),
|
||||||
|
trainable=True)
|
||||||
|
super(ulayer, self).build(input_shape) # Be sure to call this at the end
|
||||||
|
|
||||||
|
def numpify(self):
|
||||||
|
mat=np.eye(self.siz)
|
||||||
|
val=self.weights[0].numpy()[0]
|
||||||
|
sin,cos=np.sin(val),np.cos(val)
|
||||||
|
mat[self.dex1,self.dex2]=sin
|
||||||
|
mat[self.dex2,self.dex1]=-sin
|
||||||
|
mat[self.dex1,self.dex1]=cos
|
||||||
|
mat[self.dex2,self.dex2]=cos
|
||||||
|
return mat
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def call(self, x):
|
||||||
|
kernel=self.kernel
|
||||||
|
sin=K.sin(kernel)
|
||||||
|
cos=K.cos(kernel)
|
||||||
|
tan=sin/cos#that should diverge?
|
||||||
|
rows=[tf.expand_dims(x[:,i],1) for i in range(self.siz)]
|
||||||
|
#instead of ((1,a),(-a,1)), I want this to be
|
||||||
|
#((1,a),(-a,1))/sqrt(1+a**2)
|
||||||
|
#and with trigonometry, I can get the same result by
|
||||||
|
#a=sin(kernel)?
|
||||||
|
#multiply to make 1->cos(x) (aka *cos(x))
|
||||||
|
#so a actually tan(kernel)
|
||||||
|
z1=rows[self.dex2]*tan
|
||||||
|
z2=rows[self.dex1]*tan
|
||||||
|
rows[self.dex1]+=z1
|
||||||
|
rows[self.dex2]-=z2
|
||||||
|
rows[self.dex1]*=cos
|
||||||
|
rows[self.dex2]*=cos
|
||||||
|
rows=K.concatenate(rows,axis=1)
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
mat=tf.eye(self.siz)
|
||||||
|
tf.assign(mat[self.dex1,self.dex2],self.kernel)
|
||||||
|
#mat[self.dex2,self.dex1]=-self.kernel
|
||||||
|
return K.dot(x, mat)
|
||||||
|
|
||||||
|
def compute_output_shape(self, input_shape):
|
||||||
|
return input_shape
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
numpy
|
||||||
|
tensorflow
|
||||||
|
keras
|
||||||
|
matplotlib
|
|
@ -0,0 +1,23 @@
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
from data2 import data
|
||||||
|
|
||||||
|
from loss import numpyloss2d
|
||||||
|
|
||||||
|
x,y,z=data()
|
||||||
|
|
||||||
|
def plotone(x,y,show=True):
|
||||||
|
#plt.title(str(numpyloss2d(x,y)))#buggy
|
||||||
|
plt.plot(x,y,"o",alpha=0.75)
|
||||||
|
if show:plt.show()
|
||||||
|
|
||||||
|
plotone(x,y)
|
||||||
|
plotone(x,z)
|
||||||
|
plotone(y,z)
|
||||||
|
print("desired")
|
||||||
|
plotone(x,y-z)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue