initial push

2021-12-30 09:23:31 +01:00 · 2021-12-30 09:23:31 +01:00 · 97b93cd0ef
commit 97b93cd0ef
13 changed files with 336 additions and 0 deletions
--- a/15
+++ b/15
@ -0,0 +1,15 @@
 Usual feature selection tries to find the most important features of a given dataset. This is often done to make any downstream machine learning task easier.
 This repository (and the connected thesis) tries to extend the space of possibly selected features from just the given features to linear combinations of features. This could every existing machine learning pipeline relying on feature selection to work better and make lots of data more interpretable.
 To do this, it uses a special tensorflow layer (implemented in n2ulayer.py and applied in mu.py), which only allows for special linear combinations (Rotations in nd space, as they dont allow the network to lose information or to create artificial patterns that could be detected by the loss).
 This loss (defined in loss.py) is minimal for a high contrast between features. It is currently only able to work with 2 dimensional output data, but is (mostly) differentiable, making it possible to use both it and this kind of rotation networks (in main.py) to combine the features of some toy dataset (defined in data.py) in a highly contrastful manner.
 data.py contains a hidden feature that should become visible by comparing x to y-z. But as you can see from multiple runs of main.py, there are also other reations (for example y and z, as y contains some z) and combinations of features to be found. In show.py you find the three toy features x,y,z plottet against each other and the designated feature plotted to for reference.
 As you migth see, the basic idea works, but there are some caveats:
 - The loss through training is not monotonic. Often is the final loss higher than 1 (the expectation for random data). We can only solve this by restoring the best weights. This migth only be solved by a better loss function or different optimizer
 - This is important as it is not clear if the achived value is actually the minimum
 - This is implemented only for 2 dimensional output data. For a good feature selection algorithm this would be important to extend.
 - It is also not clear how well this scales to higher dimensions. The number of layers is proportional to input_dim*output_dim, but it is unclear how well the algorithm converges with a high number of layers
 - You might be able to solve a restricted version of this algorithm analytically and extend this to a greedy algorithm (but only if this is something youre interested in)
--- a/pycache/data.cpython-39.pyc
+++ b/pycache/data.cpython-39.pyc
--- a/pycache/data2.cpython-39.pyc
+++ b/pycache/data2.cpython-39.pyc
--- a/pycache/loss.cpython-39.pyc
+++ b/pycache/loss.cpython-39.pyc
--- a/pycache/mu.cpython-39.pyc
+++ b/pycache/mu.cpython-39.pyc
--- a/pycache/n2ulayer.cpython-39.pyc
+++ b/pycache/n2ulayer.cpython-39.pyc
--- a/data.py
+++ b/data.py
@ -0,0 +1,62 @@
 import numpy as np
 #obvious solution
 #(nicer version of:)
 #[[ 0.95911082  0.27747321  0.0558132 ]
 # [-0.14426784  0.64893967 -0.74703693]]
 #(maybe)
 #[[ 0.97660659  0.20377678  0.06866321]
 # [-0.09744323  0.70403038 -0.70345294]]
 #saw one (but did not copy) that looked like
 #[[ 1.0, 0.0, 0.0],
 # [ 0.0, 0.7,-0.7]]
 #but there is a secondary solution
 #[[ 0.34055104  0.35392664 -0.87106881]
 # [-0.64936382  0.75853476  0.0543288 ]]
 #x,x**2+y+z,z
 #translated into
 #a=x+x**2+y+z-2.5*z
 #b=x**2+y+z-(13/15)*x
 #find a(b)
 #a-const*b=const
 #x-(13/15)*x
 #+(x**2+y+z)-(x**2+y+z)
 #-2.5*z
 #=2x/15-2.5z
 #not great, but...
 #tertiary solution
 #[[ 0.86908816  0.3797459  -0.31698411]
 # [-0.49261647  0.60629139 -0.62429142]]
 #which looks like a mirrored version of
 #[[ 0.93229881 -0.19299482  0.30589537]
 # [ 0.3481528   0.70805987 -0.61436217]]
 #which of course makes some sense
 def data(n=1000):
    """
    Generate 3d data, where a and b have a relation, but x=x(a,b), y=y(a,b), z=z(a,b) will be returned
    """
    a=np.random.uniform(-1.0,1.0,n)
    b=a**2+np.random.uniform(-0.2,0.2,n)
    c=np.random.uniform(-1.0,1.0,n)
    x=a
    y=b+c
    z=c
    return x,y,z
 if __name__ == '__main__':
    x,y,z=data()
    from plt import plt
    plt.plot(x,y,'.')
    plt.show()
    #print(x,y,z)
--- a/loss.py
+++ b/loss.py
@ -0,0 +1,50 @@
 import tensorflow as tf
 from tensorflow import keras
 from tensorflow.keras import backend as K
 def running_mean(x,n=100,K=K,tf=tf):
    """
    Calculate the running mean of an array
    """
    cumsum = tf.cumsum(x) 
    return (cumsum[n:] - cumsum[:-n]) / float(n)
 def running_variance(x,n=100,K=K,tf=tf):
    """
    Calculate the running variance of an array
    """
    return running_mean(x**2,n=n,K=K,tf=tf)-running_mean(x,n=n,K=K,tf=tf)**2
 def running_std(x,n=100,K=K,tf=tf):
    """
    Calculate the running standard deviation of an array
    """
    return K.sqrt(running_variance(x,n=n,K=K,tf=tf))
 def loss2d(a,b,n=25,K=K,tf=tf):
    q=b
    x,y=q[:,0],q[:,1]
    #sort by x, evaluate stds on y
    dex=tf.argsort(x)
    yy=tf.gather(y,dex)
    ss=running_std(yy,n=n)
    s=K.std(yy)
    return K.mean(ss)/s
 def numpyloss2d(a,b,n=25):
    import numpy as np
    q=np.concatenate((np.expand_dims(a,1),np.expand_dims(b,1)),axis=1)
    np.gather=np.take
    return loss2d(q,q,n=n,K=np,tf=np)
 if __name__=='__main__':
    import numpy as np
    x=np.random.uniform(-1,1,size=(1000,2))
    print(numpyloss2d(x[:,0],x[:,1],n=25))
--- a/main.py
+++ b/main.py
@ -0,0 +1,75 @@
 from data import data
 import numpy as np
 x,y,z=data()
 x=np.concatenate([np.expand_dims(zw,1) for zw in [x,y,z]],axis=1)
 from tensorflow import keras
 from mu import *
 from n2ulayer import ulayer
 from loss import loss2d
 dim=int(x.shape[1])
 pdim=2
 inp=keras.layers.Input(x.shape[1:])
 q=inp
 q=partr(q,pdim,dim,ulayer)
 q=cutdown(q,pdim)
 model=keras.models.Model(inp,q)
 model.summary()
 #opt=keras.optimizers.Adam(lr=0.0001)
 #opt=keras.optimizers.Adam(lr=0.001)
 opt=keras.optimizers.Adam(lr=0.01)
 model.compile(opt,loss=loss2d)
 model.fit(x,x,
        epochs=10000,
        shuffle=False,
        validation_split=0.2,
        callbacks=[keras.callbacks.EarlyStopping(patience=250,monitor="loss",restore_best_weights=True)])
 mats=[]
 for lay in model.layers[1:]:
    if not ("ulayer" in str(type(lay))):continue
    #print(dir(lay))
    #try:
    mats.append(lay.numpify())
    #except:
    #    pass
 mat=None
 for m in mats:
    if mat is None:
        mat=m
    else:
        mat=np.dot(m,mat)
 mat=mat[:pdim]
 print(mat)
 loss=model.evaluate(x[:800],x[:800])
 print(loss)
 p=model.predict(x[:800])
 import matplotlib.pyplot as plt
 plt.plot(p[:,0],p[:,1],".",alpha=0.75)
 plt.title(str(loss))
 plt.how()
--- a/mu.py
+++ b/mu.py
@ -0,0 +1,35 @@
 import numpy as np
 def determu(q,dim,ulayer):
    for i in range(dim):
        for j in range(i+1,dim):
            q=ulayer(dim,i,j)(q)
    return q
 def determr(q,dim,ulayer):
    dex=[]
    for i in range(dim):
        for j in range(i+1,dim):
            dex.append([i,j])
    np.random.shuffle(dex)
    for i,j in dex:
        q=ulayer(dim,i,j)(q)
    return q
 def partu(q,pdim,dim,ulayer):
    for i in range(pdim):
        for j in range(i+1,dim):
            q=ulayer(dim,i,j)(q)
    return q
 def partr(q,pdim,dim,ulayer):
    dex=[]
    for i in range(pdim):
        for j in range(i+1,dim):
            dex.append([i,j])
    np.random.shuffle(dex)
    for i,j in dex:
        q=ulayer(dim,i,j)(q)
    return q
 def cutdown(q,pdim):
    return q[:,:pdim]
--- a/n2ulayer.py
+++ b/n2ulayer.py
@ -0,0 +1,72 @@
 #use sin cos to get better gradients (than nulayer)
 #migth habe better gradients? (seems that way but not sure yet)
 #should rename it, but who cares
 #now also able to export the given matrix
 from tensorflow.keras.layers import Layer
 from tensorflow.keras import backend as K
 from tensorflow import keras
 import tensorflow as tf
 import numpy as np
 class ulayer(Layer):
    def __init__(self,siz,dex1,dex2, **kwargs):
        self.siz = siz
        self.dex1 = dex1
        self.dex2 = dex2
        super(ulayer, self).__init__(**kwargs)
    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        self.kernel = self.add_weight(name='kernel',
                                      shape=(1,),
                                      initializer=keras.initializers.RandomUniform(-0.5, 0.5),
                                      trainable=True)
        super(ulayer, self).build(input_shape)  # Be sure to call this at the end
    def numpify(self):
        mat=np.eye(self.siz)
        val=self.weights[0].numpy()[0]
        sin,cos=np.sin(val),np.cos(val)
        mat[self.dex1,self.dex2]=sin
        mat[self.dex2,self.dex1]=-sin
        mat[self.dex1,self.dex1]=cos
        mat[self.dex2,self.dex2]=cos
        return mat
    def call(self, x):
        kernel=self.kernel
        sin=K.sin(kernel)
        cos=K.cos(kernel)
        tan=sin/cos#that should diverge?
        rows=[tf.expand_dims(x[:,i],1) for i in range(self.siz)]
        #instead of ((1,a),(-a,1)), I want this to be
        #((1,a),(-a,1))/sqrt(1+a**2)
        #and with trigonometry, I can get the same result by
        #a=sin(kernel)?
        #multiply to make 1->cos(x) (aka *cos(x))
        #so a actually tan(kernel)
        z1=rows[self.dex2]*tan
        z2=rows[self.dex1]*tan
        rows[self.dex1]+=z1
        rows[self.dex2]-=z2
        rows[self.dex1]*=cos
        rows[self.dex2]*=cos
        rows=K.concatenate(rows,axis=1)
        return rows
        mat=tf.eye(self.siz)
        tf.assign(mat[self.dex1,self.dex2],self.kernel)
        #mat[self.dex2,self.dex1]=-self.kernel
        return K.dot(x, mat)
    def compute_output_shape(self, input_shape):
        return input_shape
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,4 @@
 numpy
 tensorflow
 keras
 matplotlib
--- a/show.py
+++ b/show.py
@ -0,0 +1,23 @@
 import matplotlib.pyplot as plt
 from data2 import data
 from loss import numpyloss2d
 x,y,z=data()
 def plotone(x,y,show=True):
    #plt.title(str(numpyloss2d(x,y)))#buggy
    plt.plot(x,y,"o",alpha=0.75)
    if show:plt.show()
 plotone(x,y)
 plotone(x,z)
 plotone(y,z)
 print("desired")
 plotone(x,y-z)