54 lines
1.2 KiB
Python
54 lines
1.2 KiB
Python
import os
|
|
import scipy.io as sio
|
|
import numpy as np
|
|
import h5py
|
|
|
|
|
|
def remending(q):
|
|
return q.replace(".mat","").replace(".npz","")
|
|
|
|
def listfiles():
|
|
for zw in os.listdir("adata"):
|
|
fn="adata/"+zw
|
|
yield remending(zw),fn
|
|
|
|
def loadfile73(fn):
|
|
f = h5py.File(fn,'r')
|
|
x = f.get('X')
|
|
x = np.array(x) # For converting to a NumPy array
|
|
y = f.get('y')
|
|
y = np.array(y) # For converting to a NumPy array
|
|
x=np.transpose(x)
|
|
y=np.transpose(y)
|
|
return x,y
|
|
def loadfile(fn="thyroid.mat"):
|
|
if ".npz" in fn:
|
|
f=np.load(fn)
|
|
return f["x"],f["y"]
|
|
try:
|
|
mat=sio.loadmat(fn)
|
|
return mat["X"],mat["y"]
|
|
except:
|
|
return loadfile73(fn)
|
|
|
|
def loadfiles():
|
|
for f,fn in listfiles():
|
|
yield f,*loadfile(fn)
|
|
|
|
def filterfiles():
|
|
for f,x,y in loadfiles():
|
|
cou=np.sum(y)
|
|
if cou<10:continue
|
|
yield f,x,y
|
|
if __name__ == '__main__':
|
|
if False:
|
|
for f,x,y in filterfiles():
|
|
#print(f,x.shape[1],y.shape[0],np.mean(y))
|
|
les=[len(set(x[:,i]))/len(x) for i in range(x.shape[1])]
|
|
lestr=f"{np.mean(les)}+-{np.std(les)}"
|
|
print(f'"{f}":{int(x.shape[1]/2)+1},{lestr}')
|
|
|
|
for f,x,y in filterfiles():
|
|
if "wbc" in f:break
|
|
|