import os import scipy.io as sio import numpy as np import h5py def remending(q): return q.replace(".mat","").replace(".npz","") def listfiles(): for zw in os.listdir("adata"): fn="adata/"+zw yield remending(zw),fn def loadfile73(fn): f = h5py.File(fn,'r') x = f.get('X') x = np.array(x) # For converting to a NumPy array y = f.get('y') y = np.array(y) # For converting to a NumPy array x=np.transpose(x) y=np.transpose(y) return x,y def loadfile(fn="thyroid.mat"): if ".npz" in fn: f=np.load(fn) return f["x"],f["y"] try: mat=sio.loadmat(fn) return mat["X"],mat["y"] except: return loadfile73(fn) def loadfiles(): for f,fn in listfiles(): yield f,*loadfile(fn) def filterfiles(): for f,x,y in loadfiles(): cou=np.sum(y) if cou<10:continue yield f,x,y if __name__ == '__main__': if False: for f,x,y in filterfiles(): #print(f,x.shape[1],y.shape[0],np.mean(y)) les=[len(set(x[:,i]))/len(x) for i in range(x.shape[1])] lestr=f"{np.mean(les)}+-{np.std(les)}" print(f'"{f}":{int(x.shape[1]/2)+1},{lestr}') for f,x,y in filterfiles(): if "wbc" in f:break