thestraf/data.py

54 lines
1.2 KiB
Python

import os
import scipy.io as sio
import numpy as np
import h5py
def remending(q):
return q.replace(".mat","").replace(".npz","")
def listfiles():
for zw in os.listdir("adata"):
fn="adata/"+zw
yield remending(zw),fn
def loadfile73(fn):
f = h5py.File(fn,'r')
x = f.get('X')
x = np.array(x) # For converting to a NumPy array
y = f.get('y')
y = np.array(y) # For converting to a NumPy array
x=np.transpose(x)
y=np.transpose(y)
return x,y
def loadfile(fn="thyroid.mat"):
if ".npz" in fn:
f=np.load(fn)
return f["x"],f["y"]
try:
mat=sio.loadmat(fn)
return mat["X"],mat["y"]
except:
return loadfile73(fn)
def loadfiles():
for f,fn in listfiles():
yield f,*loadfile(fn)
def filterfiles():
for f,x,y in loadfiles():
cou=np.sum(y)
if cou<10:continue
yield f,x,y
if __name__ == '__main__':
if False:
for f,x,y in filterfiles():
#print(f,x.shape[1],y.shape[0],np.mean(y))
les=[len(set(x[:,i]))/len(x) for i in range(x.shape[1])]
lestr=f"{np.mean(les)}+-{np.std(les)}"
print(f'"{f}":{int(x.shape[1]/2)+1},{lestr}')
for f,x,y in filterfiles():
if "wbc" in f:break