import pandas as pd import numpy as np data = pd.read_csv('car.data',header=None) X = data.iloc[:,:-1].values Y = data.iloc[:,-1].values class Bayes: def __init__(self): self.prob_Y = {} self.prob_con = {} self.gaussians = {} def fit(self,X,Y): klasy =np.unique(Y) self.klasy = klasy self.prob_Y = {} for klasa in klasy: self.prob_Y[klasa] = X[klasa == Y]/len(X) #prawdopodobienstwo warunkowe for kl in klasy: lista_dla_klasy = [] for at in range(X.shape[1]): uniques = np.unique(X[Y == kl][:,at]) sl = {} for u in uniques: sl[u] = sum((X[Y == kl][:,at] == u)/len(X[Y == kl])) lista_dla_klasy.append(sl) self.prob_con[kl] = lista_dla_klasy #gauss for kl in klasy: means = np.mean(X[Y==kl],axis = 0) stds = np.std(X[Y==kl],axis = 0) self.gaussians[kl] = [(means[i],stds[i]) for i in range(len(means))] def predict(self, tab_X): y_hat = [] for x in tab_X: wskazniki_dla_klas = np.array(np.array(list(self.prob_Y.items()))[:,1], dtype=float) for klasa in range(len(self.klasy)): for idw, wartosc in enumerate(x): if isinstance(wartosc,(int,float)): mean,std = self.gaussians[self.klasy[klasa]][idw] wskazniki_dla_klas[klasa] *= (1 / (np.sqrt(2 * np.pi) * std)) * np.exp(-((wartosc - mean) ** 2) / (2 * std ** 2)) else: wskazniki_dla_klas[klasa] *= self.prob_con[self.klasy[klasa]][idw][wartosc] y_hat.append(self.klasy[np.argmax(wskazniki_dla_klas)]) return y_hat def evaluate(self,X,Y): return sum(self.predict(X) == Y)/len(Y)