#!/usr/bin/env python # coding: utf-8 # In[ ]: import pandas as pd import numpy as np data = pd.read_csv('car.data', header=None, sep=',') # In[ ]: data X = data.iloc[:, :6].values Y = data.iloc[:, 6].values # In[ ]: for id_atr in range(6): print("Atrybut: {}; wartoĊ›ci: {}".format(id_atr, np.unique(X[:,id_atr]))) # In[ ]: klasy = np.unique(Y) klasy # In[ ]: prob = {} for klasa in klasy: prob[klasa] = sum(Y==klasa)/len(Y) prob np.array(np.array(list(prob.items()))[:,1], dtype=float) # In[ ]: wartosci_X0 = np.unique(X[:,0]) wartosci_X0 # In[ ]: sum(X[Y=='acc',0] == 'high') / sum(Y=='acc') # In[ ]: pr_war_X0_high_pw_klasa_acc = sum(X[Y=='acc',0] == 'high') / sum(Y=='acc') # In[ ]: prob_con = {} for kl in klasy: lista_dla_klasy = [] for at in range(X.shape[1]): uniq = np.unique(X[Y == kl][:,at]) uniq sl= {} for u in uniq: sl[u] = sum((X[Y == kl][:,at] == u)) / len(X[Y == kl]) lista_dla_klasy.append(sl) prob_con[kl] = lista_dla_klasy # In[ ]: class NB: def __init__(self,): self.prob_Y = {} self.prob_con = {} def fit(self, X, Y): #prawdopodobienstwa klas klasy = np.unique(Y) self.klasy = klasy self.prob_Y = {} for klasa in klasy: self.prob_Y[klasa] = sum(Y==klasa)/len(Y) #prawdopodobienstwa warunkowe att_num = X.shape[1] for kl in self.klasy: lista_dla_klasy = [] for at in range(X.shape[1]): uniq = np.unique(X[:, at]) sl= {} for u in uniq: sl[u] = sum((X[Y == kl][:,at] == u)) / len(X[Y == kl]) lista_dla_klasy.append(sl) self.prob_con[kl] = lista_dla_klasy def predict(self, tab_X): y_hat = [] for x in tab_X: wskazniki_dla_klas = np.array(np.array(list(self.prob_Y.items()))[:,1], dtype=float) for klasa in range(len(self.klasy)): for idw, wartosc in enumerate(x): wskazniki_dla_klas[klasa] *= self.prob_con[self.klasy[klasa]][idw][wartosc] y_hat.append(self.klasy[np.argmax(wskazniki_dla_klas)]) return y_hat def evaluate(self, X, Y): return sum(self.predict(X) == Y)/len(Y) # In[ ]: model = NB() model.fit(X, Y) # In[ ]: model.prob_con['acc'], model.prob_con['good'] # In[ ]: model.evaluate(X, Y) # In[ ]: sum(Y=='unacc')/len(Y)