Untitled - PYTHON 2.71 KB
                                
                                    #!/usr/bin/env python
# coding: utf-8

# In[ ]:


import pandas as pd
import numpy as np
data = pd.read_csv('car.data', header=None, sep=',')


# In[ ]:


data
X = data.iloc[:, :6].values
Y = data.iloc[:, 6].values


# In[ ]:


for id_atr in range(6):
    print("Atrybut: {}; wartoĊ›ci: {}".format(id_atr, np.unique(X[:,id_atr])))


# In[ ]:


klasy = np.unique(Y)
klasy


# In[ ]:


prob = {}
for klasa in klasy:
    prob[klasa] = sum(Y==klasa)/len(Y)
prob

np.array(np.array(list(prob.items()))[:,1], dtype=float)


# In[ ]:


wartosci_X0 = np.unique(X[:,0])
wartosci_X0


# In[ ]:


sum(X[Y=='acc',0] == 'high') / sum(Y=='acc')


# In[ ]:


pr_war_X0_high_pw_klasa_acc = sum(X[Y=='acc',0] == 'high') / sum(Y=='acc')


# In[ ]:


prob_con = {}
for kl in klasy:
    lista_dla_klasy = []
    for at in range(X.shape[1]):
        uniq = np.unique(X[Y == kl][:,at])
        uniq
        sl= {}
        for u in uniq:
            sl[u] = sum((X[Y == kl][:,at] == u)) / len(X[Y == kl])
        lista_dla_klasy.append(sl)
    prob_con[kl] = lista_dla_klasy


# In[ ]:


class NB:
    def __init__(self,):
        self.prob_Y = {}
        self.prob_con = {}
        
    def fit(self, X, Y):
        #prawdopodobienstwa klas        
        klasy = np.unique(Y)
        self.klasy = klasy
        self.prob_Y = {}
        for klasa in klasy:
            self.prob_Y[klasa] = sum(Y==klasa)/len(Y)

        #prawdopodobienstwa warunkowe
        att_num = X.shape[1]        
        for kl in self.klasy:
            lista_dla_klasy = []
            for at in range(X.shape[1]):
                uniq = np.unique(X[:, at])
                sl= {}
                for u in uniq:
                    sl[u] = sum((X[Y == kl][:,at] == u)) / len(X[Y == kl])
                lista_dla_klasy.append(sl)
            self.prob_con[kl] = lista_dla_klasy    

    def predict(self, tab_X):
        y_hat = []
        for x in tab_X:
            wskazniki_dla_klas = np.array(np.array(list(self.prob_Y.items()))[:,1], dtype=float)
            for klasa in range(len(self.klasy)):
                for idw, wartosc in enumerate(x):
                    wskazniki_dla_klas[klasa] *= self.prob_con[self.klasy[klasa]][idw][wartosc]
            y_hat.append(self.klasy[np.argmax(wskazniki_dla_klas)])
        return y_hat
    
    def evaluate(self, X, Y):
        return sum(self.predict(X) == Y)/len(Y)
        


# In[ ]:


model = NB()
model.fit(X, Y)


# In[ ]:


model.prob_con['acc'], model.prob_con['good']


# In[ ]:


model.evaluate(X, Y)


# In[ ]:


sum(Y=='unacc')/len(Y)
                                
                            

Paste Hosted With By Wklejamy.pl