Task 2 : Prediction using Unsupervised ML

  • From the given ‘Iris’ dataset, predict the optimum number of clusters
  • and represent it visually.
!pip install human-learn
In [63]:
import pandas as pd
import numpy as np
import seaborn as sn
import plotly.express as px
import plotly.graph_objects as go
from hulearn.experimental.interactive import InteractiveCharts
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
iris = pd.read_csv('Iris.csv')
iris.head(2)
Out[2]:
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
0 1 5.1 3.5 1.4 0.2 Iris-setosa
1 2 4.9 3.0 1.4 0.2 Iris-setosa
In [3]:
iris.columns = ['Id','SepalLength','SepalWidth','PetalLength','PetalWidth','Class']
In [4]:
def splitname(string):
    string = string.split('-')
    return string[1]

iris['Class'] = iris['Class'].apply(lambda x:splitname(x))
species = list(iris['Class'].unique())
# mapper = {species[i]:i for i in range(len(species))}
# iris['Class'] = iris['Species'].map(mapper)
iris.head()
Out[4]:
Id SepalLength SepalWidth PetalLength PetalWidth Class
0 1 5.1 3.5 1.4 0.2 setosa
1 2 4.9 3.0 1.4 0.2 setosa
2 3 4.7 3.2 1.3 0.2 setosa
3 4 4.6 3.1 1.5 0.2 setosa
4 5 5.0 3.6 1.4 0.2 setosa
In [ ]:
def plotchart(clf,x,y):
    
In [70]:
clf = InteractiveCharts(dataf = iris,labels='Class')
clf.add_chart("SepalLength","SepalWidth",legend=False)
Loading BokehJS ...
In [71]:
clf.add_chart("PetalLength","PetalWidth",legend=False)
In [5]:
fig = px.scatter_3d(iris,x = 'SepalLength',y = 'SepalWidth',z = 'PetalWidth',color='Class')
fig.update_layout(title='The Iris Dataset',
                  titlefont=dict({'size':28, 'family': 'Courier New'}),
                  template='plotly',
                  paper_bgcolor='lightgray',
                  width=750, height=550,
                 )
fig.update_layout(scene = dict(xaxis = dict(backgroundcolor = "rgba(200,100,140)",
                              gridcolor = "black",
                              showbackground = True,
                              zerolinecolor = "black",),
                 yaxis = dict(backgroundcolor = "rgba(100,200,120)",
                 gridcolor = "black",
                 showbackground = True,
                 zerolinecolor = "black"),
                 zaxis = dict(backgroundcolor = "rgba(200,90,50)",
                             gridcolor = "black",
                             showbackground = True,
                             zerolinecolor = "black"),
                              ),
                 )
fig.show()

Now as we're gonna apply the KMeans Algorithm on this,the Algorithm is very sensitive to outliers as well as Distributions,so lets have a look at the statistical plots of the features,and then make some changes in the Distribution of the Features!

In [6]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
iris['Class']=le.fit_transform(iris['Class'])
iris['Class'].value_counts()
Out[6]:
2    50
1    50
0    50
Name: Class, dtype: int64
In [7]:
iris.head()
Out[7]:
Id SepalLength SepalWidth PetalLength PetalWidth Class
0 1 5.1 3.5 1.4 0.2 0
1 2 4.9 3.0 1.4 0.2 0
2 3 4.7 3.2 1.3 0.2 0
3 4 4.6 3.1 1.5 0.2 0
4 5 5.0 3.6 1.4 0.2 0
In [8]:
train = iris.drop(['Class'],axis = 1)
In [9]:
import scipy.stats as stats
In [10]:
def makeplots(col):
    
    plt.figure(figsize=(15,7))
    plt.subplot(1,3,1)
    plt.hist(iris[col],color='salmon')
    
    plt.subplot(1,3,2)
    stats.probplot(iris[col],dist='norm',plot=plt)
    
    
    plt.subplot(1,3,3)
    sn.boxplot(iris[col],color='pink')
    plt.show()
    
    
In [11]:
for i in train.columns:
    print(f' Statistical Plots for the Feature : {i} are shown ↓')
    makeplots(i)
    print("-"*75)
 Statistical Plots for the Feature : Id are shown ↓
---------------------------------------------------------------------------
 Statistical Plots for the Feature : SepalLength are shown ↓
---------------------------------------------------------------------------
 Statistical Plots for the Feature : SepalWidth are shown ↓
---------------------------------------------------------------------------
 Statistical Plots for the Feature : PetalLength are shown ↓
---------------------------------------------------------------------------
 Statistical Plots for the Feature : PetalWidth are shown ↓
---------------------------------------------------------------------------
In [12]:
temp = train.copy()
temp['PetalLength'],params = stats.boxcox(temp['PetalLength']+1)
stats.probplot(temp['PetalLength'],dist = 'norm',plot=plt)
Out[12]:
((array([-2.60376328, -2.283875  , -2.1005573 , -1.96875864, -1.86428437,
         -1.77691182, -1.70131573, -1.63435332, -1.57400778, -1.51890417,
         -1.46806125, -1.42075308, -1.37642684, -1.33465133, -1.29508341,
         -1.25744533, -1.22150891, -1.18708433, -1.15401181, -1.12215558,
         -1.0913992 , -1.06164202, -1.03279638, -1.00478546, -0.97754152,
         -0.95100448, -0.92512081, -0.89984257, -0.87512664, -0.85093408,
         -0.8272296 , -0.80398107, -0.78115919, -0.75873709, -0.73669013,
         -0.71499557, -0.69363244, -0.67258128, -0.65182406, -0.63134396,
         -0.61112532, -0.59115349, -0.57141472, -0.55189613, -0.53258558,
         -0.51347162, -0.49454346, -0.47579085, -0.45720409, -0.43877397,
         -0.4204917 , -0.40234892, -0.38433762, -0.36645016, -0.3486792 ,
         -0.33101768, -0.31345882, -0.29599609, -0.27862316, -0.26133393,
         -0.24412247, -0.22698303, -0.20991002, -0.19289797, -0.17594158,
         -0.15903562, -0.142175  , -0.12535471, -0.10856981, -0.09181544,
         -0.07508681, -0.05837916, -0.0416878 , -0.02500804, -0.00833524,
          0.00833524,  0.02500804,  0.0416878 ,  0.05837916,  0.07508681,
          0.09181544,  0.10856981,  0.12535471,  0.142175  ,  0.15903562,
          0.17594158,  0.19289797,  0.20991002,  0.22698303,  0.24412247,
          0.26133393,  0.27862316,  0.29599609,  0.31345882,  0.33101768,
          0.3486792 ,  0.36645016,  0.38433762,  0.40234892,  0.4204917 ,
          0.43877397,  0.45720409,  0.47579085,  0.49454346,  0.51347162,
          0.53258558,  0.55189613,  0.57141472,  0.59115349,  0.61112532,
          0.63134396,  0.65182406,  0.67258128,  0.69363244,  0.71499557,
          0.73669013,  0.75873709,  0.78115919,  0.80398107,  0.8272296 ,
          0.85093408,  0.87512664,  0.89984257,  0.92512081,  0.95100448,
          0.97754152,  1.00478546,  1.03279638,  1.06164202,  1.0913992 ,
          1.12215558,  1.15401181,  1.18708433,  1.22150891,  1.25744533,
          1.29508341,  1.33465133,  1.37642684,  1.42075308,  1.46806125,
          1.51890417,  1.57400778,  1.63435332,  1.70131573,  1.77691182,
          1.86428437,  1.96875864,  2.1005573 ,  2.283875  ,  2.60376328]),
  array([1.03673932, 1.14363953, 1.2510142 , 1.2510142 , 1.3588437 ,
         1.3588437 , 1.3588437 , 1.3588437 , 1.3588437 , 1.3588437 ,
         1.3588437 , 1.46711007, 1.46711007, 1.46711007, 1.46711007,
         1.46711007, 1.46711007, 1.46711007, 1.46711007, 1.46711007,
         1.46711007, 1.46711007, 1.46711007, 1.57579673, 1.57579673,
         1.57579673, 1.57579673, 1.57579673, 1.57579673, 1.57579673,
         1.57579673, 1.57579673, 1.57579673, 1.57579673, 1.57579673,
         1.57579673, 1.57579673, 1.68488841, 1.68488841, 1.68488841,
         1.68488841, 1.68488841, 1.68488841, 1.68488841, 1.79437095,
         1.79437095, 1.79437095, 1.79437095, 2.01445686, 2.01445686,
         3.24822844, 3.59065709, 3.59065709, 3.82019023, 3.82019023,
         3.93531566, 4.05067401, 4.16626078, 4.28207164, 4.28207164,
         4.28207164, 4.39810245, 4.39810245, 4.39810245, 4.39810245,
         4.39810245, 4.51434921, 4.51434921, 4.51434921, 4.63080807,
         4.63080807, 4.63080807, 4.63080807, 4.74747534, 4.74747534,
         4.86434744, 4.86434744, 4.86434744, 4.86434744, 4.98142093,
         4.98142093, 4.98142093, 4.98142093, 4.98142093, 4.98142093,
         4.98142093, 4.98142093, 5.09869248, 5.09869248, 5.09869248,
         5.21615888, 5.21615888, 5.21615888, 5.21615888, 5.21615888,
         5.33381704, 5.33381704, 5.33381704, 5.33381704, 5.45166394,
         5.45166394, 5.45166394, 5.45166394, 5.45166394, 5.56969668,
         5.56969668, 5.56969668, 5.56969668, 5.68791246, 5.68791246,
         5.68791246, 5.68791246, 5.68791246, 5.68791246, 5.68791246,
         5.68791246, 5.80630854, 5.80630854, 5.9248823 , 5.9248823 ,
         6.04363116, 6.04363116, 6.16255264, 6.16255264, 6.16255264,
         6.28164434, 6.28164434, 6.28164434, 6.28164434, 6.28164434,
         6.28164434, 6.4009039 , 6.4009039 , 6.4009039 , 6.52032907,
         6.52032907, 6.52032907, 6.63991763, 6.63991763, 6.75966742,
         6.75966742, 6.87957636, 6.87957636, 6.87957636, 7.11986359,
         7.24023797, 7.48143889, 7.6022618 , 7.6022618 , 7.84434381])),
 (1.9264805519628319, 4.155746754443966, 0.9417459585504473))
In [13]:
data = train.copy()
data['PetalLength'] = data.PetalLength**(1/1.2)
stats.probplot(data['PetalLength'],dist='norm',plot=plt)
Out[13]:
((array([-2.60376328, -2.283875  , -2.1005573 , -1.96875864, -1.86428437,
         -1.77691182, -1.70131573, -1.63435332, -1.57400778, -1.51890417,
         -1.46806125, -1.42075308, -1.37642684, -1.33465133, -1.29508341,
         -1.25744533, -1.22150891, -1.18708433, -1.15401181, -1.12215558,
         -1.0913992 , -1.06164202, -1.03279638, -1.00478546, -0.97754152,
         -0.95100448, -0.92512081, -0.89984257, -0.87512664, -0.85093408,
         -0.8272296 , -0.80398107, -0.78115919, -0.75873709, -0.73669013,
         -0.71499557, -0.69363244, -0.67258128, -0.65182406, -0.63134396,
         -0.61112532, -0.59115349, -0.57141472, -0.55189613, -0.53258558,
         -0.51347162, -0.49454346, -0.47579085, -0.45720409, -0.43877397,
         -0.4204917 , -0.40234892, -0.38433762, -0.36645016, -0.3486792 ,
         -0.33101768, -0.31345882, -0.29599609, -0.27862316, -0.26133393,
         -0.24412247, -0.22698303, -0.20991002, -0.19289797, -0.17594158,
         -0.15903562, -0.142175  , -0.12535471, -0.10856981, -0.09181544,
         -0.07508681, -0.05837916, -0.0416878 , -0.02500804, -0.00833524,
          0.00833524,  0.02500804,  0.0416878 ,  0.05837916,  0.07508681,
          0.09181544,  0.10856981,  0.12535471,  0.142175  ,  0.15903562,
          0.17594158,  0.19289797,  0.20991002,  0.22698303,  0.24412247,
          0.26133393,  0.27862316,  0.29599609,  0.31345882,  0.33101768,
          0.3486792 ,  0.36645016,  0.38433762,  0.40234892,  0.4204917 ,
          0.43877397,  0.45720409,  0.47579085,  0.49454346,  0.51347162,
          0.53258558,  0.55189613,  0.57141472,  0.59115349,  0.61112532,
          0.63134396,  0.65182406,  0.67258128,  0.69363244,  0.71499557,
          0.73669013,  0.75873709,  0.78115919,  0.80398107,  0.8272296 ,
          0.85093408,  0.87512664,  0.89984257,  0.92512081,  0.95100448,
          0.97754152,  1.00478546,  1.03279638,  1.06164202,  1.0913992 ,
          1.12215558,  1.15401181,  1.18708433,  1.22150891,  1.25744533,
          1.29508341,  1.33465133,  1.37642684,  1.42075308,  1.46806125,
          1.51890417,  1.57400778,  1.63435332,  1.70131573,  1.77691182,
          1.86428437,  1.96875864,  2.1005573 ,  2.283875  ,  2.60376328]),
  array([1.        , 1.08266452, 1.16408414, 1.16408414, 1.24437934,
         1.24437934, 1.24437934, 1.24437934, 1.24437934, 1.24437934,
         1.24437934, 1.32365061, 1.32365061, 1.32365061, 1.32365061,
         1.32365061, 1.32365061, 1.32365061, 1.32365061, 1.32365061,
         1.32365061, 1.32365061, 1.32365061, 1.4019829 , 1.4019829 ,
         1.4019829 , 1.4019829 , 1.4019829 , 1.4019829 , 1.4019829 ,
         1.4019829 , 1.4019829 , 1.4019829 , 1.4019829 , 1.4019829 ,
         1.4019829 , 1.4019829 , 1.47944896, 1.47944896, 1.47944896,
         1.47944896, 1.47944896, 1.47944896, 1.47944896, 1.5561117 ,
         1.5561117 , 1.5561117 , 1.5561117 , 1.70724035, 1.70724035,
         2.49804953, 2.7045496 , 2.7045496 , 2.84046889, 2.84046889,
         2.90793984, 2.9750991 , 3.04195648, 3.10852124, 3.10852124,
         3.10852124, 3.1748021 , 3.1748021 , 3.1748021 , 3.1748021 ,
         3.1748021 , 3.24080734, 3.24080734, 3.24080734, 3.30654478,
         3.30654478, 3.30654478, 3.30654478, 3.37202185, 3.37202185,
         3.43724559, 3.43724559, 3.43724559, 3.43724559, 3.50222272,
         3.50222272, 3.50222272, 3.50222272, 3.50222272, 3.50222272,
         3.50222272, 3.50222272, 3.56695962, 3.56695962, 3.56695962,
         3.63146237, 3.63146237, 3.63146237, 3.63146237, 3.63146237,
         3.69573677, 3.69573677, 3.69573677, 3.69573677, 3.75978837,
         3.75978837, 3.75978837, 3.75978837, 3.75978837, 3.82362246,
         3.82362246, 3.82362246, 3.82362246, 3.8872441 , 3.8872441 ,
         3.8872441 , 3.8872441 , 3.8872441 , 3.8872441 , 3.8872441 ,
         3.8872441 , 3.95065816, 3.95065816, 4.01386928, 4.01386928,
         4.07688192, 4.07688192, 4.13970037, 4.13970037, 4.13970037,
         4.20232873, 4.20232873, 4.20232873, 4.20232873, 4.20232873,
         4.20232873, 4.26477097, 4.26477097, 4.26477097, 4.32703088,
         4.32703088, 4.32703088, 4.38911213, 4.38911213, 4.45101825,
         4.45101825, 4.51275264, 4.51275264, 4.51275264, 4.63571923,
         4.69695766, 4.81895954, 4.87972861, 4.87972861, 5.00081639])),
 (1.143021220622972, 2.961696267943777, 0.9348612521968866))
In [14]:
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.cluster import KMeans
from sklearn.model_selection import RandomizedSearchCV
from sklearn import metrics
In [15]:
sc = StandardScaler()
train = iris.iloc[:,[0,1,2,3]].values
train_scaled = sc.fit_transform(train)
In [16]:
inertia_list = []
for i in range(1,11):
    clf = KMeans(n_clusters=i)
    clf.fit(train)
    inertia_list.append(clf.inertia_)
    
In [17]:
plt.plot(range(1,11),inertia_list,c = 'red')
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.show()
In [18]:
scaled_inertia = []
for i in range(1,11):
    clf = KMeans(n_clusters=i)
    clf.fit(train_scaled)
    scaled_inertia.append(clf.inertia_)
In [19]:
scaled_inertia
Out[19]:
[599.9999999999999,
 241.87865013553858,
 162.4270470434845,
 135.5910288396513,
 111.36748873360543,
 92.75115175473049,
 83.86205378257608,
 74.14289087985286,
 65.86083035974785,
 57.97874390485215]
In [20]:
plt.plot(range(1,11),scaled_inertia,c = 'red',marker = 'X')
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.show()

From the above Elbow method we can observe that the optimal value for the number of clusters is 3,well even 4 can be possible,lets check what hyperparameter tuning is giving the result

In [21]:
test = iris['Class'].values
params = {
    'n_clusters':[int(x) for x in range(1,11)],
    'init':['k-means++', 'random'],
    'max_iter':[int(x) for x in np.linspace(100,500,100)],
    'tol':[1e-4,1e-3,1e-2,0.025,0.05,0.25,0.5],
    'precompute_distances':['auto', True, False],
    'algorithm':['auto',"full", "elkan"]
}
clf = RandomizedSearchCV(estimator=KMeans(),param_distributions=params,cv = 5,n_iter=10,n_jobs=-1,
                        return_train_score=False,scoring = 'accuracy')
clf.fit(train,test)
C:\Users\meet\anaconda3\envs\tensorflow--new\lib\site-packages\sklearn\cluster\_kmeans.py:934: FutureWarning:

'precompute_distances' was deprecated in version 0.23 and will be removed in 0.25. It has no effect

Out[21]:
RandomizedSearchCV(cv=5, estimator=KMeans(), n_jobs=-1,
                   param_distributions={'algorithm': ['auto', 'full', 'elkan'],
                                        'init': ['k-means++', 'random'],
                                        'max_iter': [100, 104, 108, 112, 116,
                                                     120, 124, 128, 132, 136,
                                                     140, 144, 148, 152, 156,
                                                     160, 164, 168, 172, 176,
                                                     180, 184, 188, 192, 196,
                                                     201, 205, 209, 213, 217, ...],
                                        'n_clusters': [1, 2, 3, 4, 5, 6, 7, 8,
                                                       9, 10],
                                        'precompute_distances': ['auto', True,
                                                                 False],
                                        'tol': [0.0001, 0.001, 0.01, 0.025,
                                                0.05, 0.25, 0.5]},
                   scoring='accuracy')
In [22]:
clf.best_estimator_
Out[22]:
KMeans(algorithm='full', init='random', max_iter=487, precompute_distances=True,
       tol=0.25)

So after the hyperparameter tuning step we can observe that the max clusters selected are 3,now lets make the predictions

In [23]:
inertia_list = []
for i in range(1,11):
    clf = KMeans(n_clusters=i)
    clf.fit(train)
    inertia_list.append(clf.inertia_)
    
In [24]:
kmeans = KMeans(n_clusters = 3, init = 'k-means++',max_iter = 300, n_init = 10, random_state = 0)
predictions = kmeans.fit_predict(train)
predictions
Out[24]:
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
In [25]:
kmeans.cluster_centers_
Out[25]:
array([[125.5  ,   6.588,   2.974,   5.552],
       [ 25.5  ,   5.006,   3.418,   1.464],
       [ 75.5  ,   5.936,   2.77 ,   4.26 ]])
In [52]:
kmeans.cluster_centers_[0][:3][0]
Out[52]:
125.5
In [44]:
kmeans.cluster_centers_[:,0],kmeans.cluster_centers_[:,1]
Out[44]:
(array([125.5,  25.5,  75.5]), array([6.588, 5.006, 5.936]))
In [40]:
kmeans.cluster_centers_[:,0][0]
Out[40]:
125.5

Cluster Visualisation ~ 2D

In [26]:
fig = plt.figure(figsize=(10, 8))
plt.scatter(train[predictions == 0, 0], train[predictions == 0, 1], s = 100, c = 'red', label = 'Iris-setosa')
plt.scatter(train[predictions == 1, 0], train[predictions == 1, 1], s = 100, c = 'blue', label = 'Iris-versicolour')
plt.scatter(train[predictions == 2, 0], train[predictions == 2, 1],s = 100, c = 'green', label = 'Iris-virginica')

# Plotting the centroids of the clusters
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:,1], 
            s = 300, c = 'yellow', label = 'Centroids', marker='*')

plt.legend()
Out[26]:
<matplotlib.legend.Legend at 0x23b95ab1748>

Cluster Visualisation ~ 3D

In [60]:
fig = plt.figure(figsize=(15,15))
ax = fig.add_subplot(111,projection = '3d')
ax.scatter(train[predictions==0,0],train[predictions==0,1],train[predictions==0,2],s = 50,color = 'blue',label = 'cluster1')
ax.scatter(train[predictions==1,0],train[predictions==1,1],train[predictions==1,2],s = 50,color = 'red',label = 'cluster2')
ax.scatter(train[predictions==2,0],train[predictions==2,1],train[predictions==2,2],s = 50,color = 'green',label = 'cluster3')

ax.scatter(kmeans.cluster_centers_[0][:3][0],kmeans.cluster_centers_[0][:3][1],kmeans.cluster_centers_[0][:3][2]
           ,s = 300,color = 'yellow',label = 'centroid1',marker = 'X')
ax.scatter(kmeans.cluster_centers_[1][:3][0],kmeans.cluster_centers_[1][:3][1],kmeans.cluster_centers_[1][:3][2]
           ,s = 300,color = 'violet',label = 'centroid2',marker = 'X')
ax.scatter(kmeans.cluster_centers_[2][:3][0],kmeans.cluster_centers_[2][:3][1],kmeans.cluster_centers_[2][:3][2]
           ,s = 300,color = 'red',label = 'centroid3',marker = 'X')

# ax.scatter(kmeans.cluster_centers_[:,0],0,0,color = 'yellow',s = 300,label = 'Centriod1')
# ax.scatter(kmeans.cluster_centers_[:,1],0,0,color = 'yellow',s = 300,label = 'Centriod1')
# ax.scatter(kmeans.cluster_centers_[:,2],0,0,color = 'yellow',s = 300,label = 'Centriod1')

# ax.scatter(kmeans.cluster_centers_[:,0][0],kmeans.cluster_centers_[:,0][1],kmeans.cluster_centers_[:,0][2]
#            ,s = 300,color = 'yellow',label = 'centroid')
# ax.scatter(kmeans.cluster_centers_[:,1][0],kmeans.cluster_centers_[:,1][1],kmeans.cluster_centers_[:,1][2]
#            ,s = 300,color = 'black',label = 'centroid')
# ax.scatter(kmeans.cluster_centers_[:,2][0],kmeans.cluster_centers_[:,2][1],kmeans.cluster_centers_[:,2][2]
#            ,s = 300,color = 'brown',label = 'centroid')
ax.set_xlabel('Setosa')
ax.set_ylabel('Versicolor')
ax.set_zlabel('Virginca')
ax.legend()
plt.show()