# k_means.py # K-means Clustering(±ºÁýÈ)
import tensorflow as tf import numpy as np import matplotlib.pyplot as plt import pandas as pd import seaborn as sns
num_puntos = 2000 conjunto_puntos = [] for i in range(num_puntos): if np.random.random() > 0.5: conjunto_puntos.append([np.random.normal(0.0, 0.9), np.random.normal(0.0, 0.9)]) else: conjunto_puntos.append([np.random.normal(3.0, 0.5), np.random.normal(1.0, 0.5)]) sess = tf.Session() ''' 4 °¡Áö ºÐ·ù ¹®Á¦ ''' k = 4
''' Create a Tensor ''' vectors = tf.constant(conjunto_puntos)
''' Create a k initial means of shape (4,2) ''' # tf.slice: Extracts a slice from a tensor. centroides = tf.Variable(tf.slice(tf.random_shuffle(vectors),[0,0],[k,-1])) # (Áú·®)Áß½É
''' Increasing the dimensions ''' # vectors.shape = (2000,2) => expanded_vector.shape = (1, 2000, 2) expanded_vectors = tf.expand_dims(vectors, 0) # centroides.shape = (4,2) => expanded_centroides.shape = (4, 1, 2) expanded_centroides = tf.expand_dims(centroides, 1)
''' Initial classification ''' # °¢ ÁÂÇ¥¿Í 4-Æò±ÕÁÂÇ¥¿ÍÀÇ °Å¸®¸¦ °è»êÇÏ¿© °¡Àå °¡±î¿î °ÍÀ¸·Î ºÐ·ù # tf.square(tf.subtract(expanded_vectors, expanded_centroides)): shape = (4, 2000, 2) # tf.reduce_sum( ans, 2 ): shape = (4, 2000) : °Å¸® °Ô»ê # assignments = tf.argmin( ans, 0 ): shape = (2000, ) : min_index of 4 elements # : ÃּҰŸ® À妽º ¼±Åà assignments = tf.argmin(tf.reduce_sum( tf.square( tf.subtract(expanded_vectors,expanded_centroides) ), 2), 0)
''' Find 4 means ''' # tf.where( tf.equal( A, B) ) => A¿Í B°¡ °°Àº À§Ä¡ÀÇ index # ind = tf.reshape( ans, [1, -1] ) => shape = (1, 362) # g(c) = tf.gather(vectors, ind) => vectors[ ind ] : (1, 362, 2) : 362ÇàÀ» ¸ð¾Æ Çà·ÄÀ» Ãß°¡ 3Â÷¿ø # vectors[ ind0 ] : (362, 2) 2Â÷¿ø if ind0.shape = (362, ) # m(c) = tf.reduce_mean( g(c) , reduction_indices=[1] ) => shape = (1,2) # [tf.reduce_mean( g(c), reduction_indices=[1] ) for c in range(k)] => 4°³ÀÇ (1, 2)-arrayÀÇ ¸®½ºÆ® # means = tf.concat( ans, axis = 0) => shape = (4, 2) ¿·Î ºÙÀ̱â : np.concatenate(ans, 0) # means1 = tf.concat( ans, axis = 1) => shape = (1, 4) ÇàÀ¸·Î ºÙÀ̱â means = tf.concat( [tf.reduce_mean( tf.gather( vectors, tf.reshape(tf.where( tf.equal(assignments, c)),[1,-1]) ), reduction_indices=[1] ) for c in range(k) ], axis=0 ) ''' Update the centroides by 4-means ''' # Update 'ref' by assigning 'value' to it. update_centroides = tf.assign(centroides, means)
''' Initialize the tf-variables ''' init_op = tf.initialize_all_variables() sess.run(init_op)
''' 100 Iteration to update ''' for step in range(100): _, centroid_values, assignment_values = sess.run([update_centroides, centroides, assignments]) ''' Constructing a dictionary to plot ''' data = {"x": [], "y": [], "cluster": []} for i in range(len(assignment_values)): data["x"].append(conjunto_puntos[i][0]) data["y"].append(conjunto_puntos[i][1]) data["cluster"].append(assignment_values[i]) ''' Plot using seaborn ''' df = pd.DataFrame(data) m = sess.run(means) # DataFrame Plot: linear-regression plot #sns.lmplot("x", "y", data=df, fit_reg=False, size=6, legend=False) #plt.plot(m[:,0], m[:,1], 'kd') sns.lmplot("x", "y", data=df, fit_reg=False, size=6, hue="cluster", legend=False) plt.plot(m[:,0], m[:,1], 'kd') # sns.lmplot("x", "y", data=df, fit_reg=False, size=6, hue="y", legend=False) #sns.lmplot("x", "y", data=df, fit_reg=False, size=4, hue="cluster", col="cluster", legend=False) #plt.plot(m[:,0], m[:,1], 'kd')
|
|
|