sklearn查看数据分布
2017-02-10 19:57
337 查看
import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import StandardScaler from sklearn.model_selection import StratifiedShuffleSplit train_data = pd.read_csv("train.csv") LABELS = train_data['species'] # 将train_data中的‘id’列弹出。 ID = train_data.pop('id') # print train_data[0:1] # 将train_data中的‘species’列弹出。 y = train_data.pop('species') # 将species向量化。 y = LabelEncoder().fit(y).transform(y) print y # standardize the data by setting the mean to 0 and std to 1 standardize = True X = StandardScaler().fit(train_data).transform(train_data) if standardize else train_data.values print X[0:1] from sklearn.decomposition import PCA, IncrementalPCA n_components = 2 ipca = IncrementalPCA(n_components=n_components, batch_size=10) X_ipca = ipca.fit_transform(X) pca = PCA(n_components=n_components) X_pca = pca.fit_transform(X) colors = ['navy', 'turquoise', 'darkorange', 'blue', 'purple', 'green', 'yellow','red','pink', 'palegoldenrod','navy', 'turquoise', 'darkorange', 'blue', 'purple', 'green', 'yellow','red','pink', 'palegoldenrod','navy', 'turquoise', 'darkorange', 'blue', 'purple', 'green', 'yellow','red','pink', 'palegoldenrod',] for X_transformed, title in [(X_ipca, "Incremental PCA"), (X_pca, "PCA")]: plt.figure(figsize=(8, 8)) for color, i, target_name in \ zip(colors, [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24], LABELS): plt.scatter(X_transformed[y == i, 0], X_transformed[y == i, 1], color=color, lw=2, label=target_name) if "Incremental" in title: err = np.abs(np.abs(X_pca) - np.abs(X_ipca)).mean() plt.title(title + " of iris dataset\nMean absolute unsigned error " "%.6f" % err) else: plt.title(title + " of iris dataset") plt.legend(loc="best", shadow=False, scatterpoints=1) plt.axis([-10, 10, -10, 10]) plt.show()
import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import StandardScaler from sklearn.model_selection import StratifiedShuffleSplit train_data = pd.read_csv("train.csv") LABELS = train_data['species'] # 将train_data中的‘id’列弹出。 ID = train_data.pop('id') # print train_data[0:1] # 将train_data中的‘species’列弹出。 y = train_data.pop('species') # 将species向量化。 y = LabelEncoder().fit(y).transform(y) print y # standardize the data by setting the mean to 0 and std to 1 standardize = True X = StandardScaler().fit(train_data).transform(train_data) if standardize else train_data.values print X[0:1] from sklearn.decomposition import PCA, IncrementalPCA n_components = 2 ipca = IncrementalPCA(n_components=n_components, batch_size=10) X_ipca = ipca.fit_transform(X) pca = PCA(n_components=n_components) X_pca = pca.fit_transform(X) colors = ['navy', 'turquoise', 'darkorange', 'blue', 'purple', 'green', 'yellow','red','pink', 'palegoldenrod','navy', 'turquoise', 'darkorange', 'blue', 'purple', 'green', 'yellow','red','pink', 'palegoldenrod','navy', 'turquoise', 'darkorange', 'blue', 'purple', 'green', 'yellow','red','pink', 'palegoldenrod',] for X_transformed, title in [(X_ipca, "Incremental PCA"), (X_pca, "PCA")]: plt.figure(figsize=(8, 8)) for color, i, target_name in \ zip(colors, [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24], LABELS): plt.scatter(X_transformed[y == i, 0], X_transformed[y == i, 1], color=color, lw=2, label=target_name) if "Incremental" in title: err = np.abs(np.abs(X_pca) - np.abs(X_ipca)).mean() plt.title(title + " of iris dataset\nMean absolute unsigned error " "%.6f" % err) else: plt.title(title + " of iris dataset") #plt.legend(loc="best", shadow=False, scatterpoints=1) plt.axis([-10, 10, -10, 10]) plt.show()
如果帮到你了,请赞赏支持:
相关文章推荐
- 学习prefuse
- ios、android 数据可视化图表赏析
- 数据可视化解读阿里、腾讯帝国2015年扩张概况
- 奇虎360-数据可视化
- 搞懂5种数据可视化方法,胜任90%热门信息图设计
- 数据可视化呈现对企业有什么意义?
- 文本数据分析神器—— IBM BigInsights Text Analytics
- 软件即服务和云计算之间有什么区别
- (转)智能+可视化 看商业智能的发展方向
- 《Nevron NET Vision Enterprise 2007 v7.2.Q1》For VS2003 & For VS2005 FTP下载 技术共享
- 基于vue、vuex、vue-router、echarts搭建的数据展示平台
- 学习R网址
- 利用VTK对数据进行可视化的实例
- MATLAB-数据可视化笔记
- 计算机图形实现建模技术的3种方法
- 【斯坦福大学】计算机图形学实验室的概况
- 12306泄露数据可视化分析
- MATLAB与外部程序的数据交互三板斧
- 如何正确理解商业智能(BI)?--上