kNN.py:
# coding:utf-8import numpy as np
from math import sqrt
from collections import Counter
from metrics import accuracy_scoreclass kNNClassifier:def __init__(self, k):&#39;&#39;&#39;初始化KNN分类器&#39;&#39;&#39;assert 1 <&#61; k, &#39;k must be valid&#39;self.k &#61; kself._X_train &#61; Noneself._y_train &#61; Nonedef fit(self, X_train, y_train):&#39;&#39;&#39;根据训练数据集X-train和y_train训练kNN分类器&#39;&#39;&#39;assert X_train.shape[0] &#61;&#61; y_train.shape[0], &#39;the size of X_train must equal to the size of y_train&#39;assert self.k <&#61; X_train.shape[0], &#39;the size of X_train must be at least k&#39;self._X_train &#61; X_trainself._y_train &#61; y_trainreturn selfdef predict(self, X_predict):&#39;&#39;&#39;给定带预测数据集X_predict,返回表示X_predict的结果向量&#39;&#39;&#39;assert self._X_train is not None and self._y_train is not None, &#39;must fit before predict&#39;assert X_predict.shape[1] &#61;&#61; self._X_train.shape[1], &#39;the feature number of X_predict equal to the feature number of self._X_train&#39;y_predict &#61; [self._predict(x) for x in X_predict]return np.array(y_predict)def _predict(self, x):&#39;&#39;&#39;给定单个待预测数据x&#xff0c;返回x的预测结果值&#39;&#39;&#39;assert x.shape[0] &#61;&#61; self._X_train.shape[1], &#39;the feature number of x must be equal to X_train&#39;distances &#61; [sqrt(np.sum((x_train - x)**2))for x_train in self._X_train]nearest &#61; np.argsort(distances)topK_y &#61; [self._y_train[i] for i in nearest[:self.k]]votes &#61; Counter(topK_y)return votes.most_common(1)[0][0]def score(self, X_test, y_test):&#39;&#39;&#39;计算对测试集的预测准确度&#39;&#39;&#39;y_predict &#61; self.predict(X_test)return accuracy_score(y_test, y_predict)def __repr__(self):return &#39;kNN(k&#61;%d)&#39; % self.k# 使用:
# kNN_clf &#61; kNNClassifier(3)
# kNN_clf.fit(X_train,y_train)
# kNN_clf.predict(x)
# kNN_clf.score(X_test,y_test)
metrics.py:
import numpy as npdef accuracy_score(y_test,y_predict):&#39;&#39;&#39;计算y_predict相对于y_test的准确度&#39;&#39;&#39;assert y_test.shape[0] &#61;&#61; y_predict.shape[0],&#39;the size of y_test must equal to the size of y_predict&#39;return sum(y_predict &#61;&#61; y_test) / len(y_test)
数据归一化处理&#xff08;均值方差归一化&#xff09;&#xff1a;
import numpy as np class StandardScaler:def __init__(self):self.mean_ &#61; Noneself.scale_ &#61; Nonedef fit(self,X):&#39;&#39;&#39;根据训练数据集X获得数据均值和方差&#39;&#39;&#39;assert X.ndim &#61;&#61; 2,&#39;the dimension of X must be 2&#39;self.mean_ &#61; np.array([np.mean(X[:,i]) for i in range(X.shape[1])])self.scale_ &#61; np.array([np.std(X[:,i]) for i in range(X.shape[1])])return selfdef transform(self,X):&#39;&#39;&#39;将X根据已有信息进行均值方差归一化处理&#39;&#39;&#39;assert X.ndim &#61;&#61; 2,&#39;the dimension of X must be 2&#39;assert self.mean_ is not None and self.scale_ is not None , &#39;must fit before transform!&#39;assert X.shape[1] &#61;&#61; len(self.mean_),&#39;the feature number of X must be equal to mean_ and std_&#39;resX &#61; np.empty(shape &#61; X.shape,dtype &#61; float)for col in range(0,X.shape[1]):resX[:,col] &#61; (X[:,col] - self.mean_[col])/self.scale_[col]return resX