python机器学习API引见12: K近邻回归

李蓝奇
2019-12-15 14:26:51 4

KNeighborsRegressor 是一种用于实现 K 近邻回归的模型。以下是该模型的函数原型及相关参数说明：

python sklearn.neighbors.KNeighborsRegressor( n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', n_jobs=1, **kwargs )

参数说明：

n_neighbors：整数，指定 K 值，即最近邻的数量。
weights：字符串或可调用对象，定义投票权重类型。例如：
- 'uniform'：所有邻居节点的投票权重相同。
- 'distance'：邻居节点的投票权重与其距离成反比。
- 可调用对象：接受距离数组并返回形状相同的权重数组。
algorithm：字符串，指定计算最近邻的算法。例如：
- 'ball_tree'：使用 Ball Tree 算法。
- 'kd_tree'：使用 KDTree 算法。
- 'brute'：使用暴力搜索法。
- 'auto'：自动选择最适合的算法。
leaf_size：整数，指定 BallTree 或 KDTree 的叶子节点大小，影响树的结构和查询速度。
p：整数，指定 Minkowski 距离的指数。例如：
- p=1 表示曼哈顿距离。
- p=2 表示欧几里得距离。
metric：字符串，指定距离度量方式，默认为 Minkowski 距离。
n_jobs：并行性设置，默认为 -1，表示使用所有可用的 CPU 核心。

方法说明：

fit(x, y)：训练模型。
predict(x)：用模型进行预测，返回预测值。
score(x, y)：返回模型的性能得分（分数范围为 0 到 1，分数越接近 1 表示预测效果越好）。
kneighbors(x, nneighbors, returndistance)：返回样本的 K 个最近邻点。如果 return_distance=True，还会返回这些点的距离。
kneighborsgraph(x, nneighbors, mode)：返回样本点的连接图。

示例代码：

```python import numpy as np from sklearn import crossvalidation import matplotlib.pyplot as plt from sklearn.modelselection import traintestsplit import sklearn.datasets as datasets from sklearn.neighbors import KNeighborsRegressor

def loadregressiondata(n): x = 5 * np.random.rand(n, 1) y = np.sin(x).ravel() y[::5] += 1 * (0.5 - np.random.rand(int(n / 5))) return traintestsplit(x, y, testsize=0.25, randomstate=0)

def testKNeighborsRegressor(*data): xtrain, xtest, ytrain, ytest = data regr = KNeighborsRegressor() regr.fit(xtrain, ytrain) print(f"KNeighbors Regressor Training set score: {regr.score(xtrain, ytrain):.3f}") print(f"KNeighbors Regressor Test set score: {regr.score(xtest, y_test):.3f}")

def testKNeighborsRegressork(*data): xtrain, xtest, ytrain, ytest = data ks = np.linspace(1, ytrain.size, num=100, endpoint=False, dtype='int') weights = ['uniform', 'distance'] fig = plt.figure() ax = fig.addsubplot(1, 1, 1)

for weight in weights:
    training_scores = []
    testing_scores = []

    for k in ks:
        regr = KNeighborsRegressor(weights=weight, n_neighbors=k)
        regr.fit(x_train, y_train)
        training_scores.append(regr.score(x_train, y_train))
        testing_scores.append(regr.score(x_test, y_test))

    ax.plot(ks, testing_scores, label=f"testing score of KNeighborsRegressor: {weight}")
    ax.plot(ks, training_scores, label=f"Training scores of weights: {weight}")

ax.legend(loc="best")
ax.set_xlabel("K")
ax.set_ylabel("Score")
ax.set_ylim(0, 1.05)
ax.set_title("KNeighborsRegressor")
plt.show()

def testKNeighborsRegressorkp(*data): xtrain, xtest, ytrain, ytest = data ks = np.linspace(1, ytrain.size, num=100, endpoint=False, dtype='int') ps = [1, 2, 5, 8, 10] fig = plt.figure() ax = fig.add_subplot(1, 1, 1)

for p in ps:
    training_scores = []
    testing_scores = []

    for k in ks:
        regr = KNeighborsRegressor(p=p, n_neighbors=k)
        regr.fit(x_train, y_train)
        training_scores.append(regr.score(x_train, y_train))
        testing_scores.append(regr.score(x_test, y_test))

    ax.plot(ks, testing_scores, label=f"testing score of KNeighborsRegressor: {p}")
    ax.plot(ks, training_scores, label=f"Training scores of weights: {p}")

ax.legend(loc="best")
ax.set_xlabel("p")
ax.set_ylabel("Score")
ax.set_ylim(0, 1.05)
ax.set_title("KNeighborsRegressor")
plt.show()

xtrain, xtest, ytrain, ytest = loadregressiondata(100) testKNeighborsRegressork(xtrain, xtest, ytrain, ytest) testKNeighborsRegressorkp(xtrain, xtest, ytrain, y_test) ```