【Spark机器学习速成宝典】模型篇08保序回归【IsotonicRegression】(。。。⽬录
待续...
# -*-coding=utf-8 -*-
from pyspark import SparkConf, SparkContextsc = SparkContext('local')
import math
from pyspark.mllib.regression import LabeledPoint, IsotonicRegression, IsotonicRegressionModelfrom pyspark.mllib.util import MLUtils
# Load and parse the data 加载和解析数据def parsePoint(labeledData):
return (labeledData.label, labeledData.features[0], 1.0)
data = MLUtils.loadLibSVMFile(sc, \"data/mllib/sample_isotonic_regression_libsvm_data.txt\")
# Create label, feature, weight tuples from input data with weight set to default value 1.0. 创建标签,特征,权重的元组,并设置权重默认为1.0parsedData = data.map(parsePoint)
# Split data into training (60%) and test (40%) sets. 分割数据集training, test = parsedData.randomSplit([0.6, 0.4], 11)
# Create isotonic regression model from training data. 创建保序回归模型
# Isotonic parameter defaults to true so it is only shown for demonstration 参数默认为true,这⾥只是⽤于展⽰model = IsotonicRegression.train(training)
# Create tuples of predicted and real labels. 创建预测和真实标签的元组predictionAndLabel = test.map(lambda p: (model.predict(p[1]), p[0]))
# Calculate mean squared error between predicted and real labels.计算预测和真实标签的均⽅误差meanSquaredError = predictionAndLabel.map(lambda pl: math.pow((pl[0] - pl[1]), 2)).mean()
print(\"Mean Squared Error = \" + str(meanSquaredError)) #Mean Squared Error = 0.00863040529956# Save and load model
model.save(sc, \"myIsotonicRegressionModel\")
sameModel = IsotonicRegressionModel.load(sc, \"myIsotonicRegressionModel\")print sameModel.predict(data.collect()[0].features) #0.14987251