标签:com not for 分析 value pow code alt model
如果是自己写kmeans的话,会怎么写呢?
def train(
data: RDD[Vector],
k: Int,
maxIterations: Int,
runs: Int,
initializationMode: String): KMeansModel = {
new KMeans().setK(k)
.setMaxIterations(maxIterations)
.setRuns(runs)
.setInitializationMode(initializationMode)
.run(data)
}
def run(data: RDD[Vector]): KMeansModel = {
if (data.getStorageLevel == StorageLevel.NONE) {
logWarning("The input data is not directly cached, which may hurt performance if its"
+ " parent RDDs are also uncached.")
}
// Compute squared norms and cache them.
//求2范数
val norms = data.map(Vectors.norm(_, 2.0))
norms.persist()
//将向量和平方和zip起来
val zippedData = data.zip(norms).map { case (v, norm) =>
new VectorWithNorm(v, norm)
}
- //这个是大头
val model = runAlgorithm(zippedData)
//原来还能主动unpersist的,涨姿势了
norms.unpersist()
// Warn at the end of the run as well, for increased visibility.
if (data.getStorageLevel == StorageLevel.NONE) {
logWarning("The input data was not directly cached, which may hurt performance if its"
+ " parent RDDs are also uncached.")
}
model
}
def norm(vector: Vector, p: Double): Double = {
require(p >= 1.0, "To compute the p-norm of the vector, we require that you specify a p>=1. " +
s"You specified p=$p.")
val values = vector match {
case DenseVector(vs) => vs
case SparseVector(n, ids, vs) => vs
case v => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
}
val size = values.length
if (p == 1) {
var sum = 0.0
var i = 0
while (i < size) {
sum += math.abs(values(i))
i += 1
}
sum
} else if (p == 2) {
var sum = 0.0
var i = 0
while (i < size) {
sum += values(i) * values(i)
i += 1
}
math.sqrt(sum)
} else if (p == Double.PositiveInfinity) {
var max = 0.0
var i = 0
while (i < size) {
val value = math.abs(values(i))
if (value > max) max = value
i += 1
}
max
} else {
var sum = 0.0
var i = 0
while (i < size) {
sum += math.pow(math.abs(values(i)), p)
i += 1
}
math.pow(sum, 1.0 / p)
}
}
标签:com not for 分析 value pow code alt model
原文地址:http://www.cnblogs.com/sunrye/p/6504888.html