码迷,mamicode.com
首页 > 其他好文 > 详细

LFW

时间:2016-11-18 22:17:38      阅读:449      评论:0      收藏:0      [点我收藏+]

标签:read   cto   cas   textfile   wim   ref   log   text   numpy   

val path = "/usr/data/lfw-a/*"
val rdd = sc.wholeTextFiles(path)
val first = rdd.first
println(first)
val files = rdd.map { case (fileName, content) =>
fileName.replace("file:", "") }
println(files.first)

println(files.count
%pyspark
import matplotlib.pyplot as plt
path = "/usr/data/lfw-a/Aaron_Eckhart/Aaron_Eckhart_0001.jpg"
ae = plt.imread(path)
plt.imshow(ae)
plt.show()
import java.awt.image.BufferedImage
def loadImageFromFile(path: String): BufferedImage = {
import javax.imageio.ImageIO
import java.io.File
ImageIO.read(new File(path))
}
val aePath = "/usr/data/lfw-a/Aaron_Eckhart/Aaron_Eckhart_0001.jpg"
val aeImage = loadImageFromFile(aePath)
import java.awt.image
def processImage(image: BufferedImage, width: Int, height: Int):
BufferedImage = {
val bwImage = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY)
val g = bwImage.getGraphics()
g.drawImage(image, 0, 0, width, height, null)
g.dispose()
bwImage
}
val grayImage = processImage(aeImage, 100, 100)
import javax.imageio.ImageIO
import java.io.File
ImageIO.write(grayImage, "jpg", new File("/tmp/aeGray.jpg"))
%pyspark
import matplotlib.pyplot as plt
tmpPath = "/tmp/aeGray.jpg"
aeGary = plt.imread(tmpPath)
plt.imshow(aeGary, cmap=plt.cm.gray)
plt.show()
def getPixelsFromImage(image: BufferedImage): Array[Double] = {
val width = image.getWidth
val height = image.getHeight
val pixels = Array.ofDim[Double](width * height)
image.getData.getPixels(0, 0, width, height, pixels)
}
def extractPixels(path: String, width: Int, height: Int):
Array[Double] = {
val raw = loadImageFromFile(path)
val processed = processImage(raw, width, height)
getPixelsFromImage(processed)
}
val pixels = files.map(f => extractPixels(f, 50, 50))
println(pixels.take(10).map(_.take(10).mkString
("", ",", ", ...")).mkString("\n"))
import org.apache.spark.mllib.linalg.Vectors
val vectors = pixels.map(p => Vectors.dense(p))
vectors.setName("image-vectors")
vectors.cache
import org.apache.spark.mllib.linalg.Matrix
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import org.apache.spark.mllib.feature.StandardScaler
val scaler = new StandardScaler(withMean = true, withStd = false).fit(vectors)
val scaledVectors = vectors.map(v => scaler.transform(v))
import org.apache.spark.mllib.linalg.Matrix
import org.apache.spark.mllib.linalg.distributed.RowMatrix
val matrix = new RowMatrix(scaledVectors)
val K = 10
val pc = matrix.computePrincipalComponents(K)

java.lang.OutOfMemoryError: Java heap space解决方法

http://stackoverflow.com/questions/21138751/spark-java-lang-outofmemoryerror-java-heap-space

 

SPARK_MEM=${SPARK_MEM:-512m}
export SPARK_MEM
# Set JAVA_OPTS to be able to load native libraries and to set heap size
JAVA_OPTS="$OUR_JAVA_OPTS"
JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH"
JAVA_OPTS="$JAVA_OPTS -Xms$SPARK_MEM -Xmx$SPARK_MEM"

 

spark.driver.memory 4000m

val rows = pc.numRows
val cols = pc.numCols
println(rows, cols)
import breeze.linalg.DenseMatrix
val pcBreeze = new DenseMatrix(rows, cols, pc.toArray)
import breeze.linalg.csvwrite
csvwrite(new File("/tmp/pc.csv"), pcBreeze)
%pyspark
import numpy as np 
import matplotlib.pyplot as plt

pcs = np.loadtxt("/tmp/pc.csv", delimiter=",")
print(pcs.shape)
%pyspark
import numpy as np 
import matplotlib.pyplot as plt

def plot_gallery(images, h, w, n_row=2, n_col=5):
    plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
    plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90,hspace=.35)
    for i in range(n_row * n_col):
        plt.subplot(n_row, n_col, i + 1)
        plt.imshow(images[:, i].reshape((h, w)), cmap=plt.cm.gray)
        plt.title("Eigenface %d" % (i + 1), size=12)
        plt.xticks(())
        plt.yticks(())
%pyspark
plot_gallery(pcs, 50, 50)
plt.show()

 

LFW

标签:read   cto   cas   textfile   wim   ref   log   text   numpy   

原文地址:http://www.cnblogs.com/5211314jackrose/p/6078734.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!