-- Spark SQL 以编程方式指定模式 val sqlContext = new org.apache.spark.sql.SQLContext(sc) val employee = sc.textFile("/root/wangbin/employee.txt") 1201,satish,25 1202,krishna,28 1203,amith,39 1204,javed,23 1205,prudvi,23 val schemaString = "id,name,age" import org.apache.spark.sql.Row; import org.apache.spark.sql.types.{StructType, StructField, StringType}; val schema = StructType(schemaString.split(",").map(fieldName => StructField(fieldName, StringType, true))) val rowRDD = employee.map(_.split(",")).map(e => Row(e(0), e(1), e(2))) -- 通过使用roRDDdata和模式(SCHEMA)变量创建DataFrame。 val employeeDF = sqlContext.createDataFrame(rowRDD, schema) -- 使用以下命令将数据帧存储到名为employee的表中。 employeeDF.registerTempTable("employee2") -- 使用以下语句从employee表中选择所有记录。 val allrecords = sqlContext.sql("SELECT * FROM employee2") -- 查看所有记录数据帧的结果数据 allrecords.show() +----+-------+---+ | id| name|age| +----+-------+---+ |1201| satish| 25| |1202|krishna| 28| |1203| amith| 39| |1204| javed| 23| |1205| prudvi| 23| +----+-------+---+