spark sql加载txt文件02
加载映射
import findspark
findspark.init()
import pyspark
from __future__ import print_function
from pyspark.sql import SparkSession
from pyspark.sql import Row
from pyspark.sql.types import *
import osif __name__ == "__main__":spark = SparkSession \.builder \.appName("Python Spark SQL basic example") \.config("spark.some.config.option", "some-value") \.getOrCreate()
sc = spark.sparkContextlines = sc.textFile("C:/file/spark_package/spark-2.4.4-bin-hadoop2.7/examples/src/main/resources/people.txt")parts = lines.map(lambda l: l.split(","))people = parts.map(lambda p: (p[0], p[1].strip()))schemaString = "name age"fields = [StructField(field_name, StringType(), True) for field_name in schemaString.split()]schema = StructType(fields)schemaPeople = spark.createDataFrame(people, schema)schemaPeople.createOrReplaceTempView("people")results = spark.sql("SELECT name FROM people")results.show()
官网手册
def programmatic_schema_example(spark):sc = spark.sparkContextlines = sc.textFile("examples/src/main/resources/people.txt")parts = lines.map(lambda l: l.split(","))people = parts.map(lambda p: (p[0], p[1].strip()))schemaString = "name age"fields = [StructField(field_name, StringType(), True) for field_name in schemaString.split()]schema = StructType(fields)schemaPeople = spark.createDataFrame(people, schema)schemaPeople.createOrReplaceTempView("people")results = spark.sql("SELECT name FROM people")results.show()