Example: Montecarlo Estimation
Within the template PySpark project, pi.py
is a classic example that calculates Pi using the Montecarlo Estimation.
What follows is the full, annotated code sample that can be saved to
the pi.py
file.
# # Estimating $\pi$ # # This PySpark example shows you how to estimate $\pi$ in parallel # using Monte Carlo integration. from __future__ import print_function import sys from random import random from operator import add # Connect to Spark by creating a Spark session from pyspark.sql import SparkSession spark = SparkSession\ .builder\ .appName("PythonPi")\ .getOrCreate() partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2 n = 100000 * partitions def f(_): x = random() * 2 - 1 y = random() * 2 - 1 return 1 if x ** 2 + y ** 2 < 1 else 0 # To access the associated SparkContext count = spark.sparkContext.parallelize(range(1, n + 1), partitions).map(f).reduce(add) print("Pi is roughly %f" % (4.0 * count / n)) spark.stop()