Compiling and running a Java-based job
You see by example how to compile a Java-based Spark job using Maven.
/* SimpleApp.java */
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.Dataset;
 
public class SimpleApp {
  public static void main(String[] args) {
    String logFile = "YOUR_SPARK_HOME/README.md"; // Should be some file on your system
    SparkSession spark = SparkSession.builder().appName("Simple Application").getOrCreate();
    Dataset<String> logData = spark.read().textFile(logFile).cache();
 
    long numAs = logData.filter(s -> s.contains("a")).count();
    long numBs = logData.filter(s -> s.contains("b")).count();
 
   System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);
 
    spark.stop();
  }
}              
            You also need to create a Maven Project Object Model (POM) file, as shown in the following example:
<project>
  <groupId>edu.berkeley</groupId>
  <artifactId>simple-project</artifactId>
  <modelVersion>4.0.0</modelVersion>
  <name>Simple Project</name>
  <packaging>jar</packaging>
  <version>1.0</version>
<properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <maven.compiler.source>1.8</maven.compiler.source>
    <maven.compiler.target>1.8</maven.compiler.target>
  </properties>
<dependencies>
    <dependency> <!-- Spark dependency -->
  	<groupId>org.apache.spark</groupId>
  	<artifactId>spark-sql_2.12</artifactId>
  	<version>2.4.0</version>
  	<scope>provided</scope>
    </dependency>
</dependencies>
</project>
        - Install Apache Spark 2.4.x, JDK 8.x, and maven
 - Write a Java Spark program .java file.
 - Write a pom.xml file. This is where your Scala code resides.
 - If the cluster is Kerberized, ensure the required security token is authorized to compile and execute the workload.
 
