Spark 3 examples with Python or Java application

This section provides you some examples of Spark 3 with Python and Java applications.

Spark3 with a Java application with Hive2 credentials

<workflow-app name="JavaSpark" xmlns="uri:oozie:workflow:1.0">
    <credentials>
        <credential name="hive-credential" type="hive2">
            <property>
                <name>hive2.jdbc.url</name>
                <value>jdbc:hive2://...</value>
            </property>
            <property>
                <name>hive2.server.principal</name>
                <value>...</value>
            </property>
        </credential>
    </credentials>
    <start to="spark-node-javaspark"/>
    <action name="spark-node-javaspark" cred="hive-credential">
        <spark3 xmlns="uri:oozie:spark3-action:1.0">
            <resource-manager>${resourceManager}</resource-manager>
            <name-node>${nameNode}</name-node>
            <master>${master}</master>
            <mode>${mode}</mode>
            <name>JavaSpark-Example</name>
            <class>com.company.spark.JavaSpark</class>
            <jar>${nameNode}/user/${wf:user()}/javaspark_lib/JavaSparkForOozie.jar</jar>
            <arg>${inputFile}</arg>
        </spark3>
        <ok to="end"/>
        <error to="fail"/>
    </action>
    <kill name="fail">
        <message>Workflow failed, error
            message[${wf:errorMessage(wf:lastErrorNode())}]
        </message>
    </kill>
    <end name="end"/>
</workflow-app>

Spark3 with a Python application with Hive2 credentials

<workflow-app name="PySpark" xmlns="uri:oozie:workflow:1.0">
    <credentials>
        <credential name="hive-credential" type="hive2">
            <property>
                <name>hive2.jdbc.url</name>
                <value>jdbc:hive2://...</value>
            </property>
            <property>
                <name>hive2.server.principal</name>
                <value>...</value>
            </property>
        </credential>
    </credentials>
    <start to="spark-node-pyspark"/>
    <action name="spark-node-pyspark" cred="hive-credential">
        <spark3 xmlns="uri:oozie:spark3-action:1.0">
            <resource-manager>${resourceManager}</resource-manager>
            <name-node>${nameNode}</name-node>
            <master>${master}</master>
            <mode>${mode}</mode>
            <name>PySpark-Example</name>
            <jar>${pythonScript}</jar>
            <arg>${inputFile}</arg>
        </spark3>
        <ok to="end"/>
        <error to="fail"/>
    </action>
    <kill name="fail">
        <message>Workflow failed, error
            message[${wf:errorMessage(wf:lastErrorNode())}]
        </message>
    </kill>
    <end name="end"/>
</workflow-app>