Using Apache Hive
Also available as:
PDF

Create the UDF class

You define the UDF logic in a new class that returns the data type of a selected column in a table.

  1. In IntelliJ, click the vertical project tab, and expand hiveudf: hiveudf > src > main. Select the java directory, and on the context menu, select New > Java Class and name the class, for example, TypeOf.
  2. Extend the GenericUDF class to include the logic that identifies the data type of a column.
    For example:
    package com.mycompany.hiveudf;
    
    import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
    import org.apache.hadoop.hive.ql.metadata.HiveException;
    import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
    import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
    import org.apache.hadoop.io.Text;
    
    public class TypeOf extends GenericUDF {
      private final Text output = new Text();
    
    @Override
      public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
        checkArgsSize(arguments, 1, 1);
        checkArgPrimitive(arguments, 0);
        ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
        return outputOI;
      }
    
    @Override
      public Object evaluate(DeferredObject[] arguments) throws HiveException {
        Object obj;
        if ((obj = arguments[0].get()) == null) {
          String res = "Type: NULL";
          output.set(res);
          } else {
          String res = "Type: " + obj.getClass().getName();
          output.set(res);
        }
        return output;
      }
    
    @Override
      public String getDisplayString(String[] children) {
        return getStandardDisplayString("TYPEOF", children, ",");
      }
    }