MapReduceIndexerTool Metadata
The MapReduceIndexerTool generates metadata fields for each
input file when indexing. These fields can be used in morphline commands. These fields
can also be stored in Solr, by adding definitions like the following to your Solr
schema.xml file. After the
MapReduce indexing process completes, the fields are searchable through
Solr.
<!-- file metadata --> <field name="file_download_url" type="string" indexed="false" stored="true" /> <field name="file_upload_url" type="string" indexed="false" stored="true" /> <field name="file_scheme" type="string" indexed="true" stored="true" /> <field name="file_host" type="string" indexed="true" stored="true" /> <field name="file_port" type="int" indexed="true" stored="true" /> <field name="file_path" type="string" indexed="true" stored="true" /> <field name="file_name" type="string" indexed="true" stored="true" /> <field name="file_length" type="tlong" indexed="true" stored="true" /> <field name="file_last_modified" type="tlong" indexed="true" stored="true" /> <field name="file_owner" type="string" indexed="true" stored="true" /> <field name="file_group" type="string" indexed="true" stored="true" /> <field name="file_permissions_user" type="string" indexed="true" stored="true" /> <field name="file_permissions_group" type="string" indexed="true" stored="true" /> <field name="file_permissions_other" type="string" indexed="true" stored="true" /> <field name="file_permissions_stickybit" type="boolean" indexed="true" stored="true" />
Example output:
"file_upload_url":"foo/test-documents/sample-statuses-20120906-141433.avro", "file_download_url":"hdfs://host1.mycompany.com:8020/user/foo/ test-documents/sample-statuses-20120906-141433.avro", "file_scheme":"hdfs", "file_host":"host1.mycompany.com", "file_port":8020, "file_name":"sample-statuses-20120906-141433.avro", "file_path":"/user/foo/test-documents/sample-statuses-20120906-141433.avro", "file_last_modified":1357193447106, "file_length":1512, "file_owner":"foo", "file_group":"foo", "file_permissions_user":"rw-", "file_permissions_group":"r--", "file_permissions_other":"r--", "file_permissions_stickybit":false,
<< MapReduceIndexerTool | HdfsFindTool >> | |