<dataConfig>
<dataSource type="BinFileDataSource" />
<document>
<entity name="files" dataSource="binary" rootEntity="false"
processor="FileListEntityProcessor"
baseDir="D:/temp" fileName=".*.(doc)|(pdf)|(xls)|(ppt)|(docx)"
recursive="true">
<field column="fileAbsolutePath" name="id" />
<field column="fileSize" name="size" />
<field column="fileLastModified" name="lastModified" />
<entity
name="documentImport"
processor="TikaEntityProcessor"
url="${files.fileAbsolutePath}"
format="text">
<field column="file" name="fileName"/>
<field column="Author" name="author" meta="true"/>
<field column="title" name="title" meta="true"/>
<field column="text" name="text"/>
</entity>
</entity>
</document>
</dataConfig>
|