Commit 754dd05b authored by Gradl, Tobias's avatar Gradl, Tobias
Browse files

Implement git capabilities of processing-adapters (#406)

parent 543d1bd8
Subproject commit 3c972deafe6fecc609c39f62c897ef9f2ab90ead
Subproject commit 7738d94162679e2af797f954aacdb5926b59b943
......@@ -63,6 +63,7 @@
<value>http://purl.org/cld/accpol/partial</value>
</util:list>
<bean id="timedCrawlManager" class="eu.dariah.de.search.crawling.TimedCrawlManagerImpl">
<property name="debugging" value="${crawling.debugging:false}" />
<property name="autocrawlOnline" value="${crawling.automation.online:true}" />
......@@ -70,10 +71,9 @@
<property name="syncInterval" value="${crawling.automation.sync_interval:300}" />
<property name="timeout" value="${crawling.timeout:172800}" />
<property name="maxPoolSize" value="${crawling.max_threads:4}" />
<property name="offlineProcessingChains">
<!-- <property name="offlineProcessingChains">
<map>
<entry key="OAI-PMH" value="indexCleaner,xmlBatchFileProcessor" />
<!-- <entry key="XML" value="fileUnpacker,fileUnarchiver,xmlChunker,indexCleaner,xmlBatchFileProcessor" /> -->
<entry key="Git Repository" value="fileUnpacker,fileUnarchiver,indexCleaner,xmlBatchFileProcessor" />
<entry key="XML" value="fileUnpacker,fileUnarchiver,indexCleaner,xmlBatchFileProcessor" />
<entry key="JSON" value="fileUnpacker,fileUnarchiver,indexCleaner,jsonBatchFileProcessor" />
......@@ -86,15 +86,34 @@
<map>
<entry key="OAI-PMH" value="oaipmhCrawler,indexCleaner,xmlBatchFileProcessor" />
<entry key="Git Repository" value="gitCrawler, fileUnpacker,fileUnarchiver,indexCleaner,xmlBatchFileProcessor" />
<!-- <entry key="XML" value="fileDownloader,fileUnpacker,fileUnarchiver,xmlChunker,indexCleaner,xmlBatchFileProcessor" /> -->
<entry key="XML" value="fileDownloader,fileUnpacker,fileUnarchiver,indexCleaner,xmlBatchFileProcessor" />
<entry key="XML" value="fileDownloader,fileUnpacker,fileUnarchiver,xmlChunker,indexCleaner,xmlBatchFileProcessor" />
<entry key="JSON" value="fileDownloader,fileUnpacker,fileUnarchiver,indexCleaner,jsonBatchFileProcessor" />
<entry key="CSV" value="fileDownloader,fileUnpacker,fileUnarchiver,indexCleaner,csvBatchFileProcessor" />
<entry key="TSV" value="fileDownloader,fileUnpacker,fileUnarchiver,indexCleaner,tsvBatchFileProcessor" />
<entry key="TEXT" value="fileDownloader,fileUnpacker,fileUnarchiver,indexCleaner,textBatchFileProcessor" />
</map>
</property> -->
<property name="accessChains">
<map>
<entry key="OAI-PMH" value="oaipmhCrawler" />
<entry key="Git Repository" value="gitCrawler" />
<entry key="Online file" value="fileDownloader" />
</map>
</property>
<property name="fileProcessingChains">
<map>
<entry key="XML" value="fileUnpacker,fileUnarchiver,indexCleaner,xmlBatchFileProcessor" />
<entry key="JSON" value="fileUnpacker,fileUnarchiver,indexCleaner,jsonBatchFileProcessor" />
<entry key="CSV" value="fileUnpacker,fileUnarchiver,indexCleaner,csvBatchFileProcessor" />
<entry key="TSV" value="fileUnpacker,fileUnarchiver,indexCleaner,tsvBatchFileProcessor" />
<entry key="TEXT" value="fileUnpacker,fileUnarchiver,indexCleaner,textBatchFileProcessor" />
</map>
</property>
</bean>
<util:list id="antiPatterns" value-type="java.lang.String">
<value>.git/**</value>
</util:list>
<bean id="indexCleaner" class="eu.dariah.de.search.crawling.crawler.IndexCleaner" scope="prototype" />
......@@ -102,9 +121,9 @@
<property name="politenessTimespan" value="${crawling.politeness_timespan:500}" />
</bean>
<bean id="gitCrawler" class="eu.dariah.de.search.crawling.crawler.OaiPmhCrawlerImpl" scope="prototype">
<property name="politenessTimespan" value="${crawling.politeness_timespan:500}" />
</bean>
<bean id="gitCrawler" class="eu.dariah.de.search.crawling.crawler.GitCrawlerImpl" scope="prototype" />
<bean id="gitRepositoryAdapter" class="de.unibamberg.minf.processing.git.adapter.GitRepositoryAdapterImpl" scope="prototype" />
<bean id="fileDownloader" class="eu.dariah.de.search.crawling.files.FileDownloader" scope="prototype" />
<bean id="fileUnpacker" class="eu.dariah.de.search.crawling.files.FileUnpacker" scope="prototype" />
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment