Commit 49b141da authored by Gradl, Tobias's avatar Gradl, Tobias
Browse files

446: Reimplement automatic online crawl capabilities (OPENED)

Task-Url: #446
parent d37abc28
Pipeline #31547 passed with stage
in 25 seconds
......@@ -70,7 +70,7 @@ public abstract class BaseCrawlRunner implements ScheduledRunnable, DisposableBe
}
}
protected static NextExecution calculateNextExecution(String updatePeriod, DateTime lastStart, DateTime lastEnd) {
public static NextExecution calculateNextExecution(String updatePeriod, DateTime lastStart, DateTime lastEnd) {
if (lastStart==null) {
throw new IllegalArgumentException("No lastStart value provided: NULL");
}
......
......@@ -31,6 +31,8 @@ import de.unibamberg.minf.processing.service.base.ProcessingService.ProcessingSe
import de.unibamberg.minf.processing.service.base.ResourceProcessingService;
import eu.dariah.de.search.Constants.AccessMethods;
import eu.dariah.de.search.Constants.FileTypes;
import eu.dariah.de.search.automation.schedule.BaseCrawlRunner;
import eu.dariah.de.search.automation.schedule.NextExecution;
import eu.dariah.de.search.crawling.crawler.Crawler;
import eu.dariah.de.search.crawling.crawler.Processor;
import eu.dariah.de.search.crawling.gtf.CrawlingExecutionContext;
......@@ -39,7 +41,6 @@ import eu.dariah.de.search.model.Dataset;
import eu.dariah.de.search.model.Collection;
import eu.dariah.de.search.model.Endpoint;
import eu.dariah.de.search.model.ExtendedDatamodelContainer;
import eu.dariah.de.search.pojo.DatamodelPojo;
import eu.dariah.de.search.query.execution.AggregationService;
import eu.dariah.de.search.service.CollectionService;
import eu.dariah.de.search.service.CrawlService;
......@@ -121,12 +122,10 @@ public class CrawlManagerImpl implements CrawlManager, ApplicationContextAware,
modified = true;
}
for (Dataset ds : endpoint.getDatasets()) {
if (ds.getId().equals(datamodel.getId())) {
if (ds.isNew() || ds.isError()) {
ds.setNew(false);
ds.setError(false);
modified = true;
}
if (ds.getId().equals(datamodel.getId()) && (ds.isNew() || ds.isError())) {
ds.setNew(false);
ds.setError(false);
modified = true;
}
}
if (modified) {
......@@ -254,12 +253,12 @@ public class CrawlManagerImpl implements CrawlManager, ApplicationContextAware,
// Online but no access type detected
if (access==null && c.getBaseCrawlId()==null) {
logger.error("Unknown access type [{}]; cancelling crawl", ep.getAccessType());
this.updateCrawl(c.getId(), ProcessingServiceStates.ERROR);
this.updateCrawlAndCollection(c.getId(), ProcessingServiceStates.ERROR);
return null;
}
if (file==null) {
logger.error("Unknown file type method [{}]; cancelling crawl", ep.getFileType());
this.updateCrawl(c.getId(), ProcessingServiceStates.ERROR);
this.updateCrawlAndCollection(c.getId(), ProcessingServiceStates.ERROR);
return null;
}
......@@ -322,18 +321,41 @@ public class CrawlManagerImpl implements CrawlManager, ApplicationContextAware,
@Override
public synchronized void finished(UUID serviceId) {
String crawlId = this.removeServiceFromCache(serviceId);
this.updateCrawl(crawlId, ProcessingServiceStates.COMPLETE);
this.updateCrawlAndCollection(crawlId, ProcessingServiceStates.COMPLETE);
}
@Override
public synchronized void error(UUID serviceId) {
String crawlId = this.removeServiceFromCache(serviceId);
this.updateCrawl(crawlId, ProcessingServiceStates.ERROR);
this.updateCrawlAndCollection(crawlId, ProcessingServiceStates.ERROR);
}
private void updateCrawlAndCollection(String crawlId, ProcessingServiceStates state) {
Crawl cr = this.updateCrawl(crawlId, state);
if (cr!=null) {
this.updateCollection(cr);
}
}
private void updateCollection(Crawl cr) {
Collection c = collectionService.findById(cr.getCollectionId());
NextExecution ne = BaseCrawlRunner.calculateNextExecution(c.getUpdatePeriod(), cr.getCreated(), cr.getModified());
Dataset ds = c.getEndpoints().stream()
.filter(e -> e.getId().equals(cr.getEndpointId()))
.findAny().orElseThrow().getDatasets().stream()
.filter(d -> d.getId().equals(cr.getDatamodelId()))
.findAny().orElseThrow();
private void updateCrawl(String crawlId, ProcessingServiceStates state) {
ds.setNextExecution(ne.getNextExecutionTimestamp());
ds.setOutdated(false);
collectionService.saveCollection(c);
}
private Crawl updateCrawl(String crawlId, ProcessingServiceStates state) {
if (crawlId==null || state==null) {
return;
return null;
}
Crawl c = crawlService.findById(crawlId);
if (state==ProcessingServiceStates.ERROR) {
......@@ -352,6 +374,7 @@ public class CrawlManagerImpl implements CrawlManager, ApplicationContextAware,
c.setComplete(false);
}
crawlService.save(c);
return c;
}
private String removeServiceFromCache(UUID uuid) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment