Commit 770e394e authored by Gradl, Tobias's avatar Gradl, Tobias
Browse files

446: Reimplement automatic online and offline crawl capabilities

(OPENED)

Task-Url: #446
parent cf3dfc99
Pipeline #31077 passed with stage
in 2 minutes and 14 seconds
......@@ -18,9 +18,11 @@ import org.springframework.web.bind.annotation.ResponseBody;
import com.fasterxml.jackson.databind.node.ObjectNode;
import de.unibamberg.minf.core.web.controller.DataTableList;
import de.unibamberg.minf.core.web.pojo.MessagePojo;
import de.unibamberg.minf.core.web.pojo.ModelActionPojo;
import eu.dariah.de.search.automation.CollectionSyncService;
import eu.dariah.de.search.config.CrawlingConfigProperties;
import eu.dariah.de.search.crawling.TimedCrawlManagerImpl;
import eu.dariah.de.search.pojo.CollectionPojo;
import eu.dariah.de.search.pojo.DatasetPojo;
import eu.dariah.de.search.pojo.DatasourcePojo;
......@@ -36,6 +38,7 @@ public class CollectionController extends BaseController {
@Autowired private CollectionConverter collectionConverter;
@Autowired private CollectionSyncService crSyncService;
@Autowired private DatamodelService datamodelService;
@Autowired private TimedCrawlManagerImpl crawlManager;
@Autowired private CrawlingConfigProperties crawlingConfig;
......@@ -88,6 +91,17 @@ public class CollectionController extends BaseController {
return result;
}
@RequestMapping(method=GET, value={"/async/triggerOnline"})
public @ResponseBody ModelActionPojo triggerOnline(Model model, Locale locale) {
crawlManager.enqueueNewAndOutdatedDatasets();
ModelActionPojo result = new ModelActionPojo();
result.setSuccess(true);
result.setMessage(new MessagePojo("error", "~error.head", "~error.body"));
return result;
}
@RequestMapping(method=GET, value={"/getColregStatus"})
public @ResponseBody ModelActionPojo getColregStatus(Model model, Locale locale) {
ModelActionPojo result = new ModelActionPojo(true);
......
......@@ -27,6 +27,7 @@ import de.unibamberg.minf.core.web.pojo.ModelActionPojo;
import eu.dariah.de.search.automation.DmeSyncService;
import eu.dariah.de.search.config.CrawlingConfigProperties;
import eu.dariah.de.search.config.MainConfigProperties;
import eu.dariah.de.search.crawling.TimedCrawlManagerImpl;
import eu.dariah.de.search.mapping.MappingGenerationService;
import eu.dariah.de.search.model.ExtendedDatamodelContainer;
import eu.dariah.de.search.pojo.DatamodelPojo;
......@@ -40,6 +41,7 @@ public class DatamodelController extends BaseController {
@Autowired private DatamodelConverter datamodelConverter;
@Autowired private MappingGenerationService mappingGenerationService;
@Autowired private DmeSyncService dmeSyncService;
@Autowired private TimedCrawlManagerImpl crawlManager;
@Autowired private MainConfigProperties config;
@Autowired private CrawlingConfigProperties crawlingConfig;
......@@ -117,6 +119,16 @@ public class DatamodelController extends BaseController {
return result;
}
@RequestMapping(method=GET, value={"/async/triggerOffline"})
public @ResponseBody ModelActionPojo triggerOffline(Model model, Locale locale) {
crawlManager.reindexOutdatedData();
ModelActionPojo result = new ModelActionPojo();
result.setSuccess(true);
result.setMessage(new MessagePojo("error", "~error.head", "~error.body"));
return result;
}
@RequestMapping(method=GET, value={"/getDmeStatus"})
public @ResponseBody ModelActionPojo getDmeStatus(Model model, Locale locale) {
ModelActionPojo result = new ModelActionPojo(true);
......
......@@ -121,17 +121,9 @@ public class TimedCrawlManagerImpl extends CrawlManagerImpl implements TimedCraw
try {
statusMapslock.lock();
// TODO Reimplement once a new reindexing strategy is in place
/*if (autocrawlOffline) {
// Handle outdated models if any and conditions are met
List<ExtendedDatamodelContainer> refreshDatamodels = this.determineRefreshableDatamodels();
for (ExtendedDatamodelContainer datamodel : refreshDatamodels) {
// Drop all indexed data and recreate index with new mapping
if (this.recreateIndex(datamodel)) {
this.reindexDatamodel(datamodel);
if (autocrawlOffline) {
this.reindexOutdatedData();
}
}
}*/
if (autocrawlOnline) {
// Handle new or updated datasets
......@@ -148,8 +140,18 @@ public class TimedCrawlManagerImpl extends CrawlManagerImpl implements TimedCraw
}
}
public void reindexOutdatedData() {
// Handle outdated models if any and conditions are met
List<ExtendedDatamodelContainer> refreshDatamodels = this.determineRefreshableDatamodels();
for (ExtendedDatamodelContainer datamodel : refreshDatamodels) {
// Drop all indexed data and recreate index with new mapping
if (this.recreateIndex(datamodel)) {
this.reindexDatamodel(datamodel);
}
}
}
private void enqueueNewAndOutdatedDatasets() {
public void enqueueNewAndOutdatedDatasets() {
DateTime syncTimestamp = DateTime.now();
List<Crawl> lastOnlineCrawls;
Crawl lastOnlineCrawl;
......@@ -274,13 +276,16 @@ public class TimedCrawlManagerImpl extends CrawlManagerImpl implements TimedCraw
}
/*private List<ExtendedDatamodelContainer> determineRefreshableDatamodels() {
private List<ExtendedDatamodelContainer> determineRefreshableDatamodels() {
List<ExtendedDatamodelContainer> refreshDatamodels = new ArrayList<ExtendedDatamodelContainer>();
// Start of outdated reprocessing only between 1 and 3 o'clock
if (DateTime.now().getHourOfDay()>0 && DateTime.now().getHourOfDay()<=2) {
for (ExtendedDatamodelContainer datamodel : datamodelService.findAll()) {
if (mappingGenerationService.getIsOutdated(datamodel)) {
// TODO: Reimplement
/*if (mappingGenerationService.getIsOutdated(datamodel)) {
if (!this.outdatedDatamodelIdCrawlIdMap.containsKey(datamodel.getId())) {
// Start blocking of outdated datamodel
this.outdatedDatamodelIdCrawlIdMap.put(datamodel.getId(), null);
......@@ -291,11 +296,11 @@ public class TimedCrawlManagerImpl extends CrawlManagerImpl implements TimedCraw
refreshDatamodels.add(datamodel);
}
}
}
}*/
}
}
return refreshDatamodels;
}*/
}
......
package eu.dariah.de.search.es.client;
import java.io.IOException;
import java.util.Map;
import org.elasticsearch.action.bulk.BulkResponse;
public interface IndexingClient {
public BulkResponse bulkIndexSources(String index, Map<String, Map<String, Object>> idSourceMap);
public BulkResponse bulkIndexSources(String index, Map<String, Map<String, Object>> idSourceMap) throws IOException;
public long indexSources(String indexName, Map<String, Map<String, Object>> idSourceMap);
public void indexSource(String string, String resourceId, String source);
}
......@@ -85,11 +85,11 @@ public class IndexingClientImpl extends BaseEsClientImpl implements IndexingClie
}
@Override
public BulkResponse bulkIndexSources(String index, Map<String, Map<String, Object>> idSourceMap) {
public BulkResponse bulkIndexSources(String index, Map<String, Map<String, Object>> idSourceMap) throws IOException {
BulkRequest bulkRequest = new BulkRequest();
IndexRequest indexRequest;
String strSource;
try {
for (Entry<String, Map<String, Object>> source : idSourceMap.entrySet()) {
try {
strSource = indexingObjectMapper.writeValueAsString(source.getValue());
......@@ -103,10 +103,6 @@ public class IndexingClientImpl extends BaseEsClientImpl implements IndexingClie
}
}
return client.bulk(bulkRequest, RequestOptions.DEFAULT);
} catch (IOException e) {
logger.error("Error while bulk indexing resources", e);
}
return null;
}
}
......@@ -66,7 +66,7 @@ public class SearchClientImpl extends BaseEsClientImpl implements SearchClient {
}
return response.getCount();
} catch (Exception e) {
logger.error("Failed to execute count: " + e.getMessage(), e);
//logger.error("Failed to execute count: " + e.getMessage(), e);
}
return 0;
}
......
......@@ -133,6 +133,7 @@ public class IndexingServiceImpl implements IndexingService {
sources.put(rc.getId(), rc.toSource());
}
try {
BulkResponse bulkResponse = indexingClient.bulkIndexSources(this.index, sources);
int docCount = bulkResponse.getItems().length;
......@@ -201,6 +202,11 @@ public class IndexingServiceImpl implements IndexingService {
}
return docCount;
} catch (Exception e) {
log.error("Failed to execute bulk index request", e);
return 0;
}
}
private JsonNode resourceToSimpleNode(Resource r) {
......
Subproject commit 60db673cb20da87f563e04fc8aa05b09e72b89b4
Subproject commit 0201f7065c7d782a49ab7faa01876f01aabf3c31
Subproject commit df67695877aa86e7ce77434952e886a616b7303e
Subproject commit bc9348fa600b80863e88dfe43a7d570ec571865e
Subproject commit b951057e62d916791f2509a861e58f8c317c2e00
Subproject commit 25eda28254d770a11f0a831c20d6af715f362382
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment