Commit 7bc1bd59 authored by Gradl, Tobias's avatar Gradl, Tobias
Browse files

436: Support passing of collection metadata to datamodels

(ExecutionContext) (OPENED)

Task-Url: #436
parent a257241c
Pipeline #25330 failed with stage
in 20 seconds
......@@ -18,9 +18,9 @@ allprojects {
}
}
ext {
coreVersion = "6.5-SNAPSHOT"
gtfVersion = "2.3.1-SNAPSHOT"
processingVersion = "4.3.2-SNAPSHOT"
coreVersion = "6.5.1-SNAPSHOT"
gtfVersion = "2.3.2-SNAPSHOT"
processingVersion = "4.3.3-SNAPSHOT"
colregModelVersion = "4.3.4-RELEASE"
dariahSpVersion = "2.1.7-RELEASE"
......
......@@ -11,6 +11,7 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Scope;
import org.springframework.stereotype.Component;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import de.unibamberg.minf.core.web.localization.LocaleConverter;
......@@ -68,7 +69,7 @@ public class CollectionSyncClient extends BaseApiClientImpl<CollectionApiPojo, E
String result = restTemplate.getForObject(String.format(this.getFetchDetailsUrl(), id), String.class);
CollectionApiResultPojo<ExtendedCollectionApiPojo> rPojo = apiObjectMapper.readValue(result, new TypeReference<CollectionApiResultPojo<ExtendedCollectionApiPojo>>() {});
if (rPojo.getContent()!=null && rPojo.getContent().size()>0) {
if (rPojo.getContent()!=null && !rPojo.getContent().isEmpty()) {
return rPojo.getContent().iterator().next();
}
return null;
......@@ -162,6 +163,7 @@ public class CollectionSyncClient extends BaseApiClientImpl<CollectionApiPojo, E
cCurrent.setNames(cImported.getNames());
cCurrent.setModified(cImported.getModified());
cCurrent.setUpdatePeriod(cImported.getUpdatePeriod());
cCurrent.setCollectionMetadata(cImported.getCollectionMetadata());
cCurrent.setUpdate(true);
this.mergeEndpoints(cCurrent, cImported);
......@@ -257,6 +259,13 @@ public class CollectionSyncClient extends BaseApiClientImpl<CollectionApiPojo, E
ExtendedCollectionApiPojo fetchedCollection = this.fetchDetails(foreignEntityId);
Collection convertedCollection = new Collection();
try {
convertedCollection.setCollectionMetadata(apiObjectMapper.writeValueAsString(fetchedCollection));
} catch (JsonProcessingException e) {
logger.error("Failed to JSON serialized collection metadata");
}
if (fetchedCollection.getTitles()!=null) {
convertedCollection.setNames(new HashMap<String, String>());
String languageCode;
......
......@@ -12,8 +12,10 @@ import org.springframework.web.bind.annotation.ResponseBody;
import de.unibamberg.minf.core.web.controller.BaseTranslationController;
import de.unibamberg.minf.core.web.pojo.ModelActionPojo;
import eu.dariah.de.search.crawling.CrawlManager;
import eu.dariah.de.search.model.Collection;
import eu.dariah.de.search.model.Crawl;
import eu.dariah.de.search.model.Endpoint;
import eu.dariah.de.search.service.CollectionService;
import eu.dariah.de.search.service.CrawlService;
import eu.dariah.de.search.service.EndpointService;
import eu.dariah.de.search.service.DatamodelService;
......@@ -25,6 +27,7 @@ public class CrawlController extends BaseTranslationController {
@Autowired private CrawlService crawlService;
@Autowired private CrawlManager crawlManager;
@Autowired private EndpointService endpointService;
@Autowired private CollectionService collectionService;
public CrawlController() {
super("crawls");
......@@ -54,9 +57,10 @@ public class CrawlController extends BaseTranslationController {
ModelActionPojo result = new ModelActionPojo(true);
Crawl cBase = crawlService.findById(crawlId);
Endpoint ep = endpointService.findById(cBase.getCollectionId(), cBase.getEndpointId());
Collection c = collectionService.findById(cBase.getCollectionId());
if (cBase!=null) {
crawlManager.performOfflineCrawl(ep, schemaService.findById(cBase.getDatamodelId()), cBase.getId());
crawlManager.performOfflineCrawl(c, ep, schemaService.findById(cBase.getDatamodelId()), cBase.getId());
}
return result;
}
......
......@@ -8,7 +8,7 @@ import eu.dariah.de.search.model.Endpoint;
import eu.dariah.de.search.model.ExtendedDatamodelContainer;
public interface CrawlManager extends ProcessingListener {
public void performOfflineCrawl(Endpoint ep, ExtendedDatamodelContainer sc, String baseCrawlId);
public void performOfflineCrawl(Collection collection, Endpoint ep, ExtendedDatamodelContainer sc, String baseCrawlId);
public void performOnlineCrawl(Collection ds, Endpoint ep, ExtendedDatamodelContainer sc);
public CrawlState getCrawlState(String crawlId);
......
......@@ -20,6 +20,11 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.MissingNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import de.unibamberg.minf.processing.exception.GenericProcessingException;
import de.unibamberg.minf.processing.exception.ProcessingConfigException;
import de.unibamberg.minf.processing.service.base.ProcessingService.ProcessingServiceStates;
......@@ -43,7 +48,7 @@ public class CrawlManagerImpl implements CrawlManager, ApplicationContextAware,
@Autowired protected CrawlService crawlService;
@Autowired protected CollectionService collectionService;
@Autowired private ObjectMapper objectMapper;
private String baseDownloadPath;
......@@ -122,13 +127,26 @@ public class CrawlManagerImpl implements CrawlManager, ApplicationContextAware,
if (modified) {
collectionService.saveCollection(collection);
}
this.performCrawl(crawlService.createOnlineCrawl(collection.getId(), endpoint.getId(), datamodel.getId()), endpoint, datamodel);
this.performCrawl(crawlService.createOnlineCrawl(collection.getId(), endpoint.getId(), datamodel.getId()), endpoint, datamodel, this.getSessionData(collection));
}
@Override
public void performOfflineCrawl(Endpoint endpoint, ExtendedDatamodelContainer datamodel, String baseCrawlId) {
this.performCrawl(crawlService.createOfflineCrawl(baseCrawlId), endpoint, datamodel);
public void performOfflineCrawl(Collection collection, Endpoint endpoint, ExtendedDatamodelContainer datamodel, String baseCrawlId) {
this.performCrawl(crawlService.createOfflineCrawl(baseCrawlId), endpoint, datamodel, this.getSessionData(collection));
}
private JsonNode getSessionData(Collection collection) {
if (collection.getCollectionMetadata()==null || collection.getCollectionMetadata().isBlank()) {
return MissingNode.getInstance();
}
try {
ObjectNode cNode = objectMapper.createObjectNode();
cNode.set("collection", objectMapper.readTree(collection.getCollectionMetadata()));
return cNode;
} catch (Exception e) {
logger.error("Failed to read collection metadata to session data", e);
return MissingNode.getInstance();
}
}
@Override
......@@ -192,14 +210,14 @@ public class CrawlManagerImpl implements CrawlManager, ApplicationContextAware,
}
}
private void performCrawl(Crawl crawl, Endpoint endpoint, ExtendedDatamodelContainer datamodel) {
private void performCrawl(Crawl crawl, Endpoint endpoint, ExtendedDatamodelContainer datamodel, JsonNode sessionData) {
if (crawl==null || endpoint==null || datamodel==null) {
logger.warn("Could not create crawl pipeline. Either crawl, endpoint or datamodel were unset");
return;
}
try {
MDC.put("uid", crawl.getId());
CrawlPipeline pipeline = this.createPipeline(endpoint, datamodel, crawl);
CrawlPipeline pipeline = this.createPipeline(endpoint, datamodel, crawl, sessionData);
if (pipeline!=null) {
this.enqueue(pipeline, crawl);
}
......@@ -213,7 +231,7 @@ public class CrawlManagerImpl implements CrawlManager, ApplicationContextAware,
}
}
private CrawlPipeline createPipeline(Endpoint ep, ExtendedDatamodelContainer sc, Crawl c) throws ProcessingConfigException, GenericProcessingException, IOException {
private CrawlPipeline createPipeline(Endpoint ep, ExtendedDatamodelContainer sc, Crawl c, JsonNode sessionData) throws ProcessingConfigException, GenericProcessingException, IOException {
String access = null;
String file = null;
for (AccessMethods mAv : AccessMethods.values()) {
......@@ -240,7 +258,7 @@ public class CrawlManagerImpl implements CrawlManager, ApplicationContextAware,
return null;
}
CrawlingExecutionContext ctx = new CrawlingExecutionContext(this.baseDownloadPath, c);
CrawlingExecutionContext ctx = new CrawlingExecutionContext(this.baseDownloadPath, c, sessionData);
List<Crawler> crawlers = this.getCrawlers(access, file, c.getBaseCrawlId()==null);
ResourceIndexingServiceImpl indexer;
for (Crawler crawler : crawlers) {
......
......@@ -10,11 +10,13 @@ import java.util.concurrent.locks.ReentrantLock;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import de.unibamberg.minf.gtf.context.ExecutionContext;
import de.unibamberg.minf.processing.exception.GenericProcessingException;
import de.unibamberg.minf.processing.exception.ProcessingConfigException;
import de.unibamberg.minf.processing.listener.ProcessingListener;
import eu.dariah.de.search.crawling.crawler.Crawler;
import net.bytebuddy.asm.Advice.This;
public class CrawlPipelineImpl implements CrawlPipeline {
protected static final Logger logger = LoggerFactory.getLogger(CrawlPipelineImpl.class);
......@@ -25,6 +27,7 @@ public class CrawlPipelineImpl implements CrawlPipeline {
private String crawlId;
private ExecutorService pipelineExecutor = Executors.newSingleThreadExecutor();
private ReentrantLock lock = new ReentrantLock();
private ExecutionContext executionContext;
private ProcessingServiceStates state = ProcessingServiceStates.WAITING;
private long stageSize = 0;
......@@ -47,6 +50,9 @@ public class CrawlPipelineImpl implements CrawlPipeline {
@Override public ProcessingListener getListener() { return this.listener; }
@Override public void setListener(ProcessingListener listener) { this.listener = listener; }
@Override public ExecutionContext getExecutionContext() { return executionContext; }
@Override public void setExecutionContext(ExecutionContext executionContext) { this.executionContext = executionContext; }
@Override public boolean isCancellationRequested() { return cancellationRequested; }
public CrawlPipelineImpl(String crawlId, List<Crawler> runnables) throws GenericProcessingException {
......@@ -55,7 +61,7 @@ public class CrawlPipelineImpl implements CrawlPipeline {
}
this.uuid = UUID.randomUUID();
this.runnablesMap = new LinkedHashMap<UUID, Crawler>();
this.runnablesMap = new LinkedHashMap<>();
this.crawlId = crawlId;
try {
for (Crawler svc : runnables) {
......
......@@ -229,7 +229,7 @@ public class TimedCrawlManagerImpl extends CrawlManagerImpl implements TimedCraw
if (dataset.getId().equals(datamodel.getId())) {
String baseCrawlId = this.getCompletedOnlineCrawlId(endpoint.getId(), dataset.getId());
if (baseCrawlId!=null) {
this.performOfflineCrawl(endpoint, datamodel, baseCrawlId);
this.performOfflineCrawl(collection, endpoint, datamodel, baseCrawlId);
if (this.debugging) {
logger.debug("");
}
......
......@@ -6,6 +6,8 @@ import java.io.IOException;
import org.apache.commons.io.FileUtils;
import org.springframework.util.Assert;
import com.fasterxml.jackson.databind.JsonNode;
import de.unibamberg.minf.gtf.context.ExecutionContext;
import eu.dariah.de.search.model.Crawl;
......@@ -14,17 +16,18 @@ public class CrawlingExecutionContext implements ExecutionContext {
private final String collectionId;
private final String endpointId;
private final String datasetId;
private final JsonNode sessionData;
private final String workingDir;
public String getPathPrefix() { return pathPrefix; }
public String getCollectionId() { return collectionId; }
public String getEndpointId() { return endpointId; }
public String getDatasetId() { return datasetId; }
@Override public JsonNode getSessionData() { return this.sessionData; }
@Override public String getWorkingDir() { return this.workingDir; }
public CrawlingExecutionContext(String pathPrefix, String collectionId, String endpointId, String datasetId) throws IOException {
public CrawlingExecutionContext(String pathPrefix, String collectionId, String endpointId, String datasetId, JsonNode sessionData) throws IOException {
Assert.notNull(pathPrefix);
Assert.notNull(collectionId);
Assert.notNull(endpointId);
......@@ -33,7 +36,7 @@ public class CrawlingExecutionContext implements ExecutionContext {
this.collectionId = collectionId;
this.endpointId = endpointId;
this.datasetId = datasetId;
this.sessionData = sessionData;
this.workingDir = pathPrefix + File.separator + this.getCollectionId() + File.separator + this.getEndpointId() + File.separator + this.getDatasetId() + File.separator;
File workingDir = new File(this.workingDir);
......@@ -41,7 +44,7 @@ public class CrawlingExecutionContext implements ExecutionContext {
FileUtils.forceMkdir(new File(this.workingDir));
}
}
public CrawlingExecutionContext(String string, Crawl c) throws IOException {
this(string, c.getCollectionId(), c.getEndpointId(), c.getDatamodelId());
public CrawlingExecutionContext(String string, Crawl c, JsonNode sessionData) throws IOException {
this(string, c.getCollectionId(), c.getEndpointId(), c.getDatamodelId(), sessionData);
}
}
\ No newline at end of file
......@@ -28,6 +28,8 @@ public class Collection extends BaseNamedEntityImpl {
private String updatePeriod;
private String collectionMetadata;
public String getGeoname() { return geoname; }
public void setGeoname(String geoname) { this.geoname = geoname; }
......@@ -58,4 +60,7 @@ public class Collection extends BaseNamedEntityImpl {
public String getUpdatePeriod() { return updatePeriod; }
public void setUpdatePeriod(String updatePeriod) { this.updatePeriod = updatePeriod; }
public String getCollectionMetadata() { return collectionMetadata; }
public void setCollectionMetadata(String collectionMetadata) { this.collectionMetadata = collectionMetadata; }
}
\ No newline at end of file
......@@ -52,11 +52,11 @@ public class Endpoint implements Identifiable {
public String getSingleParamValue(String param) {
List<String> matchingParamValues = this.getParamValues(param);
return matchingParamValues==null ? null : matchingParamValues.get(matchingParamValues.size()-1);
return (matchingParamValues==null || matchingParamValues.isEmpty()) ? null : matchingParamValues.get(matchingParamValues.size()-1);
}
public EndpointParam getSingleParam(String param) {
List<EndpointParam> matchingParams = this.getParams(param);
return matchingParams==null ? null : matchingParams.get(matchingParams.size()-1);
return (matchingParams==null || matchingParams.isEmpty()) ? null : matchingParams.get(matchingParams.size()-1);
}
}
\ No newline at end of file
......@@ -15,7 +15,6 @@ dependencies {
implementation "org.springframework.boot:spring-boot-starter-webflux"
implementation "org.springframework.boot:spring-boot-starter-validation"
implementation "org.springframework.boot:spring-boot-starter-data-mongodb"
developmentOnly "org.springframework.boot:spring-boot-devtools"
// Web
implementation "org.apache.tomcat.embed:tomcat-embed-jasper"
......@@ -36,8 +35,6 @@ dependencies {
compileOnly "org.projectlombok:lombok"
annotationProcessor "org.projectlombok:lombok"
testCompileOnly "org.projectlombok:lombok"
developmentOnly "org.springframework.boot:spring-boot-devtools"
}
bootWar {
......
......@@ -3,3 +3,30 @@ include(':search-core')
include(':search-ui')
include(':search-docs')
project(':search-docs').projectDir = new File('./docs')
includeBuild('../core') {
dependencySubstitution {
substitute module('de.unibamberg.minf.core:core-metamodel') with project(':core-metamodel')
}
}
includeBuild('../gtf') {
dependencySubstitution {
substitute module('de.unibamberg.minf.gtf:gtf-base') with project(':gtf-base')
substitute module('de.unibamberg.minf.gtf:gtf-core') with project(':gtf-core')
substitute module('de.unibamberg.minf.gtf:gtf-extension-file') with project(':gtf-extension-file')
substitute module('de.unibamberg.minf.gtf:gtf-extension-wiki') with project(':gtf-extension-wiki')
substitute module('de.unibamberg.minf.gtf:gtf-extension-geo') with project(':gtf-extension-geo')
substitute module('de.unibamberg.minf.gtf:gtf-extension-nlp') with project(':gtf-extension-nlp')
substitute module('de.unibamberg.minf.gtf:gtf-extension-vocabulary') with project(':gtf-extension-vocabulary')
substitute module('de.unibamberg.minf.gtf:gtf-extension-clariah') with project(':gtf-extension-clariah')
substitute module('de.unibamberg.minf.gtf:gtf-extension-dai') with project(':gtf-extension-dai')
}
}
includeBuild('../processing') {
dependencySubstitution {
substitute module('de.unibamberg.minf.processing:processing-core') with project(':processing-core')
substitute module('de.unibamberg.minf.processing:processing-adapters') with project(':processing-adapters')
}
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment