Commit 657ef77c authored by Gradl, Tobias's avatar Gradl, Tobias
Browse files

Implement git capabilities of processing-adapters (#406)

parent cce844c6
......@@ -20,11 +20,12 @@ stages:
build:
stage: build
script: ./gradlew assemble
only:
- master
deploy:
stage: deploy
script:
- ./gradlew publish -x test $NEXUS_CREDENTIALS
only:
- master
- sru_opac_mww
- master
\ No newline at end of file
......@@ -41,6 +41,7 @@ ext {
httpComponentsVersion = "4.5.5"
elasticsearchVersion = "7.3.0"
logbackVersion = "1.1.3"
lombokVersion = "1.18.12"
}
dependencies {
......@@ -88,6 +89,10 @@ dependencies {
testImplementation "ch.qos.logback:logback-core:$logbackVersion"
testImplementation "ch.qos.logback:logback-classic:$logbackVersion"
compileOnly "javax.servlet:servlet-api:2.5"
compileOnly "org.projectlombok:lombok:$lombokVersion"
annotationProcessor "org.projectlombok:lombok:$lombokVersion"
testCompileOnly "org.projectlombok:lombok:$lombokVersion"
}
java {
......
......@@ -331,14 +331,16 @@ public class CollectionSyncClient extends BaseApiClientImpl<CollectionApiPojo, E
}
Endpoint e = new Endpoint();
if (accessPojo.getType().equals(AccessMethods.FILE.toString())) {
if (accessPojo.getSubtype()==null) {
e.setMethod("XML");
} else {
e.setMethod(accessPojo.getSubtype().toUpperCase());
}
} else {
e.setMethod(accessPojo.getType());
e.setAccessType(accessPojo.getType());
e.setFileType(accessPojo.getSubtype());
if (accessPojo.getType().equals(AccessMethods.FILE.toString()) ||
accessPojo.getType().equals(AccessMethods.OAI_PMH.toString()) ||
accessPojo.getType().equals(AccessMethods.OPAC.toString())) {
e.setFileType("XML");
} else if (accessPojo.getType().equals(AccessMethods.GIT.toString())) {
e.setFileType("Text");
}
e.setSet(accessPojo.getSet());
e.setUrl(accessPojo.getUri());
......@@ -381,7 +383,8 @@ public class CollectionSyncClient extends BaseApiClientImpl<CollectionApiPojo, E
}
private boolean endpointsAreSame(Endpoint ep1, Endpoint ep2) {
return ep1.getMethod().equals(ep2.getMethod()) &&
return ep1.getAccessType().equals(ep2.getAccessType()) &&
ep1.getFileType().equals(ep2.getFileType()) &&
ep1.getUrl().equals(ep2.getUrl()) &&
(ep1.getSet()==null && ep2.getSet()==null || ep1.getSet().equals(ep2.getSet()) );
}
......
......@@ -33,7 +33,6 @@ import eu.dariah.de.search.model.Dataset;
import eu.dariah.de.search.model.Collection;
import eu.dariah.de.search.model.Endpoint;
import eu.dariah.de.search.model.ExtendedDatamodelContainer;
import eu.dariah.de.search.query.execution.DocumentService;
import eu.dariah.de.search.service.CollectionService;
import eu.dariah.de.search.service.CrawlService;
import eu.dariah.de.search.service.ResourceIndexingServiceImpl;
......@@ -211,19 +210,19 @@ public class CrawlManagerImpl implements CrawlManager, ApplicationContextAware,
private CrawlPipeline createPipeline(Endpoint ep, ExtendedDatamodelContainer sc, Crawl c) throws ProcessingConfigException, GenericProcessingException, IOException {
String m = null;
for (AccessMethods mAv : AccessMethods.values()) {
if (mAv.equalsName(ep.getMethod())) {
if (mAv.equalsName(ep.getAccessType())) {
m = mAv.toString();
break;
}
}
for (FileTypes ftv : FileTypes.values()) {
if (ftv.toString().equals(ep.getMethod())) {
if (ftv.toString().equals(ep.getAccessType())) {
m = ftv.toString();
break;
}
}
if (m==null) {
logger.error(String.format("Unknown access method [%s]; cancelling crawl", ep.getMethod()));
logger.error(String.format("Unknown access method [%s]; cancelling crawl", ep.getAccessType()));
this.updateCrawl(c.getId(), ProcessingServiceStates.ERROR);
return null;
}
......
package eu.dariah.de.search.crawling.crawler;
import org.slf4j.MDC;
import org.springframework.beans.factory.annotation.Autowired;
import de.unibamberg.minf.dme.model.datamodel.natures.XmlDatamodelNature;
import de.unibamberg.minf.dme.model.datamodel.natures.xml.XmlTerminal;
import de.unibamberg.minf.processing.service.online.OaiPmhHarvestingService;
import eu.dariah.de.search.api.client.OaiPmhClient;
import eu.dariah.de.search.api.model.oaipmh.OaiPmhMetadataFormat;
import eu.dariah.de.search.api.model.oaipmh.OaiPmhResponseContainer;
import eu.dariah.de.search.model.Crawl;
import eu.dariah.de.search.model.Endpoint;
import eu.dariah.de.search.model.ExtendedDatamodelContainer;
import eu.dariah.de.search.service.CrawlService;
public class GitCrawlerImpl extends OaiPmhHarvestingService implements Crawler {
@Autowired private CrawlService crawlService;
@Autowired private OaiPmhClient oaiPmhClient;
private boolean initialized = false;
private String crawlId;
@Override
public String getUnitMessageCode() {
return "~eu.dariah.de.minfba.search.crawling.oai_crawling.unit";
}
@Override
public String getTitleMessageCode() {
return "~eu.dariah.de.minfba.search.crawling.oai_crawling.title";
}
@Override
public boolean isInitialized() {
return super.isInitialized() && initialized;
}
@Override
public void run() {
MDC.put("uid", crawlId);
super.run();
}
@Override
public void init(Endpoint endpoint, Crawl crawl, ExtendedDatamodelContainer sc) {
this.setUrl(endpoint.getUrl());
this.setSet(endpoint.getSet());
this.crawlId = crawl.getId();
if (crawl.getPrefix()==null || crawl.getPrefix().trim().isEmpty()) {
String prefix = this.detectMetadataPrefix(endpoint, sc);
if (prefix==null || prefix.trim().isEmpty()) {
logger.warn("Failed to automatically detect metadata prefix for OAI-PMH endpoint");
this.initialized = false;
return;
} else {
logger.warn(String.format("Metadata prefix for OAI-PMH endpoint [%s] automatically detected [%s]", endpoint.getUrl(), prefix));
crawl.setPrefix(prefix);
crawlService.save(crawl);
}
}
this.setPrefix(crawl.getPrefix());
this.setCrawlDir(crawlService.getCrawlDirPath(crawl));
this.initialized = true;
}
private String detectMetadataPrefix(Endpoint ep, ExtendedDatamodelContainer sc) {
String rootNs = null;
XmlDatamodelNature xmlNature = sc.getModel().getNature(XmlDatamodelNature.class);
String rootTerminalId = xmlNature.getTerminalId(sc.getRoot().getId());
for (XmlTerminal t : xmlNature.getTerminals()) {
if (t.getId().equals(rootTerminalId)) {
rootNs = t.getNamespace().trim().toLowerCase();
}
}
String prefix = null;
OaiPmhResponseContainer oaiFormatsResponse = oaiPmhClient.listMetadataFormats(ep.getUrl(), null);
if (oaiFormatsResponse!=null && oaiFormatsResponse.getFormats()!=null) {
for (OaiPmhMetadataFormat format : oaiFormatsResponse.getFormats()) {
if (format.getMetadataNamespace().trim().toLowerCase().equals(rootNs)) {
if (prefix==null) {
prefix = format.getMetadataPrefix();
} else {
logger.warn("Multiple metadata prefixes matched for schema. Using first");
}
}
}
}
if (prefix==null) {
logger.warn("Could not detect metadata prefix from namespaced. Trying schema names");
if (oaiFormatsResponse!=null && oaiFormatsResponse.getFormats()!=null) {
for (OaiPmhMetadataFormat format : oaiFormatsResponse.getFormats()) {
if (format.getMetadataPrefix().trim().toLowerCase().equals(sc.getModel().getName().trim().toLowerCase())) {
if (prefix==null) {
prefix = format.getMetadataPrefix();
} else {
logger.warn("Multiple metadata prefixes matched for schema. Using first");
}
}
}
}
}
return prefix;
}
}
\ No newline at end of file
package eu.dariah.de.search.dao;
import de.unibamberg.minf.dme.model.version.VersionInfo;
import eu.dariah.de.search.dao.base.MongoDao;
public interface VersionDao extends MongoDao<VersionInfo> { }
package eu.dariah.de.search.dao;
import org.springframework.stereotype.Repository;
import de.unibamberg.minf.dme.model.version.VersionInfo;
import eu.dariah.de.search.dao.base.BaseMongoDaoImpl;
@Repository
public class VersionDaoImpl extends BaseMongoDaoImpl<VersionInfo> implements VersionDao {
public VersionDaoImpl() {
super(VersionInfo.class);
}
}
......@@ -16,7 +16,10 @@ public class Endpoint implements Identifiable {
private List<String> patterns;
private String url;
private String method;
private String accessType;
private String fileType;
private String set;
private String dateTimeFormatPattern;
......@@ -44,8 +47,11 @@ public class Endpoint implements Identifiable {
public String getUrl() { return url; }
public void setUrl(String url) { this.url = url; }
public String getMethod() { return method; }
public void setMethod(String method) { this.method = method; }
public String getAccessType() { return accessType; }
public void setAccessType(String accessType) { this.accessType = accessType; }
public String getFileType() { return fileType; }
public void setFileType(String fileType) { this.fileType = fileType; }
public String getSet() { return set; }
public void setSet(String set) { this.set = set; }
......
......@@ -9,7 +9,8 @@ public class EndpointPojo implements Identifiable {
private String id;
private String url;
private String method;
private String accessType;
private String fileType;
private String set;
private List<DatasetPojo> datasetPojos;
......@@ -27,8 +28,11 @@ public class EndpointPojo implements Identifiable {
public String getUrl() { return url; }
public void setUrl(String url) { this.url = url; }
public String getMethod() { return method; }
public void setMethod(String method) { this.method = method; }
public String getAccessType() { return accessType; }
public void setAccessType(String accessType) { this.accessType = accessType; }
public String getFileType() { return fileType; }
public void setFileType(String fileType) { this.fileType = fileType; }
public String getSet() { return set; }
public void setSet(String set) { this.set = set; }
......
......@@ -25,7 +25,8 @@ public class EndpointConverter extends BaseConverter<Endpoint, EndpointPojo> {
EndpointPojo ePojo = new EndpointPojo();
ePojo.setId(endpoint.getId());
ePojo.setUnprocessed(endpoint.isNew());
ePojo.setMethod(endpoint.getMethod());
ePojo.setAccessType(endpoint.getAccessType());
ePojo.setFileType(endpoint.getFileType());
ePojo.setSet(endpoint.getSet());
ePojo.setUrl(endpoint.getUrl());
ePojo.setUnaccessible(endpoint.isUnaccessible());
......
......@@ -162,7 +162,7 @@ public class SruQueryExecutionServiceImpl extends BaseResultService implements I
QueryResultDatasource qrd;
for (Collection c : coll) {
for (Endpoint e : c.getEndpoints()) {
if (e.getMethod().equals("OPAC")) {
if (e.getAccessType().equals("OPAC")) {
qrd = new QueryResultDatasource();
qrd.setProviderName(c.getName(locale.getISO3Language()));
qrd.setProviderId(c.getId());
......
package eu.dariah.de.search.updates;
import java.io.File;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.attribute.FileAttribute;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.mongodb.core.CollectionCallback;
import org.springframework.data.mongodb.core.MongoTemplate;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.node.TextNode;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import de.unibamberg.minf.dme.model.version.VersionInfo;
import de.unibamberg.minf.dme.model.version.VersionInfoImpl;
import eu.dariah.de.search.dao.VersionDao;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Data
public class UpdateServiceImpl implements InitializingBean {
private final static String versionHashPrefix = "GenericSearch";
private String backupsBasePath;
private String database;
private final MessageDigest md;
@Autowired private MongoTemplate mongoTemplate;
@Autowired private ObjectMapper objectMapper;
@Autowired private VersionDao versionDao;
public UpdateServiceImpl() throws NoSuchAlgorithmException {
md = MessageDigest.getInstance("MD5");
}
@Override
public void afterPropertiesSet() throws Exception {
List<String> versions = new ArrayList<String>();
List<VersionInfo> versionInfos = versionDao.findAll();
for (VersionInfo vi : versionInfos) {
if (!vi.getVersionHash().equals(new String(md.digest(new String(versionHashPrefix + vi.getVersion()).getBytes(StandardCharsets.UTF_8)), StandardCharsets.UTF_8))) {
log.error("Cancelling migration checks: failed to compare version hashes. Is the correct database configured?");
return;
}
versions.add(vi.getVersion());
}
this.performUpdates(versions);
}
private void performUpdates(List<String> existingVersions) throws Exception {
boolean backedUp = false;
if (!existingVersions.contains("3.11.0")) {
if (!backedUp) {
this.backupDb();
backedUp = true;
}
this.migrateEndpoints();
}
}
private void migrateEndpoints() {
List<String> rawCollections = this.getObjectsAsString("collection");
boolean errors = false;
log.info("Performing endpoint migration (version: 3.11.0)");
JsonNode node;
ArrayNode endpointsNode;
ObjectNode collectionNode, endpointNode;
String method;
for (String rawCollection : rawCollections) {
try {
node = objectMapper.readTree(rawCollection);
if (!node.path("endpoints").isMissingNode()) {
collectionNode = (ObjectNode)node;
endpointsNode = (ArrayNode)collectionNode.get("endpoints");
for (JsonNode child : endpointsNode) {
endpointNode = (ObjectNode)child;
method = endpointNode.remove("method").textValue();
if (method.equals("XML")) {
endpointNode.set("accessType", new TextNode("Online file"));
endpointNode.set("fileType", new TextNode("XML"));
} else if (method.equals("JSON")) {
endpointNode.set("accessType", new TextNode("Online file"));
endpointNode.set("fileType", new TextNode("JSON"));
} else if (method.equals("CSV")) {
endpointNode.set("accessType", new TextNode("Online file"));
endpointNode.set("fileType", new TextNode("CSV"));
} else if (method.equals("TSV")) {
endpointNode.set("accessType", new TextNode("Online file"));
endpointNode.set("fileType", new TextNode("TSV"));
} else if (method.equals("TEXT")) {
endpointNode.set("accessType", new TextNode("Online file"));
endpointNode.set("fileType", new TextNode("TEXT"));
} else if (method.equals("OAI-PMH")) {
endpointNode.set("accessType", new TextNode("OAI-PMH"));
endpointNode.set("fileType", new TextNode("XML"));
} else if (method.equals("Git Repository")) {
endpointNode.set("accessType", new TextNode("Git Repository"));
endpointNode.set("fileType", new TextNode("TEXT"));
}
}
mongoTemplate.save(collectionNode.toString(), "collection");
}
} catch (Exception e) {
log.error("Failed to update database to version 3.11.0", e);
errors = true;
}
}
this.saveVersionInfo("3.11.0", errors);
log.info("Endpoint migration completed " + (errors ? "WITH" : "without") + " errors (version: 3.11.0)");
}
private void backupDb() throws Exception {
String backupPath = backupsBasePath + File.separator + DateTime.now().toString(DateTimeFormat.forPattern("yyyyMMdd_HHmmss"));
Files.createDirectories(Paths.get(new File(backupPath).toURI()), new FileAttribute<?>[0]);
try {
Runtime.getRuntime().exec(String.format("mongodump --out %s --db %s", backupPath, database));
} catch (Exception e) {
log.error("Failed to create mongodb backup", e);
throw e;
}
}
private void saveVersionInfo(String version, boolean errors) {
VersionInfo vi = new VersionInfoImpl();
vi.setUpdateWithErrors(errors);
vi.setVersion(version);
vi.setVersionHash(new String(md.digest(new String(versionHashPrefix + vi.getVersion()).getBytes(StandardCharsets.UTF_8)), StandardCharsets.UTF_8));
versionDao.save(vi);
}
private List<String> getObjectsAsString(String queryObject) {
return mongoTemplate.execute(queryObject, new CollectionCallback<List<String>>() {
public List<String> doInCollection(DBCollection collection) {
DBCursor cursor = collection.find();
List<String> result = new ArrayList<String>();
while (cursor.hasNext()) {
result.add(cursor.next().toString());
}
return result;
}
});
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment