Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
dariah
search-commons
Commits
657ef77c
Commit
657ef77c
authored
Jul 27, 2020
by
Gradl, Tobias
Browse files
Implement git capabilities of processing-adapters (#406)
parent
cce844c6
Changes
12
Hide whitespace changes
Inline
Side-by-side
.gitlab-ci.yml
View file @
657ef77c
...
...
@@ -20,11 +20,12 @@ stages:
build
:
stage
:
build
script
:
./gradlew assemble
only
:
-
master
deploy
:
stage
:
deploy
script
:
-
./gradlew publish -x test $NEXUS_CREDENTIALS
only
:
-
master
-
sru_opac_mww
-
master
\ No newline at end of file
build.gradle
View file @
657ef77c
...
...
@@ -41,6 +41,7 @@ ext {
httpComponentsVersion
=
"4.5.5"
elasticsearchVersion
=
"7.3.0"
logbackVersion
=
"1.1.3"
lombokVersion
=
"1.18.12"
}
dependencies
{
...
...
@@ -88,6 +89,10 @@ dependencies {
testImplementation
"ch.qos.logback:logback-core:$logbackVersion"
testImplementation
"ch.qos.logback:logback-classic:$logbackVersion"
compileOnly
"javax.servlet:servlet-api:2.5"
compileOnly
"org.projectlombok:lombok:$lombokVersion"
annotationProcessor
"org.projectlombok:lombok:$lombokVersion"
testCompileOnly
"org.projectlombok:lombok:$lombokVersion"
}
java
{
...
...
src/main/java/eu/dariah/de/search/api/client/CollectionSyncClient.java
View file @
657ef77c
...
...
@@ -331,14 +331,16 @@ public class CollectionSyncClient extends BaseApiClientImpl<CollectionApiPojo, E
}
Endpoint
e
=
new
Endpoint
();
if
(
accessPojo
.
getType
().
equals
(
AccessMethods
.
FILE
.
toString
()))
{
if
(
accessPojo
.
getSubtype
()==
null
)
{
e
.
setMethod
(
"XML"
);
}
else
{
e
.
setMethod
(
accessPojo
.
getSubtype
().
toUpperCase
());
}
}
else
{
e
.
setMethod
(
accessPojo
.
getType
());
e
.
setAccessType
(
accessPojo
.
getType
());
e
.
setFileType
(
accessPojo
.
getSubtype
());
if
(
accessPojo
.
getType
().
equals
(
AccessMethods
.
FILE
.
toString
())
||
accessPojo
.
getType
().
equals
(
AccessMethods
.
OAI_PMH
.
toString
())
||
accessPojo
.
getType
().
equals
(
AccessMethods
.
OPAC
.
toString
()))
{
e
.
setFileType
(
"XML"
);
}
else
if
(
accessPojo
.
getType
().
equals
(
AccessMethods
.
GIT
.
toString
()))
{
e
.
setFileType
(
"Text"
);
}
e
.
setSet
(
accessPojo
.
getSet
());
e
.
setUrl
(
accessPojo
.
getUri
());
...
...
@@ -381,7 +383,8 @@ public class CollectionSyncClient extends BaseApiClientImpl<CollectionApiPojo, E
}
private
boolean
endpointsAreSame
(
Endpoint
ep1
,
Endpoint
ep2
)
{
return
ep1
.
getMethod
().
equals
(
ep2
.
getMethod
())
&&
return
ep1
.
getAccessType
().
equals
(
ep2
.
getAccessType
())
&&
ep1
.
getFileType
().
equals
(
ep2
.
getFileType
())
&&
ep1
.
getUrl
().
equals
(
ep2
.
getUrl
())
&&
(
ep1
.
getSet
()==
null
&&
ep2
.
getSet
()==
null
||
ep1
.
getSet
().
equals
(
ep2
.
getSet
())
);
}
...
...
src/main/java/eu/dariah/de/search/crawling/CrawlManagerImpl.java
View file @
657ef77c
...
...
@@ -33,7 +33,6 @@ import eu.dariah.de.search.model.Dataset;
import
eu.dariah.de.search.model.Collection
;
import
eu.dariah.de.search.model.Endpoint
;
import
eu.dariah.de.search.model.ExtendedDatamodelContainer
;
import
eu.dariah.de.search.query.execution.DocumentService
;
import
eu.dariah.de.search.service.CollectionService
;
import
eu.dariah.de.search.service.CrawlService
;
import
eu.dariah.de.search.service.ResourceIndexingServiceImpl
;
...
...
@@ -211,19 +210,19 @@ public class CrawlManagerImpl implements CrawlManager, ApplicationContextAware,
private
CrawlPipeline
createPipeline
(
Endpoint
ep
,
ExtendedDatamodelContainer
sc
,
Crawl
c
)
throws
ProcessingConfigException
,
GenericProcessingException
,
IOException
{
String
m
=
null
;
for
(
AccessMethods
mAv
:
AccessMethods
.
values
())
{
if
(
mAv
.
equalsName
(
ep
.
get
Method
()))
{
if
(
mAv
.
equalsName
(
ep
.
get
AccessType
()))
{
m
=
mAv
.
toString
();
break
;
}
}
for
(
FileTypes
ftv
:
FileTypes
.
values
())
{
if
(
ftv
.
toString
().
equals
(
ep
.
get
Method
()))
{
if
(
ftv
.
toString
().
equals
(
ep
.
get
AccessType
()))
{
m
=
ftv
.
toString
();
break
;
}
}
if
(
m
==
null
)
{
logger
.
error
(
String
.
format
(
"Unknown access method [%s]; cancelling crawl"
,
ep
.
get
Method
()));
logger
.
error
(
String
.
format
(
"Unknown access method [%s]; cancelling crawl"
,
ep
.
get
AccessType
()));
this
.
updateCrawl
(
c
.
getId
(),
ProcessingServiceStates
.
ERROR
);
return
null
;
}
...
...
src/main/java/eu/dariah/de/search/crawling/crawler/GitCrawlerImpl.java
0 → 100644
View file @
657ef77c
package
eu.dariah.de.search.crawling.crawler
;
import
org.slf4j.MDC
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
de.unibamberg.minf.dme.model.datamodel.natures.XmlDatamodelNature
;
import
de.unibamberg.minf.dme.model.datamodel.natures.xml.XmlTerminal
;
import
de.unibamberg.minf.processing.service.online.OaiPmhHarvestingService
;
import
eu.dariah.de.search.api.client.OaiPmhClient
;
import
eu.dariah.de.search.api.model.oaipmh.OaiPmhMetadataFormat
;
import
eu.dariah.de.search.api.model.oaipmh.OaiPmhResponseContainer
;
import
eu.dariah.de.search.model.Crawl
;
import
eu.dariah.de.search.model.Endpoint
;
import
eu.dariah.de.search.model.ExtendedDatamodelContainer
;
import
eu.dariah.de.search.service.CrawlService
;
public
class
GitCrawlerImpl
extends
OaiPmhHarvestingService
implements
Crawler
{
@Autowired
private
CrawlService
crawlService
;
@Autowired
private
OaiPmhClient
oaiPmhClient
;
private
boolean
initialized
=
false
;
private
String
crawlId
;
@Override
public
String
getUnitMessageCode
()
{
return
"~eu.dariah.de.minfba.search.crawling.oai_crawling.unit"
;
}
@Override
public
String
getTitleMessageCode
()
{
return
"~eu.dariah.de.minfba.search.crawling.oai_crawling.title"
;
}
@Override
public
boolean
isInitialized
()
{
return
super
.
isInitialized
()
&&
initialized
;
}
@Override
public
void
run
()
{
MDC
.
put
(
"uid"
,
crawlId
);
super
.
run
();
}
@Override
public
void
init
(
Endpoint
endpoint
,
Crawl
crawl
,
ExtendedDatamodelContainer
sc
)
{
this
.
setUrl
(
endpoint
.
getUrl
());
this
.
setSet
(
endpoint
.
getSet
());
this
.
crawlId
=
crawl
.
getId
();
if
(
crawl
.
getPrefix
()==
null
||
crawl
.
getPrefix
().
trim
().
isEmpty
())
{
String
prefix
=
this
.
detectMetadataPrefix
(
endpoint
,
sc
);
if
(
prefix
==
null
||
prefix
.
trim
().
isEmpty
())
{
logger
.
warn
(
"Failed to automatically detect metadata prefix for OAI-PMH endpoint"
);
this
.
initialized
=
false
;
return
;
}
else
{
logger
.
warn
(
String
.
format
(
"Metadata prefix for OAI-PMH endpoint [%s] automatically detected [%s]"
,
endpoint
.
getUrl
(),
prefix
));
crawl
.
setPrefix
(
prefix
);
crawlService
.
save
(
crawl
);
}
}
this
.
setPrefix
(
crawl
.
getPrefix
());
this
.
setCrawlDir
(
crawlService
.
getCrawlDirPath
(
crawl
));
this
.
initialized
=
true
;
}
private
String
detectMetadataPrefix
(
Endpoint
ep
,
ExtendedDatamodelContainer
sc
)
{
String
rootNs
=
null
;
XmlDatamodelNature
xmlNature
=
sc
.
getModel
().
getNature
(
XmlDatamodelNature
.
class
);
String
rootTerminalId
=
xmlNature
.
getTerminalId
(
sc
.
getRoot
().
getId
());
for
(
XmlTerminal
t
:
xmlNature
.
getTerminals
())
{
if
(
t
.
getId
().
equals
(
rootTerminalId
))
{
rootNs
=
t
.
getNamespace
().
trim
().
toLowerCase
();
}
}
String
prefix
=
null
;
OaiPmhResponseContainer
oaiFormatsResponse
=
oaiPmhClient
.
listMetadataFormats
(
ep
.
getUrl
(),
null
);
if
(
oaiFormatsResponse
!=
null
&&
oaiFormatsResponse
.
getFormats
()!=
null
)
{
for
(
OaiPmhMetadataFormat
format
:
oaiFormatsResponse
.
getFormats
())
{
if
(
format
.
getMetadataNamespace
().
trim
().
toLowerCase
().
equals
(
rootNs
))
{
if
(
prefix
==
null
)
{
prefix
=
format
.
getMetadataPrefix
();
}
else
{
logger
.
warn
(
"Multiple metadata prefixes matched for schema. Using first"
);
}
}
}
}
if
(
prefix
==
null
)
{
logger
.
warn
(
"Could not detect metadata prefix from namespaced. Trying schema names"
);
if
(
oaiFormatsResponse
!=
null
&&
oaiFormatsResponse
.
getFormats
()!=
null
)
{
for
(
OaiPmhMetadataFormat
format
:
oaiFormatsResponse
.
getFormats
())
{
if
(
format
.
getMetadataPrefix
().
trim
().
toLowerCase
().
equals
(
sc
.
getModel
().
getName
().
trim
().
toLowerCase
()))
{
if
(
prefix
==
null
)
{
prefix
=
format
.
getMetadataPrefix
();
}
else
{
logger
.
warn
(
"Multiple metadata prefixes matched for schema. Using first"
);
}
}
}
}
}
return
prefix
;
}
}
\ No newline at end of file
src/main/java/eu/dariah/de/search/dao/VersionDao.java
0 → 100755
View file @
657ef77c
package
eu.dariah.de.search.dao
;
import
de.unibamberg.minf.dme.model.version.VersionInfo
;
import
eu.dariah.de.search.dao.base.MongoDao
;
public
interface
VersionDao
extends
MongoDao
<
VersionInfo
>
{
}
src/main/java/eu/dariah/de/search/dao/VersionDaoImpl.java
0 → 100755
View file @
657ef77c
package
eu.dariah.de.search.dao
;
import
org.springframework.stereotype.Repository
;
import
de.unibamberg.minf.dme.model.version.VersionInfo
;
import
eu.dariah.de.search.dao.base.BaseMongoDaoImpl
;
@Repository
public
class
VersionDaoImpl
extends
BaseMongoDaoImpl
<
VersionInfo
>
implements
VersionDao
{
public
VersionDaoImpl
()
{
super
(
VersionInfo
.
class
);
}
}
src/main/java/eu/dariah/de/search/model/Endpoint.java
View file @
657ef77c
...
...
@@ -16,7 +16,10 @@ public class Endpoint implements Identifiable {
private
List
<
String
>
patterns
;
private
String
url
;
private
String
method
;
private
String
accessType
;
private
String
fileType
;
private
String
set
;
private
String
dateTimeFormatPattern
;
...
...
@@ -44,8 +47,11 @@ public class Endpoint implements Identifiable {
public
String
getUrl
()
{
return
url
;
}
public
void
setUrl
(
String
url
)
{
this
.
url
=
url
;
}
public
String
getMethod
()
{
return
method
;
}
public
void
setMethod
(
String
method
)
{
this
.
method
=
method
;
}
public
String
getAccessType
()
{
return
accessType
;
}
public
void
setAccessType
(
String
accessType
)
{
this
.
accessType
=
accessType
;
}
public
String
getFileType
()
{
return
fileType
;
}
public
void
setFileType
(
String
fileType
)
{
this
.
fileType
=
fileType
;
}
public
String
getSet
()
{
return
set
;
}
public
void
setSet
(
String
set
)
{
this
.
set
=
set
;
}
...
...
src/main/java/eu/dariah/de/search/pojo/EndpointPojo.java
View file @
657ef77c
...
...
@@ -9,7 +9,8 @@ public class EndpointPojo implements Identifiable {
private
String
id
;
private
String
url
;
private
String
method
;
private
String
accessType
;
private
String
fileType
;
private
String
set
;
private
List
<
DatasetPojo
>
datasetPojos
;
...
...
@@ -27,8 +28,11 @@ public class EndpointPojo implements Identifiable {
public
String
getUrl
()
{
return
url
;
}
public
void
setUrl
(
String
url
)
{
this
.
url
=
url
;
}
public
String
getMethod
()
{
return
method
;
}
public
void
setMethod
(
String
method
)
{
this
.
method
=
method
;
}
public
String
getAccessType
()
{
return
accessType
;
}
public
void
setAccessType
(
String
accessType
)
{
this
.
accessType
=
accessType
;
}
public
String
getFileType
()
{
return
fileType
;
}
public
void
setFileType
(
String
fileType
)
{
this
.
fileType
=
fileType
;
}
public
String
getSet
()
{
return
set
;
}
public
void
setSet
(
String
set
)
{
this
.
set
=
set
;
}
...
...
src/main/java/eu/dariah/de/search/pojo/conversion/EndpointConverter.java
View file @
657ef77c
...
...
@@ -25,7 +25,8 @@ public class EndpointConverter extends BaseConverter<Endpoint, EndpointPojo> {
EndpointPojo
ePojo
=
new
EndpointPojo
();
ePojo
.
setId
(
endpoint
.
getId
());
ePojo
.
setUnprocessed
(
endpoint
.
isNew
());
ePojo
.
setMethod
(
endpoint
.
getMethod
());
ePojo
.
setAccessType
(
endpoint
.
getAccessType
());
ePojo
.
setFileType
(
endpoint
.
getFileType
());
ePojo
.
setSet
(
endpoint
.
getSet
());
ePojo
.
setUrl
(
endpoint
.
getUrl
());
ePojo
.
setUnaccessible
(
endpoint
.
isUnaccessible
());
...
...
src/main/java/eu/dariah/de/search/query/meta/SruQueryExecutionServiceImpl.java
View file @
657ef77c
...
...
@@ -162,7 +162,7 @@ public class SruQueryExecutionServiceImpl extends BaseResultService implements I
QueryResultDatasource
qrd
;
for
(
Collection
c
:
coll
)
{
for
(
Endpoint
e
:
c
.
getEndpoints
())
{
if
(
e
.
get
Method
().
equals
(
"OPAC"
))
{
if
(
e
.
get
AccessType
().
equals
(
"OPAC"
))
{
qrd
=
new
QueryResultDatasource
();
qrd
.
setProviderName
(
c
.
getName
(
locale
.
getISO3Language
()));
qrd
.
setProviderId
(
c
.
getId
());
...
...
src/main/java/eu/dariah/de/search/updates/UpdateServiceImpl.java
0 → 100644
View file @
657ef77c
package
eu.dariah.de.search.updates
;
import
java.io.File
;
import
java.nio.charset.StandardCharsets
;
import
java.nio.file.Files
;
import
java.nio.file.Paths
;
import
java.nio.file.attribute.FileAttribute
;
import
java.security.MessageDigest
;
import
java.security.NoSuchAlgorithmException
;
import
java.util.ArrayList
;
import
java.util.List
;
import
org.joda.time.DateTime
;
import
org.joda.time.format.DateTimeFormat
;
import
org.springframework.beans.factory.InitializingBean
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.data.mongodb.core.CollectionCallback
;
import
org.springframework.data.mongodb.core.MongoTemplate
;
import
com.fasterxml.jackson.databind.JsonNode
;
import
com.fasterxml.jackson.databind.ObjectMapper
;
import
com.fasterxml.jackson.databind.node.ArrayNode
;
import
com.fasterxml.jackson.databind.node.ObjectNode
;
import
com.fasterxml.jackson.databind.node.TextNode
;
import
com.mongodb.DBCollection
;
import
com.mongodb.DBCursor
;
import
de.unibamberg.minf.dme.model.version.VersionInfo
;
import
de.unibamberg.minf.dme.model.version.VersionInfoImpl
;
import
eu.dariah.de.search.dao.VersionDao
;
import
lombok.Data
;
import
lombok.extern.slf4j.Slf4j
;
@Slf4j
@Data
public
class
UpdateServiceImpl
implements
InitializingBean
{
private
final
static
String
versionHashPrefix
=
"GenericSearch"
;
private
String
backupsBasePath
;
private
String
database
;
private
final
MessageDigest
md
;
@Autowired
private
MongoTemplate
mongoTemplate
;
@Autowired
private
ObjectMapper
objectMapper
;
@Autowired
private
VersionDao
versionDao
;
public
UpdateServiceImpl
()
throws
NoSuchAlgorithmException
{
md
=
MessageDigest
.
getInstance
(
"MD5"
);
}
@Override
public
void
afterPropertiesSet
()
throws
Exception
{
List
<
String
>
versions
=
new
ArrayList
<
String
>();
List
<
VersionInfo
>
versionInfos
=
versionDao
.
findAll
();
for
(
VersionInfo
vi
:
versionInfos
)
{
if
(!
vi
.
getVersionHash
().
equals
(
new
String
(
md
.
digest
(
new
String
(
versionHashPrefix
+
vi
.
getVersion
()).
getBytes
(
StandardCharsets
.
UTF_8
)),
StandardCharsets
.
UTF_8
)))
{
log
.
error
(
"Cancelling migration checks: failed to compare version hashes. Is the correct database configured?"
);
return
;
}
versions
.
add
(
vi
.
getVersion
());
}
this
.
performUpdates
(
versions
);
}
private
void
performUpdates
(
List
<
String
>
existingVersions
)
throws
Exception
{
boolean
backedUp
=
false
;
if
(!
existingVersions
.
contains
(
"3.11.0"
))
{
if
(!
backedUp
)
{
this
.
backupDb
();
backedUp
=
true
;
}
this
.
migrateEndpoints
();
}
}
private
void
migrateEndpoints
()
{
List
<
String
>
rawCollections
=
this
.
getObjectsAsString
(
"collection"
);
boolean
errors
=
false
;
log
.
info
(
"Performing endpoint migration (version: 3.11.0)"
);
JsonNode
node
;
ArrayNode
endpointsNode
;
ObjectNode
collectionNode
,
endpointNode
;
String
method
;
for
(
String
rawCollection
:
rawCollections
)
{
try
{
node
=
objectMapper
.
readTree
(
rawCollection
);
if
(!
node
.
path
(
"endpoints"
).
isMissingNode
())
{
collectionNode
=
(
ObjectNode
)
node
;
endpointsNode
=
(
ArrayNode
)
collectionNode
.
get
(
"endpoints"
);
for
(
JsonNode
child
:
endpointsNode
)
{
endpointNode
=
(
ObjectNode
)
child
;
method
=
endpointNode
.
remove
(
"method"
).
textValue
();
if
(
method
.
equals
(
"XML"
))
{
endpointNode
.
set
(
"accessType"
,
new
TextNode
(
"Online file"
));
endpointNode
.
set
(
"fileType"
,
new
TextNode
(
"XML"
));
}
else
if
(
method
.
equals
(
"JSON"
))
{
endpointNode
.
set
(
"accessType"
,
new
TextNode
(
"Online file"
));
endpointNode
.
set
(
"fileType"
,
new
TextNode
(
"JSON"
));
}
else
if
(
method
.
equals
(
"CSV"
))
{
endpointNode
.
set
(
"accessType"
,
new
TextNode
(
"Online file"
));
endpointNode
.
set
(
"fileType"
,
new
TextNode
(
"CSV"
));
}
else
if
(
method
.
equals
(
"TSV"
))
{
endpointNode
.
set
(
"accessType"
,
new
TextNode
(
"Online file"
));
endpointNode
.
set
(
"fileType"
,
new
TextNode
(
"TSV"
));
}
else
if
(
method
.
equals
(
"TEXT"
))
{
endpointNode
.
set
(
"accessType"
,
new
TextNode
(
"Online file"
));
endpointNode
.
set
(
"fileType"
,
new
TextNode
(
"TEXT"
));
}
else
if
(
method
.
equals
(
"OAI-PMH"
))
{
endpointNode
.
set
(
"accessType"
,
new
TextNode
(
"OAI-PMH"
));
endpointNode
.
set
(
"fileType"
,
new
TextNode
(
"XML"
));
}
else
if
(
method
.
equals
(
"Git Repository"
))
{
endpointNode
.
set
(
"accessType"
,
new
TextNode
(
"Git Repository"
));
endpointNode
.
set
(
"fileType"
,
new
TextNode
(
"TEXT"
));
}
}
mongoTemplate
.
save
(
collectionNode
.
toString
(),
"collection"
);
}
}
catch
(
Exception
e
)
{
log
.
error
(
"Failed to update database to version 3.11.0"
,
e
);
errors
=
true
;
}
}
this
.
saveVersionInfo
(
"3.11.0"
,
errors
);
log
.
info
(
"Endpoint migration completed "
+
(
errors
?
"WITH"
:
"without"
)
+
" errors (version: 3.11.0)"
);
}
private
void
backupDb
()
throws
Exception
{
String
backupPath
=
backupsBasePath
+
File
.
separator
+
DateTime
.
now
().
toString
(
DateTimeFormat
.
forPattern
(
"yyyyMMdd_HHmmss"
));
Files
.
createDirectories
(
Paths
.
get
(
new
File
(
backupPath
).
toURI
()),
new
FileAttribute
<?>[
0
]);
try
{
Runtime
.
getRuntime
().
exec
(
String
.
format
(
"mongodump --out %s --db %s"
,
backupPath
,
database
));
}
catch
(
Exception
e
)
{
log
.
error
(
"Failed to create mongodb backup"
,
e
);
throw
e
;
}
}
private
void
saveVersionInfo
(
String
version
,
boolean
errors
)
{
VersionInfo
vi
=
new
VersionInfoImpl
();
vi
.
setUpdateWithErrors
(
errors
);
vi
.
setVersion
(
version
);
vi
.
setVersionHash
(
new
String
(
md
.
digest
(
new
String
(
versionHashPrefix
+
vi
.
getVersion
()).
getBytes
(
StandardCharsets
.
UTF_8
)),
StandardCharsets
.
UTF_8
));
versionDao
.
save
(
vi
);
}
private
List
<
String
>
getObjectsAsString
(
String
queryObject
)
{
return
mongoTemplate
.
execute
(
queryObject
,
new
CollectionCallback
<
List
<
String
>>()
{
public
List
<
String
>
doInCollection
(
DBCollection
collection
)
{
DBCursor
cursor
=
collection
.
find
();
List
<
String
>
result
=
new
ArrayList
<
String
>();
while
(
cursor
.
hasNext
())
{
result
.
add
(
cursor
.
next
().
toString
());
}
return
result
;
}
});
}
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment