Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
dariah
search
Commits
770e394e
Commit
770e394e
authored
Jan 18, 2022
by
Gradl, Tobias
Browse files
446: Reimplement automatic online and offline crawl capabilities
(OPENED) Task-Url:
#446
parent
cf3dfc99
Pipeline
#31077
passed with stage
in 2 minutes and 14 seconds
Changes
10
Pipelines
1
Show whitespace changes
Inline
Side-by-side
search-core/src/main/java/eu/dariah/de/search/controller/CollectionController.java
View file @
770e394e
...
...
@@ -18,9 +18,11 @@ import org.springframework.web.bind.annotation.ResponseBody;
import
com.fasterxml.jackson.databind.node.ObjectNode
;
import
de.unibamberg.minf.core.web.controller.DataTableList
;
import
de.unibamberg.minf.core.web.pojo.MessagePojo
;
import
de.unibamberg.minf.core.web.pojo.ModelActionPojo
;
import
eu.dariah.de.search.automation.CollectionSyncService
;
import
eu.dariah.de.search.config.CrawlingConfigProperties
;
import
eu.dariah.de.search.crawling.TimedCrawlManagerImpl
;
import
eu.dariah.de.search.pojo.CollectionPojo
;
import
eu.dariah.de.search.pojo.DatasetPojo
;
import
eu.dariah.de.search.pojo.DatasourcePojo
;
...
...
@@ -36,6 +38,7 @@ public class CollectionController extends BaseController {
@Autowired
private
CollectionConverter
collectionConverter
;
@Autowired
private
CollectionSyncService
crSyncService
;
@Autowired
private
DatamodelService
datamodelService
;
@Autowired
private
TimedCrawlManagerImpl
crawlManager
;
@Autowired
private
CrawlingConfigProperties
crawlingConfig
;
...
...
@@ -88,6 +91,17 @@ public class CollectionController extends BaseController {
return
result
;
}
@RequestMapping
(
method
=
GET
,
value
={
"/async/triggerOnline"
})
public
@ResponseBody
ModelActionPojo
triggerOnline
(
Model
model
,
Locale
locale
)
{
crawlManager
.
enqueueNewAndOutdatedDatasets
();
ModelActionPojo
result
=
new
ModelActionPojo
();
result
.
setSuccess
(
true
);
result
.
setMessage
(
new
MessagePojo
(
"error"
,
"~error.head"
,
"~error.body"
));
return
result
;
}
@RequestMapping
(
method
=
GET
,
value
={
"/getColregStatus"
})
public
@ResponseBody
ModelActionPojo
getColregStatus
(
Model
model
,
Locale
locale
)
{
ModelActionPojo
result
=
new
ModelActionPojo
(
true
);
...
...
search-core/src/main/java/eu/dariah/de/search/controller/DatamodelController.java
View file @
770e394e
...
...
@@ -27,6 +27,7 @@ import de.unibamberg.minf.core.web.pojo.ModelActionPojo;
import
eu.dariah.de.search.automation.DmeSyncService
;
import
eu.dariah.de.search.config.CrawlingConfigProperties
;
import
eu.dariah.de.search.config.MainConfigProperties
;
import
eu.dariah.de.search.crawling.TimedCrawlManagerImpl
;
import
eu.dariah.de.search.mapping.MappingGenerationService
;
import
eu.dariah.de.search.model.ExtendedDatamodelContainer
;
import
eu.dariah.de.search.pojo.DatamodelPojo
;
...
...
@@ -40,6 +41,7 @@ public class DatamodelController extends BaseController {
@Autowired
private
DatamodelConverter
datamodelConverter
;
@Autowired
private
MappingGenerationService
mappingGenerationService
;
@Autowired
private
DmeSyncService
dmeSyncService
;
@Autowired
private
TimedCrawlManagerImpl
crawlManager
;
@Autowired
private
MainConfigProperties
config
;
@Autowired
private
CrawlingConfigProperties
crawlingConfig
;
...
...
@@ -117,6 +119,16 @@ public class DatamodelController extends BaseController {
return
result
;
}
@RequestMapping
(
method
=
GET
,
value
={
"/async/triggerOffline"
})
public
@ResponseBody
ModelActionPojo
triggerOffline
(
Model
model
,
Locale
locale
)
{
crawlManager
.
reindexOutdatedData
();
ModelActionPojo
result
=
new
ModelActionPojo
();
result
.
setSuccess
(
true
);
result
.
setMessage
(
new
MessagePojo
(
"error"
,
"~error.head"
,
"~error.body"
));
return
result
;
}
@RequestMapping
(
method
=
GET
,
value
={
"/getDmeStatus"
})
public
@ResponseBody
ModelActionPojo
getDmeStatus
(
Model
model
,
Locale
locale
)
{
ModelActionPojo
result
=
new
ModelActionPojo
(
true
);
...
...
search-core/src/main/java/eu/dariah/de/search/crawling/TimedCrawlManagerImpl.java
View file @
770e394e
...
...
@@ -121,17 +121,9 @@ public class TimedCrawlManagerImpl extends CrawlManagerImpl implements TimedCraw
try
{
statusMapslock
.
lock
();
// TODO Reimplement once a new reindexing strategy is in place
/*if (autocrawlOffline) {
// Handle outdated models if any and conditions are met
List<ExtendedDatamodelContainer> refreshDatamodels = this.determineRefreshableDatamodels();
for (ExtendedDatamodelContainer datamodel : refreshDatamodels) {
// Drop all indexed data and recreate index with new mapping
if (this.recreateIndex(datamodel)) {
this.reindexDatamodel(datamodel);
if
(
autocrawlOffline
)
{
this
.
reindexOutdatedData
();
}
}
}*/
if
(
autocrawlOnline
)
{
// Handle new or updated datasets
...
...
@@ -148,8 +140,18 @@ public class TimedCrawlManagerImpl extends CrawlManagerImpl implements TimedCraw
}
}
public
void
reindexOutdatedData
()
{
// Handle outdated models if any and conditions are met
List
<
ExtendedDatamodelContainer
>
refreshDatamodels
=
this
.
determineRefreshableDatamodels
();
for
(
ExtendedDatamodelContainer
datamodel
:
refreshDatamodels
)
{
// Drop all indexed data and recreate index with new mapping
if
(
this
.
recreateIndex
(
datamodel
))
{
this
.
reindexDatamodel
(
datamodel
);
}
}
}
p
rivate
void
enqueueNewAndOutdatedDatasets
()
{
p
ublic
void
enqueueNewAndOutdatedDatasets
()
{
DateTime
syncTimestamp
=
DateTime
.
now
();
List
<
Crawl
>
lastOnlineCrawls
;
Crawl
lastOnlineCrawl
;
...
...
@@ -274,13 +276,16 @@ public class TimedCrawlManagerImpl extends CrawlManagerImpl implements TimedCraw
}
/*
private List<ExtendedDatamodelContainer> determineRefreshableDatamodels() {
private
List
<
ExtendedDatamodelContainer
>
determineRefreshableDatamodels
()
{
List
<
ExtendedDatamodelContainer
>
refreshDatamodels
=
new
ArrayList
<
ExtendedDatamodelContainer
>();
// Start of outdated reprocessing only between 1 and 3 o'clock
if
(
DateTime
.
now
().
getHourOfDay
()>
0
&&
DateTime
.
now
().
getHourOfDay
()<=
2
)
{
for
(
ExtendedDatamodelContainer
datamodel
:
datamodelService
.
findAll
())
{
if (mappingGenerationService.getIsOutdated(datamodel)) {
// TODO: Reimplement
/*if (mappingGenerationService.getIsOutdated(datamodel)) {
if (!this.outdatedDatamodelIdCrawlIdMap.containsKey(datamodel.getId())) {
// Start blocking of outdated datamodel
this.outdatedDatamodelIdCrawlIdMap.put(datamodel.getId(), null);
...
...
@@ -291,11 +296,11 @@ public class TimedCrawlManagerImpl extends CrawlManagerImpl implements TimedCraw
refreshDatamodels.add(datamodel);
}
}
}
}
*/
}
}
return
refreshDatamodels
;
}
*/
}
...
...
search-core/src/main/java/eu/dariah/de/search/es/client/IndexingClient.java
View file @
770e394e
package
eu.dariah.de.search.es.client
;
import
java.io.IOException
;
import
java.util.Map
;
import
org.elasticsearch.action.bulk.BulkResponse
;
public
interface
IndexingClient
{
public
BulkResponse
bulkIndexSources
(
String
index
,
Map
<
String
,
Map
<
String
,
Object
>>
idSourceMap
);
public
BulkResponse
bulkIndexSources
(
String
index
,
Map
<
String
,
Map
<
String
,
Object
>>
idSourceMap
)
throws
IOException
;
public
long
indexSources
(
String
indexName
,
Map
<
String
,
Map
<
String
,
Object
>>
idSourceMap
);
public
void
indexSource
(
String
string
,
String
resourceId
,
String
source
);
}
search-core/src/main/java/eu/dariah/de/search/es/client/IndexingClientImpl.java
View file @
770e394e
...
...
@@ -85,11 +85,11 @@ public class IndexingClientImpl extends BaseEsClientImpl implements IndexingClie
}
@Override
public
BulkResponse
bulkIndexSources
(
String
index
,
Map
<
String
,
Map
<
String
,
Object
>>
idSourceMap
)
{
public
BulkResponse
bulkIndexSources
(
String
index
,
Map
<
String
,
Map
<
String
,
Object
>>
idSourceMap
)
throws
IOException
{
BulkRequest
bulkRequest
=
new
BulkRequest
();
IndexRequest
indexRequest
;
String
strSource
;
try
{
for
(
Entry
<
String
,
Map
<
String
,
Object
>>
source
:
idSourceMap
.
entrySet
())
{
try
{
strSource
=
indexingObjectMapper
.
writeValueAsString
(
source
.
getValue
());
...
...
@@ -103,10 +103,6 @@ public class IndexingClientImpl extends BaseEsClientImpl implements IndexingClie
}
}
return
client
.
bulk
(
bulkRequest
,
RequestOptions
.
DEFAULT
);
}
catch
(
IOException
e
)
{
logger
.
error
(
"Error while bulk indexing resources"
,
e
);
}
return
null
;
}
}
search-core/src/main/java/eu/dariah/de/search/es/client/SearchClientImpl.java
View file @
770e394e
...
...
@@ -66,7 +66,7 @@ public class SearchClientImpl extends BaseEsClientImpl implements SearchClient {
}
return
response
.
getCount
();
}
catch
(
Exception
e
)
{
logger
.
error
(
"Failed to execute count: "
+
e
.
getMessage
(),
e
);
//
logger.error("Failed to execute count: " + e.getMessage(), e);
}
return
0
;
}
...
...
search-core/src/main/java/eu/dariah/de/search/es/service/IndexingServiceImpl.java
View file @
770e394e
...
...
@@ -133,6 +133,7 @@ public class IndexingServiceImpl implements IndexingService {
sources
.
put
(
rc
.
getId
(),
rc
.
toSource
());
}
try
{
BulkResponse
bulkResponse
=
indexingClient
.
bulkIndexSources
(
this
.
index
,
sources
);
int
docCount
=
bulkResponse
.
getItems
().
length
;
...
...
@@ -201,6 +202,11 @@ public class IndexingServiceImpl implements IndexingService {
}
return
docCount
;
}
catch
(
Exception
e
)
{
log
.
error
(
"Failed to execute bulk index request"
,
e
);
return
0
;
}
}
private
JsonNode
resourceToSimpleNode
(
Resource
r
)
{
...
...
i18n
@
0201f706
Compare
60db673c
...
0201f706
Subproject commit
60db673cb20da87f563e04fc8aa05b09e72b89b4
Subproject commit
0201f7065c7d782a49ab7faa01876f01aabf3c31
_search-commons
@
bc9348fa
Compare
df676958
...
bc9348fa
Subproject commit
df67695877aa86e7ce77434952e886a616b7303
e
Subproject commit
bc9348fa600b80863e88dfe43a7d570ec571865
e
resources
@
25eda282
Compare
b951057e
...
25eda282
Subproject commit
b951057e62d916791f2509a861e58f8c317c2e00
Subproject commit
25eda28254d770a11f0a831c20d6af715f362382
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment