Commit eceba500 authored by Gradl, Tobias's avatar Gradl, Tobias
Browse files

Major refactoring: gtf-processing-adapter moved to processing

parent 30ce4c10
package eu.dariah.de.search.wikipedia;
import java.io.Serializable;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.client.Client;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Autowired;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import de.dariah.search.context.IndexInitService;
import eu.dariah.de.minfba.core.metamodel.interfaces.Schema;
import eu.dariah.de.minfba.processing.consumption.ResourceConsumptionService;
import eu.dariah.de.minfba.processing.model.SerializableResource;
import eu.dariah.de.minfba.processing.model.base.Resource;
public class WikipediaConsumptionService implements ResourceConsumptionService, InitializingBean {
private static Logger logger = LoggerFactory.getLogger(WikipediaConsumptionService.class);
@Autowired private Client client;
@Autowired private IndexInitService initService;
@Autowired private ObjectMapper objMapper;
private String indexName;
private String indexAlias;
private String mapping;
public String getIndexAlias() { return indexAlias; }
public void setIndexAlias(String indexAlias) { this.indexAlias = indexAlias; }
public String getMapping() { return mapping; }
public void setMapping(String mapping) { this.mapping = mapping; }
private BulkRequestBuilder bulkRequest;
@Override
public void afterPropertiesSet() throws Exception {
indexName = initService.getIndexForAlias(indexAlias);
}
@Override
public void init(Schema config) {
bulkRequest = client.prepareBulk();
logger.info(String.format("Preparing wikipedia bulk indexing"));
}
@Override
public void consume(Resource res) {
boolean doIndex = true;
SerializableResource res1 = (SerializableResource)res;
//doIndex = false;
/*JsonNode c = res.getContent().path("Entry").path("Revision").path("ContentContainer").path("Fulltext");
if (c==null || c.isMissingNode()) {
doIndex = false;
} */
if (doIndex) {
//String id = String.format("Wikipedia$%s", res.getContent().path("Entry").path("Title").textValue());
//if (id==null || id.trim().isEmpty()) {
try {
this.index(client.prepareIndex(indexName, mapping).setSource(objMapper.writeValueAsString(res1)));
} catch (JsonProcessingException e) {
logger.error("Prepare indexing error", e);
}
/* } else {
this.index(client.prepareIndex(indexName, mapping, id).setSource(res.getContent().toString()));
}
if (logger.isDebugEnabled()) {
logger.debug("Consumed: " + id);
}*/
} /**/
}
private void index(IndexRequestBuilder req) {
if (bulkRequest != null) {
bulkRequest.add(req);
} else {
req.execute().actionGet();
}
}
@Override
public int commit() {
if (bulkRequest.numberOfActions() > 0) {
BulkResponse bulkResponse = bulkRequest.execute().actionGet();
int docCount = bulkResponse.getItems().length;
logger.debug(String.format("Completed bulk indexing %s wikipedia records", docCount));
return docCount;
} else {
return 0;
}
}
}
......@@ -18,7 +18,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import de.dariah.search.context.IndexInitService;
import eu.dariah.de.minfba.core.metamodel.interfaces.Schema;
import eu.dariah.de.minfba.processing.consumption.ResourceConsumptionService;
import eu.dariah.de.minfba.processing.json.ResourceSerializationTest;
import eu.dariah.de.minfba.processing.model.SerializableResource;
import eu.dariah.de.minfba.processing.model.base.Resource;
......
......@@ -127,7 +127,7 @@
</property> -->
</bean>
<bean id="gtfElementProcessor" class="eu.dariah.de.minfba.processing.gtf.GtfElementProcessor">
<bean id="gtfElementProcessor" class="de.unibamberg.minf.processing.gtf.GtfElementProcessor">
<property name="transformationEngine" ref="transformationEngine" />
</bean>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment