diff --git a/backend/src/main/java/fr/inra/urgi/faidare/domain/data/LocationSitemapVO.java b/backend/src/main/java/fr/inra/urgi/faidare/domain/data/LocationSitemapVO.java new file mode 100644 index 0000000000000000000000000000000000000000..1ee34df29e0c81442293391c34f8947f08b5311e --- /dev/null +++ b/backend/src/main/java/fr/inra/urgi/faidare/domain/data/LocationSitemapVO.java @@ -0,0 +1,36 @@ +package fr.inra.urgi.faidare.domain.data; + +import java.util.List; + +import fr.inra.urgi.faidare.domain.brapi.v1.data.BrapiAdditionalInfo; +import fr.inra.urgi.faidare.domain.brapi.v1.data.BrapiLocation; +import fr.inra.urgi.faidare.domain.jsonld.data.HasURI; +import fr.inra.urgi.faidare.domain.jsonld.data.HasURL; +import fr.inra.urgi.faidare.domain.jsonld.data.IncludedInDataCatalog; +import fr.inra.urgi.faidare.elasticsearch.document.annotation.Document; +import fr.inra.urgi.faidare.elasticsearch.document.annotation.Id; + +/** + * A minimal view of a location containing only its ID, used to generate sitemaps + */ +@Document(type = "location", includedFields = "locationDbId") +public class LocationSitemapVO { + + @Id + private String locationDbId; + + public LocationSitemapVO() { + } + + public LocationSitemapVO(String locationDbId) { + this.locationDbId = locationDbId; + } + + public String getLocationDbId() { + return locationDbId; + } + + public void setLocationDbId(String locationDbId) { + this.locationDbId = locationDbId; + } +} diff --git a/backend/src/main/java/fr/inra/urgi/faidare/domain/data/germplasm/GermplasmSitemapVO.java b/backend/src/main/java/fr/inra/urgi/faidare/domain/data/germplasm/GermplasmSitemapVO.java new file mode 100644 index 0000000000000000000000000000000000000000..b7dc268148ed53309a64f4fbf808d5bdec580e68 --- /dev/null +++ b/backend/src/main/java/fr/inra/urgi/faidare/domain/data/germplasm/GermplasmSitemapVO.java @@ -0,0 +1,38 @@ +package fr.inra.urgi.faidare.domain.data.germplasm; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonSetter; +import com.fasterxml.jackson.annotation.Nulls; +import fr.inra.urgi.faidare.domain.brapi.v1.data.BrapiGermplasm; +import fr.inra.urgi.faidare.domain.jsonld.data.HasURI; +import fr.inra.urgi.faidare.domain.jsonld.data.HasURL; +import fr.inra.urgi.faidare.domain.jsonld.data.IncludedInDataCatalog; +import fr.inra.urgi.faidare.elasticsearch.document.annotation.Document; +import fr.inra.urgi.faidare.elasticsearch.document.annotation.Id; + +/** + * A minimal view of a germplasm, containing only its ID, used for sitemaps + */ +@Document(type = "germplasm", includedFields = "germplasmDbId") +public class GermplasmSitemapVO { + + @Id + private String germplasmDbId; + + public GermplasmSitemapVO() { + } + + public GermplasmSitemapVO(String germplasmDbId) { + this.germplasmDbId = germplasmDbId; + } + + public String getGermplasmDbId() { + return germplasmDbId; + } + + public void setGermplasmDbId(String germplasmDbId) { + this.germplasmDbId = germplasmDbId; + } +} diff --git a/backend/src/main/java/fr/inra/urgi/faidare/domain/data/study/StudySitemapVO.java b/backend/src/main/java/fr/inra/urgi/faidare/domain/data/study/StudySitemapVO.java new file mode 100644 index 0000000000000000000000000000000000000000..c4900361a93329fa4c623b71b4cebf7a360e49d6 --- /dev/null +++ b/backend/src/main/java/fr/inra/urgi/faidare/domain/data/study/StudySitemapVO.java @@ -0,0 +1,40 @@ +package fr.inra.urgi.faidare.domain.data.study; + +import java.util.Date; +import java.util.List; +import java.util.Set; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import fr.inra.urgi.faidare.domain.brapi.v1.data.BrapiAdditionalInfo; +import fr.inra.urgi.faidare.domain.brapi.v1.data.BrapiStudySummary; +import fr.inra.urgi.faidare.domain.data.GnpISInternal; +import fr.inra.urgi.faidare.domain.jsonld.data.HasURI; +import fr.inra.urgi.faidare.domain.jsonld.data.HasURL; +import fr.inra.urgi.faidare.domain.jsonld.data.IncludedInDataCatalog; +import fr.inra.urgi.faidare.elasticsearch.document.annotation.Document; +import fr.inra.urgi.faidare.elasticsearch.document.annotation.Id; + +/** + * A minimal view of a study containing only its ID, used to generate sitemaps + */ +@Document(type = "study", includedFields = "studyDbId") +public class StudySitemapVO { + + @Id + private String studyDbId; + + public StudySitemapVO() { + } + + public StudySitemapVO(String studyDbId) { + this.studyDbId = studyDbId; + } + + public String getStudyDbId() { + return studyDbId; + } + + public void setStudyDbId(String studyDbId) { + this.studyDbId = studyDbId; + } +} diff --git a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/ESScrollIterator.java b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/ESScrollIterator.java index f106e9b7709dfc4d8e407322732e72ddd576835a..04b8d4e2993512412b57f9acedafd4217cd4c245 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/ESScrollIterator.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/ESScrollIterator.java @@ -68,6 +68,13 @@ public class ESScrollIterator<T> implements Iterator<T> { .size(fetchSize) .sort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC); + // Add included and excluded fields if requested + String[] includedFields = documentMetadata.getIncludedFields(); + String[] excludedFields = documentMetadata.getExcludedFields(); + if ((includedFields != null && includedFields.length >= 1) || (excludedFields != null && excludedFields.length >= 1)) { + request.source().fetchSource(includedFields, excludedFields); + } + SearchResponse response = null; try { response = client.search(request, RequestOptions.DEFAULT); diff --git a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentAnnotationUtil.java b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentAnnotationUtil.java index a521970d7baafc394c027314ae554af65fac1af2..eb63d3e3424b7addbbf70f1731001c94a4959d8d 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentAnnotationUtil.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentAnnotationUtil.java @@ -54,8 +54,9 @@ public class DocumentAnnotationUtil { Map<String, DocumentMetadata.Field> fields = findDocumentFields(ImmutableList.<String>of(), valueObjectClass); + String[] includedFields = document.includedFields(); String[] excludedFields = document.excludedFields(); - metadata = new DocumentMetadata<>(documentType, idFieldName, valueObjectClass, excludedFields, fields); + metadata = new DocumentMetadata<>(documentType, idFieldName, valueObjectClass, includedFields, excludedFields, fields); metadataCache.put(valueObjectClass, metadata); } return metadata; diff --git a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentMetadata.java b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentMetadata.java index 6bf7389bd43b46f6dc5cd72ef5883b932788f0a5..a54e37e3c5023fcf8516a9d6e4b347ec628293da 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentMetadata.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentMetadata.java @@ -17,15 +17,21 @@ public class DocumentMetadata<VO> { private final String documentType; private final String idField; private final Class<VO> documentClass; + private final String[] includedFields; private final String[] excludedFields; private final Map<String, Field> fieldsByName; private final Map<List<String>, Field> fieldByPath; - public DocumentMetadata(String documentType, String idField, Class<VO> documentClass, String[] excludedFields, + public DocumentMetadata(String documentType, + String idField, + Class<VO> documentClass, + String[] includedFields, + String[] excludedFields, Map<String, Field> fieldsByName) { this.documentType = documentType; this.idField = idField; this.documentClass = documentClass; + this.includedFields = includedFields; this.excludedFields = excludedFields; this.fieldsByName = fieldsByName; this.fieldByPath = flattenDocumentFieldTree(ImmutableList.<String>of(), fieldsByName); @@ -57,6 +63,10 @@ public class DocumentMetadata<VO> { return idField; } + public String[] getIncludedFields() { + return includedFields; + } + public String[] getExcludedFields() { return excludedFields; } diff --git a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/annotation/Document.java b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/annotation/Document.java index df4af121fa9a0f84b8b90b6ec188f6f36b17bf23..3d0585a459d0fe20a030a5b2a6e88d5217dee332 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/annotation/Document.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/annotation/Document.java @@ -13,5 +13,6 @@ import java.lang.annotation.Target; public @interface Document { String type(); + String[] includedFields() default {}; String[] excludedFields() default {}; } diff --git a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/repository/impl/ESGenericFindRepository.java b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/repository/impl/ESGenericFindRepository.java index 54e7b287e44db289e5a83771353fc4bca487741e..e1122d86108623783ca5bf7b8d01ce3739a71796 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/repository/impl/ESGenericFindRepository.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/repository/impl/ESGenericFindRepository.java @@ -90,10 +90,11 @@ public class ESGenericFindRepository<C extends PaginationCriteria, VO> implement request.source().sort(field, order); } - // Add excluded fields if requested + // Add included and excluded fields if requested + String[] includedFields = documentMetadata.getIncludedFields(); String[] excludedFields = documentMetadata.getExcludedFields(); - if (excludedFields != null && excludedFields.length >= 1) { - request.source().fetchSource(null, excludedFields); + if ((includedFields != null && includedFields.length >= 1) || (excludedFields != null && excludedFields.length >= 1)) { + request.source().fetchSource(includedFields, excludedFields); } Logger logger = LoggerFactory.getLogger(ESGenericFindRepository.class); diff --git a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/GermplasmRepository.java b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/GermplasmRepository.java index dafff525774e979f20be09872e5357a888ed01c8..739feea02d0acd41ceb53c0fac02a5fd2ec0775f 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/GermplasmRepository.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/GermplasmRepository.java @@ -3,6 +3,7 @@ package fr.inra.urgi.faidare.repository.es; import fr.inra.urgi.faidare.domain.criteria.FaidareGermplasmPOSTShearchCriteria; import fr.inra.urgi.faidare.domain.criteria.GermplasmSearchCriteria; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmMcpdVO; +import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmSitemapVO; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmVO; import fr.inra.urgi.faidare.domain.data.germplasm.PedigreeVO; import fr.inra.urgi.faidare.domain.data.germplasm.ProgenyVO; @@ -31,6 +32,11 @@ public interface GermplasmRepository */ GermplasmVO getById(String germplasmDbId); + /** + * Scroll through all germplasms, using the given fetch size + */ + Iterator<GermplasmSitemapVO> scrollAllForSitemap(int fetchSize); + /** * Scroll through all germplasm matching the given criteria. */ diff --git a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/GermplasmRepositoryImpl.java b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/GermplasmRepositoryImpl.java index 31a6e1a8c55537fb4ffe7a35b418909c3a8dfede..360087ce79e69f48685b4c2c3480b6de51766171 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/GermplasmRepositoryImpl.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/GermplasmRepositoryImpl.java @@ -4,6 +4,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import fr.inra.urgi.faidare.domain.criteria.FaidareGermplasmPOSTShearchCriteria; import fr.inra.urgi.faidare.domain.criteria.GermplasmSearchCriteria; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmMcpdVO; +import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmSitemapVO; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmVO; import fr.inra.urgi.faidare.domain.data.germplasm.PedigreeVO; import fr.inra.urgi.faidare.domain.data.germplasm.ProgenyVO; @@ -90,6 +91,12 @@ public class GermplasmRepositoryImpl implements GermplasmRepository { this.criteriaMapping = AnnotatedCriteriaMapper.getMapping(criteriaClass); } + @Override + public Iterator<GermplasmSitemapVO> scrollAllForSitemap(int fetchSize) { + QueryBuilder query = QueryBuilders.matchAllQuery(); + return new ESScrollIterator<>(client, requestFactory, parser, GermplasmSitemapVO.class, query, fetchSize); + } + @Override public Iterator<GermplasmVO> scrollAll(GermplasmSearchCriteria criteria) { QueryBuilder query = queryFactory.createQuery(criteria); diff --git a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/LocationRepository.java b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/LocationRepository.java index 707f51bcd649768f66fdcbf99b8fee51736b5395..93e65b549079430cd2a2cfbbad8d2bb0676dd544 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/LocationRepository.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/LocationRepository.java @@ -1,6 +1,9 @@ package fr.inra.urgi.faidare.repository.es; +import java.util.Iterator; + import fr.inra.urgi.faidare.domain.criteria.LocationCriteria; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; import fr.inra.urgi.faidare.domain.data.LocationVO; import fr.inra.urgi.faidare.domain.response.PaginatedList; import fr.inra.urgi.faidare.elasticsearch.repository.ESFindRepository; @@ -21,4 +24,5 @@ public interface LocationRepository @Override PaginatedList<LocationVO> find(LocationCriteria criteria); + Iterator<LocationSitemapVO> scrollAllForSitemap(int fetchSize); } diff --git a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/LocationRepositoryImpl.java b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/LocationRepositoryImpl.java index af74a55c2e7f9cf7e788e3905186a2bef9a66182..0a1133d75f654527a74a2bad73bc21a83763e52c 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/LocationRepositoryImpl.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/LocationRepositoryImpl.java @@ -1,11 +1,18 @@ package fr.inra.urgi.faidare.repository.es; +import java.util.Iterator; + import fr.inra.urgi.faidare.domain.criteria.LocationCriteria; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; import fr.inra.urgi.faidare.domain.data.LocationVO; +import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmSitemapVO; import fr.inra.urgi.faidare.elasticsearch.ESRequestFactory; import fr.inra.urgi.faidare.elasticsearch.ESResponseParser; +import fr.inra.urgi.faidare.elasticsearch.ESScrollIterator; import fr.inra.urgi.faidare.elasticsearch.repository.impl.BaseESRepository; import org.elasticsearch.client.RestHighLevelClient; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; @@ -17,6 +24,10 @@ public class LocationRepositoryImpl extends BaseESRepository<LocationCriteria, LocationVO> implements LocationRepository { + private final RestHighLevelClient client; + private final ESRequestFactory requestFactory; + private final ESResponseParser parser; + @Autowired public LocationRepositoryImpl( RestHighLevelClient client, @@ -24,6 +35,14 @@ public class LocationRepositoryImpl ESResponseParser parser ) { super(client, requestFactory, LocationVO.class, parser); + this.client = client; + this.requestFactory = requestFactory; + this.parser = parser; } + @Override + public Iterator<LocationSitemapVO> scrollAllForSitemap(int fetchSize) { + QueryBuilder query = QueryBuilders.matchAllQuery(); + return new ESScrollIterator<>(client, requestFactory, parser, LocationSitemapVO.class, query, fetchSize); + } } diff --git a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/StudyRepository.java b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/StudyRepository.java index 6ceeca9f655fea5b94548d3c32941906c976d625..8831a8c1ff66f6a31aeac3825d690651da10c27b 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/StudyRepository.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/StudyRepository.java @@ -1,12 +1,15 @@ package fr.inra.urgi.faidare.repository.es; import fr.inra.urgi.faidare.domain.criteria.StudyCriteria; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; import fr.inra.urgi.faidare.domain.data.study.StudyDetailVO; +import fr.inra.urgi.faidare.domain.data.study.StudySitemapVO; import fr.inra.urgi.faidare.domain.data.study.StudySummaryVO; import fr.inra.urgi.faidare.domain.response.PaginatedList; import fr.inra.urgi.faidare.elasticsearch.repository.ESFindRepository; import fr.inra.urgi.faidare.elasticsearch.repository.ESGetByIdRepository; +import java.util.Iterator; import java.util.Set; /** @@ -29,4 +32,5 @@ public interface StudyRepository */ Set<String> getVariableIds(String studyDbId); + Iterator<StudySitemapVO> scrollAllForSitemap(int fetchSize); } diff --git a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/StudyRepositoryImpl.java b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/StudyRepositoryImpl.java index d9410a871cb1c7c0f9ab3966594b523fe6d29b3b..46b0b71248b4a5498a9d24bedb3f854ce756af6f 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/StudyRepositoryImpl.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/StudyRepositoryImpl.java @@ -2,12 +2,15 @@ package fr.inra.urgi.faidare.repository.es; import fr.inra.urgi.faidare.domain.brapi.v1.data.BrapiLocation; import fr.inra.urgi.faidare.domain.criteria.StudyCriteria; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; import fr.inra.urgi.faidare.domain.data.LocationVO; import fr.inra.urgi.faidare.domain.data.study.StudyDetailVO; +import fr.inra.urgi.faidare.domain.data.study.StudySitemapVO; import fr.inra.urgi.faidare.domain.data.study.StudySummaryVO; import fr.inra.urgi.faidare.domain.response.PaginatedList; import fr.inra.urgi.faidare.elasticsearch.ESRequestFactory; import fr.inra.urgi.faidare.elasticsearch.ESResponseParser; +import fr.inra.urgi.faidare.elasticsearch.ESScrollIterator; import fr.inra.urgi.faidare.elasticsearch.document.DocumentAnnotationUtil; import fr.inra.urgi.faidare.elasticsearch.document.DocumentMetadata; import fr.inra.urgi.faidare.elasticsearch.query.impl.ESGenericQueryFactory; @@ -19,6 +22,8 @@ import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestHighLevelClient; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.slf4j.Logger; @@ -28,6 +33,7 @@ import org.springframework.stereotype.Repository; import java.io.IOException; import java.util.Arrays; +import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Set; @@ -129,4 +135,9 @@ public class StudyRepositoryImpl return new LinkedHashSet<>(ids); } + @Override + public Iterator<StudySitemapVO> scrollAllForSitemap(int fetchSize) { + QueryBuilder query = QueryBuilders.matchAllQuery(); + return new ESScrollIterator<>(client, requestFactory, parser, StudySitemapVO.class, query, fetchSize); + } } diff --git a/backend/src/main/java/fr/inra/urgi/faidare/utils/Sitemaps.java b/backend/src/main/java/fr/inra/urgi/faidare/utils/Sitemaps.java new file mode 100644 index 0000000000000000000000000000000000000000..ba32b8cf286c1c04b589e50a4338addae9df2e78 --- /dev/null +++ b/backend/src/main/java/fr/inra/urgi/faidare/utils/Sitemaps.java @@ -0,0 +1,125 @@ +package fr.inra.urgi.faidare.utils; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.UncheckedIOException; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.util.Iterator; +import java.util.Spliterators; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Component; +import org.springframework.web.servlet.support.ServletUriComponentsBuilder; + +/** + * A generator of site maps. + * @author JB Nizet + */ +@Component +public class Sitemaps { + public static final int BUCKET_COUNT = 11; + + public static <T> void generateSitemap(String sitemapPath, + OutputStream out, + Iterator<T> entryIterator, + Predicate<T> entryPredicate, + Function<T, String> entryToPath) { + SanityChecker sanityChecker = new SanityChecker(sitemapPath); + + Writer writer = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8)); + Stream<T> entries = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(entryIterator, 0), false); + entries.filter(entryPredicate) + .map(entryToPath) + .map(entryPath -> Sitemaps.generateSitemapUrl(entryPath) + '\n') + .forEach(entry -> { + try { + writer.write(entry); + sanityChecker.addEntry(entry); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + + try { + writer.flush(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + + sanityChecker.check(); + } + + public static String generateSitemapUrl(String path) { + return ServletUriComponentsBuilder + .fromCurrentContextPath() + .path(path) + .toUriString(); + } + + private static class SanityChecker { + private static final Logger LOGGER = LoggerFactory.getLogger(SanityChecker.class); + + private static final int MAX_ENTRY_COUNT = 50_000; + private static final int MAX_BYTE_COUNT = 50 * 1024 * 1024; + + private static final int DANGER_ENTRY_COUNT = 40_000; + private static final int DANGER_BYTE_COUNT = 40 * 1024 * 1024; + + private final String sitemapPath; + private int entryCount = 0; + private int byteCount = 0; + + public SanityChecker(String sitemapPath) { + this.sitemapPath = sitemapPath; + } + + public void addEntry(String entry) { + entryCount++; + byteCount += entry.length(); + } + + public void check() { + if (entryCount > MAX_ENTRY_COUNT) { + LOGGER.error("The generated sitemap at path " + + sitemapPath + + " has more than " + + MAX_ENTRY_COUNT + + " entries and will thus be rejected by search engines. Increase Sitemaps.BUCKET_COUNT for a better distribution of sitemap entries."); + } else if (entryCount > DANGER_ENTRY_COUNT) { + LOGGER.warn("The generated sitemap at path " + + sitemapPath + + " has more than " + + DANGER_ENTRY_COUNT + + " entries and is thus approaching the max of " + + MAX_ENTRY_COUNT + + ". Increase Sitemaps.BUCKET_COUNT for a better distribution of sitemap entries."); + } + + if (byteCount > MAX_BYTE_COUNT) { + LOGGER.error("The generated sitemap at path " + + sitemapPath + + " has more than " + + MAX_BYTE_COUNT + + " bytes and will thus be rejected by search engines. Increase Sitemaps.BUCKET_COUNT for a better distribution of sitemap entries."); + } else if (entryCount > DANGER_ENTRY_COUNT) { + LOGGER.warn("The generated sitemap at path " + + sitemapPath + + " has more than " + + DANGER_BYTE_COUNT + + " bytes and is thus approaching the max of " + + MAX_BYTE_COUNT + + ". Increase Sitemaps.BUCKET_COUNT for a better distribution of sitemap entries."); + } + } + } +} diff --git a/backend/src/main/java/fr/inra/urgi/faidare/web/germplasm/GermplasmController.java b/backend/src/main/java/fr/inra/urgi/faidare/web/germplasm/GermplasmController.java index cd24d06dc61698b217db1422c9585ed4b7b97b05..973d2de38c9f3e04fb85b2b073bda308b6691719 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/web/germplasm/GermplasmController.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/web/germplasm/GermplasmController.java @@ -4,9 +4,16 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; +import java.util.Iterator; import java.util.List; +import java.util.Spliterators; import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; +import javax.servlet.http.HttpServletRequest; + +import com.google.common.collect.Streams; import fr.inra.urgi.faidare.api.NotFoundException; import fr.inra.urgi.faidare.config.FaidareProperties; import fr.inra.urgi.faidare.domain.brapi.v1.data.BrapiGermplasmAttributeValue; @@ -18,25 +25,30 @@ import fr.inra.urgi.faidare.domain.data.germplasm.DonorVO; import fr.inra.urgi.faidare.domain.data.germplasm.GenealogyVO; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmAttributeValueVO; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmInstituteVO; +import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmSitemapVO; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmVO; import fr.inra.urgi.faidare.domain.data.germplasm.InstituteVO; import fr.inra.urgi.faidare.domain.data.germplasm.PedigreeVO; import fr.inra.urgi.faidare.domain.data.germplasm.PhotoVO; import fr.inra.urgi.faidare.domain.data.germplasm.PuiNameValueVO; import fr.inra.urgi.faidare.domain.data.germplasm.SiblingVO; -import fr.inra.urgi.faidare.domain.data.germplasm.SimpleVO; import fr.inra.urgi.faidare.domain.data.germplasm.SiteVO; import fr.inra.urgi.faidare.domain.data.germplasm.TaxonSourceVO; import fr.inra.urgi.faidare.domain.xref.XRefDocumentVO; import fr.inra.urgi.faidare.repository.es.GermplasmAttributeRepository; import fr.inra.urgi.faidare.repository.es.GermplasmRepository; import fr.inra.urgi.faidare.repository.es.XRefDocumentRepository; +import fr.inra.urgi.faidare.utils.Sitemaps; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.ResponseBody; import org.springframework.web.servlet.ModelAndView; +import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody; /** * Controller used to display a germplasm card based on its ID. @@ -49,7 +61,7 @@ public class GermplasmController { private final GermplasmRepository germplasmRepository; private final FaidareProperties faidareProperties; private final XRefDocumentRepository xRefDocumentRepository; - private GermplasmAttributeRepository germplasmAttributeRepository; + private final GermplasmAttributeRepository germplasmAttributeRepository; public GermplasmController(GermplasmRepository germplasmRepository, FaidareProperties faidareProperties, @@ -87,6 +99,26 @@ public class GermplasmController { return toModelAndView(germplasms.get(0)); } + + @GetMapping(value = "/sitemap-{index}.txt") + @ResponseBody + public ResponseEntity<StreamingResponseBody> sitemap(@PathVariable("index") int index) { + if (index < 0 || index >= Sitemaps.BUCKET_COUNT) { + throw new NotFoundException("no sitemap for this index"); + } + StreamingResponseBody body = out -> { + Iterator<GermplasmSitemapVO> iterator = germplasmRepository.scrollAllForSitemap(1000); + Sitemaps.generateSitemap( + "/germplasms/sitemap-" + index + ".txt", + out, + iterator, + vo -> Math.floorMod(vo.getGermplasmDbId().hashCode(), Sitemaps.BUCKET_COUNT) == index, + vo -> "/germplasms/" + vo.getGermplasmDbId() + ); + }; + return ResponseEntity.ok().contentType(MediaType.TEXT_PLAIN).body(body); + } + private ModelAndView toModelAndView(GermplasmVO germplasm) { // List<BrapiGermplasmAttributeValue> attributes = getAttributes(germplasm); // List<XRefDocumentVO> crossReferences = xRefDocumentRepository.find( diff --git a/backend/src/main/java/fr/inra/urgi/faidare/web/site/SiteController.java b/backend/src/main/java/fr/inra/urgi/faidare/web/site/SiteController.java index 151da527d4393d1f388de8d6098b52d4219d98b2..3b05139df1fa4e7e3584bd6695251fd9fe1bf6a9 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/web/site/SiteController.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/web/site/SiteController.java @@ -1,20 +1,29 @@ package fr.inra.urgi.faidare.web.site; import java.util.Arrays; +import java.util.Iterator; import java.util.List; +import javax.servlet.http.HttpServletRequest; + import fr.inra.urgi.faidare.api.NotFoundException; import fr.inra.urgi.faidare.config.FaidareProperties; import fr.inra.urgi.faidare.domain.brapi.v1.data.BrapiAdditionalInfo; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; import fr.inra.urgi.faidare.domain.data.LocationVO; import fr.inra.urgi.faidare.domain.xref.XRefDocumentVO; import fr.inra.urgi.faidare.repository.es.LocationRepository; import fr.inra.urgi.faidare.repository.es.XRefDocumentRepository; +import fr.inra.urgi.faidare.utils.Sitemaps; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.ResponseBody; import org.springframework.web.servlet.ModelAndView; +import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody; /** * Controller used to display a site card based on its ID. @@ -64,6 +73,22 @@ public class SiteController { ); } + @GetMapping(value = "/sitemap.txt") + @ResponseBody + public ResponseEntity<StreamingResponseBody> sitemap() { + StreamingResponseBody body = out -> { + Iterator<LocationSitemapVO> iterator = locationRepository.scrollAllForSitemap(1000); + Sitemaps.generateSitemap( + "/sites/sitemap.txt", + out, + iterator, + vo -> true, + vo -> "/sites/" + vo.getLocationDbId() + ); + }; + return ResponseEntity.ok().contentType(MediaType.TEXT_PLAIN).body(body); + } + private LocationVO createSite() { LocationVO site = new LocationVO(); site.setLocationName("France"); diff --git a/backend/src/main/java/fr/inra/urgi/faidare/web/sitemap/SitemapIndexController.java b/backend/src/main/java/fr/inra/urgi/faidare/web/sitemap/SitemapIndexController.java new file mode 100644 index 0000000000000000000000000000000000000000..1dd480dfbd465d243ae3b32b535c32b81c6b355a --- /dev/null +++ b/backend/src/main/java/fr/inra/urgi/faidare/web/sitemap/SitemapIndexController.java @@ -0,0 +1,57 @@ +package fr.inra.urgi.faidare.web.sitemap; + +import java.nio.charset.StandardCharsets; + +import fr.inra.urgi.faidare.utils.Sitemaps; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +/** + * Controller used to generate the sitemap index containing links to the site sitemap + * (unique), the N sitemaps for the studies and the N sitemaps for the germplasms. + * + * Sitemaps for studies and germplasms are split in N buckets because sitemaps + * can't be more than 50 MB and can't have more than 50,000 entries. + * Splitting them in N sitemaps makes it almost sure that none of the sitemaps + * overflows those limits. + * + * The sitemaps are split based on the hashCode of the ID of the document: + * if a document's hash code modulo N is 0, then it's in the sitemap-0.txt, etc. + * + * @author JB Nizet + */ +@RestController +@RequestMapping("") +public class SitemapIndexController { + @GetMapping("/sitemap.xml") + public ResponseEntity<byte[]> sitemapIndex() { + StringBuilder builder = new StringBuilder(); + builder + .append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") + .append("<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n"); + + appendSiteMap(builder, "/sites/sitemap.txt"); + for (int i = 0; i < Sitemaps.BUCKET_COUNT; i++) { + appendSiteMap(builder, "/germplasms/sitemap-" + i + ".txt"); + } + for (int i = 0; i < Sitemaps.BUCKET_COUNT; i++) { + appendSiteMap(builder, "/studies/sitemap-" + i + ".txt"); + } + builder.append("</sitemapindex>"); + + return ResponseEntity.ok() + .contentType(MediaType.TEXT_XML) + .body(builder.toString().getBytes(StandardCharsets.UTF_8)); + } + + public void appendSiteMap(StringBuilder builder, String path) { + builder.append(" <sitemap>\n") + .append(" <loc>") + .append(Sitemaps.generateSitemapUrl(path)) + .append("</loc>\n") + .append(" </sitemap>\n"); + } +} diff --git a/backend/src/main/java/fr/inra/urgi/faidare/web/study/StudyController.java b/backend/src/main/java/fr/inra/urgi/faidare/web/study/StudyController.java index 833c45471061a5ef02d7f5fc57319fc9afc8a12b..e9a9ac46f890cc8dc32c2234da82265f1cf9a37e 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/web/study/StudyController.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/web/study/StudyController.java @@ -3,19 +3,24 @@ package fr.inra.urgi.faidare.web.study; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; +import java.util.Iterator; import java.util.List; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; +import javax.servlet.http.HttpServletRequest; + import com.google.common.collect.Lists; import fr.inra.urgi.faidare.api.NotFoundException; import fr.inra.urgi.faidare.config.FaidareProperties; import fr.inra.urgi.faidare.domain.criteria.GermplasmPOSTSearchCriteria; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; import fr.inra.urgi.faidare.domain.data.LocationVO; import fr.inra.urgi.faidare.domain.data.TrialVO; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmVO; import fr.inra.urgi.faidare.domain.data.study.StudyDetailVO; +import fr.inra.urgi.faidare.domain.data.study.StudySitemapVO; import fr.inra.urgi.faidare.domain.data.variable.ObservationVariableVO; import fr.inra.urgi.faidare.domain.xref.XRefDocumentVO; import fr.inra.urgi.faidare.repository.es.GermplasmRepository; @@ -24,13 +29,18 @@ import fr.inra.urgi.faidare.repository.es.StudyRepository; import fr.inra.urgi.faidare.repository.es.TrialRepository; import fr.inra.urgi.faidare.repository.es.XRefDocumentRepository; import fr.inra.urgi.faidare.repository.file.CropOntologyRepository; +import fr.inra.urgi.faidare.utils.Sitemaps; import fr.inra.urgi.faidare.web.site.MapLocation; import org.apache.logging.log4j.util.Strings; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.ResponseBody; import org.springframework.web.servlet.ModelAndView; +import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody; /** * Controller used to display a study card based on its ID. @@ -104,6 +114,24 @@ public class StudyController { ); } + @GetMapping(value = "/sitemap-{index}.txt") + @ResponseBody + public ResponseEntity<StreamingResponseBody> sitemap(@PathVariable("index") int index) { + if (index < 0 || index >= Sitemaps.BUCKET_COUNT) { + throw new NotFoundException("no sitemap for this index"); + } + StreamingResponseBody body = out -> { + Iterator<StudySitemapVO> iterator = studyRepository.scrollAllForSitemap(1000); + Sitemaps.generateSitemap( + "/sudies/sitemap-" + index + ".txt", + out, + iterator, + vo -> Math.floorMod(vo.getStudyDbId().hashCode(), Sitemaps.BUCKET_COUNT) == index, + vo -> "/studies/" + vo.getStudyDbId()); + }; + return ResponseEntity.ok().contentType(MediaType.TEXT_PLAIN).body(body); + } + private LocationVO getLocation(StudyDetailVO study) { if (Strings.isBlank(study.getLocationDbId())) { return null; diff --git a/backend/src/main/resources/application.yml b/backend/src/main/resources/application.yml index 2b684f600c3e548456c9e68ef473116839440929..206012ca02782c52dbdf1b27b80ac0897f351bf2 100644 --- a/backend/src/main/resources/application.yml +++ b/backend/src/main/resources/application.yml @@ -86,6 +86,7 @@ server: - text/html - text/css - text/markdown + - text/plain port: 8380 servlet: context-path: /faidare-dev diff --git a/backend/src/test/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentAnnotationUtilTest.java b/backend/src/test/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentAnnotationUtilTest.java index b8a86407533fef5f21831fd31f01f753e8524f29..a712bfe44b6b5683c5c1f87a290e48d790136920 100644 --- a/backend/src/test/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentAnnotationUtilTest.java +++ b/backend/src/test/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentAnnotationUtilTest.java @@ -50,6 +50,7 @@ class DocumentAnnotationUtilTest { .getDocumentObjectMetadata(ComplexDocument.class); assertThat(metadata).isNotNull(); assertThat(metadata.getDocumentType()).isEqualTo("dataObject4"); + assertThat(metadata.getIncludedFields()).containsExactly("id", "nested0"); assertThat(metadata.getExcludedFields()).containsExactly("a", "b"); assertThat(metadata.getIdField()).isEqualTo("@id"); diff --git a/backend/src/test/java/fr/inra/urgi/faidare/elasticsearch/document/fixture/ComplexDocument.java b/backend/src/test/java/fr/inra/urgi/faidare/elasticsearch/document/fixture/ComplexDocument.java index dd9a2c789623a79ab4101136bdbd08e21fb82703..e799a70ab4fd8f21413735811c2bd058311f61f0 100644 --- a/backend/src/test/java/fr/inra/urgi/faidare/elasticsearch/document/fixture/ComplexDocument.java +++ b/backend/src/test/java/fr/inra/urgi/faidare/elasticsearch/document/fixture/ComplexDocument.java @@ -11,7 +11,7 @@ import java.util.List; /** * @author gcornut */ -@Document(type = "dataObject4", excludedFields = {"a", "b"}) +@Document(type = "dataObject4", includedFields = { "id", "nested0"}, excludedFields = {"a", "b"}) public class ComplexDocument { @Id(jsonName = "@id") String id; diff --git a/backend/src/test/java/fr/inra/urgi/faidare/repository/es/GermplasmRepositoryTest.java b/backend/src/test/java/fr/inra/urgi/faidare/repository/es/GermplasmRepositoryTest.java index 76285c6934b21838e2af8c7c9446aafde00787e5..7705ab8afdb15a5c4ca4474a56bccb150d62073a 100644 --- a/backend/src/test/java/fr/inra/urgi/faidare/repository/es/GermplasmRepositoryTest.java +++ b/backend/src/test/java/fr/inra/urgi/faidare/repository/es/GermplasmRepositoryTest.java @@ -6,12 +6,14 @@ import fr.inra.urgi.faidare.Application; import fr.inra.urgi.faidare.domain.criteria.GermplasmGETSearchCriteria; import fr.inra.urgi.faidare.domain.criteria.GermplasmPOSTSearchCriteria; import fr.inra.urgi.faidare.domain.criteria.GermplasmSearchCriteria; +import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmSitemapVO; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmVO; import fr.inra.urgi.faidare.domain.data.germplasm.PedigreeVO; import fr.inra.urgi.faidare.domain.data.germplasm.ProgenyVO; import fr.inra.urgi.faidare.domain.response.PaginatedList; import fr.inra.urgi.faidare.domain.response.Pagination; import fr.inra.urgi.faidare.repository.es.setup.ESSetUp; +import org.assertj.core.data.Index; import org.assertj.core.util.Lists; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -28,6 +30,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.Objects; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.fail; @@ -126,6 +129,12 @@ class GermplasmRepositoryTest { assertThat(list).isNotNull().hasSize(0); } + @Test + void shouldScrollAllForSitemap() { + Iterator<GermplasmSitemapVO> list = repository.scrollAllForSitemap(100); + assertThat(list).isNotEmpty() + .allMatch(vo -> !vo.getGermplasmDbId().isEmpty()); + } @Test void should_Scroll_By_accessionNumber() { diff --git a/backend/src/test/java/fr/inra/urgi/faidare/repository/es/LocationRepositoryTest.java b/backend/src/test/java/fr/inra/urgi/faidare/repository/es/LocationRepositoryTest.java index 88bcef7e6130cde2111f28f0e84ab70aae2a9e73..d7700556221825480c36b8ef8c3ce78f2338eb51 100644 --- a/backend/src/test/java/fr/inra/urgi/faidare/repository/es/LocationRepositoryTest.java +++ b/backend/src/test/java/fr/inra/urgi/faidare/repository/es/LocationRepositoryTest.java @@ -3,7 +3,9 @@ package fr.inra.urgi.faidare.repository.es; import com.google.common.collect.Sets; import fr.inra.urgi.faidare.Application; import fr.inra.urgi.faidare.domain.criteria.LocationCriteria; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; import fr.inra.urgi.faidare.domain.data.LocationVO; +import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmSitemapVO; import fr.inra.urgi.faidare.domain.response.PaginatedList; import fr.inra.urgi.faidare.repository.es.setup.ESSetUp; import org.junit.jupiter.api.BeforeAll; @@ -16,6 +18,7 @@ import org.springframework.context.annotation.Import; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit.jupiter.SpringExtension; +import java.util.Iterator; import java.util.Set; import static org.assertj.core.api.Assertions.assertThat; @@ -114,4 +117,13 @@ class LocationRepositoryTest { assertThat(locations).isNotNull().hasSize(3); assertThat(locations).extracting("locationType").containsOnlyElementsOf(expectedTypes); } + + @Test + void shouldScrollAllForSitemap() { + Iterator<LocationSitemapVO> list = repository.scrollAllForSitemap(100); + assertThat(list).isNotEmpty() + .allMatch(vo -> !vo.getLocationDbId().isEmpty()); + } + + } diff --git a/backend/src/test/java/fr/inra/urgi/faidare/repository/es/StudyRepositoryTest.java b/backend/src/test/java/fr/inra/urgi/faidare/repository/es/StudyRepositoryTest.java index 42b2328714b627b7b2fc2099b3614257f61c7b49..fc73d3f6515d43e9f7c0e09bc7db1b4d27c31b06 100644 --- a/backend/src/test/java/fr/inra/urgi/faidare/repository/es/StudyRepositoryTest.java +++ b/backend/src/test/java/fr/inra/urgi/faidare/repository/es/StudyRepositoryTest.java @@ -4,7 +4,9 @@ import com.google.common.collect.Sets; import fr.inra.urgi.faidare.Application; import fr.inra.urgi.faidare.domain.criteria.StudySearchCriteria; import fr.inra.urgi.faidare.domain.data.LocationVO; +import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmSitemapVO; import fr.inra.urgi.faidare.domain.data.study.StudyDetailVO; +import fr.inra.urgi.faidare.domain.data.study.StudySitemapVO; import fr.inra.urgi.faidare.domain.data.study.StudySummaryVO; import fr.inra.urgi.faidare.domain.response.PaginatedList; import fr.inra.urgi.faidare.repository.es.setup.ESSetUp; @@ -20,6 +22,7 @@ import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit.jupiter.SpringExtension; import java.util.Comparator; +import java.util.Iterator; import java.util.Set; import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat; @@ -157,6 +160,13 @@ class StudyRepositoryTest { assertThat(result).extracting(sortField).isSortedAccordingTo(new DescendingOrder()); } + @Test + void shouldScrollAllForSitemap() { + Iterator<StudySitemapVO> list = repository.scrollAllForSitemap(100); + Assertions.assertThat(list).isNotEmpty() + .allMatch(vo -> !vo.getStudyDbId().isEmpty()); + } + private class DescendingOrder implements Comparator<Object> { @Override public int compare(Object o1, Object o2) { diff --git a/backend/src/test/java/fr/inra/urgi/faidare/web/germplasm/GermplasmControllerTest.java b/backend/src/test/java/fr/inra/urgi/faidare/web/germplasm/GermplasmControllerTest.java new file mode 100644 index 0000000000000000000000000000000000000000..75f6f3b2ce3609c627e1e1d437a530ed60d0fad5 --- /dev/null +++ b/backend/src/test/java/fr/inra/urgi/faidare/web/germplasm/GermplasmControllerTest.java @@ -0,0 +1,89 @@ +package fr.inra.urgi.faidare.web.germplasm; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.Mockito.when; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.asyncDispatch; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*; + +import java.util.Arrays; +import java.util.List; + +import fr.inra.urgi.faidare.config.FaidareProperties; +import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmSitemapVO; +import fr.inra.urgi.faidare.domain.data.study.StudySitemapVO; +import fr.inra.urgi.faidare.repository.es.GermplasmAttributeRepository; +import fr.inra.urgi.faidare.repository.es.GermplasmRepository; +import fr.inra.urgi.faidare.repository.es.XRefDocumentRepository; +import fr.inra.urgi.faidare.utils.Sitemaps; +import fr.inra.urgi.faidare.web.study.StudyController; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.http.MediaType; +import org.springframework.test.web.servlet.MockMvc; +import org.springframework.test.web.servlet.MvcResult; + +/** + * MVC tests for {@link GermplasmController} + * @author JB Nizet + */ +@WebMvcTest(GermplasmController.class) +public class GermplasmControllerTest { + + @Autowired + private MockMvc mockMvc; + + @MockBean + private GermplasmRepository mockGermplasmRepository; + + @MockBean + private FaidareProperties mockFaidareProperties; + + @MockBean + private XRefDocumentRepository mockXRefDocumentRepository; + + @MockBean + private GermplasmAttributeRepository mockGermplasmAttributeRepository; + + + @Test + void shouldGenerateSitemap() throws Exception { + List<GermplasmSitemapVO> germplasms = Arrays.asList( + new GermplasmSitemapVO("germplasm1"), + new GermplasmSitemapVO("germplasm4"), + new GermplasmSitemapVO("germplasm45"), + new GermplasmSitemapVO("germplasm73") + ); + + // the hashCode algorithm is specified in the javadoc, so it's guaranteed to be + // the same everywhere + // uncomment the following line to see which sitemap index each study has + // germplasms.forEach(germplasm -> System.out.println(germplasm.getGermplasmDbId() + " = " + Math.floorMod(germplasm.getGermplasmDbId().hashCode(), Sitemaps.BUCKET_COUNT))); + + when(mockGermplasmRepository.scrollAllForSitemap(anyInt())).thenAnswer(invocation -> germplasms.iterator()); + + testSitemap(6, "http://localhost/faidare/germplasms/germplasm1\nhttp://localhost/faidare/germplasms/germplasm45\n"); + testSitemap(9, "http://localhost/faidare/germplasms/germplasm4\nhttp://localhost/faidare/germplasms/germplasm73\n"); + testSitemap(7, ""); + + mockMvc.perform(get("/faidare/germplasms/sitemap-17.txt") + .contextPath("/faidare")) + .andExpect(status().isNotFound()); + } + + private void testSitemap(int index, String expectedContent) throws Exception { + MvcResult mvcResult = mockMvc.perform(get("/faidare/germplasms/sitemap-" + index + ".txt") + .contextPath("/faidare")) + .andExpect(request().asyncStarted()) + .andReturn(); + + this.mockMvc.perform(asyncDispatch(mvcResult)) + .andExpect(status().isOk()) + .andExpect(content().contentType(MediaType.TEXT_PLAIN)) + .andExpect(content().string(expectedContent)); + + } +} diff --git a/backend/src/test/java/fr/inra/urgi/faidare/web/site/SiteControllerTest.java b/backend/src/test/java/fr/inra/urgi/faidare/web/site/SiteControllerTest.java new file mode 100644 index 0000000000000000000000000000000000000000..a71bfa4cf3acb16dfc4b92de3cac7ccaacfb3a8f --- /dev/null +++ b/backend/src/test/java/fr/inra/urgi/faidare/web/site/SiteControllerTest.java @@ -0,0 +1,62 @@ +package fr.inra.urgi.faidare.web.site; + +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.Mockito.when; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.*; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.content; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; + +import java.util.Arrays; +import java.util.List; + +import fr.inra.urgi.faidare.config.FaidareProperties; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; +import fr.inra.urgi.faidare.repository.es.LocationRepository; +import fr.inra.urgi.faidare.repository.es.XRefDocumentRepository; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.http.MediaType; +import org.springframework.test.web.servlet.MockMvc; +import org.springframework.test.web.servlet.MvcResult; +import org.springframework.test.web.servlet.request.MockMvcRequestBuilders; +import org.springframework.test.web.servlet.result.MockMvcResultMatchers; + +/** + * MVC tests for {@link SiteController} + * @author JB Nizet + */ +@WebMvcTest(SiteController.class) +public class SiteControllerTest { + @Autowired + private MockMvc mockMvc; + + @MockBean + private LocationRepository mockLocationRepository; + + @MockBean + private XRefDocumentRepository mockXRefDocumentRepository; + + @MockBean + private FaidareProperties mockFaidareProperties; + + @Test + void shouldGenerateSitemap() throws Exception { + List<LocationSitemapVO> sites = Arrays.asList( + new LocationSitemapVO("site1"), + new LocationSitemapVO("site2") + ); + when(mockLocationRepository.scrollAllForSitemap(anyInt())).thenReturn(sites.iterator()); + MvcResult mvcResult = mockMvc.perform(get("/faidare/sites/sitemap.txt") + .contextPath("/faidare")) + .andExpect(request().asyncStarted()) + .andReturn(); + + this.mockMvc.perform(asyncDispatch(mvcResult)) + .andExpect(status().isOk()) + .andExpect(content().contentType(MediaType.TEXT_PLAIN)) + .andExpect(content().string("http://localhost/faidare/sites/site1\nhttp://localhost/faidare/sites/site2\n")); + } +} diff --git a/backend/src/test/java/fr/inra/urgi/faidare/web/sitemap/SitemapIndexControllerTest.java b/backend/src/test/java/fr/inra/urgi/faidare/web/sitemap/SitemapIndexControllerTest.java new file mode 100644 index 0000000000000000000000000000000000000000..ea03832f41a262ea210eb6f093ef68fe5b583aab --- /dev/null +++ b/backend/src/test/java/fr/inra/urgi/faidare/web/sitemap/SitemapIndexControllerTest.java @@ -0,0 +1,34 @@ +package fr.inra.urgi.faidare.web.sitemap; + +import static org.junit.jupiter.api.Assertions.*; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*; + +import fr.inra.urgi.faidare.utils.Sitemaps; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest; +import org.springframework.http.MediaType; +import org.springframework.test.web.servlet.MockMvc; + +/** + * MVC tests for {@link SitemapIndexController} + * @author JB Nizet + */ +@WebMvcTest(SitemapIndexController.class) +class SitemapIndexControllerTest { + @Autowired + private MockMvc mockMvc; + + @Test + void shouldGenerateSitemapIndex() throws Exception { + mockMvc.perform(get("/faidare/sitemap.xml").contextPath("/faidare")) + .andExpect(status().isOk()) + .andExpect(content().contentType(MediaType.TEXT_XML)) + .andExpect(xpath("/sitemapindex/sitemap[1]/loc").string("http://localhost/faidare/sites/sitemap.txt")) + .andExpect(xpath("/sitemapindex/sitemap[2]/loc").string("http://localhost/faidare/germplasms/sitemap-0.txt")) + .andExpect(xpath("/sitemapindex/sitemap[3]/loc").string("http://localhost/faidare/germplasms/sitemap-1.txt")) + .andExpect(xpath("/sitemapindex/sitemap[" + (Sitemaps.BUCKET_COUNT + 2) + "]/loc").string("http://localhost/faidare/studies/sitemap-0.txt")) + .andExpect(xpath("/sitemapindex/sitemap[" + (Sitemaps.BUCKET_COUNT + 3) + "]/loc").string("http://localhost/faidare/studies/sitemap-1.txt")); + } +} diff --git a/backend/src/test/java/fr/inra/urgi/faidare/web/study/StudyControllerTest.java b/backend/src/test/java/fr/inra/urgi/faidare/web/study/StudyControllerTest.java new file mode 100644 index 0000000000000000000000000000000000000000..cfa58e320cd6dc55d7ae41a7c24d778c61c448d3 --- /dev/null +++ b/backend/src/test/java/fr/inra/urgi/faidare/web/study/StudyControllerTest.java @@ -0,0 +1,97 @@ +package fr.inra.urgi.faidare.web.study; + +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.Mockito.when; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.asyncDispatch; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*; + +import java.util.Arrays; +import java.util.List; + +import fr.inra.urgi.faidare.config.FaidareProperties; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; +import fr.inra.urgi.faidare.domain.data.study.StudySitemapVO; +import fr.inra.urgi.faidare.repository.es.GermplasmRepository; +import fr.inra.urgi.faidare.repository.es.LocationRepository; +import fr.inra.urgi.faidare.repository.es.StudyRepository; +import fr.inra.urgi.faidare.repository.es.TrialRepository; +import fr.inra.urgi.faidare.repository.es.XRefDocumentRepository; +import fr.inra.urgi.faidare.repository.file.CropOntologyRepository; +import fr.inra.urgi.faidare.utils.Sitemaps; +import fr.inra.urgi.faidare.web.site.SiteController; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.http.MediaType; +import org.springframework.test.web.servlet.MockMvc; +import org.springframework.test.web.servlet.MvcResult; + +/** + * MVC tests for {@link StudyController} + * @author JB Nizet + */ +@WebMvcTest(StudyController.class) +public class StudyControllerTest { + @Autowired + private MockMvc mockMvc; + + @MockBean + private StudyRepository mockStudyRepository; + + @MockBean + private FaidareProperties mockFaidareProperties; + + @MockBean + private XRefDocumentRepository mockXRefDocumentRepository; + + @MockBean + private GermplasmRepository mockGermplasmRepository; + + @MockBean + private CropOntologyRepository mockCropOntologyRepository; + + @MockBean + private TrialRepository mockTrialRepository; + + @MockBean + private LocationRepository mockLocationRepository; + + @Test + void shouldGenerateSitemap() throws Exception { + List<StudySitemapVO> studies = Arrays.asList( + new StudySitemapVO("study1"), + new StudySitemapVO("study4"), + new StudySitemapVO("study51"), + new StudySitemapVO("study72") + ); + + // the hashCode algorithm is specified in the javadoc, so it's guaranteed to be + // the same everywhere + // uncomment the following line to see which sitemap index each study has + // studies.forEach(study -> System.out.println(study.getStudyDbId() + " = " + Math.floorMod(study.getStudyDbId().hashCode(), Sitemaps.BUCKET_COUNT))); + + when(mockStudyRepository.scrollAllForSitemap(anyInt())).thenAnswer(invocation -> studies.iterator()); + testSitemap(6, "http://localhost/faidare/studies/study1\nhttp://localhost/faidare/studies/study72\n"); + testSitemap(9, "http://localhost/faidare/studies/study4\nhttp://localhost/faidare/studies/study51\n"); + testSitemap(7, ""); + + mockMvc.perform(get("/faidare/studies/sitemap-17.txt") + .contextPath("/faidare")) + .andExpect(status().isNotFound()); + } + + private void testSitemap(int index, String expectedContent) throws Exception { + MvcResult mvcResult = mockMvc.perform(get("/faidare/studies/sitemap-" + index + ".txt") + .contextPath("/faidare")) + .andExpect(request().asyncStarted()) + .andReturn(); + + this.mockMvc.perform(asyncDispatch(mvcResult)) + .andExpect(status().isOk()) + .andExpect(content().contentType(MediaType.TEXT_PLAIN)) + .andExpect(content().string(expectedContent)); + + } +}