diff --git a/server/src/main/java/au/org/aodn/ogcapi/server/core/parser/elastic/CQLToElasticFilterFactory.java b/server/src/main/java/au/org/aodn/ogcapi/server/core/parser/elastic/CQLToElasticFilterFactory.java index 8c64809d..98c93635 100644 --- a/server/src/main/java/au/org/aodn/ogcapi/server/core/parser/elastic/CQLToElasticFilterFactory.java +++ b/server/src/main/java/au/org/aodn/ogcapi/server/core/parser/elastic/CQLToElasticFilterFactory.java @@ -64,6 +64,20 @@ public class CQLToElasticFilterFactory & CQLFieldsInterface> i @Getter protected Map querySetting; + /** + * Indicates that a parameter vocabulary filter was found and curated parameter values (parameter_vocabs) should + * be prioritised in the Elasticsearch result ordering. + */ + @Getter + protected boolean parameterPrioritySort = false; + + /** + * Indicates that a platform vocabulary filter was found and curated platform values (platform_vocabs) should be + * prioritised in the Elasticsearch result ordering. + */ + @Getter + protected boolean platformPrioritySort = false; + public CQLToElasticFilterFactory(CQLCrsType cqlCoorSystem, Class tClass) { this(cqlCoorSystem, tClass, new HashMap<>()); } @@ -255,6 +269,9 @@ public PropertyIsEqualTo equal(Expression expression, Expression expression1, bo return equal(expression, expression1, b, null); } + /** + * Creates an Elasticsearch equality filter and records metadata used to build the search request. + */ @Override public PropertyIsEqualTo equal(Expression expression, Expression expression1, boolean b, MultiValuedFilter.MatchAction matchAction) { logger.debug("PropertyIsEqualTo {} {}, {} {}", expression, expression1, b, matchAction); @@ -267,6 +284,17 @@ public PropertyIsEqualTo equal(Expression expression, Expression expression1, bo return setting; } + // Record curated vocabulary filters so the search service can prioritise curated records. + if (expression instanceof AttributeExpressionImpl attribute && expression1 instanceof LiteralExpressionImpl) { + String fieldName = attribute.toString().toLowerCase(); + if (fieldName.equals("parameter_vocabs")) { + this.parameterPrioritySort = true; + } + if (fieldName.equals("platform_vocabs")) { + this.platformPrioritySort = true; + } + } + return new PropertyEqualToImpl<>(expression, expression1, b, matchAction, collectionFieldType); } diff --git a/server/src/main/java/au/org/aodn/ogcapi/server/core/parser/elastic/OrImpl.java b/server/src/main/java/au/org/aodn/ogcapi/server/core/parser/elastic/OrImpl.java index 1fed3d2a..140786d0 100644 --- a/server/src/main/java/au/org/aodn/ogcapi/server/core/parser/elastic/OrImpl.java +++ b/server/src/main/java/au/org/aodn/ogcapi/server/core/parser/elastic/OrImpl.java @@ -8,65 +8,84 @@ import java.util.ArrayList; import java.util.List; -import java.util.stream.Collectors; public class OrImpl extends QueryHandler implements Or { protected List children = new ArrayList<>(); - public OrImpl(Filter filter1, Filter filter2) { + private static boolean containsElasticSetting(Filter filter) { + if (filter instanceof ElasticSetting) { + return true; + } - if(filter1 instanceof ElasticSetting && filter2 instanceof QueryHandler elasticFilter2) { - this.addErrors(elasticFilter2.getErrors()); - throw new IllegalArgumentException("Or combine with query setting do not make sense"); + return filter instanceof OrImpl orFilter + && orFilter.getChildren().stream().anyMatch(OrImpl::containsElasticSetting); + } + + /** + * Recursively extracts leaf Elasticsearch queries from nested OR filters and returns them as a flat list. + * The caller uses this list to construct a single bool/should query. + */ + private static List collectQueries(Filter filter) { + if (filter instanceof OrImpl orFilter) { + return orFilter.getChildren().stream() + .flatMap(child -> collectQueries(child).stream()) + .toList(); } - else if(filter2 instanceof ElasticSetting && filter1 instanceof QueryHandler elasticFilter1){ - this.addErrors(elasticFilter1.getErrors()); + + if (filter instanceof QueryHandler handler && handler.getQuery() != null) { + return List.of(handler.getQuery()); + } + + return List.of(); + } + + + /** + * Builds the Elasticsearch representation of an OR expression. + * + * A single query is returned directly. Multiple queries are combined into + * one flat bool/should query to avoid deeply nested bool queries for large + * vocabulary selections. + */ + private void buildQuery(List filters) { + if (filters.stream().anyMatch(OrImpl::containsElasticSetting)) { throw new IllegalArgumentException("Or combine with query setting do not make sense"); } - else if(filter1 instanceof QueryHandler elasticFilter1 && filter2 instanceof QueryHandler elasticFilter2) { - // If the CQL contains ElasticSetting then the query will be null, this check is used to make sure - // we ignore those null query - if(elasticFilter1.query != null && elasticFilter2.query != null) { - this.query = BoolQuery.of(f -> f - .should(elasticFilter1.query, elasticFilter2.query) - )._toQuery(); - } - else if(elasticFilter1.query != null) { - this.query = elasticFilter1.query; - } - else { - this.query = elasticFilter2.query; - } - - children.add(filter1); - children.add(filter2); - - // Remember to copy the error from child - this.addErrors(elasticFilter1.getErrors()); - this.addErrors(elasticFilter2.getErrors()); + + List queries = filters.stream() + .flatMap(filter -> collectQueries(filter).stream()) + .toList(); + + if (queries.size() == 1) { + this.query = queries.get(0); + } else if (!queries.isEmpty()) { + this.query = BoolQuery.of(b -> b.should(queries))._toQuery(); } } - public OrImpl(List filters) { - // Extract query object in the filters, it must be an ElasitcFilter - List elasticFilters = filters.stream() - .filter(f -> f instanceof QueryHandler) - .map(m -> (QueryHandler)m) - .collect(Collectors.toList()); + public OrImpl(Filter filter1, Filter filter2) { + children.add(filter1); + children.add(filter2); - List queries = elasticFilters.stream() - .map(m -> m.query) - .collect(Collectors.toList()); + buildQuery(children); - this.query = BoolQuery.of(f -> f - .should(queries)) - ._toQuery(); + if (filter1 instanceof QueryHandler handler) { + addErrors(handler.getErrors()); + } + if (filter2 instanceof QueryHandler handler) { + addErrors(handler.getErrors()); + } + } + public OrImpl(List filters) { children.addAll(filters); + buildQuery(children); - // Copy child error if any - elasticFilters.stream().forEach(elasticFilter -> {this.addErrors(elasticFilter.getErrors());}); + filters.stream() + .filter(QueryHandler.class::isInstance) + .map(QueryHandler.class::cast) + .forEach(handler -> addErrors(handler.getErrors())); } @Override diff --git a/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearch.java b/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearch.java index b3d0e3ee..b4b468a1 100644 --- a/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearch.java +++ b/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearch.java @@ -8,8 +8,9 @@ import au.org.aodn.ogcapi.server.core.parser.elastic.CQLToElasticFilterFactory; import au.org.aodn.ogcapi.server.core.parser.elastic.QueryHandler; import co.elastic.clients.elasticsearch.ElasticsearchClient; -import co.elastic.clients.elasticsearch._types.FieldValue; +import co.elastic.clients.elasticsearch._types.*; import co.elastic.clients.elasticsearch._types.query_dsl.*; +import co.elastic.clients.json.JsonData; import co.elastic.clients.elasticsearch.core.SearchMvtRequest; import co.elastic.clients.elasticsearch.core.SearchRequest; import co.elastic.clients.elasticsearch.core.SearchResponse; @@ -255,6 +256,27 @@ public ElasticSearchBase.SearchResult searchAllCollections( return searchCollectionsByIds(null, Boolean.FALSE, sortBy); } + protected SortOptions parameterVocabsPrioritySort() { + return vocabPrioritySort(StacBasicField.ParameterVocabs.searchField); + } + + protected SortOptions platformVocabsPrioritySort() { + return vocabPrioritySort(StacBasicField.PlatformVocabs.searchField); + } + + protected SortOptions vocabPrioritySort(String vocabField) { + return SortOptions.of(so -> so + .script(s -> s + .type(ScriptSortType.Number) + .script(sc -> sc + .lang("painless") + .source( + "return doc.containsKey('" + vocabField + ".keyword') && " + + "!doc['" + vocabField + ".keyword'].empty ? 1 : 0;" + )) + .order(SortOrder.Desc))); + } + @Override public ElasticSearchBase.SearchResult searchByParameters(List keywords, String cql, List properties, String sortBy, CQLCrsType coor) throws CQLException { @@ -376,13 +398,30 @@ public ElasticSearchBase.SearchResult searchByParameters(Li .toList(); } + List sortOptions = createSortOptions(sortBy, CQLFields.class); + // When the filter searches curated vocab fields, prepend presence-based priority sort keys + // so matching human-curated records rank above AI-generated fallback records. This is + // the first sort key; existing -score,-rank ordering is preserved within each tier. + if (factory.isParameterPrioritySort()) { + if (sortOptions == null) { + sortOptions = new ArrayList<>(); + } + sortOptions.add(0, parameterVocabsPrioritySort()); + } + if (factory.isPlatformPrioritySort()) { + if (sortOptions == null) { + sortOptions = new ArrayList<>(); + } + sortOptions.add(0, platformVocabsPrioritySort()); + } + return searchCollectionBy( null, should, filters, properties, searchAfter, - createSortOptions(sortBy, CQLFields.class), + sortOptions, score, maxSize ); diff --git a/server/src/test/java/au/org/aodn/ogcapi/server/common/RestApiTest.java b/server/src/test/java/au/org/aodn/ogcapi/server/common/RestApiTest.java index a4630e0d..46f9fb6d 100644 --- a/server/src/test/java/au/org/aodn/ogcapi/server/common/RestApiTest.java +++ b/server/src/test/java/au/org/aodn/ogcapi/server/common/RestApiTest.java @@ -555,11 +555,11 @@ public void verifyCQLPropertyScore() throws IOException { "bf287dfe-9ce4-4969-9c59-51c39ea4d011.json" ); // Make sure AND operation works - ResponseEntity collections = testRestTemplate.getForEntity(getBasePath() + "/collections?filter=score>=2 AND parameter_vocabs='wave'", Collections.class); + ResponseEntity collections = testRestTemplate.getForEntity(getBasePath() + "/collections?filter=score>=2 AND (parameter_vocabs='wave' OR ai_parameter_vocabs='wave')", Collections.class); assertEquals(1, Objects.requireNonNull(collections.getBody()).getCollections().size(), "hit 1, only one record"); // Make sure OR not work as it didn't make sense to use or with setting - ResponseEntity error = testRestTemplate.getForEntity(getBasePath() + "/collections?filter=score>=2 OR parameter_vocabs='wave'", ErrorResponse.class); + ResponseEntity error = testRestTemplate.getForEntity(getBasePath() + "/collections?filter=score>=2 OR (parameter_vocabs='wave' OR ai_parameter_vocabs='wave')", ErrorResponse.class); assertEquals(HttpStatus.INTERNAL_SERVER_ERROR, error.getStatusCode()); assertEquals("Or combine with query setting do not make sense", Objects.requireNonNull(error.getBody()).getMessage(), "correct error"); @@ -621,7 +621,7 @@ public void verifySortBy() throws IOException { // Edge case on sort by with 1 item, but typo in argument sortBy, it should be sortby. Hence use API default sort -score // https://docs.ogc.org/DRAFTS/20-004.html#sorting-parameter-sortby - ResponseEntity collections = testRestTemplate.getForEntity(getBasePath() + "/collections?filter=score>=2 AND parameter_vocabs='wave'&sortBy=-score,+title", ExtendedCollections.class); + ResponseEntity collections = testRestTemplate.getForEntity(getBasePath() + "/collections?filter=score>=2 AND (parameter_vocabs='wave' OR ai_parameter_vocabs='wave')&sortBy=-score,+title", ExtendedCollections.class); assertEquals(1, Objects.requireNonNull(collections.getBody()).getCollections().size(), "hit 1, only one record"); // Now return result should sort by score then title, since no query here, the score will auto adjust to 1 as all search without query default score is 1 diff --git a/server/src/test/java/au/org/aodn/ogcapi/server/core/parser/elastic/CQLToElasticFilterFactoryTest.java b/server/src/test/java/au/org/aodn/ogcapi/server/core/parser/elastic/CQLToElasticFilterFactoryTest.java new file mode 100644 index 00000000..48eead43 --- /dev/null +++ b/server/src/test/java/au/org/aodn/ogcapi/server/core/parser/elastic/CQLToElasticFilterFactoryTest.java @@ -0,0 +1,97 @@ +package au.org.aodn.ogcapi.server.core.parser.elastic; + +import au.org.aodn.ogcapi.server.core.model.enumeration.CQLCrsType; +import au.org.aodn.ogcapi.server.core.model.enumeration.CQLElasticSetting; +import au.org.aodn.ogcapi.server.core.model.enumeration.CQLFields; +import org.geotools.filter.text.commons.CompilerUtil; +import org.geotools.filter.text.commons.Language; +import org.geotools.filter.text.cql2.CQLException; +import org.junit.jupiter.api.Test; +import org.opengis.filter.Filter; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class CQLToElasticFilterFactoryTest { + + @Test + public void parameterVocabFilterEnablesPrioritySort() throws CQLException { + String cql = "(parameter_vocabs='acoustics' OR ai_parameter_vocabs='acoustics') OR " + + "(parameter_vocabs='aerosols' OR ai_parameter_vocabs='aerosols') OR " + + "(parameter_vocabs='air pressure' OR ai_parameter_vocabs='air pressure')"; + CQLToElasticFilterFactory factory = newFactory(); + Filter filter = CompilerUtil.parseFilter(Language.ECQL, cql, factory); + + assertTrue(factory.isParameterPrioritySort()); + assertFalse(factory.isPlatformPrioritySort()); + + OrImpl parameterFilter = assertInstanceOf(OrImpl.class, filter); + assertTrue(parameterFilter.getQuery().isBool()); + assertEquals(6, parameterFilter.getQuery().bool().should().size()); + assertTrue( + parameterFilter.getQuery().bool().should().stream().noneMatch(query -> query.isBool()), + "Parameter vocabulary clauses should be flattened into one should list"); + } + + @Test + public void platformVocabFilterEnablesPrioritySort() throws CQLException { + String cql = "(platform_vocabs='satellite' OR ai_platform_vocabs='satellite') OR " + + "(platform_vocabs='glider' OR ai_platform_vocabs='glider')"; + CQLToElasticFilterFactory factory = newFactory(); + Filter filter = CompilerUtil.parseFilter(Language.ECQL, cql, factory); + + assertTrue(factory.isPlatformPrioritySort()); + assertFalse(factory.isParameterPrioritySort()); + + OrImpl platformFilter = assertInstanceOf(OrImpl.class, filter); + assertTrue(platformFilter.getQuery().isBool()); + assertEquals(4, platformFilter.getQuery().bool().should().size()); + assertTrue( + platformFilter.getQuery().bool().should().stream().noneMatch(query -> query.isBool()), + "Grouped platform vocabulary clauses should be flattened into one should list"); + } + + @Test + public void prioritySortMetadataIsCollectedAlongsideQuerySettings() throws CQLException { + CQLToElasticFilterFactory factory = parse( + "page_size=11 AND " + + "((parameter_vocabs='heat budget' OR ai_parameter_vocabs='heat budget')) " + + "AND ((platform_vocabs='satellite' OR ai_platform_vocabs='satellite') OR " + + "(platform_vocabs='glider' OR ai_platform_vocabs='glider'))"); + + assertEquals("11", factory.getQuerySetting().get(CQLElasticSetting.page_size)); + assertTrue(factory.isParameterPrioritySort()); + assertTrue(factory.isPlatformPrioritySort()); + } + + @Test + public void querySettingsCannotBeCombinedWithOr() { + IllegalArgumentException settingFirst = assertThrows( + IllegalArgumentException.class, + () -> parse("score>=2 OR parameter_vocabs='wave'")); + assertEquals( + "Or combine with query setting do not make sense", + settingFirst.getMessage()); + + IllegalArgumentException settingLast = assertThrows( + IllegalArgumentException.class, + () -> parse( + "parameter_vocabs='wave' OR ai_parameter_vocabs='wave' OR score>=2")); + assertEquals( + "Or combine with query setting do not make sense", + settingLast.getMessage()); + } + + private CQLToElasticFilterFactory parse(String cql) throws CQLException { + CQLToElasticFilterFactory factory = newFactory(); + CompilerUtil.parseFilter(Language.ECQL, cql, factory); + return factory; + } + + private CQLToElasticFilterFactory newFactory() { + return new CQLToElasticFilterFactory<>(CQLCrsType.EPSG4326, CQLFields.class); + } +}