diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/parser/MongoAggregateExpressionParser.java b/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/parser/MongoAggregateExpressionParser.java index e4ce0d0c9..ae182796c 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/parser/MongoAggregateExpressionParser.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/parser/MongoAggregateExpressionParser.java @@ -12,9 +12,11 @@ import static org.hypertrace.core.documentstore.mongo.MongoUtils.getUnsupportedOperationException; import java.util.EnumMap; +import java.util.List; import java.util.Map; import lombok.NoArgsConstructor; import org.hypertrace.core.documentstore.expression.impl.AggregateExpression; +import org.hypertrace.core.documentstore.expression.impl.ConstantExpression; import org.hypertrace.core.documentstore.expression.operators.AggregationOperator; import org.hypertrace.core.documentstore.parser.SelectTypeExpressionVisitor; @@ -30,7 +32,6 @@ final class MongoAggregateExpressionParser extends MongoSelectTypeExpressionPars put(SUM, "$sum"); put(MIN, "$min"); put(MAX, "$max"); - put(COUNT, "$push"); put(LAST, "$last"); } }); @@ -47,11 +48,6 @@ public Map visit(final AggregateExpression expression) { Map parse(final AggregateExpression expression) { AggregationOperator operator = expression.getAggregator(); - String key = KEY_MAP.get(operator); - - if (key == null) { - throw getUnsupportedOperationException(operator); - } SelectTypeExpressionVisitor parser = new MongoIdentifierPrefixingParser( @@ -59,6 +55,31 @@ Map parse(final AggregateExpression expression) { new MongoAggregateExpressionParser( new MongoFunctionExpressionParser(new MongoConstantExpressionParser())))); + // MongoDB has no native COUNT accumulator. Implement COUNT with $sum instead of collecting + // every value into an array via $push (followed by $size). The $push approach materializes one + // array element per matching document, which is memory-intensive and can spill to disk. + // + // The previous $push semantics are preserved: + // - COUNT() counts every document in the group (i.e. COUNT(*)). + // - COUNT() counts only documents where the operand is present (not missing), + // matching $push, which skips missing values. ($type returns "missing" for absent fields.) + if (operator == COUNT) { + if (expression.getExpression() instanceof ConstantExpression) { + return Map.of("$sum", 1); + } + + Object operand = expression.getExpression().accept(parser); + return Map.of( + "$sum", + Map.of("$cond", List.of(Map.of("$ne", List.of(Map.of("$type", operand), "missing")), 1, 0))); + } + + String key = KEY_MAP.get(operator); + + if (key == null) { + throw getUnsupportedOperationException(operator); + } + Object value = expression.getExpression().accept(parser); return Map.of(key, value); } diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/transformer/MongoSelectionsAddingTransformation.java b/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/transformer/MongoSelectionsAddingTransformation.java index cd6fcc7fd..f7eabcb3c 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/transformer/MongoSelectionsAddingTransformation.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/transformer/MongoSelectionsAddingTransformation.java @@ -1,6 +1,5 @@ package org.hypertrace.core.documentstore.mongo.query.transformer; -import static org.hypertrace.core.documentstore.expression.operators.AggregationOperator.COUNT; import static org.hypertrace.core.documentstore.expression.operators.AggregationOperator.DISTINCT_COUNT; import static org.hypertrace.core.documentstore.expression.operators.FunctionOperator.LENGTH; import static org.hypertrace.core.documentstore.mongo.MongoUtils.encodeKey; @@ -86,10 +85,11 @@ public Optional visit(final AggregateExpression expression) { final String encodedAlias = encodeKey(alias); final SelectTypeExpression pairingExpression; - if (expression.getAggregator() == DISTINCT_COUNT || expression.getAggregator() == COUNT) { - // Since MongoDB doesn't support $distinctCount and $count(optional_field) in aggregations, - // we convert them to $addToSet and $push functions respectively. - // So, we need to project $size(set) or $size(list) instead of just the alias in these cases. + if (expression.getAggregator() == DISTINCT_COUNT) { + // Since MongoDB doesn't support $distinctCount in aggregations, we convert it to $addToSet. + // So, we need to project $size(set) instead of just the alias in this case. + // (COUNT is implemented as $sum and already yields a scalar, so it falls into the else + // branch.) pairingExpression = FunctionExpression.builder() .operator(LENGTH) diff --git a/document-store/src/test/resources/mongo/pipeline/field_count.json b/document-store/src/test/resources/mongo/pipeline/field_count.json index bd7def235..2b10dfc88 100644 --- a/document-store/src/test/resources/mongo/pipeline/field_count.json +++ b/document-store/src/test/resources/mongo/pipeline/field_count.json @@ -3,15 +3,26 @@ "$group": { "_id": null, "total": { - "$push": "$path" + "$sum": { + "$cond": [ + { + "$ne": [ + { + "$type": "$path" + }, + "missing" + ] + }, + 1, + 0 + ] + } } } }, { "$project": { - "total": { - "$size": "$total" - } + "total": "$total" } } ] diff --git a/document-store/src/test/resources/mongo/pipeline/optimize_sorts_simple_sort_with_aggregation_selection.json b/document-store/src/test/resources/mongo/pipeline/optimize_sorts_simple_sort_with_aggregation_selection.json index ce2253af5..0a9b07f6d 100644 --- a/document-store/src/test/resources/mongo/pipeline/optimize_sorts_simple_sort_with_aggregation_selection.json +++ b/document-store/src/test/resources/mongo/pipeline/optimize_sorts_simple_sort_with_aggregation_selection.json @@ -2,16 +2,14 @@ { "$group": { "total": { - "$push": 1 + "$sum": 1 }, "_id": null } }, { "$project": { - "total": { - "$size": "$total" - } + "total": "$total" } }, { diff --git a/document-store/src/test/resources/mongo/pipeline/simple.json b/document-store/src/test/resources/mongo/pipeline/simple.json index f07ec7a03..a1682b571 100644 --- a/document-store/src/test/resources/mongo/pipeline/simple.json +++ b/document-store/src/test/resources/mongo/pipeline/simple.json @@ -3,15 +3,13 @@ "$group": { "_id": null, "total": { - "$push": 1 + "$sum": 1 } } }, { "$project": { - "total": { - "$size": "$total" - } + "total": "$total" } } ] diff --git a/document-store/src/test/resources/mongo/pipeline/with_projections.json b/document-store/src/test/resources/mongo/pipeline/with_projections.json index cbc11066d..23d9eb81a 100644 --- a/document-store/src/test/resources/mongo/pipeline/with_projections.json +++ b/document-store/src/test/resources/mongo/pipeline/with_projections.json @@ -3,16 +3,14 @@ "$group": { "_id": null, "total": { - "$push": 1 + "$sum": 1 } } }, { "$project": { "name": 1, - "total": { - "$size": "$total" - } + "total": "$total" } } ]