From 2a3af0eeb95f3a8ca2de009bd0e8a0594fc821b5 Mon Sep 17 00:00:00 2001 From: Prashant Pandey Date: Wed, 10 Jun 2026 13:22:39 +0530 Subject: [PATCH 1/2] Use $sum instead of $push to optimise Mongo group queries --- .../query/parser/MongoAggregateExpressionParser.java | 10 +++++++++- .../MongoSelectionsAddingTransformation.java | 10 +++++----- .../src/test/resources/mongo/pipeline/field_count.json | 6 ++---- ...e_sorts_simple_sort_with_aggregation_selection.json | 6 ++---- .../src/test/resources/mongo/pipeline/simple.json | 6 ++---- .../resources/mongo/pipeline/with_projections.json | 6 ++---- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/parser/MongoAggregateExpressionParser.java b/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/parser/MongoAggregateExpressionParser.java index e4ce0d0c9..16eaa64ed 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/parser/MongoAggregateExpressionParser.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/parser/MongoAggregateExpressionParser.java @@ -30,7 +30,6 @@ final class MongoAggregateExpressionParser extends MongoSelectTypeExpressionPars put(SUM, "$sum"); put(MIN, "$min"); put(MAX, "$max"); - put(COUNT, "$push"); put(LAST, "$last"); } }); @@ -47,6 +46,15 @@ public Map visit(final AggregateExpression expression) { Map parse(final AggregateExpression expression) { AggregationOperator operator = expression.getAggregator(); + + // MongoDB has no native COUNT accumulator. Implement COUNT as $sum: 1, which increments a + // counter per document, instead of collecting every value into an array via $push (followed + // by $size). The $push approach materializes one array element per matching document, which is + // memory-intensive, can spill to disk, and fails with a BSON 16MB error for very large groups. + if (operator == COUNT) { + return Map.of("$sum", 1); + } + String key = KEY_MAP.get(operator); if (key == null) { diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/transformer/MongoSelectionsAddingTransformation.java b/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/transformer/MongoSelectionsAddingTransformation.java index cd6fcc7fd..f7eabcb3c 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/transformer/MongoSelectionsAddingTransformation.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/transformer/MongoSelectionsAddingTransformation.java @@ -1,6 +1,5 @@ package org.hypertrace.core.documentstore.mongo.query.transformer; -import static org.hypertrace.core.documentstore.expression.operators.AggregationOperator.COUNT; import static org.hypertrace.core.documentstore.expression.operators.AggregationOperator.DISTINCT_COUNT; import static org.hypertrace.core.documentstore.expression.operators.FunctionOperator.LENGTH; import static org.hypertrace.core.documentstore.mongo.MongoUtils.encodeKey; @@ -86,10 +85,11 @@ public Optional visit(final AggregateExpression expression) { final String encodedAlias = encodeKey(alias); final SelectTypeExpression pairingExpression; - if (expression.getAggregator() == DISTINCT_COUNT || expression.getAggregator() == COUNT) { - // Since MongoDB doesn't support $distinctCount and $count(optional_field) in aggregations, - // we convert them to $addToSet and $push functions respectively. - // So, we need to project $size(set) or $size(list) instead of just the alias in these cases. + if (expression.getAggregator() == DISTINCT_COUNT) { + // Since MongoDB doesn't support $distinctCount in aggregations, we convert it to $addToSet. + // So, we need to project $size(set) instead of just the alias in this case. + // (COUNT is implemented as $sum and already yields a scalar, so it falls into the else + // branch.) pairingExpression = FunctionExpression.builder() .operator(LENGTH) diff --git a/document-store/src/test/resources/mongo/pipeline/field_count.json b/document-store/src/test/resources/mongo/pipeline/field_count.json index bd7def235..a1682b571 100644 --- a/document-store/src/test/resources/mongo/pipeline/field_count.json +++ b/document-store/src/test/resources/mongo/pipeline/field_count.json @@ -3,15 +3,13 @@ "$group": { "_id": null, "total": { - "$push": "$path" + "$sum": 1 } } }, { "$project": { - "total": { - "$size": "$total" - } + "total": "$total" } } ] diff --git a/document-store/src/test/resources/mongo/pipeline/optimize_sorts_simple_sort_with_aggregation_selection.json b/document-store/src/test/resources/mongo/pipeline/optimize_sorts_simple_sort_with_aggregation_selection.json index ce2253af5..0a9b07f6d 100644 --- a/document-store/src/test/resources/mongo/pipeline/optimize_sorts_simple_sort_with_aggregation_selection.json +++ b/document-store/src/test/resources/mongo/pipeline/optimize_sorts_simple_sort_with_aggregation_selection.json @@ -2,16 +2,14 @@ { "$group": { "total": { - "$push": 1 + "$sum": 1 }, "_id": null } }, { "$project": { - "total": { - "$size": "$total" - } + "total": "$total" } }, { diff --git a/document-store/src/test/resources/mongo/pipeline/simple.json b/document-store/src/test/resources/mongo/pipeline/simple.json index f07ec7a03..a1682b571 100644 --- a/document-store/src/test/resources/mongo/pipeline/simple.json +++ b/document-store/src/test/resources/mongo/pipeline/simple.json @@ -3,15 +3,13 @@ "$group": { "_id": null, "total": { - "$push": 1 + "$sum": 1 } } }, { "$project": { - "total": { - "$size": "$total" - } + "total": "$total" } } ] diff --git a/document-store/src/test/resources/mongo/pipeline/with_projections.json b/document-store/src/test/resources/mongo/pipeline/with_projections.json index cbc11066d..23d9eb81a 100644 --- a/document-store/src/test/resources/mongo/pipeline/with_projections.json +++ b/document-store/src/test/resources/mongo/pipeline/with_projections.json @@ -3,16 +3,14 @@ "$group": { "_id": null, "total": { - "$push": 1 + "$sum": 1 } } }, { "$project": { "name": 1, - "total": { - "$size": "$total" - } + "total": "$total" } } ] From fb83995f8bd83e423ae6383351015013a29292eb Mon Sep 17 00:00:00 2001 From: Prashant Pandey Date: Wed, 10 Jun 2026 13:58:00 +0530 Subject: [PATCH 2/2] WIP --- .../MongoAggregateExpressionParser.java | 35 +++++++++++++------ .../resources/mongo/pipeline/field_count.json | 15 +++++++- 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/parser/MongoAggregateExpressionParser.java b/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/parser/MongoAggregateExpressionParser.java index 16eaa64ed..ae182796c 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/parser/MongoAggregateExpressionParser.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/parser/MongoAggregateExpressionParser.java @@ -12,9 +12,11 @@ import static org.hypertrace.core.documentstore.mongo.MongoUtils.getUnsupportedOperationException; import java.util.EnumMap; +import java.util.List; import java.util.Map; import lombok.NoArgsConstructor; import org.hypertrace.core.documentstore.expression.impl.AggregateExpression; +import org.hypertrace.core.documentstore.expression.impl.ConstantExpression; import org.hypertrace.core.documentstore.expression.operators.AggregationOperator; import org.hypertrace.core.documentstore.parser.SelectTypeExpressionVisitor; @@ -47,12 +49,29 @@ public Map visit(final AggregateExpression expression) { Map parse(final AggregateExpression expression) { AggregationOperator operator = expression.getAggregator(); - // MongoDB has no native COUNT accumulator. Implement COUNT as $sum: 1, which increments a - // counter per document, instead of collecting every value into an array via $push (followed - // by $size). The $push approach materializes one array element per matching document, which is - // memory-intensive, can spill to disk, and fails with a BSON 16MB error for very large groups. + SelectTypeExpressionVisitor parser = + new MongoIdentifierPrefixingParser( + new MongoIdentifierExpressionParser( + new MongoAggregateExpressionParser( + new MongoFunctionExpressionParser(new MongoConstantExpressionParser())))); + + // MongoDB has no native COUNT accumulator. Implement COUNT with $sum instead of collecting + // every value into an array via $push (followed by $size). The $push approach materializes one + // array element per matching document, which is memory-intensive and can spill to disk. + // + // The previous $push semantics are preserved: + // - COUNT() counts every document in the group (i.e. COUNT(*)). + // - COUNT() counts only documents where the operand is present (not missing), + // matching $push, which skips missing values. ($type returns "missing" for absent fields.) if (operator == COUNT) { - return Map.of("$sum", 1); + if (expression.getExpression() instanceof ConstantExpression) { + return Map.of("$sum", 1); + } + + Object operand = expression.getExpression().accept(parser); + return Map.of( + "$sum", + Map.of("$cond", List.of(Map.of("$ne", List.of(Map.of("$type", operand), "missing")), 1, 0))); } String key = KEY_MAP.get(operator); @@ -61,12 +80,6 @@ Map parse(final AggregateExpression expression) { throw getUnsupportedOperationException(operator); } - SelectTypeExpressionVisitor parser = - new MongoIdentifierPrefixingParser( - new MongoIdentifierExpressionParser( - new MongoAggregateExpressionParser( - new MongoFunctionExpressionParser(new MongoConstantExpressionParser())))); - Object value = expression.getExpression().accept(parser); return Map.of(key, value); } diff --git a/document-store/src/test/resources/mongo/pipeline/field_count.json b/document-store/src/test/resources/mongo/pipeline/field_count.json index a1682b571..2b10dfc88 100644 --- a/document-store/src/test/resources/mongo/pipeline/field_count.json +++ b/document-store/src/test/resources/mongo/pipeline/field_count.json @@ -3,7 +3,20 @@ "$group": { "_id": null, "total": { - "$sum": 1 + "$sum": { + "$cond": [ + { + "$ne": [ + { + "$type": "$path" + }, + "missing" + ] + }, + 1, + 0 + ] + } } } },