From dde06685d29690ecde14430ca3dee4090ef50bb2 Mon Sep 17 00:00:00 2001 From: patchwright Date: Wed, 17 Jun 2026 19:41:49 +0200 Subject: [PATCH] Fix format_size() rounding rollover at unit boundaries format_size() chooses the unit by comparing the raw byte count to each divider, then rounds the mantissa with round_number() afterward. When the mantissa rounds up to the base (e.g. 999999 bytes is 999.999 KB, which rounds to '1000 KB'), the already-chosen unit is left stale: >>> format_size(999999) '1000 KB' # expected '1 MB' >>> format_size(999999999) '1000 MB' # expected '1 GB' >>> format_size(1024 ** 2 - 1, binary=True) '1024 KiB' # expected '1 MiB' Fix: after rounding, if the mantissa has reached the base and a larger unit is available, carry into that next unit and re-render. Added regression cases to test_format_size (they fail before this change, pass after); all existing assertions are unchanged. --- humanfriendly/__init__.py | 23 ++++++++++++++++------- humanfriendly/tests.py | 8 ++++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/humanfriendly/__init__.py b/humanfriendly/__init__.py index 4c0a333..3be3074 100644 --- a/humanfriendly/__init__.py +++ b/humanfriendly/__init__.py @@ -185,13 +185,22 @@ def format_size(num_bytes, keep_width=False, binary=False): >>> format_size(1000 ** 3 * 4) '4 GB' """ - for unit in reversed(disk_size_units): - if num_bytes >= unit.binary.divider and binary: - number = round_number(float(num_bytes) / unit.binary.divider, keep_width=keep_width) - return pluralize(number, unit.binary.symbol, unit.binary.symbol) - elif num_bytes >= unit.decimal.divider and not binary: - number = round_number(float(num_bytes) / unit.decimal.divider, keep_width=keep_width) - return pluralize(number, unit.decimal.symbol, unit.decimal.symbol) + base = 1024 if binary else 1000 + ordered_units = list(disk_size_units) + for index in range(len(ordered_units) - 1, -1, -1): + unit = ordered_units[index] + side = unit.binary if binary else unit.decimal + if num_bytes >= side.divider: + number = round_number(float(num_bytes) / side.divider, keep_width=keep_width) + # The unit was chosen from the unrounded byte count, but rounding can + # push the mantissa up to `base` (e.g. 999999 bytes -> 999.999 KB, + # which rounds to "1000 KB"). When that happens, carry into the next + # larger unit so the result reads "1 MB" instead of "1000 KB". + if float(number) >= base and index < len(ordered_units) - 1: + side = (ordered_units[index + 1].binary if binary + else ordered_units[index + 1].decimal) + number = round_number(float(num_bytes) / side.divider, keep_width=keep_width) + return pluralize(number, side.symbol, side.symbol) return pluralize(num_bytes, 'byte') diff --git a/humanfriendly/tests.py b/humanfriendly/tests.py index 72dad99..5406efe 100644 --- a/humanfriendly/tests.py +++ b/humanfriendly/tests.py @@ -513,6 +513,14 @@ def test_format_size(self): self.assertEqual('1 YiB', format_size(1024 ** 8, binary=True)) self.assertEqual('45 KB', format_size(1000 * 45)) self.assertEqual('2.9 TB', format_size(1000 ** 4 * 2.9)) + # Rounding must not leave the mantissa at or above the base while a + # larger unit is available: 999999 bytes is 999.999 KB, which rounds to + # 1000 KB and should carry into 1 MB (not render as "1000 KB"). + self.assertEqual('1 MB', format_size(999999)) + self.assertEqual('1 GB', format_size(999999999)) + self.assertEqual('1 TB', format_size(999999999999)) + self.assertEqual('1 MiB', format_size(1024 ** 2 - 1, binary=True)) + self.assertEqual('1 GiB', format_size(1024 ** 3 - 1, binary=True)) def test_parse_size(self): """Test :func:`humanfriendly.parse_size()`."""