diff --git a/IronSoftware.Drawing/IronSoftware.Drawing.Common.Tests/UnitTests/AnyBitmapFunctionality.cs b/IronSoftware.Drawing/IronSoftware.Drawing.Common.Tests/UnitTests/AnyBitmapFunctionality.cs index ef91a05..7d8225f 100644 --- a/IronSoftware.Drawing/IronSoftware.Drawing.Common.Tests/UnitTests/AnyBitmapFunctionality.cs +++ b/IronSoftware.Drawing/IronSoftware.Drawing.Common.Tests/UnitTests/AnyBitmapFunctionality.cs @@ -579,6 +579,48 @@ public void Try_UnLoad_Tiff_Image() Assert.Equal(2, anyBitmap.FrameCount); } + [FactWithAutomaticDisplayName] + public void FromTiffFile_StreamsMultiPageTiff_MatchesFromFile() + { + string tiffPath = GetRelativeFilePath("IRON-274-39065.tif"); + + // Baseline: the standard in-memory loader. + var expected = AnyBitmap.FromFile(tiffPath); + + // Streaming loader: the path used automatically for TIFF files > ~2 GB. + var streamed = AnyBitmap.FromTiffFile(tiffPath); + + streamed.FrameCount.Should().Be(expected.FrameCount); + + var expectedFrames = expected.GetAllFrames.ToList(); + var streamedFrames = streamed.GetAllFrames.ToList(); + streamedFrames.Count.Should().Be(expectedFrames.Count); + for (int i = 0; i < expectedFrames.Count; i++) + { + streamedFrames[i].Width.Should().Be(expectedFrames[i].Width); + streamedFrames[i].Height.Should().Be(expectedFrames[i].Height); + } + } + + [FactWithAutomaticDisplayName] + public void FromTiffFile_StreamsEveryPage_OfMultiPageTiff() + { + string tiffPath = GetRelativeFilePath("test_dw_10.tif"); + + var expected = AnyBitmap.FromFile(tiffPath); + var streamed = AnyBitmap.FromTiffFile(tiffPath); + + streamed.FrameCount.Should().Be(expected.FrameCount); + streamed.FrameCount.Should().BeGreaterThan(1); + } + + [FactWithAutomaticDisplayName] + public void FromTiffFile_MissingFile_ThrowsFileNotFound() + { + Action act = () => AnyBitmap.FromTiffFile(GetRelativeFilePath("does-not-exist-DW39.tiff")); + act.Should().Throw(); + } + [FactWithAutomaticDisplayName] public void Create_Multi_page_Tiff() { diff --git a/IronSoftware.Drawing/IronSoftware.Drawing.Common/AnyBitmap.cs b/IronSoftware.Drawing/IronSoftware.Drawing.Common/AnyBitmap.cs index b323b3c..d652de4 100644 --- a/IronSoftware.Drawing/IronSoftware.Drawing.Common/AnyBitmap.cs +++ b/IronSoftware.Drawing/IronSoftware.Drawing.Common/AnyBitmap.cs @@ -706,6 +706,14 @@ public AnyBitmap(AnyBitmap original, int width, int height) LoadAndResizeImage(original, width, height); } + /// + /// Private parameterless constructor used by factory methods that populate + /// the image after construction (e.g. ). + /// + private AnyBitmap() + { + } + /// /// Construct a new Bitmap from a file. /// @@ -714,7 +722,7 @@ public AnyBitmap(AnyBitmap original, int width, int height) /// public AnyBitmap(string file) { - LoadImage(File.ReadAllBytes(file), true); + LoadImageFromFile(file, true); } /// @@ -726,7 +734,7 @@ public AnyBitmap(string file) /// public AnyBitmap(string file, bool preserveOriginalFormat) { - LoadImage(File.ReadAllBytes(file), preserveOriginalFormat); + LoadImageFromFile(file, preserveOriginalFormat); } /// @@ -876,6 +884,34 @@ public static AnyBitmap FromFile(string file, bool preserveOriginalFormat) } } + /// + /// Creates a new from a TIFF file by streaming it + /// from disk one page at a time. + /// + /// A fully qualified path to a TIFF file. + /// + /// Unlike , the file is never read into a + /// single byte[] buffer, so multi-page TIFF files larger than the + /// .NET ~2 GB single-array limit can be loaded natively without external + /// splitting. Each individual page must still fit within a single decode + /// buffer (see the page-size limit applied while decoding). + /// automatically falls back to this loader when a TIFF file exceeds the + /// in-memory size limit, so calling it explicitly is only required when you + /// want to force page-by-page streaming regardless of file size. + /// + /// + public static AnyBitmap FromTiffFile(string file) + { + if (!File.Exists(file)) + { + throw new FileNotFoundException($"TIFF file not found at path '{file}'.", file); + } + + AnyBitmap bitmap = new(); + bitmap.LoadLargeTiffFromFile(file); + return bitmap; + } + /// /// Construct a new Bitmap from a Uri. /// @@ -2845,59 +2881,208 @@ private Lazy> OpenTiffToImageSharp() } private IReadOnlyList InternalLoadTiff() + { + // Decode a TIFF that already lives in the in-memory Binary buffer. + using MemoryStream tiffStream = new(Binary); + + // Disable warning messages + Tiff.SetErrorHandler(new DisableErrorHandler()); + + // open a TIFF stored in the stream + using Tiff tiff = Tiff.ClientOpen("in-memory", "r", tiffStream, new TiffStream()); + if (tiff == null) + { + throw new NotSupportedException("Could not read image"); + } + + return ReadTiffFrames(tiff); + } + + /// + /// Reads every (non-thumbnail) directory of an already-open + /// into a list of ImageSharp images, decoding one page at a time. + /// + /// + /// The supplied may be backed by any stream - an + /// in-memory buffer or a . When it is backed by a + /// file, LibTiff seeks to each directory on demand, so the whole file is + /// never buffered. This is what allows multi-page TIFF files larger than + /// 2 GB to be loaded: total file size is unbounded, only an individual + /// page must fit within a single decode buffer. + /// + private List ReadTiffFrames(Tiff tiff) { int imageWidth = 0; int imageHeight = 0; double imageXResolution = 0; double imageYResolution = 0; - //IEnumerable images = new(); - // create a memory stream out of them - using MemoryStream tiffStream = new(Binary); + SetTiffCompression(tiff); - // Disable warning messages - Tiff.SetErrorHandler(new DisableErrorHandler()); List images = new(); - // open a TIFF stored in the stream - using (Tiff tiff = Tiff.ClientOpen("in-memory", "r", tiffStream, new TiffStream())) + short num = tiff.NumberOfDirectories(); + for (short i = 0; i < num; i++) { - SetTiffCompression(tiff); + _ = tiff.SetDirectory(i); - short num = tiff.NumberOfDirectories(); - for (short i = 0; i < num; i++) + if (IsThumbnail(tiff)) { - _ = tiff.SetDirectory(i); + continue; + } - if (IsThumbnail(tiff)) - { - continue; - } + var (width, height, horizontalResolution, verticalResolution) = SetWidthHeight(tiff, i, ref imageWidth, ref imageHeight, ref imageXResolution, ref imageYResolution); - var (width, height, horizontalResolution, verticalResolution) = SetWidthHeight(tiff, i, ref imageWidth, ref imageHeight, ref imageXResolution, ref imageYResolution); + // A single page is still decoded into one RGBA buffer, so its pixel + // count is bounded by the .NET single-array index limit. Multi-page + // files of any total size are fine as long as each page fits. + long pixelCount = (long)width * height; + if (pixelCount > MaxSingleFrameRasterPixels) + { + throw new NotSupportedException( + $"TIFF page {i} is {width}x{height} ({pixelCount:N0} pixels), which exceeds the maximum of " + + $"{MaxSingleFrameRasterPixels:N0} pixels that can be decoded into a single buffer. " + + "Split this page into smaller images before loading."); + } - // Read the image into the memory buffer - int[] raster = new int[height * width]; - if (!tiff.ReadRGBAImage(width, height, raster)) - { - throw new NotSupportedException("Could not read image"); - } + // Read the image into the memory buffer + int[] raster = new int[height * width]; + if (!tiff.ReadRGBAImage(width, height, raster)) + { + throw new NotSupportedException("Could not read image"); + } - var bits = PrepareByteArray(raster, width, height, 32); - - var image = Image.LoadPixelData(bits, width, height); + var bits = PrepareByteArray(raster, width, height, 32); - image.Metadata.HorizontalResolution = horizontalResolution; - image.Metadata.VerticalResolution = verticalResolution; - images.Add(image); + var image = Image.LoadPixelData(bits, width, height); - //Note1: it might be some case that the bytes of current Image is smaller/bigger than the original tiff - //Note2: 'yield return' make it super slow - } + image.Metadata.HorizontalResolution = horizontalResolution; + image.Metadata.VerticalResolution = verticalResolution; + images.Add(image); + //Note1: it might be some case that the bytes of current Image is smaller/bigger than the original tiff + //Note2: 'yield return' make it super slow } + return images; } + // .NET indexes arrays with a 32-bit integer, so a single byte[] (and + // therefore File.ReadAllBytes) cannot exceed ~2 GB. Files above this + // threshold are routed to a streaming loader for TIFF, or rejected with a + // clear message for formats that have no page-based streaming decoder. + private const long MaxInMemoryFileBytes = 2_000_000_000L; + + // An Rgba32 page is decoded into one byte[] of width*height*4 bytes; a + // single .NET array is capped at int.MaxValue bytes, so a page may hold at + // most ~536M pixels regardless of how large the overall file is. + private const long MaxSingleFrameRasterPixels = int.MaxValue / 4; + + /// + /// Loads an image from a file, transparently handling files that are too + /// large to fit in a single in-memory buffer. Large multi-page TIFF files + /// are streamed page-by-page from disk; other oversized formats raise a + /// clear, actionable exception instead of the opaque .NET array-size error. + /// + private void LoadImageFromFile(string file, bool preserveOriginalFormat) + { + long length; + try + { + length = new FileInfo(file).Length; + } + catch + { + // Surface the real access/IO error from the read below. + length = 0; + } + + if (length > MaxInMemoryFileBytes) + { + if (IsTiffFile(file)) + { + // Stream the TIFF page-by-page; never materialise the whole file. + LoadLargeTiffFromFile(file); + return; + } + + throw new NotSupportedException( + $"The image file '{file}' is {length:N0} bytes, which exceeds the ~2 GB limit for loading an " + + "image into a single memory buffer. Large multi-page TIFF files are supported via streaming; " + + "other formats must be split into smaller files before loading."); + } + + LoadImage(File.ReadAllBytes(file), preserveOriginalFormat); + } + + /// + /// Lightweight TIFF detection that reads only the 4-byte file header, + /// avoiding any full-file read. Recognises both classic TIFF (version 42, + /// 0x2A) and BigTIFF (version 43, 0x2B) in little-endian (II) and big-endian + /// (MM) byte order. BigTIFF detection is essential here because it is the + /// format typically used for the multi-gigabyte files this loader targets. + /// + private static bool IsTiffFile(string file) + { + try + { + using FileStream fs = new(file, FileMode.Open, FileAccess.Read, FileShare.Read); + byte[] header = new byte[4]; + int read = fs.Read(header, 0, 4); + return read == 4 && + ((header[0] == 0x49 && header[1] == 0x49 && header[2] == 0x2A && header[3] == 0x00) || // II classic + (header[0] == 0x4D && header[1] == 0x4D && header[2] == 0x00 && header[3] == 0x2A) || // MM classic + (header[0] == 0x49 && header[1] == 0x49 && header[2] == 0x2B && header[3] == 0x00) || // II BigTIFF + (header[0] == 0x4D && header[1] == 0x4D && header[2] == 0x00 && header[3] == 0x2B)); // MM BigTIFF + } + catch + { + return false; + } + } + + /// + /// Loads a TIFF that is too large to fit in a single in-memory buffer by + /// streaming it directly from disk. LibTiff reads one directory (page) at a + /// time through the underlying , so the entire file + /// is never allocated as one array, enabling TIFF files larger than 2 GB. + /// + private void LoadLargeTiffFromFile(string file) + { + Tiff.SetErrorHandler(new DisableErrorHandler()); + + List frames; + using (FileStream fileStream = new(file, FileMode.Open, FileAccess.Read, FileShare.Read, 1 << 20)) + using (Tiff tiff = Tiff.ClientOpen(file, "r", fileStream, new TiffStream())) + { + if (tiff == null) + { + throw new NotSupportedException( + $"Unable to open the TIFF file '{file}'. The file may be corrupted or in an unsupported format."); + } + + try + { + frames = ReadTiffFrames(tiff); + } + catch (DllNotFoundException e) + { + throw new DllNotFoundException("Please install BitMiracle.LibTiff.NET from NuGet.", e); + } + } + + if (frames.Count == 0) + { + throw new NotSupportedException( + $"The TIFF file '{file}' was opened but contained no decodable image pages."); + } + + // Hold the decoded pages directly. Binary is deliberately NOT set: the + // source file is larger than a single byte[] can hold, so it is + // re-encoded on demand if the raw bytes are ever requested. + _lazyImage = new Lazy>(() => frames); + ForceLoadLazyImage(); + } + private Lazy> OpenImageToImageSharp(bool preserveOriginalFormat, bool tryWithLibTiff = false) { return new Lazy>(() =>