From 97a191796c5386acba1dad6b8cef0b96fc5c7d79 Mon Sep 17 00:00:00 2001 From: Kakhnovich Raman Date: Mon, 8 Jun 2026 17:44:55 +0300 Subject: [PATCH 1/4] Add extractFontFamily method --- .../org/verapdf/pd/font/PDFontDescriptor.java | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/verapdf/pd/font/PDFontDescriptor.java b/src/main/java/org/verapdf/pd/font/PDFontDescriptor.java index 2c33d74d..18554e39 100644 --- a/src/main/java/org/verapdf/pd/font/PDFontDescriptor.java +++ b/src/main/java/org/verapdf/pd/font/PDFontDescriptor.java @@ -25,10 +25,11 @@ import org.verapdf.pd.PDObject; import org.verapdf.pd.font.stdmetrics.StandardFontMetrics; -import java.util.Iterator; -import java.util.Map; +import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * Represents font descriptor. @@ -54,6 +55,12 @@ public class PDFontDescriptor extends PDObject { private static final Double DEFAULT_XHEIGHT = 0d; private static final Double DEFAULT_STEM_H = 0d; private static final Double DEFAULT_WIDTH = 0d; + private static final List STYLE_SUFFIXES = Arrays.asList( + "Semibold", "BoldSemiExt", "BoldItalic", "RomanSmallCaps", + "SmallCaps", "SemiboldIt", "BoldIt", "Regular", "Italic", + "Oblique", "Bold", "Light", "Medium", "Black", "Condensed", + "Extended", "Exp", "Roman", "Book", "Std", "MT", "PS", "It" + ); // values private String fontName; @@ -122,6 +129,33 @@ public String getFontFamily() { return fontFamily; } + public static String extractFontFamily(String fontName) { + if (fontName == null || fontName.isEmpty()) return null; + + String name = fontName.trim(); + name = name.replaceAll("\\*\\d+", ""); + + boolean changed; + do { + changed = false; + for (String suffix : STYLE_SUFFIXES) { + Pattern p = Pattern.compile("(-?)" + Pattern.quote(suffix) + "$", Pattern.CASE_INSENSITIVE); + Matcher m = p.matcher(name); + if (m.find()) { + name = name.substring(0, m.start()); + changed = true; + break; + } + } + } while (changed); + + name = name.replaceAll("-$", ""); + String spaced = name.replaceAll("([a-z])([A-Z])", "$1 $2"); + spaced = spaced.trim().replaceAll("\\s+", " "); + + return spaced.isEmpty() ? "sans-serif" : spaced; + } + /** * @return the font stretch value. */ From 343d6060af7f865cd7ca385756a7c66556c321f9 Mon Sep 17 00:00:00 2001 From: Kakhnovich Raman Date: Mon, 8 Jun 2026 18:27:41 +0300 Subject: [PATCH 2/4] Update STYLE_SUFFIXES map --- .../org/verapdf/pd/font/PDFontDescriptor.java | 72 +++++++++++++++++-- 1 file changed, 68 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/verapdf/pd/font/PDFontDescriptor.java b/src/main/java/org/verapdf/pd/font/PDFontDescriptor.java index 18554e39..ef2d14ec 100644 --- a/src/main/java/org/verapdf/pd/font/PDFontDescriptor.java +++ b/src/main/java/org/verapdf/pd/font/PDFontDescriptor.java @@ -56,10 +56,74 @@ public class PDFontDescriptor extends PDObject { private static final Double DEFAULT_STEM_H = 0d; private static final Double DEFAULT_WIDTH = 0d; private static final List STYLE_SUFFIXES = Arrays.asList( - "Semibold", "BoldSemiExt", "BoldItalic", "RomanSmallCaps", - "SmallCaps", "SemiboldIt", "BoldIt", "Regular", "Italic", - "Oblique", "Bold", "Light", "Medium", "Black", "Condensed", - "Extended", "Exp", "Roman", "Book", "Std", "MT", "PS", "It" + // ---- Weight ---- + "Hairline", "Thin", + "ExtraLight", "ExtLt", "Extra Light", "Extra-Light", + "UltraLight", "UltLt", "Ultra Light", "Ultra-Light", + "Light", "Lt", + "Book", + "Normal", + "Regular", "Rg", "Roman", + "Medium", "Md", + "Demi", "DemiBold", "Demi Bold", "Demi-Bold", + "SemiBold", "Semibold", "SemiBd", "Semi Bold", "Semi-Bold", + "Bold", "Bd", + "ExtraBold", "ExtBd", "Extra Bold", "Extra-Bold", + "UltraBold", "UltBd", "Ultra Bold", "Ultra-Bold", + "Black", "Blk", + "Heavy", "Hv", + "Ultra", "Fat", "Poster", + + // ---- Slope ---- + "Italic", "Ita", "It", + "Oblique", "Obl", + "Backslant", + + // ---- Width ---- + "Compressed", + "ExtraCondensed", "UltraCondensed", + "Condensed", "Cond", "Cn", + "SemiCondensed", + "Narrow", + "SemiExpanded", + "Expanded", "Exp", "Extended", + "ExtraExpanded", "UltraExpanded", + "Wide", + + // ---- Optical size ---- + "Caption", "Text", "Subhead", "Deck", "Display", "Titling", + + // ---- Weight + Italic/Oblique (common combinations) ---- + "ThinItalic", "ThinIt", + "LightItalic", "LightOblique", "LightIt", + "BookItalic", "BookOblique", + "MediumItalic", "MediumOblique", "MediumIt", + "DemiItalic", "DemiOblique", + "SemiBoldItalic", "SemiboldItalic", "SemiBoldIt", "SemiboldIt", + "BoldItalic", "BoldIt", "BoldOblique", "BoldObl", + "ExtraBoldItalic", "ExtraBoldIt", + "BlackItalic", "BlackIt", "HeavyItalic", + + // ---- Width + Weight / Slope (common combos) ---- + "BoldCondensed", "BoldCond", "BoldCn", + "BoldExpanded", "BoldExp", "BoldExtended", + "BoldSemiExt", + "LightCondensed", "LightCond", + "MediumCondensed", "MediumCond", + "CondensedBold", "CondBold", + "CondensedLight", + "ExpandedBold", "ExtendedBold", + + // ---- SmallCaps & other variants ---- + "SmallCaps", "SC", "PetiteCaps", + "RomanSmallCaps", + "BoldSmallCaps", + "Inline", "Outline", "Shadow", + "Engraved", "Stencil", "Swash", + "Ornaments", "Symbols", "Icons", + + // ---- Foundry / vendor tags ---- + "Std", "MT", "PS", "LT", "Pro", "Com", "W1G", "EF", "CE" ); // values From d6cc2f32cc7bb1ffe94da4df11eb6f030317f165 Mon Sep 17 00:00:00 2001 From: Kakhnovich Raman Date: Tue, 9 Jun 2026 12:08:13 +0300 Subject: [PATCH 3/4] Create FontConstants --- .../org/verapdf/pd/font/PDFontDescriptor.java | 95 +++---------------- .../java/org/verapdf/tools/FontConstants.java | 78 +++++++++++++++ 2 files changed, 93 insertions(+), 80 deletions(-) create mode 100644 src/main/java/org/verapdf/tools/FontConstants.java diff --git a/src/main/java/org/verapdf/pd/font/PDFontDescriptor.java b/src/main/java/org/verapdf/pd/font/PDFontDescriptor.java index ef2d14ec..340ec19a 100644 --- a/src/main/java/org/verapdf/pd/font/PDFontDescriptor.java +++ b/src/main/java/org/verapdf/pd/font/PDFontDescriptor.java @@ -24,12 +24,11 @@ import org.verapdf.cos.*; import org.verapdf.pd.PDObject; import org.verapdf.pd.font.stdmetrics.StandardFontMetrics; +import org.verapdf.tools.FontConstants; import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * Represents font descriptor. @@ -55,76 +54,6 @@ public class PDFontDescriptor extends PDObject { private static final Double DEFAULT_XHEIGHT = 0d; private static final Double DEFAULT_STEM_H = 0d; private static final Double DEFAULT_WIDTH = 0d; - private static final List STYLE_SUFFIXES = Arrays.asList( - // ---- Weight ---- - "Hairline", "Thin", - "ExtraLight", "ExtLt", "Extra Light", "Extra-Light", - "UltraLight", "UltLt", "Ultra Light", "Ultra-Light", - "Light", "Lt", - "Book", - "Normal", - "Regular", "Rg", "Roman", - "Medium", "Md", - "Demi", "DemiBold", "Demi Bold", "Demi-Bold", - "SemiBold", "Semibold", "SemiBd", "Semi Bold", "Semi-Bold", - "Bold", "Bd", - "ExtraBold", "ExtBd", "Extra Bold", "Extra-Bold", - "UltraBold", "UltBd", "Ultra Bold", "Ultra-Bold", - "Black", "Blk", - "Heavy", "Hv", - "Ultra", "Fat", "Poster", - - // ---- Slope ---- - "Italic", "Ita", "It", - "Oblique", "Obl", - "Backslant", - - // ---- Width ---- - "Compressed", - "ExtraCondensed", "UltraCondensed", - "Condensed", "Cond", "Cn", - "SemiCondensed", - "Narrow", - "SemiExpanded", - "Expanded", "Exp", "Extended", - "ExtraExpanded", "UltraExpanded", - "Wide", - - // ---- Optical size ---- - "Caption", "Text", "Subhead", "Deck", "Display", "Titling", - - // ---- Weight + Italic/Oblique (common combinations) ---- - "ThinItalic", "ThinIt", - "LightItalic", "LightOblique", "LightIt", - "BookItalic", "BookOblique", - "MediumItalic", "MediumOblique", "MediumIt", - "DemiItalic", "DemiOblique", - "SemiBoldItalic", "SemiboldItalic", "SemiBoldIt", "SemiboldIt", - "BoldItalic", "BoldIt", "BoldOblique", "BoldObl", - "ExtraBoldItalic", "ExtraBoldIt", - "BlackItalic", "BlackIt", "HeavyItalic", - - // ---- Width + Weight / Slope (common combos) ---- - "BoldCondensed", "BoldCond", "BoldCn", - "BoldExpanded", "BoldExp", "BoldExtended", - "BoldSemiExt", - "LightCondensed", "LightCond", - "MediumCondensed", "MediumCond", - "CondensedBold", "CondBold", - "CondensedLight", - "ExpandedBold", "ExtendedBold", - - // ---- SmallCaps & other variants ---- - "SmallCaps", "SC", "PetiteCaps", - "RomanSmallCaps", - "BoldSmallCaps", - "Inline", "Outline", "Shadow", - "Engraved", "Stencil", "Swash", - "Ornaments", "Symbols", "Icons", - - // ---- Foundry / vendor tags ---- - "Std", "MT", "PS", "LT", "Pro", "Com", "W1G", "EF", "CE" - ); // values private String fontName; @@ -193,20 +122,26 @@ public String getFontFamily() { return fontFamily; } - public static String extractFontFamily(String fontName) { - if (fontName == null || fontName.isEmpty()) return null; + public static String extractFontFamilyFromFontName(String fontNameWithoutSubset) { + if (fontNameWithoutSubset == null || fontNameWithoutSubset.isEmpty()) return null; - String name = fontName.trim(); + String name = fontNameWithoutSubset.trim(); name = name.replaceAll("\\*\\d+", ""); boolean changed; do { changed = false; - for (String suffix : STYLE_SUFFIXES) { - Pattern p = Pattern.compile("(-?)" + Pattern.quote(suffix) + "$", Pattern.CASE_INSENSITIVE); - Matcher m = p.matcher(name); - if (m.find()) { - name = name.substring(0, m.start()); + for (String suffix : FontConstants.STYLE_SUFFIXES) { + String lowerName = name.toLowerCase(); + String lowerSuffix = suffix.toLowerCase(); + + if (lowerName.endsWith("-" + lowerSuffix)) { + name = name.substring(0, name.length() - suffix.length() - 1); + changed = true; + break; + } + if (lowerName.endsWith(lowerSuffix)) { + name = name.substring(0, name.length() - suffix.length()); changed = true; break; } diff --git a/src/main/java/org/verapdf/tools/FontConstants.java b/src/main/java/org/verapdf/tools/FontConstants.java new file mode 100644 index 00000000..cd3d17a4 --- /dev/null +++ b/src/main/java/org/verapdf/tools/FontConstants.java @@ -0,0 +1,78 @@ +package org.verapdf.tools; + +import java.util.Arrays; +import java.util.List; + +public class FontConstants { + public static final List STYLE_SUFFIXES = Arrays.asList( + // ---- Weight ---- + "Hairline", "Thin", + "ExtraLight", "ExtLt", "Extra Light", "Extra-Light", + "UltraLight", "UltLt", "Ultra Light", "Ultra-Light", + "Light", "Lt", + "Book", + "Normal", + "Regular", "Rg", "Roman", + "Medium", "Md", + "Demi", "DemiBold", "Demi Bold", "Demi-Bold", + "SemiBold", "Semibold", "SemiBd", "Semi Bold", "Semi-Bold", + "Bold", "Bd", + "ExtraBold", "ExtBd", "Extra Bold", "Extra-Bold", + "UltraBold", "UltBd", "Ultra Bold", "Ultra-Bold", + "Black", "Blk", + "Heavy", "Hv", + "Ultra", "Fat", "Poster", + + // ---- Slope ---- + "Italic", "Ita", "It", + "Oblique", "Obl", "Caps", "CapsI", + "Backslant", + + // ---- Width ---- + "Compressed", + "ExtraCondensed", "UltraCondensed", + "Condensed", "Cond", "Cn", + "SemiCondensed", + "Narrow", + "SemiExpanded", + "Expanded", "Exp", "Extended", + "ExtraExpanded", "UltraExpanded", + "Wide", + + // ---- Optical size ---- + "Caption", "Text", "Subhead", "Deck", "Display", "Titling", + + // ---- Weight + Italic/Oblique (common combinations) ---- + "ThinItalic", "ThinIt", + "LightItalic", "LightOblique", "LightIt", + "BookItalic", "BookOblique", + "MediumItalic", "MediumOblique", "MediumIt", + "DemiItalic", "DemiOblique", + "SemiBoldItalic", "SemiboldItalic", "SemiBoldIt", "SemiboldIt", + "BoldItalic", "BoldIt", "BoldOblique", "BoldObl", + "ExtraBoldItalic", "ExtraBoldIt", + "BlackItalic", "BlackIt", "HeavyItalic", + + // ---- Width + Weight / Slope (common combos) ---- + "BoldCondensed", "BoldCond", "BoldCn", + "BoldExpanded", "BoldExp", "BoldExtended", + "BoldSemiExt", "SemiExt", + "LightCondensed", "LightCond", + "MediumCondensed", "MediumCond", + "CondensedBold", "CondBold", + "CondensedLight", + "ExpandedBold", "ExtendedBold", + + // ---- SmallCaps & other variants ---- + "SmallCaps", "SC", "PetiteCaps", + "RomanSmallCaps", + "BoldSmallCaps", + "Inline", "Outline", "Shadow", + "Engraved", "Stencil", "Swash", + "SuppSwashCaps", "SwashCaps", + "Ornaments", "Symbols", "Icons", "Supp", "Small", + + // ---- Foundry / vendor tags ---- + "Std", "MT", "PS", "LT", "Com", "W1G", "EF", "CE" + ); +} From f0efbca65aee24c4b6e35aa1e55d68621517ca79 Mon Sep 17 00:00:00 2001 From: Kakhnovich Raman Date: Tue, 9 Jun 2026 12:56:37 +0300 Subject: [PATCH 4/4] Update PDFontDescriptor.java --- .../org/verapdf/pd/font/PDFontDescriptor.java | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/verapdf/pd/font/PDFontDescriptor.java b/src/main/java/org/verapdf/pd/font/PDFontDescriptor.java index 340ec19a..4f3c250d 100644 --- a/src/main/java/org/verapdf/pd/font/PDFontDescriptor.java +++ b/src/main/java/org/verapdf/pd/font/PDFontDescriptor.java @@ -128,31 +128,27 @@ public static String extractFontFamilyFromFontName(String fontNameWithoutSubset) String name = fontNameWithoutSubset.trim(); name = name.replaceAll("\\*\\d+", ""); - boolean changed; - do { + boolean changed = true; + while (changed) { changed = false; for (String suffix : FontConstants.STYLE_SUFFIXES) { String lowerName = name.toLowerCase(); String lowerSuffix = suffix.toLowerCase(); - - if (lowerName.endsWith("-" + lowerSuffix)) { - name = name.substring(0, name.length() - suffix.length() - 1); - changed = true; - break; - } if (lowerName.endsWith(lowerSuffix)) { name = name.substring(0, name.length() - suffix.length()); + if (name.endsWith("-")) { + name = name.substring(0, name.length() - 1); + } changed = true; break; } } - } while (changed); + } - name = name.replaceAll("-$", ""); String spaced = name.replaceAll("([a-z])([A-Z])", "$1 $2"); spaced = spaced.trim().replaceAll("\\s+", " "); - return spaced.isEmpty() ? "sans-serif" : spaced; + return spaced.isEmpty() ? null : spaced; } /**