Skip to content

Commit 85161e6

Browse files
wang0331obo
andauthored
feat: add support for Ideographic Variation Sequences (IVS) in TrueTy… (#1482)
* feat: add support for Ideographic Variation Sequences (IVS) in TrueType font * test: add ivs font unit tests --------- Co-authored-by: obo <[email protected]>
1 parent 6d0eaaf commit 85161e6

File tree

5 files changed

+217
-15
lines changed

5 files changed

+217
-15
lines changed

openpdf-core/src/main/java/org/openpdf/text/pdf/FontDetails.java

Lines changed: 78 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -271,31 +271,81 @@ byte[] convertToBytes(String text, TextRenderingOptions options) {
271271
return b;
272272
}
273273

274-
private byte[] convertToBytesWithGlyphs(String text) {
274+
private byte[] convertToBytesWithGlyphs(String text) throws UnsupportedEncodingException {
275275
int len = text.length();
276-
int[] metrics = null;
277-
int[] glyph = new int[len];
276+
int[] glyph = new int[len * 2];
278277
int i = 0;
279-
for (int k = 0; k < len; ++k) {
280-
int val;
278+
int k = 0;
279+
280+
while (k < len) {
281+
int baseCp;
282+
int charCount;
283+
281284
if (Utilities.isSurrogatePair(text, k)) {
282-
val = Utilities.convertToUtf32(text, k);
283-
k++;
285+
baseCp = Utilities.convertToUtf32(text, k);
286+
charCount = 2;
284287
} else {
285-
val = text.charAt(k);
288+
baseCp = text.charAt(k);
289+
charCount = 1;
286290
}
287-
metrics = ttu.getMetricsTT(val);
288-
if (metrics == null) {
291+
292+
// try to process IVS
293+
IVSResult ivsResult = tryProcessIVS(text, k + charCount, baseCp);
294+
if (ivsResult.found) {
295+
glyph[i++] = ivsResult.glyphCode;
296+
k += charCount + ivsResult.vsCharCount;
289297
continue;
290298
}
291-
int m0 = metrics[0];
292-
int m1 = metrics[1];
293-
longTag.computeIfAbsent(m0, key -> new int[]{m0, m1, val});
294-
glyph[i++] = m0;
299+
// common glyph searching
300+
int[] metrics = ttu.getMetricsTT(baseCp);
301+
if (metrics != null) {
302+
int m0 = metrics[0];
303+
longTag.computeIfAbsent(m0, key -> new int[]{m0, metrics[1], baseCp});
304+
glyph[i++] = m0;
305+
}
306+
307+
k += charCount;
295308
}
309+
296310
return getCJKEncodingBytes(glyph, i);
297311
}
298312

313+
private IVSResult tryProcessIVS(String text, int vsStartIndex, int baseCp) {
314+
if (vsStartIndex >= text.length()) {
315+
return IVSResult.NOT_FOUND;
316+
}
317+
318+
int vsCp;
319+
int vsCharCount;
320+
321+
if (Utilities.isSurrogatePair(text, vsStartIndex)) {
322+
vsCp = Utilities.convertToUtf32(text, vsStartIndex);
323+
vsCharCount = 2;
324+
} else {
325+
vsCp = text.charAt(vsStartIndex);
326+
vsCharCount = 1;
327+
}
328+
329+
if (!isVariationSelector(vsCp)) {
330+
return IVSResult.NOT_FOUND;
331+
}
332+
333+
int[] format14Metrics = ttu.getFormat14MetricsTT(baseCp, vsCp);
334+
if (format14Metrics == null) {
335+
return IVSResult.NOT_FOUND;
336+
}
337+
338+
int glyphCode = format14Metrics[0];
339+
Integer gl = glyphCode;
340+
longTag.computeIfAbsent(gl, k -> new int[]{glyphCode, format14Metrics[1], baseCp, vsCp});
341+
return new IVSResult(true, glyphCode, vsCharCount);
342+
}
343+
344+
private static boolean isVariationSelector(int codePoint) {
345+
return (codePoint >= 0xFE00 && codePoint <= 0xFE0F) ||
346+
(codePoint >= 0xE0100 && codePoint <= 0xE01EF);
347+
}
348+
299349
private byte[] getCJKEncodingBytes(int[] glyph, int size) {
300350
byte[] result = new byte[size * 2];
301351
for (int i = 0; i < size; i++) {
@@ -433,4 +483,18 @@ public boolean isSubset() {
433483
public void setSubset(boolean subset) {
434484
this.subset = subset;
435485
}
486+
487+
private static class IVSResult {
488+
static final IVSResult NOT_FOUND = new IVSResult(false, 0, 0);
489+
490+
final boolean found;
491+
final int glyphCode;
492+
final int vsCharCount;
493+
494+
IVSResult(boolean found, int glyphCode, int vsCharCount) {
495+
this.found = found;
496+
this.glyphCode = glyphCode;
497+
this.vsCharCount = vsCharCount;
498+
}
499+
}
436500
}

openpdf-core/src/main/java/org/openpdf/text/pdf/TrueTypeFont.java

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,8 @@ class TrueTypeFont extends BaseFont {
206206

207207
protected HashMap<Integer, int[]> cmapExt;
208208

209+
protected HashMap<String, int[]> cmap05;
210+
209211
/**
210212
* The map containing the kerning information. It represents the content of table 'kern'. The key is an
211213
* <CODE>Integer</CODE> where the top 16 bits are the glyph number for the first character and the lower 16 bits
@@ -797,6 +799,7 @@ void readCMaps() throws DocumentException, IOException {
797799
int map31 = 0;
798800
int map30 = 0;
799801
int mapExt = 0;
802+
int map05 = 0;
800803
for (int k = 0; k < num_tables; ++k) {
801804
int platId = rf.readUnsignedShort();
802805
int platSpecId = rf.readUnsignedShort();
@@ -808,6 +811,8 @@ void readCMaps() throws DocumentException, IOException {
808811
map31 = offset;
809812
} else if (platId == 3 && platSpecId == 10) {
810813
mapExt = offset;
814+
} else if (platId == 0 && platSpecId == 5) {
815+
map05 = offset;
811816
}
812817
if (platId == 1 && platSpecId == 0) {
813818
map10 = offset;
@@ -860,6 +865,83 @@ void readCMaps() throws DocumentException, IOException {
860865
break;
861866
}
862867
}
868+
if (map05 > 0) {
869+
int format14Location = table_location[0] + map05;
870+
this.rf.seek((long) format14Location);
871+
int format = this.rf.readUnsignedShort();
872+
if (format == 14) {
873+
this.cmap05 = this.readFormat14(format14Location);
874+
}
875+
}
876+
}
877+
878+
HashMap<String, int[]> readFormat14(int format14Location) throws IOException {
879+
HashMap<String, int[]> result = new HashMap<>();
880+
this.rf.getFilePointer(); //startPosition unused
881+
882+
this.rf.readInt(); // byteLength unused
883+
int numVarSelectorRecords = this.rf.readInt();
884+
885+
if (numVarSelectorRecords < 0 || numVarSelectorRecords > 10000) {
886+
throw new IOException("Invalid numVarSelectorRecords: " + numVarSelectorRecords);
887+
}
888+
889+
Map<Integer, Integer> nonDefaultOffsetMap = new HashMap<>();
890+
891+
for (int i = 0; i < numVarSelectorRecords; ++i) {
892+
byte[] input = new byte[3];
893+
this.rf.read(input);
894+
int selectorUnicodeValue = this.byte2int(input, 3);
895+
this.rf.readInt(); // defaultUVSOffset unused
896+
int nonDefaultUVSOffset = this.rf.readInt();
897+
898+
if (nonDefaultUVSOffset > 0) {
899+
nonDefaultOffsetMap.put(selectorUnicodeValue, nonDefaultUVSOffset);
900+
}
901+
}
902+
903+
for (Map.Entry<Integer, Integer> entry : nonDefaultOffsetMap.entrySet()) {
904+
Integer selectorUnicodeValue = entry.getKey();
905+
int nonDefaultUVSOffset = entry.getValue();
906+
907+
this.rf.seek((long) (format14Location + nonDefaultUVSOffset));
908+
int mappingNums = this.rf.readInt();
909+
910+
if (mappingNums < 0 || mappingNums > 10000) {
911+
continue;
912+
}
913+
914+
for (int i = 0; i < mappingNums; ++i) {
915+
byte[] input = new byte[3];
916+
this.rf.read(input);
917+
int unicodeValue = this.byte2int(input, 3);
918+
int glyphId = this.rf.readUnsignedShort();
919+
result.put(unicodeValue + "_" + selectorUnicodeValue,
920+
new int[]{glyphId, this.getGlyphWidth(glyphId)});
921+
}
922+
}
923+
return result;
924+
}
925+
926+
/**
927+
* convert(Big-Endian)byte Array to unsigned int
928+
*/
929+
public int byte2int(byte[] data, int n) {
930+
if (data == null || n <= 0 || n > 4 || data.length < n) {
931+
return 0;
932+
}
933+
int result = 0;
934+
for (int i = 0; i < n; i++) {
935+
result = (result << 8) | (data[i] & 0xFF); // & 0xFF 确保无符号
936+
}
937+
return result;
938+
}
939+
940+
public int[] getFormat14MetricsTT(int char1, int char2) {
941+
if (this.cmap05 != null) {
942+
return this.cmap05.get(char1 + "_" + char2);
943+
}
944+
return null;
863945
}
864946

865947
HashMap<Integer, int[]> readFormat12() throws IOException {
@@ -1419,6 +1501,9 @@ public int[] getMetricsTT(int c) {
14191501
if (cmap10 != null) {
14201502
return cmap10.get(c);
14211503
}
1504+
if (cmap05 != null) {
1505+
return cmap05.get(c);
1506+
}
14221507
return null;
14231508
}
14241509

openpdf-core/src/main/java/org/openpdf/text/pdf/TrueTypeFontUnicode.java

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,9 @@ private PdfStream getToUnicode(int[][] metrics) {
290290
--size;
291291
int[] metric = metrics[k];
292292
String fromTo = toHex(metric[0]);
293-
buf.append(fromTo).append(fromTo).append(toHex(metric[2])).append('\n');
293+
String hexString;
294+
hexString = metric.length == 4 ? toHex(metric[2], metric[3]) : toHex(metric[2]);
295+
buf.append(fromTo).append(fromTo).append(hexString).append('\n');
294296
}
295297
buf.append(
296298
"endbfrange\n" +
@@ -625,4 +627,37 @@ public int[] getCharBBox(int c) {
625627
return bboxes[m[0]];
626628
}
627629

630+
/**
631+
* Converts two Unicode code points to a combined hex string in the format [<hex1hex2>]
632+
*
633+
* @param codePoint1 The first Unicode code point to convert
634+
* @param codePoint2 The second Unicode code point to convert
635+
* @return Combined hex string with specified format
636+
*/
637+
private String toHex(int codePoint1, int codePoint2) {
638+
String hexStr1 = convertCodePointToHex(codePoint1);
639+
String hexStr2 = convertCodePointToHex(codePoint2);
640+
return "[<" + hexStr1 + hexStr2 + ">]";
641+
}
642+
643+
/**
644+
* Converts a single Unicode code point to 4-digit hex string
645+
* Handles both BMP (Basic Multilingual Plane) and supplementary plane characters
646+
*/
647+
private String convertCodePointToHex(int codePoint) {
648+
final int BMP_MAX_CODE_POINT = 65536;
649+
final int SURROGATE_DIVISOR = 1024;
650+
final char HIGH_SURROGATE_BASE = '\ud800';
651+
final char LOW_SURROGATE_BASE = '\udc00';
652+
653+
if (codePoint < BMP_MAX_CODE_POINT) {
654+
return toHex4(codePoint);
655+
} else {
656+
int adjustedCodePoint = codePoint - BMP_MAX_CODE_POINT;
657+
int highSurrogate = adjustedCodePoint / SURROGATE_DIVISOR + HIGH_SURROGATE_BASE;
658+
int lowSurrogate = adjustedCodePoint % SURROGATE_DIVISOR + LOW_SURROGATE_BASE;
659+
return toHex4(highSurrogate) + toHex4(lowSurrogate);
660+
}
661+
}
662+
628663
}

openpdf-core/src/test/java/org/openpdf/text/pdf/FontDetailsTest.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,22 @@ void testFillerCMapLiberationIsNotNull() throws IOException {
4444
assertThat(fontDetails.getFillerCmap()).hasSize(1);
4545
}
4646

47+
@Test
48+
void testIvsTextConversion() throws IOException {
49+
String filename = "src/test/resources/fonts/ivs/Hei_MSCS.ttf";
50+
BaseFont baseFont = BaseFont.createFont(filename, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
51+
FontDetails fontDetails = new FontDetails(null, null, baseFont);
52+
TextRenderingOptions options = new TextRenderingOptions();
53+
options.setGlyphSubstitutionEnabled(false);
54+
String text = "㛇\uDB40\uDD01\uDB40\uDD02";
55+
byte[] bytes = fontDetails.convertToBytes(text, options);
56+
57+
assertThat(bytes).isNotNull().isNotEmpty();
58+
assertThat(fontDetails.longTag).isNotNull().isNotEmpty();
59+
// unicode kept
60+
assertThat(bytes).hasSize(4);
61+
// convert to 2 glyphs
62+
assertThat(fontDetails.longTag).hasSize(2);
63+
}
64+
4765
}
123 KB
Binary file not shown.

0 commit comments

Comments
 (0)