mirror of
https://github.com/signalapp/Signal-Android.git
synced 2026-03-03 15:58:40 +00:00
Account for grapheme cluster when trimming to fit a specific length.
Fixes #10076
This commit is contained in:
committed by
Alex Hart
parent
da4be5c1cf
commit
f06817f00d
@@ -0,0 +1,124 @@
|
||||
package org.thoughtcrime.securesms.util;
|
||||
|
||||
import android.os.Build;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
import androidx.annotation.RequiresApi;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Iterates over a string treating a surrogate pair and a grapheme cluster a single character.
|
||||
*/
|
||||
public final class CharacterIterable implements Iterable<String> {
|
||||
|
||||
private final String string;
|
||||
|
||||
public CharacterIterable(@NonNull String string) {
|
||||
this.string = string;
|
||||
}
|
||||
|
||||
@Override
|
||||
public @NonNull Iterator<String> iterator() {
|
||||
return new CharacterIterator();
|
||||
}
|
||||
|
||||
private class CharacterIterator implements Iterator<String> {
|
||||
private static final int UNINITIALIZED = -2;
|
||||
|
||||
private final BreakIteratorCompat breakIterator;
|
||||
|
||||
private int lastIndex = UNINITIALIZED;
|
||||
|
||||
CharacterIterator() {
|
||||
this.breakIterator = Build.VERSION.SDK_INT >= 24 ? new AndroidIcuBreakIterator(string)
|
||||
: new FallbackBreakIterator(string);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (lastIndex == UNINITIALIZED) {
|
||||
lastIndex = breakIterator.first();
|
||||
}
|
||||
return !breakIterator.isDone(lastIndex);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String next() {
|
||||
int firstIndex = lastIndex;
|
||||
lastIndex = breakIterator.next();
|
||||
return string.substring(firstIndex, lastIndex);
|
||||
}
|
||||
}
|
||||
|
||||
private interface BreakIteratorCompat {
|
||||
int first();
|
||||
|
||||
int next();
|
||||
|
||||
boolean isDone(int index);
|
||||
}
|
||||
|
||||
/**
|
||||
* An BreakIteratorCompat implementation that delegates calls to `android.icu.text.BreakIterator`.
|
||||
* This class handles grapheme clusters fine but requires Android API >= 24.
|
||||
*/
|
||||
@RequiresApi(24)
|
||||
private static class AndroidIcuBreakIterator implements BreakIteratorCompat {
|
||||
private final android.icu.text.BreakIterator breakIterator = android.icu.text.BreakIterator.getCharacterInstance();
|
||||
|
||||
public AndroidIcuBreakIterator(@NonNull String string) {
|
||||
breakIterator.setText(string);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int first() {
|
||||
return breakIterator.first();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int next() {
|
||||
return breakIterator.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isDone(int index) {
|
||||
return index == android.icu.text.BreakIterator.DONE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An BreakIteratorCompat implementation that delegates calls to `java.text.BreakIterator`.
|
||||
* This class may or may not handle grapheme clusters well depending on the underlying implementation.
|
||||
* In the emulator, API 23 implements ICU version of the BreakIterator so that it handles grapheme
|
||||
* clusters fine. But API 21 implements RuleBasedIterator which does not handle grapheme clusters.
|
||||
* <p>
|
||||
* If it doesn't handle grapheme clusters correctly, in most cases the combined characters are
|
||||
* broken up into pieces when the code tries to trim a string. For example, an emoji that is
|
||||
* a combination of a person, gender and skin tone, trimming the character using this class may result
|
||||
* in trimming the parts of the character, e.g. a dark skin frowning woman emoji may result in
|
||||
* a neutral skin frowning woman emoji.
|
||||
*/
|
||||
private static class FallbackBreakIterator implements BreakIteratorCompat {
|
||||
private final java.text.BreakIterator breakIterator = java.text.BreakIterator.getCharacterInstance();
|
||||
|
||||
public FallbackBreakIterator(@NonNull String string) {
|
||||
breakIterator.setText(string);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int first() {
|
||||
return breakIterator.first();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int next() {
|
||||
return breakIterator.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isDone(int index) {
|
||||
return index == java.text.BreakIterator.DONE;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,13 @@
|
||||
package org.thoughtcrime.securesms.util;
|
||||
|
||||
import android.text.TextUtils;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
import androidx.annotation.Nullable;
|
||||
import androidx.core.text.BidiFormatter;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Set;
|
||||
|
||||
@@ -39,21 +43,33 @@ public final class StringUtil {
|
||||
|
||||
/**
|
||||
* Trims a name string to fit into the byte length requirement.
|
||||
* <p>
|
||||
* This method treats a surrogate pair and a grapheme cluster a single character
|
||||
* See examples in tests defined in StringUtilText_trimToFit.
|
||||
*/
|
||||
public static @NonNull String trimToFit(@Nullable String name, int maxLength) {
|
||||
if (name == null) return "";
|
||||
|
||||
// At least one byte per char, so shorten string to reduce loop
|
||||
if (name.length() > maxLength) {
|
||||
name = name.substring(0, maxLength);
|
||||
public static @NonNull String trimToFit(@Nullable String name, int maxByteLength) {
|
||||
if (TextUtils.isEmpty(name)) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Remove one char at a time until fits in byte allowance
|
||||
while (name.getBytes(StandardCharsets.UTF_8).length > maxLength) {
|
||||
name = name.substring(0, name.length() - 1);
|
||||
if (name.getBytes(StandardCharsets.UTF_8).length <= maxByteLength) {
|
||||
return name;
|
||||
}
|
||||
|
||||
return name;
|
||||
try (ByteArrayOutputStream stream = new ByteArrayOutputStream()) {
|
||||
for (String graphemeCharacter : new CharacterIterable(name)) {
|
||||
byte[] bytes = graphemeCharacter.getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
if (stream.size() + bytes.length <= maxByteLength) {
|
||||
stream.write(bytes);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return stream.toString();
|
||||
} catch (IOException e) {
|
||||
throw new AssertionError(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user