|
@@ -11,6 +11,7 @@ import org.apache.commons.lang3.CharSetUtils;
|
|
|
|
|
|
import java.io.UnsupportedEncodingException;
|
|
|
import java.nio.charset.Charset;
|
|
|
+import java.nio.charset.StandardCharsets;
|
|
|
import java.util.HashMap;
|
|
|
import java.util.Iterator;
|
|
|
|
|
@@ -43,27 +44,31 @@ public class Test {
|
|
|
}
|
|
|
|
|
|
|
|
|
- /* public static void main(String[] args) {
|
|
|
- String str = "---prediction: \"\\346\\210\\221\"";
|
|
|
- String substring = str.substring(16, str.length() - 1);
|
|
|
- System.out.println(unicodeToChinese(substring));
|
|
|
+ /*public static void main(String[] args) {
|
|
|
+ String encoded = "\\345\\216\\214\\346\\260\\247\\346\\261\\240\\347\\241\\235\\351\\205\\270\\347\\233\\220\\346\\260\\256\\346\\230\\257\\345\\220\\246\\345\\244\\247\\344\\272";
|
|
|
+ String decoded = decodeOctalToUtf8(encoded);
|
|
|
+ System.out.println(decoded);
|
|
|
}*/
|
|
|
|
|
|
- // 将 Unicode 编码的字符串转换为中文 (返回的结果里没有u 不是常规的Unicode编码)
|
|
|
- // 将 Unicode 编码的字符串转换为中文
|
|
|
- public static String unicodeToChinese(String unicodeStr) {
|
|
|
- StringBuilder chineseStr = new StringBuilder();
|
|
|
- String[] hex = unicodeStr.split("\\\\");
|
|
|
|
|
|
- for (String hexChar : hex) {
|
|
|
- if (!hexChar.isEmpty()) {
|
|
|
- // 将每个 Unicode 编码转换为字符并添加到中文字符串中
|
|
|
- int data = Integer.parseInt(hexChar, 8);
|
|
|
- chineseStr.append((char) data);
|
|
|
- }
|
|
|
+ public static String decodeOctalToUtf8(String encoded) {
|
|
|
+ // 移除反斜杠
|
|
|
+ String octalSequence = encoded.replaceAll("\\\\", "");
|
|
|
+
|
|
|
+ // 检查长度是否是3的倍数
|
|
|
+ if (octalSequence.length() % 3 != 0) {
|
|
|
+ throw new IllegalArgumentException("Encoded string length is not a multiple of 3");
|
|
|
}
|
|
|
|
|
|
- return chineseStr.toString();
|
|
|
+ byte[] bytes = new byte[octalSequence.length() / 3];
|
|
|
+ for (int i = 0, j = 0; i < octalSequence.length(); i += 3, j++) {
|
|
|
+ // 提取每三个字符的八进制数
|
|
|
+ String octal = octalSequence.substring(i, i + 3);
|
|
|
+ // 将八进制数转换为字节
|
|
|
+ bytes[j] = (byte) Integer.parseInt(octal, 8);
|
|
|
+ }
|
|
|
+ // 将字节序列转换为UTF-8编码的字符串
|
|
|
+ return new String(bytes, StandardCharsets.UTF_8);
|
|
|
}
|
|
|
|
|
|
|