首页 > 编程知识 正文

python gbk转utf8,文件utf8转gbk

时间:2023-05-05 06:35:31 阅读:175910 作者:2039

代码中有两种方式实现

import java.io.IOException;import java.io.UnsupportedEncodingException;public class EncodingUtil { public static String getUTF8StringFromGBKString(String gbkStr) { try { return new String(getUTF8BytesFromGBKString(gbkStr), "UTF-8"); } catch (UnsupportedEncodingException e) { throw new InternalError(); } } public static byte[] getUTF8BytesFromGBKString(String gbkStr) { int n = gbkStr.length(); byte[] utfBytes = new byte[3 * n]; int k = 0; for (int i = 0; i < n; i++) { int m = gbkStr.charAt(i); if (m < 128 && m >= 0) { utfBytes[kmtddx = (byte) m; continue; } utfBytes[kmtddx = (byte) (0xe0 | (m >> 12)); utfBytes[kmtddx = (byte) (0x80 | ((m >> 6) & 0x3f)); utfBytes[kmtddx = (byte) (0x80 | (m & 0x3f)); } if (k < utfBytes.length) { byte[] tmp = new byte[k]; System.arraycopy(utfBytes, 0, tmp, 0, k); return tmp; } return utfBytes; } public static String unicodeToUtf8(String theString) { char aChar; int len = theString.length(); StringBuffer outBuffer = new StringBuffer(len); for (int x = 0; x < len;) { aChar = theString.charAt(x++); if (aChar == '\') { aChar = theString.charAt(x++); if (aChar == 'u') { // Read the xxxx int value = 0; for (int i = 0; i < 4; i++) { aChar = theString.charAt(x++); switch (aChar) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': value = (value << 4) + aChar - '0'; break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': value = (value << 4) + 10 + aChar - 'a'; break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': value = (value << 4) + 10 + aChar - 'A'; break; default: throw new IllegalArgumentException( "Malformed \uxxxx encoding."); } } outBuffer.append((char) value); } else { if (aChar == 't') aChar = 't'; else if (aChar == 'r') aChar = 'r'; else if (aChar == 'n') aChar = 'n'; else if (aChar == 'f') aChar = 'f'; outBuffer.append(aChar); } } else outBuffer.append(aChar); } return outBuffer.toString(); } /** * @title converToUnicode * @description * @param: str * @updateTime 2021/11/23 10:23 * @return: java.lang.String * @throws */ public static String converToUnicode(String str) { str = (str == null ? "" : str); String tmp; StringBuffer sb = new StringBuffer(1000); char c; int i, j; sb.setLength(0); for (i = 0; i < str.length(); i++) { c = str.charAt(i); sb.append("\u"); j = (c >>>8); //取出高8位 tmp = Integer.toHexString(j); if (tmp.length() == 1) sb.append("0"); sb.append(tmp); j = (c & 0xFF); //取出低8位 tmp = Integer.toHexString(j); if (tmp.length() == 1) sb.append("0"); sb.append(tmp); } return (new String(sb)); } public static String convertToUnicodeToUtf8(String str){ return unicodeToUtf8(converToUnicode(str)); } public static void main(String[] args) throws IOException { String s = converToUnicode("中国"); System.out.println(unicodeToUtf8(s)); System.out.println(unicodeToUtf8("\u60a8\u597d\uff0c\u60a8\u6709\u5f85\u529e\u8bf7\u5904\u7406\uff08\u5982\u5df2\u5904\u7406\uff0c\u8bf7\u5ffd\u7565\uff09\uff0c\u8c22\u8c22\u3002")); }}

版权声明:该文观点仅代表作者本人。处理文章:请发送邮件至 三1五14八八95#扣扣.com 举报,一经查实,本站将立刻删除。