码迷,mamicode.com
首页 > 其他好文 > 详细

Punycode转中文

时间:2016-08-09 00:18:03      阅读:221      评论:0      收藏:0      [点我收藏+]

标签:

package cn.cnnic.ops.udf;

public class GetChineseFromPunycode {

    static int TMIN = 1;
    static int TMAX = 26;
    static int BASE = 36;
    static int INITIAL_N = 128;
    static int INITIAL_BIAS = 72;
    static int DAMP = 700;
    static int SKEW = 38;
    static char DELIMITER = ‘-‘;
    static String PUNY_PREFIX = "xn--";
    static char DOT = ‘.‘;
    static String SPLIT_DOT = "\\.";

    public static void main(String[] args) {
        String str = "xn--fiq7iz9az60bsyah94knxag3d.xn--fiqs8s";
        GetChineseFromPunycode gpfc = new GetChineseFromPunycode();
        System.out.println(gpfc.evaluate(str));
    }

    public String evaluate(String txt) {
        String strResult = txt.toString().trim();
        try {
            strResult = fromPunycodeToChinese(txt.toString().trim());
        } catch (Exception e) {
            e.printStackTrace();
        }
        return strResult;
    }

    /**
     * 
     * @param input
     * @return
     * @throws Exception
     */
    public static String fromPunycodeToChineseUnit(String input) throws Exception {
        int n = INITIAL_N;
        int i = 0;
        int bias = INITIAL_BIAS;
        StringBuilder output = new StringBuilder();
        int d = input.lastIndexOf(DELIMITER);
        if (d > 0) {
            for (int j = 0; j < d; j++) {
                char c = input.charAt(j);
                if (!isBasic(c)) {
                    throw new Exception("BAD_INPUT");
                }
                output.append(c);
            }
            d++;
        } else {
            d = 0;
        }
        while (d < input.length()) {
            int oldi = i;
            int w = 1;
            for (int k = BASE;; k += BASE) {
                if (d == input.length()) {
                    throw new Exception("BAD_INPUT");
                }
                int c = input.charAt(d++);
                int digit = codepoint2digit(c);
                if (digit > (Integer.MAX_VALUE - i) / w) {
                    throw new Exception("OVERFLOW");
                }
                i = i + digit * w;
                int t;
                if (k <= bias) {
                    t = TMIN;
                } else if (k >= bias + TMAX) {
                    t = TMAX;
                } else {
                    t = k - bias;
                }
                if (digit < t) {
                    break;
                }
                w = w * (BASE - t);
            }
            bias = adapt(i - oldi, output.length() + 1, oldi == 0);
            if (i / (output.length() + 1) > Integer.MAX_VALUE - n) {
                throw new Exception("OVERFLOW");
            }
            n = n + i / (output.length() + 1);
            i = i % (output.length() + 1);
            output.insert(i, (char) n);
            i++;
        }
        return output.toString();
    }

    /**
     * 
     * @param delta
     * @param numpoints
     * @param first
     * @return
     */
    public static int adapt(int delta, int numpoints, boolean first) {
        if (first) {
            delta = delta / DAMP;
        } else {
            delta = delta / 2;
        }
        delta = delta + (delta / numpoints);
        int k = 0;
        while (delta > ((BASE - TMIN) * TMAX) / 2) {
            delta = delta / (BASE - TMIN);
            k = k + BASE;
        }
        return k + ((BASE - TMIN + 1) * delta) / (delta + SKEW);
    }

    /**
     * 
     * @param c
     * @return
     */
    public static boolean isBasic(char c) {
        return c < 0x80;
    }

    /**
     * 
     * @param d
     * @return
     * @throws Exception
     */
    public static int digit2codepoint(int d) throws Exception {
        if (d < 26) {
            // 0..25 : ‘a‘..‘z‘
            return d + ‘a‘;
        } else if (d < 36) {
            // 26..35 : ‘0‘..‘9‘;
            return d - 26 + ‘0‘;
        } else {
            throw new Exception("BAD_INPUT");
        }
    }

    /**
     * 
     * @param c
     * @return
     * @throws Exception
     */
    public static int codepoint2digit(int c) throws Exception {
        if (c - ‘0‘ < 10) {
            // ‘0‘..‘9‘ : 26..35
            return c - ‘0‘ + 26;
        } else if (c - ‘a‘ < 26) {
            // ‘a‘..‘z‘ : 0..25
            return c - ‘a‘;
        } else {
            throw new Exception("BAD_INPUT");
        }
    }

    /**
     * 
     * @param input
     * @return
     * @throws Exception
     */
    public static String fromPunycodeToChinese(String input) throws Exception {
        if (input == null || input.equalsIgnoreCase("")) {
            return "";
        } else if (input.indexOf(DOT) < 0) {
            if (input.startsWith(PUNY_PREFIX)) {
                return fromPunycodeToChineseUnit(input.substring(PUNY_PREFIX.length()));
            } else {
                return input;
            }

        } else if (input.indexOf(DOT) > 0) {
            String[] arr = input.split(SPLIT_DOT);
            String result = "";
            for (int index = 0; index < arr.length; index++) {
                if (arr[index].startsWith(PUNY_PREFIX)) {
                    result += fromPunycodeToChineseUnit(arr[index].substring(PUNY_PREFIX.length())) + ".";
                } else {
                    result += arr[index] + ".";
                }
            }
            return result.substring(0, result.length() - 1);
        }
        return input;
    }
}

 

【参考】http://blog.csdn.net/a19881029/article/details/18262671

Punycode转中文

标签:

原文地址:http://www.cnblogs.com/zhzhang/p/5751436.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!