`
oywl2008
  • 浏览: 998312 次
  • 性别: Icon_minigender_1
  • 来自: 广州
社区版块
存档分类
最新评论

中文、英文和中英文混合排序

 
阅读更多

对Search进行排序

SearchComparator.java中的实现方法compare已不能满足需要

其中涉及到中文、英文或者中英文混合排序,所以,这里使用开源的Pingyin 对起排序

SearchComparator.java调用sortListByType排序,其中调用了PinyinComparator

SearchComparator .java

 

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.clx.webmail.util.PinyinComparator;

 

public class SearchComparator implements Comparator<Search> {

 

 /**简单,不能用来对纯中文、纯英文或者中英文混合进行排序**/
 public int compare(Search s1, Search s2) {
  return s1.getSearchName().compareToIgnoreCase(s2.getSearchName());
 } 
 
 public List<Search> sortListByType(List<Search> list)
 {


  List engList=new ArrayList();
  List chaList=new ArrayList();


  if(list!=null&&list.size()>0)
  {


    for(int i=0;i<list.size();i++)
    {


       Search search=(Search)list.get(i);
       String name=search.getSearchName();
       /**
          *  如果英文优先,则这里的IF条件为:
          *  isContainsHanyu(name.substring(0,1))&&isContainsHanyu(name)
         **/
      if(isContainsHanyu(name))
      {
           chaList.add(search);
      }
      else
      {
           engList.add(search);
      }


    }


  }


  if(chaList.size()>0)
  {
       PinyinComparator pinyin=new PinyinComparator();
       Collections.sort(chaList,pinyin);
  }


  if(engList.size()>0)
  {
      Collections.sort(engList,this);
  }


  list=new ArrayList();
  list=copy(list,engList);
  list=copy(list,chaList);


  return list;
 }
 
 //把一个集合中的元素复制到另一个集合中
 public List copy(List sourceList,List copyList)
 {
    if(copyList!=null)
   {
      for(int i=0;i<copyList.size();i++)
      {
          sourceList.add(copyList.get(i));
      }
   }
   return sourceList;
 }
   
 
 //检查字符串是否包含中文
 public boolean isContainsHanyu(String str)
 {
        boolean flag=false;
        Pattern  pattern   =   Pattern.compile("[//u4E00-//u9FA5]+",Pattern.CANON_EQ);  
        Matcher  matcher   =   pattern.matcher(str);  


       if(matcher.find())  
       {  
            flag=true;
       }  

 
     return flag;
  
 }
 
 
     public static void main(String[] args)
    {
       String s = "test测试";
       SearchComparator comparator=new SearchComparator();
       comparator.isContainsHanyu(s);
   }

 

 

}

 

 

 

PinyinComparator.java

 

 

import java.util.Arrays;
import java.util.Comparator;

import com.clx.webmail.models.Search;

import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;

public class PinyinComparator implements Comparator<Search>{
 
 public int compare(Search s1,Search s2) {


  String o1=s1.getSearchName();


  String o2=s2.getSearchName();


  for (int i = 0; i < o1.length() && i < o2.length(); i++) {

 

            int codePoint1 = o1.charAt(i);


            int codePoint2 = o2.charAt(i);

 

            if (Character.isSupplementaryCodePoint(codePoint1)|| Character.isSupplementaryCodePoint(codePoint2)) {
                i++;
            }

 

            if (codePoint1 != codePoint2)
            {
                if (Character.isSupplementaryCodePoint(codePoint1)|| Character.isSupplementaryCodePoint(codePoint2))

                {
                    return codePoint1 - codePoint2;
                }

                String pinyin1 = pinyin((char) codePoint1);
                String pinyin2 = pinyin((char) codePoint2);

                if (pinyin1 != null && pinyin2 != null)
                {
                 // 两个字符都是汉字
                    if (!pinyin1.equals(pinyin2))
                    {
                        return pinyin1.compareTo(pinyin2);
                    }
                }
                else
                {
                    return codePoint1 - codePoint2;
                }
            }
     }
        
     return  o1.length() -  o2.length();


}

 /**对中英文排序**/
    private String pinyin(char c) {
     
     if(String.valueOf(c)==null||String.valueOf(c).length()==0)
     {
      return "";
     }
     
     HanyuPinyinOutputFormat format = new  HanyuPinyinOutputFormat();
     format.setCaseType(HanyuPinyinCaseType.LOWERCASE);   
     format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);    
     format.setVCharType(HanyuPinyinVCharType.WITH_V);
     String output="";
     try
     {            
       if (java.lang.Character.toString(c).matches("[//u4E00-//u9FA5]+"))
       {                  
        String[] temp = PinyinHelper.toHanyuPinyinStringArray(c,format);
        if(temp!=null&&temp.length>0)
        {
         output += temp[0];
        }
       }
       else
       {
        output += java.lang.Character.toString(c);
       }
     }catch(BadHanyuPinyinOutputFormatCombination e)
     {
      e.printStackTrace();
     }
     
     return output;     
    }   
  
}

 

此外,增加一个类,此类用来得到汉语拼音,可能在排序中也用得到

 

 

import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;

import java.io.UnsupportedEncodingException;

/**
* 拼音工具
*
* @author zhouhang 2010-01-25

*/
public class PinyinToolkit {

        /**
         * 获取汉字串拼音首字母,英文字符不变
         *
         * @param chinese 汉字串
         * @return 汉语拼音首字母
         */
        public static String cn2FirstSpell(String chinese) {
                StringBuffer pybf = new StringBuffer();
                char[] arr = chinese.toCharArray();
                HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
                defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
                defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
                defaultFormat.setVCharType(HanyuPinyinVCharType.WITH_V);
                for (int i = 0; i < arr.length; i++) {
                    if (arr[i] > 128)
                    {
                        try {
                                String[] _t = PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat);
                                if (_t != null) {
                                      pybf.append(_t[0].charAt(0));
                                }
                        } catch (BadHanyuPinyinOutputFormatCombination e) {
                                e.printStackTrace();
                        }
                    } else {
                            pybf.append(arr[i]);
                    }
                }
                return pybf.toString().replaceAll("//W", "").trim();
        }

        /**
         * 获取汉字串拼音,英文字符不变
         *
         * @param chinese 汉字串
         * @return 汉语拼音
         */
        public static String cn2Spell(String chinese) {
                StringBuffer pybf = new StringBuffer();
                char[] arr = chinese.toCharArray();
                HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
                defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
                defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
                defaultFormat.setVCharType(HanyuPinyinVCharType.WITH_V);
                for (int i = 0; i < arr.length; i++) {
                    if (arr[i] > 128) {
                        try {
                                pybf.append(PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat)[0]);
                        } catch (BadHanyuPinyinOutputFormatCombination e) {
                                e.printStackTrace();
                        }
                    } else {
                            pybf.append(arr[i]);
                    }
                }
                return pybf.toString();
        }

        public static void main(String[] args) throws UnsupportedEncodingException {
                String x = "嘅囧誰說壞學生來勼髮視頻裆児";
                System.out.println(cn2FirstSpell(x));
                System.out.println(cn2Spell(x));
        }
}

 

 

转自: http://blog.csdn.net/shenzhen_mydream/article/details/5253048

 

 

 

 

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics