「cpdetector, free java code page detection.」,這是另一套編碼偵測的解決方案(Java)~ 同時也包含了Mozilla's chardet (jchardet)~
另外根據「Shared Development: Character encoding detection」所針對「cpdetector」的編碼測試~ 它所顯現的成果的確相當顯著~ 有需要的人用看看吧~
範例程式
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.nio.charset.Charset;
import cpdetector.io.CodepageDetectorProxy;
import cpdetector.io.HTMLCodepageDetector;
import cpdetector.io.JChardetFacade;
public class CPdetector
{
private static CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();
static
{
detector.add(new HTMLCodepageDetector(false));
detector.add(JChardetFacade.getInstance());
}
public String getEncoding(File f)throws Exception
{
return getEncoding(f.toURI().toURL());
}
public String getEncoding(URL url)throws IOException
{
Charset charset = detector.detectCodepage(url);
if (charset != null)
return charset.name();
return null;
}
public static void main(String[] args)
{
CPdetector detector = new CPdetector();
try
{
String encoding = detector.getEncoding(new File("Big5.txt"));
System.out.println("encoding:"+encoding);
encoding = detector.getEncoding(new URL("http://www.google.com.tw"));
System.out.println("encoding:"+encoding);
}catch(Exception e)
{
e.printStackTrace();
}
}
}
範例結果:
encoding:UTF-8 encoding:Big5
