Detect non-ASCII character in a StringTag(s): Internationalization String/Number
import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharacterCodingException; public class StringUtils { private StringUtils() {} public static boolean isPureAscii(String v) { byte bytearray [] = v.getBytes(); CharsetDecoder d = Charset.forName("US-ASCII").newDecoder(); try { CharBuffer r = d.decode(ByteBuffer.wrap(bytearray)); r.toString(); } catch(CharacterCodingException e) { return false; } return true; } public static void main (String args[]) throws Exception { String test = "Réal"; System.out.println(test + " isPureAscii() : " + StringUtils.isPureAscii(test)); test = "Real"; System.out.println(test + " isPureAscii() : " + StringUtils.isPureAscii(test)); /* * output : * Réal isPureAscii() : false * Real isPureAscii() : true */ } }
import java.nio.charset.Charset; public class StringUtils { public static boolean isPureAscii(String v) { return Charset.forName("US-ASCII").newEncoder().canEncode(v); // or "ISO-8859-1" for ISO Latin 1 // or StandardCharsets.US_ASCII with JDK1.7+ } public static void main (String args[]) throws Exception { String test = "Réal"; System.out.println(test + " isPureAscii() : " + StringUtils.isPureAscii(test)); test = "Real"; System.out.println(test + " isPureAscii() : " + StringUtils.isPureAscii(test)); /* * output : * Réal isPureAscii() : false * Real isPureAscii() : true */ } }
Another way is to use a regular expression, see this Javascript HowTo for a hint!
To simply strip any non-ascii characters form a string
public class Test { public static void main(String args[]){ String input = "eéaà"; String output = input.replaceAll("[^\\p{ASCII}]", ""); System.out.println(output); /* * output : ea */ } }
See also Unaccent letters.