Detect non-ASCII character in a StringTag(s): Internationalization String/Number
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharacterCodingException;
public class StringUtils {
private StringUtils() {}
public static boolean isPureAscii(String v) {
byte bytearray [] = v.getBytes();
CharsetDecoder d = Charset.forName("US-ASCII").newDecoder();
try {
CharBuffer r = d.decode(ByteBuffer.wrap(bytearray));
r.toString();
}
catch(CharacterCodingException e) {
return false;
}
return true;
}
public static void main (String args[]) throws Exception {
String test = "Réal";
System.out.println(test + " isPureAscii() : " + StringUtils.isPureAscii(test));
test = "Real";
System.out.println(test + " isPureAscii() : " + StringUtils.isPureAscii(test));
/*
* output :
* Réal isPureAscii() : false
* Real isPureAscii() : true
*/
}
}
import java.nio.charset.Charset;
public class StringUtils {
public static boolean isPureAscii(String v) {
return Charset.forName("US-ASCII").newEncoder().canEncode(v);
// or "ISO-8859-1" for ISO Latin 1
// or StandardCharsets.US_ASCII with JDK1.7+
}
public static void main (String args[]) throws Exception {
String test = "Réal";
System.out.println(test + " isPureAscii() : " + StringUtils.isPureAscii(test));
test = "Real";
System.out.println(test + " isPureAscii() : " + StringUtils.isPureAscii(test));
/*
* output :
* Réal isPureAscii() : false
* Real isPureAscii() : true
*/
}
}
Another way is to use a regular expression, see this Javascript HowTo for a hint!
To simply strip any non-ascii characters form a string
public class Test {
public static void main(String args[]){
String input = "eéaà";
String output = input.replaceAll("[^\\p{ASCII}]", "");
System.out.println(output);
/*
* output : ea
*/
}
}
See also Unaccent letters.
mail_outline
Send comment, question or suggestion to howto@rgagnon.com
Send comment, question or suggestion to howto@rgagnon.com