i believe the following is what strips non english words from tags. if anyone knows how to translate it that would be great.
Code:
var subject = document.vbform.subject.value.replace(/[^a-zA-Z 0-9]+/gi,"");
subject = subject.replace(/(\b(\w{1,2})\b(\s|$))|(\b(\w{19,})\b(\s|$))/g,"");
var newtaglist = filterText(subject);
newtaglist = trim(newtaglist);
newtaglist = newtaglist.replace(/\s+/g," ");
document.vbform.taglist.value = newtaglist.replace(/ /gi,",");