Friday, September 30, 2011

Xuất ra file unicode

Xuất ra file Unicode trong java


import java.io.FileOutputStream;
import java.io.IOException;

public class Util {
public static void writeUnicode(String filepath, String content){
// Check file exist
File f;
f=new File(filepath);
if(!f.exists()){
try {
f.createNewFile();
System.out.println("New file " + filepath + " has been created ");
} catch (IOException e) {
e.printStackTrace();
}
}
try {
byte[] out = UnicodeUtil.convert(content.getBytes("UTF-16"), "UTF-8");
FileOutputStream fos = new FileOutputStream(filepath);
fos.write(out);
fos.close();
} catch (IOException e) {
e.printStackTrace();
} catch (Exception ex){
ex.printStackTrace();
}
}
}

File UnicodeUtil.java


import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PushbackInputStream;
import java.io.UnsupportedEncodingException;
import java.io.Writer;

public class UnicodeUtil {

public static byte[] convert(byte[] bytes, String encout) throws Exception {
// Workaround for bug that will not be fixed by SUN
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4508058
UnicodeInputStream uis = new UnicodeInputStream(new ByteArrayInputStream(bytes), "ASCII");
boolean unicodeOutputReqd = (getBOM(encout) != null) ? true : false;
String enc = uis.getEncoding();
String BOM = getBOM(enc); // get the BOM of the inputstream

if (BOM == null) {
// inputstream looks like ascii...
// create a BOM based on the outputstream
BOM = getBOM(encout);
}
uis.close();

ByteArrayOutputStream out = new ByteArrayOutputStream();
BufferedReader br = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(bytes, uis.getBOMOffset(), bytes.length), enc));
Writer w = new BufferedWriter(new OutputStreamWriter(out, encout));

// dont write a BOM for ascii(out) as the OutputStreamWriter
// will not process it correctly.
if (BOM != null && unicodeOutputReqd) {
w.write(BOM);
}

char[] buffer = new char[4096];
int len;
while ((len = br.read(buffer)) != -1) {
w.write(buffer, 0, len);
}

br.close(); // Close the input.
w.close(); // Flush and close output.
return out.toByteArray();
}

public static String getBOM(String enc) throws UnsupportedEncodingException {
if ("UTF-8".equals(enc)) {
byte[] bom = new byte[3];
bom[0] = (byte) 0xEF;
bom[1] = (byte) 0xBB;
bom[2] = (byte) 0xBF;
return new String(bom, enc);
} else if ("UTF-16BE".equals(enc)) {
byte[] bom = new byte[2];
bom[0] = (byte) 0xFE;
bom[1] = (byte) 0xFF;
return new String(bom, enc);
} else if ("UTF-16LE".equals(enc)) {
byte[] bom = new byte[2];
bom[0] = (byte) 0xFF;
bom[1] = (byte) 0xFE;
return new String(bom, enc);
} else if ("UTF-32BE".equals(enc)) {
byte[] bom = new byte[4];
bom[0] = (byte) 0x00;
bom[1] = (byte) 0x00;
bom[2] = (byte) 0xFE;
bom[3] = (byte) 0xFF;
return new String(bom, enc);
} else if ("UTF-32LE".equals(enc)) {
byte[] bom = new byte[4];
bom[0] = (byte) 0x00;
bom[1] = (byte) 0x00;
bom[2] = (byte) 0xFF;
bom[3] = (byte) 0xFE;
return new String(bom, enc);
} else {
return null;
}

}

public static class UnicodeInputStream extends InputStream {
private PushbackInputStream internalIn;

private boolean isInited = false;

private int BOMOffset = -1;

private String defaultEnc;

private String encoding;

public static final int BOM_SIZE = 4;

public UnicodeInputStream(InputStream in, String defaultEnc) {
internalIn = new PushbackInputStream(in, BOM_SIZE);
this.defaultEnc = defaultEnc;
}

public String getDefaultEncoding() {
return defaultEnc;
}

public String getEncoding() {
if (!isInited) {
try {
init();
} catch (IOException ex) {
IllegalStateException ise = new IllegalStateException("Init method failed.");
ise.initCause(ise);
throw ise;
}
}
return encoding;
}

/**
* Read-ahead four bytes and check for BOM marks. Extra bytes are unread
* back to the stream, only BOM bytes are skipped.
*/
protected void init() throws IOException {
if (isInited)
return;

byte bom[] = new byte[BOM_SIZE];
int n, unread;
n = internalIn.read(bom, 0, bom.length);

if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
encoding = "UTF-32BE";
unread = n - 4;
} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
encoding = "UTF-32LE";
unread = n - 4;
} else if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
encoding = "UTF-8";
unread = n - 3;
} else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
encoding = "UTF-16BE";
unread = n - 2;
} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
encoding = "UTF-16LE";
unread = n - 2;
} else {
// Unicode BOM mark not found, unread all bytes
encoding = defaultEnc;
unread = n;
}
BOMOffset = BOM_SIZE - unread;
if (unread > 0)
internalIn.unread(bom, (n - unread), unread);

isInited = true;
}

public void close() throws IOException {
// init();
isInited = true;
internalIn.close();
}

public int read() throws IOException {
// init();
isInited = true;
return internalIn.read();
}

public int getBOMOffset() {
return BOMOffset;
}
}
}

Nguồn: Internet

Tuesday, September 27, 2011

Common expressions to validate/format a string

Date

/^\d{1,2}(\-|\/|\.)\d{1,2}\1\d{4}$/   mm/dd/yyyy   
US zip code 
  /(^\d{5}$)|(^\d{5}-\d{4}$)/   99999 or 99999-9999
Canadian postal code
 /^\D{1}\d{1}\D{1}\-?\d{1}\D{1}\d{1}$/  Z5Z-5Z5 orZ5Z5Z5
Time   
/^([1-9]|1[0-2]):[0-5]\d(:[0-5]\d(\.\d{1,3})?)?$/ 
HH:MM or HH:MM:SS or HH:MM:SS.mmm
IP Address(no check for alid values (0-255))
/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ 
999.999.999.999
Dollar Amount
/^((\$\d*)|(\$\d*\.\d{2})|(\d*)|(\d*\.\d{2}))$/ 
100, 100.00, $100 or $100.00
Social Security Number
/^\d{3}\-?\d{2}\-?\d{4}$/   999-99-9999 or999999999
Canadian Social Insurance Number
   /^\d{9}$/ 999999999
/****************************************************************
FILE: RegExpValidate.js

DESCRIPTION: This file contains a library of validation functions
using javascript regular expressions. Library also contains
functions that reformat fields for display or for storage.


VALIDATION FUNCTIONS:

validateEmail - checks format of email address
validateUSPhone - checks format of US phone number
validateNumeric - checks for valid numeric value
validateInteger - checks for valid integer value
validateNotEmpty - checks for blank form field
validateUSZip - checks for valid US zip code
validateUSDate - checks for valid date in US format
validateValue - checks a string against supplied pattern

FORMAT FUNCTIONS:

rightTrim - removes trailing spaces from a string
leftTrim - removes leading spaces from a string
trimAll - removes leading and trailing spaces from a string
removeCurrency - removes currency formatting characters (), $
addCurrency - inserts currency formatting characters
removeCommas - removes comma separators from a number
addCommas - adds comma separators to a number
removeCharacters - removes characters from a string that match
passed pattern


AUTHOR: Karen Gayda

DATE: 03/24/2000
*******************************************************************/

function validateEmail( strValue) {
/************************************************
DESCRIPTION: Validates that a string contains a
valid email pattern.

PARAMETERS:
strValue - String to be tested for validity

RETURNS:
True if valid, otherwise false.

REMARKS: Accounts for email with country appended
does not validate that email contains valid URL
type (.com, .gov, etc.) or valid country suffix.
*************************************************/
var objRegExp =
/(^[a-z]([a-z_\.]*)@([a-z_\.]*)([.][a-z]{3})$)|(^[a-z]([a-z_\.]*)@
([a-z_\.]*)(\.[a-z]{3})(\.[a-z]{2})*$)/i;

//check for valid email
return objRegExp.test(strValue);
}
function validateUSPhone( strValue ) {
/************************************************
DESCRIPTION: Validates that a string contains valid
US phone pattern.
Ex. (999) 999-9999 or (999)999-9999

PARAMETERS:
strValue - String to be tested for validity

RETURNS:
True if valid, otherwise false.
*************************************************/
var objRegExp = /^\([1-9]\d{2}\)\s?\d{3}\-\d{4}$/;

//check for valid us phone with or without space between
//area code
return objRegExp.test(strValue);
}

function validateNumeric( strValue ) {
/*****************************************************************
DESCRIPTION: Validates that a string contains only valid numbers.

PARAMETERS:
strValue - String to be tested for validity

RETURNS:
True if valid, otherwise false.
******************************************************************/
var objRegExp = /(^-?\d\d*\.\d*$)|(^-?\d\d*$)|(^-?\.\d\d*$)/;

//check for numeric characters
return objRegExp.test(strValue);
}

function validateInteger( strValue ) {
/************************************************
DESCRIPTION: Validates that a string contains only
valid integer number.

PARAMETERS:
strValue - String to be tested for validity

RETURNS:
True if valid, otherwise false.
**************************************************/
var objRegExp = /(^-?\d\d*$)/;

//check for integer characters
return objRegExp.test(strValue);
}

function validateNotEmpty( strValue ) {
/************************************************
DESCRIPTION: Validates that a string is not all
blank (whitespace) characters.

PARAMETERS:
strValue - String to be tested for validity

RETURNS:
True if valid, otherwise false.
*************************************************/
var strTemp = strValue;
strTemp = trimAll(strTemp);
if(strTemp.length > 0){
return true;
}
return false;
}
function validateUSZip( strValue ) {
/************************************************
DESCRIPTION: Validates that a string a United
States zip code in 5 digit format or zip+4
format. 99999 or 99999-9999

PARAMETERS:
strValue - String to be tested for validity

RETURNS:
True if valid, otherwise false.

*************************************************/
var objRegExp = /(^\d{5}$)|(^\d{5}-\d{4}$)/;

//check for valid US Zipcode
return objRegExp.test(strValue);
}

function validateUSDate( strValue ) {
/************************************************
DESCRIPTION: Validates that a string contains only
valid dates with 2 digit month, 2 digit day,
4 digit year. Date separator can be ., -, or /.
Uses combination of regular expressions and
string parsing to validate date.
Ex. mm/dd/yyyy or mm-dd-yyyy or mm.dd.yyyy

PARAMETERS:
strValue - String to be tested for validity

RETURNS:
True if valid, otherwise false.

REMARKS:
Avoids some of the limitations of the Date.parse()
method such as the date separator character.
*************************************************/
var objRegExp = /^\d{1,2}(\-|\/|\.)\d{1,2}\1\d{4}$/
//check to see if in correct format
if(!objRegExp.test(strValue))
return false; //doesn't match pattern, bad date
else{
var strSeparator = strValue.substring(2,3)
var arrayDate = strValue.split(strSeparator);
//create a lookup for months not equal to Feb.
var arrayLookup = { '01' : 31,'03' : 31,
'04' : 30,'05' : 31,
'06' : 30,'07' : 31,
'08' : 31,'09' : 30,
'10' : 31,'11' : 30,'12' : 31}
var intDay = parseInt(arrayDate[1],10);

//check if month value and day value agree
if(arrayLookup[arrayDate[0]] != null) {
if(intDay <= arrayLookup[arrayDate[0]] && intDay != 0)
return true; //found in lookup table, good date
}
//check for February (bugfix 20050322)
//bugfix for parseInt kevin
//bugfix biss year O.Jp Voutat
var intMonth = parseInt(arrayDate[0],10);
if (intMonth == 2) {
var intYear = parseInt(arrayDate[2]);
if (intDay > 0 && intDay < 29) {
return true;
}
else if (intDay == 29) {
if ((intYear % 4 == 0) && (intYear % 100 != 0) ||
(intYear % 400 == 0)) {
// year div by 4 and ((not div by 100) or div by 400) ->ok
return true;
}
}
}
}
return false; //any other values, bad date
}
function validateValue( strValue, strMatchPattern ) {
/************************************************
DESCRIPTION: Validates that a string a matches
a valid regular expression value.

PARAMETERS:
strValue - String to be tested for validity
strMatchPattern - String containing a valid
regular expression match pattern.

RETURNS:
True if valid, otherwise false.
*************************************************/
var objRegExp = new RegExp( strMatchPattern);

//check if string matches pattern
return objRegExp.test(strValue);
}


function rightTrim( strValue ) {
/************************************************
DESCRIPTION: Trims trailing whitespace chars.

PARAMETERS:
strValue - String to be trimmed.

RETURNS:
Source string with right whitespaces removed.
*************************************************/
var objRegExp = /^([\w\W]*)(\b\s*)$/;

if(objRegExp.test(strValue)) {
//remove trailing a whitespace characters
strValue = strValue.replace(objRegExp, '$1');
}
return strValue;
}

function leftTrim( strValue ) {
/************************************************
DESCRIPTION: Trims leading whitespace chars.

PARAMETERS:
strValue - String to be trimmed

RETURNS:
Source string with left whitespaces removed.
*************************************************/
var objRegExp = /^(\s*)(\b[\w\W]*)$/;

if(objRegExp.test(strValue)) {
//remove leading a whitespace characters
strValue = strValue.replace(objRegExp, '$2');
}
return strValue;
}

function trimAll( strValue ) {
/************************************************
DESCRIPTION: Removes leading and trailing spaces.

PARAMETERS: Source string from which spaces will
be removed;

RETURNS: Source string with whitespaces removed.
*************************************************/
var objRegExp = /^(\s*)$/;

//check for all spaces
if(objRegExp.test(strValue)) {
strValue = strValue.replace(objRegExp, '');
if( strValue.length == 0)
return strValue;
}

//check for leading & trailing spaces
objRegExp = /^(\s*)([\W\w]*)(\b\s*$)/;
if(objRegExp.test(strValue)) {
//remove leading and trailing whitespace characters
strValue = strValue.replace(objRegExp, '$2');
}
return strValue;
}
PARAMETERS:
strValue - Source string from which currency formatting
will be removed;

RETURNS: Source string with commas removed.
*************************************************/
var objRegExp = /\(/;
var strMinus = '';

//check if negative
if(objRegExp.test(strValue)){
strMinus = '-';
}

objRegExp = /\)|\(|[,]/g;
strValue = strValue.replace(objRegExp,'');
if(strValue.indexOf('$') >= 0){
strValue = strValue.substring(1, strValue.length);
}
return strMinus + strValue;
}

function addCurrency( strValue ) {
/************************************************
DESCRIPTION: Formats a number as currency.

PARAMETERS:
strValue - Source string to be formatted

REMARKS: Assumes number passed is a valid
numeric value in the rounded to 2 decimal
places. If not, returns original value.
*************************************************/
var objRegExp = /-?[0-9]+\.[0-9]{2}$/;

if( objRegExp.test(strValue)) {
objRegExp.compile('^-');
strValue = addCommas(strValue);
if (objRegExp.test(strValue)){
strValue = '(' + strValue.replace(objRegExp,'') + ')';
}
return '$' + strValue;
}
else
return strValue;
}

function removeCommas( strValue ) {
/************************************************
DESCRIPTION: Removes commas from source string.

PARAMETERS:
strValue - Source string from which commas will
be removed;

RETURNS: Source string with commas removed.
*************************************************/
var objRegExp = /,/g; //search for commas globally

//replace all matches with empty strings
return strValue.replace(objRegExp,'');
}

function addCommas( strValue ) {
/************************************************
DESCRIPTION: Inserts commas into numeric string.

PARAMETERS:
strValue - source string containing commas.

RETURNS: String modified with comma grouping if
source was all numeric, otherwise source is
returned.

REMARKS: Used with integers or numbers with
2 or less decimal places.
*************************************************/
var objRegExp = new RegExp('(-?[0-9]+)([0-9]{3})');

//check for match to search criteria
while(objRegExp.test(strValue)) {
//replace original string with first group match,
//a comma, then second group match
strValue = strValue.replace(objRegExp, '$1,$2');
}
return strValue;
}

function removeCharacters( strValue, strMatchPattern ) {
/************************************************
DESCRIPTION: Removes characters from a source string
based upon matches of the supplied pattern.

PARAMETERS:
strValue - source string containing number.

RETURNS: String modified with characters
matching search pattern removed

USAGE: strNoSpaces = removeCharacters( ' sfdf dfd',
'\s*')
*************************************************/
var objRegExp = new RegExp( strMatchPattern, 'gi' );

//replace passed pattern matches with blanks
return strValue.replace(objRegExp,'');
}