TextEncodedStringSizeTerminated.java (Jaudiotagger 2.0.4)

File	Doc	Category	Size	Date	Package
TextEncodedStringSizeTerminated.java	API Doc	Jaudiotagger 2.0.4	14446	Mon Sep 26 13:06:22 BST 2011	org.jaudiotagger.tag.datatype
TextEncodedStringSizeTerminated.java

package org.jaudiotagger.tag.datatype;

import org.jaudiotagger.tag.InvalidDataTypeException;
import org.jaudiotagger.tag.TagOptionSingleton;
import org.jaudiotagger.tag.id3.AbstractTagFrameBody;
import org.jaudiotagger.tag.id3.valuepair.TextEncoding;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
 * Represents a String which is not delimited by null character.
 * <p/>
 * This type of String will usually only be used when it is the last field within a frame, when reading the remainder of
 * the byte array will be read, when writing the frame will be accommodate the required size for the String. The String
 * will be encoded based upon the text encoding of the frame that it belongs to.
 * <p/>
 * All TextInformation frames support multiple strings, stored as a null separated list, where null is represented by
 * the termination code for the character encoding. This functionality is only officially support in ID3v24.
 *
 * Most applications will ignore any but the first value, but some such as Foobar 2000 will decode them properly
 *
 * iTunes write null terminators characters after the String even though it only writes a single value.
 *
 *
 */
public class TextEncodedStringSizeTerminated extends AbstractString
{

    /**
     * Creates a new empty TextEncodedStringSizeTerminated datatype.
     *
     * @param identifier identifies the frame type
     * @param frameBody
     */
    public TextEncodedStringSizeTerminated(String identifier, AbstractTagFrameBody frameBody)
    {
        super(identifier, frameBody);
    }

    /**
     * Copy constructor
     *
     * @param object
     */
    public TextEncodedStringSizeTerminated(TextEncodedStringSizeTerminated object)
    {
        super(object);
    }

    public boolean equals(Object obj)
    {
        if(this==obj)
        {
            return true;
        }
        return obj instanceof TextEncodedStringSizeTerminated && super.equals(obj);
    }

    /**
     * Read a 'n' bytes from buffer into a String where n is the framesize - offset
     * so thefore cannot use this if there are other objects after it because it has no
     * delimiter.
     * <p/>
     * Must take into account the text encoding defined in the Encoding Object
     * ID3 Text Frames often allow multiple strings seperated by the null char
     * appropriate for the encoding.
     *
     * @param arr    this is the buffer for the frame
     * @param offset this is where to start reading in the buffer for this field
     * @throws NullPointerException
     * @throws IndexOutOfBoundsException
     */
    public void readByteArray(byte[] arr, int offset) throws InvalidDataTypeException
    {
        logger.finest("Reading from array from offset:" + offset);

        //Get the Specified Decoder
        String charSetName = getTextEncodingCharSet();
        CharsetDecoder decoder = Charset.forName(charSetName).newDecoder();
        decoder.reset();

        //Decode sliced inBuffer
        ByteBuffer inBuffer;
        if(TagOptionSingleton.getInstance().isAndroid())
        {
           //#302 [dallen] truncating array manually since the decoder.decode() does not honor the offset in the in buffer
           byte[] truncArr = new byte[arr.length - offset];
           System.arraycopy(arr, offset, truncArr, 0, truncArr.length);
           inBuffer = ByteBuffer.wrap(truncArr);
        }
        else
        {
           inBuffer = ByteBuffer.wrap(arr, offset, arr.length - offset).slice();
        }

        CharBuffer outBuffer = CharBuffer.allocate(arr.length - offset);
        CoderResult coderResult = decoder.decode(inBuffer, outBuffer, true);
        if (coderResult.isError())
        {
            logger.warning("Decoding error:" + coderResult.toString());
        }
        decoder.flush(outBuffer);
        outBuffer.flip();

        //If using UTF16 with BOM we then search through the text removing any BOMs that could exist
        //for multiple values, BOM could be Big Endian or Little Endian
        if (charSetName.equals(TextEncoding.CHARSET_UTF_16))
        {
            value = outBuffer.toString().replace("\ufeff","").replace("\ufffe","");
        }
        else
        {
            value = outBuffer.toString();
        }
        //SetSize, important this is correct for finding the next datatype
        setSize(arr.length - offset);
        logger.config("Read SizeTerminatedString:" + value + " size:" + size);

    }

    /**
     * Write String using specified encoding
     *
     * When this is called multiple times, all but the last value has a trailing null
     *
     * @param encoder
     * @param next
     * @param i
     * @param noOfValues
     * @return
     * @throws CharacterCodingException
     */
    private ByteBuffer writeString( CharsetEncoder encoder, String next, int i, int noOfValues)
            throws CharacterCodingException
    {

        ByteBuffer bb;
        if(( i + 1) == noOfValues )
        {
            bb = encoder.encode(CharBuffer.wrap(next));
        }
        else
        {
            bb = encoder.encode(CharBuffer.wrap(next + '\0'));
        }
        bb.rewind();
        return bb;
    }

    /**
     * Write String in UTF-LEBOM format
     *
     * When this is called multiple times, all but the last value has a trailing null
     *
     * Remember we are using this charset because the charset that writes BOM does it the wrong way for us
     * so we use this none and then manually add the BOM ourselves.
     *
     * @param next
     * @param i
     * @param noOfValues
     * @return
     * @throws CharacterCodingException
     */
    private ByteBuffer writeStringUTF16LEBOM( String next, int i, int noOfValues)
            throws CharacterCodingException
    {
        CharsetEncoder encoder = Charset.forName(TextEncoding.CHARSET_UTF_16_LE_ENCODING_FORMAT).newEncoder();
        ByteBuffer bb = null;
        //Note remember LE BOM is ff fe but this is handled by encoder Unicode char is fe ff
        if(( i + 1)==noOfValues)
        {
            bb = encoder.encode(CharBuffer.wrap('\ufeff' + next ));
        }
        else
        {
            bb = encoder.encode(CharBuffer.wrap('\ufeff' + next + '\0'));
        }
        bb.rewind();
        return bb;
    }

    /**
     * Write String in UTF-BEBOM format
     *
     * When this is called multiple times, all but the last value has a trailing null
     *
     * @param next
     * @param i
     * @param noOfValues
     * @return
     * @throws CharacterCodingException
     */
    private ByteBuffer writeStringUTF16BEBOM( String next, int i, int noOfValues)
            throws CharacterCodingException
    {
        CharsetEncoder encoder = Charset.forName(TextEncoding.CHARSET_UTF_16_BE_ENCODING_FORMAT).newEncoder();
        ByteBuffer bb = null;
        //Add BOM
        if(( i + 1)==noOfValues)
        {
            bb = encoder.encode(CharBuffer.wrap('\ufeff' + next ));
        }
        else
        {
            bb = encoder.encode(CharBuffer.wrap('\ufeff' + next + '\0'));
        }
        bb.rewind();
        return bb;
    }

    /**
     * Removing trailing null from end of String, this should be there but some applications continue to write
     * this unnecessary null char.
     */
    private void stripTrailingNull()
    {
        if (TagOptionSingleton.getInstance().isRemoveTrailingTerminatorOnWrite())
        {
            String stringValue = (String) value;
            if (stringValue.length() > 0)
            {
                if (stringValue.charAt(stringValue.length() - 1) == '\0')
                {
                    stringValue = (stringValue).substring(0, stringValue.length() - 1);
                    value = stringValue;
                }
            }
        }
    }

    /**
     * Because nulls are stripped we need to check if not removing trailing nulls whether the original
     * value ended with a null and if so add it back in.
     * @param values
     * @param stringValue
     */
    private void checkTrailingNull( List<String> values, String stringValue)
    {
        if(!TagOptionSingleton.getInstance().isRemoveTrailingTerminatorOnWrite())
        {
            if (stringValue.length() > 0 && stringValue.charAt(stringValue.length() - 1) == '\0')
            {
                String lastVal = values.get(values.size() - 1);
                String newLastVal = lastVal + '\0';
                values.set(values.size() - 1,newLastVal);
            }
        }
    }

    /**
     * Write String into byte array
     * <p/>
     * It will remove a trailing null terminator if exists if the option
     * RemoveTrailingTerminatorOnWrite has been set.
     *
     * @return the data as a byte array in format to write to file
     */
    public byte[] writeByteArray()
    {
        byte[] data;
        //Try and write to buffer using the CharSet defined by getTextEncodingCharSet()
        try
        {
            
            stripTrailingNull();

            //Special Handling because there is no UTF16 BOM LE charset
            String stringValue   = (String)value;
            String charSetName   = getTextEncodingCharSet();
            String actualCharSet = null;
            if (charSetName.equals(TextEncoding.CHARSET_UTF_16))
            {
                if(TagOptionSingleton.getInstance().isEncodeUTF16BomAsLittleEndian())
                {
                    actualCharSet = TextEncoding.CHARSET_UTF_16_LE_ENCODING_FORMAT;
                }
                else
                {
                    actualCharSet = TextEncoding.CHARSET_UTF_16_BE_ENCODING_FORMAT;
                }
            }

            //Ensure large enough for any encoding
            ByteBuffer outputBuffer = ByteBuffer.allocate((stringValue.length() + 3)* 3);

            //Ensure each string (if multiple values) is written with BOM by writing separately
            List<String> values = splitByNullSeperator(stringValue);
            checkTrailingNull(values, stringValue);

            //For each value
            for(int i=0;i<values.size();i++)
            {
                String next = values.get(i);
                if(actualCharSet!=null)
                {
                    if (actualCharSet.equals(TextEncoding.CHARSET_UTF_16_LE_ENCODING_FORMAT))
                    {
                        outputBuffer.put(writeStringUTF16LEBOM( next, i, values.size()));
                    }
                    else if (actualCharSet.equals(TextEncoding.CHARSET_UTF_16_BE_ENCODING_FORMAT))
                    {
                        outputBuffer.put(writeStringUTF16BEBOM( next, i, values.size()));
                    }
                }
                else
                {
                    outputBuffer.put(writeString( Charset.forName(charSetName).newEncoder(), next, i, values.size()));
                }
            }
            outputBuffer.flip();
            data = new byte[outputBuffer.limit()];
            outputBuffer.rewind();
            outputBuffer.get(data, 0, outputBuffer.limit());
            setSize(data.length);
        }
        //Should never happen so if does throw a RuntimeException
        catch (CharacterCodingException ce)
        {
            logger.severe(ce.getMessage());
            throw new RuntimeException(ce);
        }
        return data;
    }

    /**
     * Get the text encoding being used.
     * <p/>
     * The text encoding is defined by the frame body that the text field belongs to.
     *
     * @return the text encoding charset
     */
    protected String getTextEncodingCharSet()
    {
        byte textEncoding = this.getBody().getTextEncoding();
        String charSetName = TextEncoding.getInstanceOf().getValueForId(textEncoding);
        logger.finest("text encoding:" + textEncoding + " charset:" + charSetName);
        return charSetName;
    }

    /**
     * Split the values separated by null character
     *
     * @param value the raw value
     * @return list of values, guaranteed to be at least one value
     */
    public static List<String> splitByNullSeperator(String value)
    {
        String[] valuesarray = value.split("\\u0000");
        List<String> values = Arrays.asList(valuesarray);
        //Read only list so if empty have to create new list
        if (values.size() == 0)
        {
            values = new ArrayList<String>(1);
            values.add("");
        }
        return values;
    }

    /**
     * Add an additional String to the current String value
     *
     * @param value
     */
    public void addValue(String value)
    {
        setValue(this.value + "\u0000" + value);
    }

    /**
     * How many values are held, each value is separated by a null terminator
     *
     * @return number of values held, usually this will be one.
     */
    public int getNumberOfValues()
    {
        return splitByNullSeperator(((String) value)).size();
    }

    /**
     * Get the nth value
     *
     * @param index
     * @return the nth value
     * @throws IndexOutOfBoundsException if value does not exist
     */
    public String getValueAtIndex(int index)
    {
        //Split String into separate components
        List values = splitByNullSeperator((String) value);
        return (String) values.get(index);
    }

    /**
     *
     * @return list of all values
     */
    public List<String> getValues()
    {
        return splitByNullSeperator((String) value);
    }

    /**
     * Get value(s) whilst removing any trailing nulls
     *
     * @return
     */
    public String getValueWithoutTrailingNull()
    {
        List<String> values = splitByNullSeperator((String) value);
        StringBuffer sb = new StringBuffer();
        for(int i=0;i<values.size();i++)
        {
            if(i!=0)
            {
                sb.append("\u0000");
            }
            sb.append(values.get(i));
        }
        return sb.toString();
    }
}