FileDocCategorySizeDatePackage
GsmAlphabet.javaAPI DocAndroid 1.5 API25893Wed May 06 22:42:02 BST 2009com.android.internal.telephony.gsm

GsmAlphabet.java

/*
 * Copyright (C) 2006 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.internal.telephony.gsm;

import android.telephony.gsm.SmsMessage;
import android.util.SparseIntArray;

import android.util.Log;

/**
 * This class implements the character set mapping between
 * the GSM SMS 7-bit alphabet specifed in TS 23.038 6.2.1
 * and UTF-16
 *
 * {@hide}
 */
public class GsmAlphabet
{
    static final String LOG_TAG = "GSM";
    


    //***** Constants

    /**
     * This escapes extended characters, and when present indicates that the 
     * following character should
     * be looked up in the "extended" table
     *
     * gsmToChar(GSM_EXTENDED_ESCAPE) returns 0xffff
     */

    public static final byte GSM_EXTENDED_ESCAPE = 0x1B;


    /**
     * char to GSM alphabet char
     * Returns ' ' in GSM alphabet if there's no possible match
     * Returns GSM_EXTENDED_ESCAPE if this character is in the extended table
     * In this case, you must call charToGsmExtended() for the value that
     * should follow GSM_EXTENDED_ESCAPE in the GSM alphabet string
     */
    public static int
    charToGsm(char c)
    {
        try {
            return charToGsm(c, false);
        } catch (EncodeException ex) {
            // this should never happen
            return sGsmSpaceChar;
        }
    }

    /**
     * char to GSM alphabet char
     * @param throwException If true, throws EncodeException on invalid char. 
     *   If false, returns GSM alphabet ' ' char.
     *
     * Returns GSM_EXTENDED_ESCAPE if this character is in the extended table
     * In this case, you must call charToGsmExtended() for the value that
     * should follow GSM_EXTENDED_ESCAPE in the GSM alphabet string
     */

    public static int
    charToGsm(char c, boolean throwException) throws EncodeException
    {
        int ret;
        
        ret = charToGsm.get(c, -1);

        if (ret == -1) {
            ret = charToGsmExtended.get(c, -1);

            if (ret == -1) {
                if (throwException) {
                    throw new EncodeException(c);
                } else {
                    return sGsmSpaceChar;
                }
            } else {
                return GSM_EXTENDED_ESCAPE;
            }
        }

        return ret;

    }
    

    /**
     * char to extended GSM alphabet char
     *
     * Extended chars should be escaped with GSM_EXTENDED_ESCAPE
     *
     * Returns ' ' in GSM alphabet if there's no possible match
     *
     */
    public static int
    charToGsmExtended(char c)
    {
        int ret;
        
        ret = charToGsmExtended.get(c, -1);

        if (ret == -1) {
            return sGsmSpaceChar;
        }

        return ret;
    }

    /**
     * Converts a character in the GSM alphabet into a char 
     *
     * if GSM_EXTENDED_ESCAPE is passed, 0xffff is returned. In this case,
     * the following character in the stream should be decoded with 
     * gsmExtendedToChar()
     *
     * If an unmappable value is passed (one greater than 127), ' ' is returned
     */

    public static char
    gsmToChar(int gsmChar)
    {
        return (char)gsmToChar.get(gsmChar, ' ');
    }
    
    /**
     * Converts a character in the extended GSM alphabet into a char 
     *
     * if GSM_EXTENDED_ESCAPE is passed, ' ' is returned since no second
     * extension page has yet been defined (see Note 1 in table 6.2.1.1 of
     * TS 23.038 v7.00)
     *     
     * If an unmappable value is passed , ' ' is returned
     */

    public static char
    gsmExtendedToChar(int gsmChar)
    {
        int ret;

        ret = gsmExtendedToChar.get(gsmChar, -1);

        if (ret == -1) {
            return ' ';
        }

        return (char)ret;
    }

    /**
     * Converts a String into a byte array containing the 7-bit packed
     * GSM Alphabet representation of the string. If a header is provided,
     * this is included in the returned byte array and padded to a septet
     * boundary.
     *
     * Unencodable chars are encoded as spaces
     *
     * Byte 0 in the returned byte array is the count of septets used,
     * including the header and header padding. The returned byte array is
     * the minimum size required to store the packed septets. The returned
     * array cannot contain more than 255 septets.
     *
     * @param data The text string to encode.
     * @param header Optional header (includeing length byte) that precedes
     * the encoded data, padded to septet boundary.
     * @return Byte array containing header and encoded data.
     */
    public static byte[] stringToGsm7BitPackedWithHeader(String data, byte[] header)
            throws EncodeException {

        if (header == null || header.length == 0) {
            return stringToGsm7BitPacked(data);
        }

        int headerBits = header.length * 8;
        int headerSeptets = headerBits / 7;
        headerSeptets += (headerBits % 7) > 0 ? 1 : 0;

        int sz = data.length();
        int septetCount;
        septetCount = countGsmSeptets(data, true) + headerSeptets;

        byte[] ret = stringToGsm7BitPacked(data, 0, septetCount,
                (headerSeptets*7), true);

        // Paste in the header
        System.arraycopy(header, 0, ret, 1, header.length);
        return ret;
    }

    /**
     * Converts a String into a byte array containing 
     * the 7-bit packed GSM Alphabet representation of the string.
     *
     * Unencodable chars are encoded as spaces
     *
     * Byte 0 in the returned byte array is the count of septets used
     * The returned byte array is the minimum size required to store
     * the packed septets. The returned array cannot contain more than 255
     * septets.
     *
     * @param data the data string to endcode
     * @throws EncodeException if String is too large to encode
     */
    public static byte[] stringToGsm7BitPacked(String data)
            throws EncodeException {
        return stringToGsm7BitPacked(data, 0, -1, 0, true);
    }

    /**
     * Converts a String into a byte array containing 
     * the 7-bit packed GSM Alphabet representation of the string.
     *
     * Byte 0 in the returned byte array is the count of septets used
     * The returned byte array is the minimum size required to store
     * the packed septets. The returned array cannot contain more than 255
     * septets.
     *
     * @param data the text to convert to septets
     * @param dataOffset the character offset in data to start the encoding from
     * @param maxSeptets the maximum number of septets to convert, or -1 for no
     *  enforced maximum.
     * @param startingBitOffset the number of padding bits to put before
     *  the start of the first septet at the begining of the array
     * @param throwException If true, throws EncodeException on invalid char. 
     *   If false, replaces unencodable char with GSM alphabet space char.
     *
     * @throws EncodeException if String is too large to encode
     */
    public static byte[] stringToGsm7BitPacked(String data, int dataOffset,
            int maxSeptets, int startingBitOffset, boolean throwException)
            throws EncodeException {

        int sz = data.length();
        int septetCount;
        if (maxSeptets == -1) {
            septetCount = countGsmSeptets(data, true);
        } else {
            septetCount = maxSeptets;
        }

        if(septetCount > 0xff) {
            throw new EncodeException("Payload cannot exceed " + Short.MAX_VALUE
                    + " septets");
        }

        // Enough for all the septets and the length 2 byte prefix
        byte[] ret = new byte[1 + (((septetCount * 7) + 7) / 8)];

        int bitOffset = startingBitOffset;
        int septets = startingBitOffset/7;
        for (int i = dataOffset; i < sz && septets < septetCount; i++, bitOffset += 7) {
            char c = data.charAt(i);

            int v = GsmAlphabet.charToGsm(c, throwException);
            if (v == GSM_EXTENDED_ESCAPE) {
                // Lookup the extended char
                v = GsmAlphabet.charToGsmExtended(c);

                packSmsChar(ret, bitOffset, GSM_EXTENDED_ESCAPE);
                bitOffset += 7;
                septets++;
            }

            packSmsChar(ret, bitOffset, v);
            septets++;
        }

        // See check for > 0xff above
        ret[0] = (byte)septets;

        return ret;
    }

    /**
     * Pack a 7-bit char into its appropirate place in a byte array
     *
     * @param bitOffset the bit offset that the septet should be packed at
     *                  (septet index * 7)
     */
    private static void 
    packSmsChar(byte[] packedChars, int bitOffset, int value)
    {
        int byteOffset = bitOffset / 8;
        int shift = bitOffset % 8;

        packedChars[++byteOffset] |= value << shift;

        if (shift > 1) {
        	packedChars[++byteOffset] = (byte)(value >> (8 - shift));
        }    	
    }

    /**
     * Convert a GSM alphabet 7 bit packed string (SMS string) into a 
     * {@link java.lang.String}.
     *
     * See TS 23.038 6.1.2.1 for SMS Character Packing
     *
     * @param pdu the raw data from the pdu
     * @param offset the byte offset of 
     * @param lengthSeptets string length in septets, not bytes
     * @return String representation or null on decoding exception
     */
    public static String gsm7BitPackedToString(byte[] pdu, int offset,
            int lengthSeptets) {
        return gsm7BitPackedToString(pdu, offset, lengthSeptets, 0);
    }

    /**
     * Convert a GSM alphabet 7 bit packed string (SMS string) into a 
     * {@link java.lang.String}.
     *
     * See TS 23.038 6.1.2.1 for SMS Character Packing
     *
     * @param pdu the raw data from the pdu
     * @param offset the byte offset of 
     * @param lengthSeptets string length in septets, not bytes
     * @param numPaddingBits the number of padding bits before the start of the
     *  string in the first byte
     * @return String representation or null on decoding exception
     */
    public static String gsm7BitPackedToString(byte[] pdu, int offset,
            int lengthSeptets, int numPaddingBits)
    {
        StringBuilder ret = new StringBuilder(lengthSeptets);
        boolean prevCharWasEscape;
        
        try {
            prevCharWasEscape = false;
            
            for (int i = 0 ; i < lengthSeptets ; i++) {
                int bitOffset = (7 * i) + numPaddingBits;

                int byteOffset = bitOffset / 8;
                int shift = bitOffset % 8;
                int gsmVal;

                gsmVal = (0x7f & (pdu[offset + byteOffset] >> shift));

                // if it crosses a byte boundry
                if (shift > 1) {
                    // set msb bits to 0
                    gsmVal &= 0x7f >> (shift - 1);

                    gsmVal |= 0x7f & (pdu[offset + byteOffset + 1] << (8 - shift));
                }

                if (prevCharWasEscape) {
                    ret.append(GsmAlphabet.gsmExtendedToChar(gsmVal));
                    prevCharWasEscape = false;
                } else if (gsmVal == GSM_EXTENDED_ESCAPE) {
                    prevCharWasEscape = true;
                } else {
                    ret.append(GsmAlphabet.gsmToChar(gsmVal));
                }
            }
        } catch (RuntimeException ex) {
            Log.e(LOG_TAG, "Error GSM 7 bit packed: ", ex);
            return null;
        }

        return ret.toString();
    }


    /**
     * Convert a GSM alphabet string that's stored in 8-bit unpacked 
     * format (as it often appears in SIM records) into a String
     *
     * Field may be padded with trailing 0xff's. The decode stops
     * at the first 0xff encountered.
     */
    public static String
    gsm8BitUnpackedToString(byte[] data, int offset, int length)
    {
        boolean prevWasEscape;
        StringBuilder ret = new StringBuilder(length);

        prevWasEscape = false;
        for (int i = offset ; i < offset + length ; i++) {
            // Never underestimate the pain that can be caused
            // by signed bytes
            int c = data[i] & 0xff;

            if (c == 0xff) {
                break;
            } else if (c == GSM_EXTENDED_ESCAPE) {
                if (prevWasEscape) {
                    // Two escape chars in a row
                    // We treat this as a space
                    // See Note 1 in table 6.2.1.1 of TS 23.038 v7.00
                    ret.append(' ');
                    prevWasEscape = false;
                } else {
                    prevWasEscape = true;
                }
            } else {
                if (prevWasEscape) {
                    ret.append((char)gsmExtendedToChar.get(c, ' '));
                } else {
                    ret.append((char)gsmToChar.get(c, ' '));
                }
                prevWasEscape = false;
            }
        }
        
        return ret.toString();    
    }

    /**
     * Convert a string into an 8-bit unpacked GSM alphabet byte
     * array
     */
    public static byte[]
    stringToGsm8BitPacked(String s)
    {
        byte[] ret;

        int septets = 0;

        septets = countGsmSeptets(s);

        // Enough for all the septets and the length byte prefix
        ret = new byte[septets];

        stringToGsm8BitUnpackedField(s, ret, 0, ret.length);

        return ret;
    }


    /**
     * Write a String into a GSM 8-bit unpacked field of
     * @param length size at @param offset in @param dest
     *
     * Field is padded with 0xff's, string is truncated if necessary
     */
     
    public static void
    stringToGsm8BitUnpackedField(String s, byte dest[], int offset, int length)
    {
        int outByteIndex = offset;

        // Septets are stored in byte-aligned octets
        for (int i = 0, sz = s.length()
                ; i < sz && (outByteIndex - offset) < length 
                ; i++
        ) {
            char c = s.charAt(i);

            int v = GsmAlphabet.charToGsm(c);

            if (v == GSM_EXTENDED_ESCAPE) {
                // make sure we can fit an escaped char
                if (! (outByteIndex + 1 - offset < length)) {
                    break;
                }

                dest[outByteIndex++] = GSM_EXTENDED_ESCAPE;

        		v = GsmAlphabet.charToGsmExtended(c);
            }

            dest[outByteIndex++] = (byte)v;
        }

        // pad with 0xff's
        while((outByteIndex - offset) < length) {
            dest[outByteIndex++] = (byte)0xff;
        }
    }

    /**
     * Returns the count of 7-bit GSM alphabet characters
     * needed to represent this character. Counts unencodable char as 1 septet.
     */
    public static int
    countGsmSeptets(char c)
    {
        try {
            return countGsmSeptets(c, false);
        } catch (EncodeException ex) {
            // This should never happen.
            return 0;
        }
    }

    /**
     * Returns the count of 7-bit GSM alphabet characters
     * needed to represent this character
     * @param throwsException If true, throws EncodeException if unencodable
     * char. Otherwise, counts invalid char as 1 septet
     */
    public static int
    countGsmSeptets(char c, boolean throwsException) throws EncodeException
    {
    	if (charToGsm.get(c, -1) != -1) {
    		return 1;
    	}
    	
    	if (charToGsmExtended.get(c, -1) != -1) {
    		return 2;
    	}

        if (throwsException) {
            throw new EncodeException(c);
        } else {    
        	// count as a space char
        	return 1;
        }
    }

    /**
     * Returns the count of 7-bit GSM alphabet characters
     * needed to represent this string. Counts unencodable char as 1 septet.
     */
    public static int
    countGsmSeptets(CharSequence s)
    {
        try {
            return countGsmSeptets(s, false);
        } catch (EncodeException ex) {
            // this should never happen
            return 0;
        }
    }

    /**
     * Returns the count of 7-bit GSM alphabet characters
     * needed to represent this string.
     * @param throwsException If true, throws EncodeException if unencodable
     * char. Otherwise, counts invalid char as 1 septet
     */
    public static int
    countGsmSeptets(CharSequence s, boolean throwsException) throws EncodeException
    {
        int charIndex = 0;
        int sz = s.length();
        int count = 0;

        while (charIndex < sz) {
            count += countGsmSeptets(s.charAt(charIndex), throwsException);
            charIndex++;
        }
        
        return count;
    }    

    /**
     * Returns the index into <code>s</code> of the first character
     * after <code>limit</code> septets have been reached, starting at
     * index <code>start</code>.  This is used when dividing messages
     * into units within the SMS message size limit.
     *
     * @param s source string
     * @param start index of where to start counting septets
     * @param limit maximum septets to include,
     *   e.g. <code>MAX_USER_DATA_SEPTETS</code>
     * @return index of first character that won't fit, or the length
     *   of the entire string if everything fits
     */
    public static int
    findGsmSeptetLimitIndex(String s, int start, int limit) {
        int accumulator = 0;
        int size = s.length();

        for (int i = start; i < size; i++) {
            accumulator += countGsmSeptets(s.charAt(i));
            if (accumulator > limit) {
                return i;
            }
        }
        return size;
    }

    /**
     * Returns the index into <code>s</code> of the first character
     * after <code>limit</code> octets have been reached, starting at
     * index <code>start</code>.  This is used when dividing messages
     * in UCS2 encoding into units within the SMS message size limit.
     *
     * @param s source string
     * @param start index of where to start counting septets
     * @param limit maximum septets to include,
     *   e.g. <code>MAX_USER_DATA_BYTES</code>
     * @return index of first character that won't fit, or the length
     *   of the entire string if everything fits
     */
    public static int
    findUCS2LimitIndex(String s, int start, int limit) {
        int numCharToBeEncoded = s.length() - start;
        return ((numCharToBeEncoded*2 > limit)? limit/2: numCharToBeEncoded) + start;
    }

    /**
     * Returns the index into <code>s</code> of the first character
     * after <code>limit</code> septets/octets have been reached
     * according to the <code>encodingType</code>, starting at
     * index <code>start</code>.  This is used when dividing messages
     * units within the SMS message size limit.
     *
     * @param s source string
     * @param start index of where to start counting septets
     * @param limit maximum septets to include,
     *   e.g. <code>MAX_USER_DATA_BYTES</code>
     * @return index of first character that won't fit, or the length
     *   of the entire string if everything fits
     */
    public static int 
    findLimitIndex(String s, int start, int limit, int encodingType) throws EncodeException {
        if (encodingType == SmsMessage.ENCODING_7BIT) {
            return findGsmSeptetLimitIndex(s, start, limit);
        }
        else if (encodingType == SmsMessage.ENCODING_16BIT) {
            return findUCS2LimitIndex(s, start, limit);
        }
        else {
            throw new EncodeException("Unsupported encoding type: " + encodingType);
        }
    }

    // Set in the static initializer
    private static int sGsmSpaceChar;

    private static final SparseIntArray charToGsm = new SparseIntArray();
    private static final SparseIntArray gsmToChar = new SparseIntArray();
    private static final SparseIntArray charToGsmExtended = new SparseIntArray();
    private static final SparseIntArray gsmExtendedToChar = new SparseIntArray();
    
    static {
        int i = 0;
        
        charToGsm.put('@', i++);
        charToGsm.put('\u00a3', i++);
        charToGsm.put('$', i++);
        charToGsm.put('\u00a5', i++);
        charToGsm.put('\u00e8', i++);
        charToGsm.put('\u00e9', i++);
        charToGsm.put('\u00f9', i++);
        charToGsm.put('\u00ec', i++);
        charToGsm.put('\u00f2', i++);
        charToGsm.put('\u00c7', i++);
        charToGsm.put('\n', i++);
        charToGsm.put('\u00d8', i++);
        charToGsm.put('\u00f8', i++);
        charToGsm.put('\r', i++);
        charToGsm.put('\u00c5', i++);
        charToGsm.put('\u00e5', i++);
        
        charToGsm.put('\u0394', i++);
        charToGsm.put('_', i++);
        charToGsm.put('\u03a6', i++);
        charToGsm.put('\u0393', i++);
        charToGsm.put('\u039b', i++);
        charToGsm.put('\u03a9', i++);
        charToGsm.put('\u03a0', i++);
        charToGsm.put('\u03a8', i++);
        charToGsm.put('\u03a3', i++);
        charToGsm.put('\u0398', i++);
        charToGsm.put('\u039e', i++);
        charToGsm.put('\uffff', i++);
        charToGsm.put('\u00c6', i++);
        charToGsm.put('\u00e6', i++);
        charToGsm.put('\u00df', i++);
        charToGsm.put('\u00c9', i++);
        
        charToGsm.put(' ', i++);
        charToGsm.put('!', i++);
        charToGsm.put('"', i++);
        charToGsm.put('#', i++);
        charToGsm.put('\u00a4', i++);
        charToGsm.put('%', i++);
        charToGsm.put('&', i++);
        charToGsm.put('\'', i++);
        charToGsm.put('(', i++);
        charToGsm.put(')', i++);
        charToGsm.put('*', i++);
        charToGsm.put('+', i++);
        charToGsm.put(',', i++);
        charToGsm.put('-', i++);
        charToGsm.put('.', i++);
        charToGsm.put('/', i++);
        
        charToGsm.put('0', i++);
        charToGsm.put('1', i++);
        charToGsm.put('2', i++);
        charToGsm.put('3', i++);
        charToGsm.put('4', i++);
        charToGsm.put('5', i++);
        charToGsm.put('6', i++);
        charToGsm.put('7', i++);
        charToGsm.put('8', i++);
        charToGsm.put('9', i++);
        charToGsm.put(':', i++);
        charToGsm.put(';', i++);
        charToGsm.put('<', i++);
        charToGsm.put('=', i++);
        charToGsm.put('>', i++);
        charToGsm.put('?', i++);
        
        charToGsm.put('\u00a1', i++);
        charToGsm.put('A', i++);
        charToGsm.put('B', i++);
        charToGsm.put('C', i++);
        charToGsm.put('D', i++);
        charToGsm.put('E', i++);
        charToGsm.put('F', i++);
        charToGsm.put('G', i++);
        charToGsm.put('H', i++);
        charToGsm.put('I', i++);
        charToGsm.put('J', i++);
        charToGsm.put('K', i++);
        charToGsm.put('L', i++);
        charToGsm.put('M', i++);
        charToGsm.put('N', i++);
        charToGsm.put('O', i++);
        
        charToGsm.put('P', i++);
        charToGsm.put('Q', i++);
        charToGsm.put('R', i++);
        charToGsm.put('S', i++);
        charToGsm.put('T', i++);
        charToGsm.put('U', i++);
        charToGsm.put('V', i++);
        charToGsm.put('W', i++);
        charToGsm.put('X', i++);
        charToGsm.put('Y', i++);
        charToGsm.put('Z', i++);
        charToGsm.put('\u00c4', i++);
        charToGsm.put('\u00d6', i++);
        charToGsm.put('\u0147', i++);
        charToGsm.put('\u00dc', i++);
        charToGsm.put('\u00a7', i++);
        
        charToGsm.put('\u00bf', i++);
        charToGsm.put('a', i++);
        charToGsm.put('b', i++);
        charToGsm.put('c', i++);
        charToGsm.put('d', i++);
        charToGsm.put('e', i++);
        charToGsm.put('f', i++);
        charToGsm.put('g', i++);
        charToGsm.put('h', i++);
        charToGsm.put('i', i++);
        charToGsm.put('j', i++);
        charToGsm.put('k', i++);
        charToGsm.put('l', i++);
        charToGsm.put('m', i++);
        charToGsm.put('n', i++);
        charToGsm.put('o', i++);
        
        charToGsm.put('p', i++);
        charToGsm.put('q', i++);
        charToGsm.put('r', i++);
        charToGsm.put('s', i++);
        charToGsm.put('t', i++);
        charToGsm.put('u', i++);
        charToGsm.put('v', i++);
        charToGsm.put('w', i++);
        charToGsm.put('x', i++);
        charToGsm.put('y', i++);
        charToGsm.put('z', i++);
        charToGsm.put('\u00e4', i++);
        charToGsm.put('\u00f6', i++);
        charToGsm.put('\u00f1', i++);
        charToGsm.put('\u00fc', i++);
        charToGsm.put('\u00e0', i++);
        
        
        charToGsmExtended.put('\f', 10);
        charToGsmExtended.put('^', 20);
        charToGsmExtended.put('{', 40);
        charToGsmExtended.put('}', 41);
        charToGsmExtended.put('\\', 47);
        charToGsmExtended.put('[', 60);
        charToGsmExtended.put('~', 61);
        charToGsmExtended.put(']', 62);
        charToGsmExtended.put('|', 64);
        charToGsmExtended.put('\u20ac', 101);
                
        int size = charToGsm.size();
        for (int j=0; j<size; j++) {
            gsmToChar.put(charToGsm.valueAt(j), charToGsm.keyAt(j));
        }
 
        size = charToGsmExtended.size();
        for (int j=0; j<size; j++) {
            gsmExtendedToChar.put(charToGsmExtended.valueAt(j), charToGsmExtended.keyAt(j));
        }


        sGsmSpaceChar = charToGsm.get(' ');
    }
    

}