/*-------------------------------------------------------------------------------------------------
 _______ __   _ _______ _______ ______  ______
 |_____| | \  |    |    |______ |     \ |_____]
 |     | |  \_|    |    ______| |_____/ |_____]

 Copyright (c) 2016, antsdb.com and/or its affiliates. All rights reserved. *-xguo0<@

 This program is free software: you can redistribute it and/or modify it under the terms of the
 GNU GNU Lesser General Public License, version 3, as published by the Free Software Foundation.

 You should have received a copy of the GNU Affero General Public License along with this program.
 If not, see <https://www.gnu.org/licenses/lgpl-3.0.en.html>
-------------------------------------------------------------------------------------------------*/
package com.antsdb.saltedfish.charset;

import java.nio.ByteBuffer;
import java.util.function.IntConsumer;
import java.util.function.IntSupplier;

import com.antsdb.saltedfish.cpp.Unsafe;

/**
 * Utf8 decoder
 *  
 * @author wgu0
 */
public final class Utf8 implements Decoder {

    @Override
    public int get(ByteBuffer buf) {
        int ch = buf.get() & 0xff;
        if ((ch & 0x7f) == 0) {
        }
        else if ((ch & 0xe0) == 0xc0) {
            // 2 bytes utf
            if (buf.remaining() >= 1) {
                ch = (ch & 0x1f) << 6;
                ch = ch | (buf.get() & 0x3f);
            }
        }
        else if ((ch & 0xf0) == 0xe0) {
            // 3 bytes utf
            if (buf.remaining() >= 2) {
                ch = (ch & 0xf) << 12;
                ch = ch | ((buf.get() & 0x3f) << 6);
                ch = ch | (buf.get() & 0x3f);
            }
        }
        else if ((ch & 0xf8) == 0xf0) {
            // 4 bytes utf
            if (buf.remaining() >= 3) {
                ch = (ch & 0x7) << 18;
                ch = ch | ((buf.get() & 0x3f) << 12);
                ch = ch | ((buf.get() & 0x3f) << 6);
                ch = ch | (buf.get() & 0x3f);
            }
        }
        return ch;
    }

    public int get(IntSupplier supplier) {
        int ch = supplier.getAsInt();
        if (ch == -1) {
            return -1;
        }
        ch = ch & 0xff;
        if ((ch & 0x80) == 0) {
            return ch;
        }
        else if ((ch & 0xe0) == 0xc0) {
            // 2 bytes utf
            int next = supplier.getAsInt();
            ch = (ch & 0x1f) << 6;
            ch = ch | (next & 0x3f);
            return ch;
        }
        else if ((ch & 0xf0) == 0xe0) {
            // 3 bytes utf
            int next = supplier.getAsInt();
            int nextnext = supplier.getAsInt();
            ch = (ch & 0xf) << 12;
            ch = ch | ((next & 0x3f) << 6);
            ch = ch | (nextnext & 0x3f);
            return ch;
        }
        else if ((ch & 0xf8) == 0xf0) {
            // 4 bytes utf
            int next = supplier.getAsInt();
            int nextnext = supplier.getAsInt();
            int nextnextnext = supplier.getAsInt();
            ch = (ch & 0x7) << 18;
            ch = ch | ((next & 0x3f) << 12);
            ch = ch | ((nextnext & 0x3f) << 6);
            ch = ch | (nextnextnext & 0x3f);
            return ch;
        }
        else if (ch == -1) {
            return -1;
        }
        throw new IllegalArgumentException();
    }
    
    public IntSupplier mapDecode(IntSupplier supplier) {
        IntSupplier result = new IntSupplier() {
            @Override
            public int getAsInt() {
                int ch = get(supplier);
                return ch;
            }
        };
        return result;
    }
    
    public String decode(IntSupplier supplier) {
        StringBuilder buf = new StringBuilder();
        IntSupplier output = mapDecode(supplier);
        for (int ch = output.getAsInt(); ch != -1; ch=output.getAsInt()) {
               buf.append((char)ch); 
        }
        return buf.toString();
    }

    public static void encode(IntSupplier supplier, IntConsumer consumer) {
        for (int ch=supplier.getAsInt(); ch!=-1; ch=supplier.getAsInt()) {
            encode(ch, consumer);
        }
    }
    
    public static void encode(String s, IntConsumer consumer) {
        for (int i=0; i<s.length(); i++) {
            encode(s.charAt(i), consumer);
        }
    }
    
    /**
     * @see org.apache.hadoop.io.UTF8
     * @param ch
     * @param consumer
     */
    public static void encode(int ch, IntConsumer consumer) {
        if (ch <= 0x7F) {
            consumer.accept(ch);
            return;
        }
        else if (ch <= 0x07FF) {
            consumer.accept((byte) (0xC0 | ((ch >> 6) & 0x1F)));
            consumer.accept((byte) (0x80 | ch & 0x3F));
        }
        else {
            consumer.accept((byte) (0xE0 | ((ch >> 12) & 0X0F)));
            consumer.accept((byte) (0x80 | ((ch >> 6) & 0x3F)));
            consumer.accept((byte) (0x80 | (ch & 0x3F)));
        }
    }

    public static String decode(long pBytes, int size) {
        char[] buf = new char[size];
        int j = 0;
        for (int i=0; i<size;) {
            if (size - i > 8) {
                long value = Unsafe.getLong(pBytes + i);
                if (isAscii(value)) {
                    buf[j++] = (char)(value & 0x7f);
                    buf[j++] = (char)((value >> 8) & 0x7f);
                    buf[j++] = (char)((value >> 16) & 0x7f);
                    buf[j++] = (char)((value >> 24) & 0x7f);
                    buf[j++] = (char)((value >> 32) & 0x7f);
                    buf[j++] = (char)((value >> 40) & 0x7f);
                    buf[j++] = (char)((value >> 48) & 0x7f);
                    buf[j++] = (char)((value >> 56) & 0x7f);
                    i += 8;
                    continue;
                }
            }
            int ch = Unsafe.getByte(pBytes + i);
            if ((ch & 0x80) == 0) {
                buf[j++] = (char)ch;
                i += 1;
                continue;
            }
            else if ((ch & 0xe0) == 0xc0) {
                // 2 bytes utf
                int next = Unsafe.getByte(pBytes + i + 1);
                ch = (ch & 0x1f) << 6;
                ch = ch | (next & 0x3f);
                buf[j++] = (char)ch;
                i += 2;
                continue;
            }
            else if ((ch & 0xf0) == 0xe0) {
                // 3 bytes utf
                int next = Unsafe.getByte(pBytes + i + 1);
                int nextnext = Unsafe.getByte(pBytes + i + 2);
                ch = (ch & 0xf) << 12;
                ch = ch | ((next & 0x3f) << 6);
                ch = ch | (nextnext & 0x3f);
                buf[j++] = (char)ch;
                i += 3;
                continue;
            }
            else if ((ch & 0xf8) == 0xf0) {
                // 4 bytes utf
                int next = Unsafe.getByte(pBytes + i + 1);
                int nextnext = Unsafe.getByte(pBytes + i + 2);
                int nextnextnext = Unsafe.getByte(pBytes + i + 3);
                ch = (ch & 0x7) << 18;
                ch = ch | ((next & 0x3f) << 12);
                ch = ch | ((nextnext & 0x3f) << 6);
                ch = ch | (nextnextnext & 0x3f);
                buf[j++] = (char)ch;
                i += 4;
                continue;
            }
            else {
                buf[j++] = (char)ch;
                i += 1;
                continue;
            }
        }
        return String.valueOf(buf, 0, j);
    }
    
    private static boolean isAscii(long value) {
        return (value & 0x8080808080808080l) == 0;
    }
    
    @Override
    public int getChar(long addr) {
        int ch = Unsafe.getByte(addr) & 0xff;
        if ((ch & 0x80) == 0) {
            ch = ch | 0x10000;
        }
        else if ((ch & 0xe0) == 0xc0) {
            // 2 bytes utf
            int next = Unsafe.getByte(addr + 1);
            ch = (ch & 0x1f) << 6;
            ch = ch | (next & 0x3f);
            ch = ch | 0x20000;
        }
        else if ((ch & 0xf0) == 0xe0) {
            // 3 bytes utf
            int next = Unsafe.getByte(addr + 1);
            int nextnext = Unsafe.getByte(addr + 2);
            ch = (ch & 0xf) << 12;
            ch = ch | ((next & 0x3f) << 6);
            ch = ch | (nextnext & 0x3f);
            ch = ch | 0x30000;
        }
        else if ((ch & 0xf8) == 0xf0) {
            // 4 bytes utf
            int next = Unsafe.getByte(addr + 1);
            int nextnext = Unsafe.getByte(addr + 2);
            int nextnextnext = Unsafe.getByte(addr + 3);
            ch = (ch & 0x7) << 18;
            ch = ch | ((next & 0x3f) << 12);
            ch = ch | ((nextnext & 0x3f) << 6);
            ch = ch | (nextnextnext & 0x3f);
            ch = ch | 0x40000;
        }
        else {
            ch = ch | 0x10000;
        }
        return ch;
    }

}