/* KallistiOS ##version##

   sd.c
   Copyright (C) 2012, 2013 Lawrence Sebald
*/

/* The code contained herein is basically directly implementing what is
   documented here: http://elm-chan.org/docs/mmc/mmc_e.html */

#include <arch/types.h>
#include <dc/scif.h>
#include <dc/sd.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>

/* For CRC16-CCITT */
#include <kos/net.h>

#include <kos/blockdev.h>
#include <kos/dbglog.h>

#define MAX_RETRIES     500000
#define READ_RETRIES    50000
#define WRITE_RETRIES   150000

#define CMD(n) ((n) | 0x40)

static int byte_mode = 0;
static int is_mmc = 0;
static int initted = 0;

/* The type of the dev_data in the block device structure */
typedef struct sd_devdata {
    uint64_t block_count;
    uint64_t start_block;
} sd_devdata_t;

/* Table/algorithm generated by pycrc. I really wanted to have a much smaller
   table here, but unfortunately, the code pycrc generated just did not work. */
static const uint8 crc7_table[256] = {
    0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e,
    0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee,
    0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c,
    0xa2, 0xb0, 0x86, 0x94, 0xea, 0xf8, 0xce, 0xdc,
    0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a,
    0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a,
    0x56, 0x44, 0x72, 0x60, 0x1e, 0x0c, 0x3a, 0x28,
    0xc6, 0xd4, 0xe2, 0xf0, 0x8e, 0x9c, 0xaa, 0xb8,
    0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6,
    0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26,
    0xfa, 0xe8, 0xde, 0xcc, 0xb2, 0xa0, 0x96, 0x84,
    0x6a, 0x78, 0x4e, 0x5c, 0x22, 0x30, 0x06, 0x14,
    0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2,
    0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42,
    0x9e, 0x8c, 0xba, 0xa8, 0xd6, 0xc4, 0xf2, 0xe0,
    0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54, 0x62, 0x70,
    0x82, 0x90, 0xa6, 0xb4, 0xca, 0xd8, 0xee, 0xfc,
    0x12, 0x00, 0x36, 0x24, 0x5a, 0x48, 0x7e, 0x6c,
    0xb0, 0xa2, 0x94, 0x86, 0xf8, 0xea, 0xdc, 0xce,
    0x20, 0x32, 0x04, 0x16, 0x68, 0x7a, 0x4c, 0x5e,
    0xe6, 0xf4, 0xc2, 0xd0, 0xae, 0xbc, 0x8a, 0x98,
    0x76, 0x64, 0x52, 0x40, 0x3e, 0x2c, 0x1a, 0x08,
    0xd4, 0xc6, 0xf0, 0xe2, 0x9c, 0x8e, 0xb8, 0xaa,
    0x44, 0x56, 0x60, 0x72, 0x0c, 0x1e, 0x28, 0x3a,
    0x4a, 0x58, 0x6e, 0x7c, 0x02, 0x10, 0x26, 0x34,
    0xda, 0xc8, 0xfe, 0xec, 0x92, 0x80, 0xb6, 0xa4,
    0x78, 0x6a, 0x5c, 0x4e, 0x30, 0x22, 0x14, 0x06,
    0xe8, 0xfa, 0xcc, 0xde, 0xa0, 0xb2, 0x84, 0x96,
    0x2e, 0x3c, 0x0a, 0x18, 0x66, 0x74, 0x42, 0x50,
    0xbe, 0xac, 0x9a, 0x88, 0xf6, 0xe4, 0xd2, 0xc0,
    0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62,
    0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2
};

uint8 sd_crc7(const uint8 *data, int size, uint8 crc) {
    int tbl_idx;

    while(size--) {
        tbl_idx = (crc ^ *data) & 0xff;
        crc = (crc7_table[tbl_idx] ^ (crc << 7)) & (0x7f << 1);

        data++;
    }

    return crc & (0x7f << 1);
}

static int sd_send_cmd(uint8 cmd, uint32 arg, int slow) {
    uint8 (*dfunc)(uint8 data) = &scif_spi_rw_byte;
    uint8 rv;
    int i = 0;
    uint8 pkt[6];

    if(slow)
        dfunc = &scif_spi_slow_rw_byte;

    /* Wait for the SD card to be ready to accept our command... */
    dfunc(0xFF);
    do {
        rv = dfunc(0xFF);
        ++i;
    } while(rv != 0xFF && i < MAX_RETRIES);

    /* If we never got a response, something's wrong... bail out */
    if(rv != 0xFF)
        return -1;

    /* Pack up the packet */
    pkt[0] = cmd;
    pkt[1] = (uint8)(arg >> 24);
    pkt[2] = (uint8)(arg >> 16);
    pkt[3] = (uint8)(arg >>  8);
    pkt[4] = (uint8)(arg >>  0);
    pkt[5] = sd_crc7(pkt, 5, 0) | 0x01;

    /* Write out the packet to the device */
    dfunc(pkt[0]);
    dfunc(pkt[1]);
    dfunc(pkt[2]);
    dfunc(pkt[3]);
    dfunc(pkt[4]);
    dfunc(pkt[5]);

    /* Ignore the first byte after sending a CMD12 */
    if(cmd == CMD(12))
        dfunc(0xFF);

    /* Wait for a response */
    i = 0;
    do {
        rv = dfunc(0xFF);
        ++i;
    } while((rv & 0x80) && i < 20);

    if(rv & 0x80)
        return -1;

    return (int)rv;
}

static int acmd41_loop(uint32 arg) {
    int i = 0, rv;

    /* Try to send ACMD41 for a while. It could take up to 1 second to come
       back to us, but will likely take much less. */
    while(i++ < MAX_RETRIES) {
        if(sd_send_cmd(CMD(55), 0, 1) > 1)
            return -1;

        if((rv = sd_send_cmd(CMD(41), arg, 1)) > 1)
            return -1;

        if(rv == 0)
            break;
    }

    /* If we get this far, it isn't going to come back. */
    if(i == MAX_RETRIES)
        return -2;

    return 0;
}

static int cmd1_loop(void) {
    int i = 0, rv;

    /* Try to send CMD1 for a while. It could take up to 1 second to come
       back to us, but will likely take much less. */
    while(i++ < MAX_RETRIES) {
        if((rv = sd_send_cmd(CMD(1), 0, 1)) > 1)
            return -1;

        if(rv == 0)
            break;
    }

    /* If we get this far, it isn't going to come back. */
    if(i == MAX_RETRIES)
        return -2;

    return 0;
}

int sd_init(void) {
    int i;
    uint8 buf[4];

    if(initted)
        return 0;

    byte_mode = is_mmc = 0;

    if(scif_spi_init())
        return -1;

    /* Send 10 idle bytes so as to delay the required number of clock cycles
       after asserting the /CS pin (we have to delay at least 74 clock cycles,
       and sending 10 idle bytes is 80 cycles). Once that is done, deassert the
       /CS line. */
    for(i = 0; i < 10; ++i) {
        scif_spi_slow_rw_byte(0xFF);
    }

    scif_spi_set_cs(0);

    /* Reset the card, putting it in its idle state. */
    if(sd_send_cmd(CMD(0), 0, 1) != 1) {
        scif_spi_set_cs(1);
        return -1;
    }

    /* Detect if we're using a v2 SD card. */
    if(sd_send_cmd(CMD(8), 0x000001AA, 1) == 1) {
        buf[0] = scif_spi_slow_rw_byte(0xFF);
        buf[1] = scif_spi_slow_rw_byte(0xFF);
        buf[2] = scif_spi_slow_rw_byte(0xFF);
        buf[3] = scif_spi_slow_rw_byte(0xFF);

        if((buf[2] & 0x0F) != 0x01 || buf[3] != 0xAA) {
            scif_spi_set_cs(1);
            return -2;
        }

        /* ACMD41 until we're ready */
        if(acmd41_loop(0x40000000)) {
            scif_spi_set_cs(1);
            return -1;
        }

        /* Detect if we do byte addressing or block addressing with CMD58 */
        if(sd_send_cmd(CMD(58), 0, 1)) {
            scif_spi_set_cs(1);
            return -1;
        }

        buf[0] = scif_spi_slow_rw_byte(0xFF);
        buf[1] = scif_spi_slow_rw_byte(0xFF);
        buf[2] = scif_spi_slow_rw_byte(0xFF);
        buf[3] = scif_spi_slow_rw_byte(0xFF);

        if(!(buf[0] & 0x40))
            byte_mode = 1;
    }
    else {
        /* ACMD41 (SDv1) or CMD1 (MMC) until we're ready */
        if(acmd41_loop(0)) {
            /* Try with CMD1 instead then... */
            if(cmd1_loop()) {
                scif_spi_set_cs(1);
                return -1;
            }

            /* If ACMD41 failed but CMD1 succeeded, we have a MMC card. */
            is_mmc = 1;
        }

        /* Set the block length to 512 with CMD16 */
        if(sd_send_cmd(CMD(16), 512, 1)) {
            scif_spi_set_cs(1);
            return -1;
        }

        /* v1 cards always use byte addressing. */
        byte_mode = 1;
    }

    /* Re-enable CRC checking. */
    if(sd_send_cmd(CMD(59), 1, 1)) {
        scif_spi_set_cs(1);
        return -1;
    }

    /* Make sure that the card releases the data line */
    scif_spi_set_cs(1);
    scif_spi_slow_rw_byte(0xFF);
    initted = 1;

    return 0;
}

int sd_shutdown(void) {
    int i = 0;
    uint8 rv;

    if(!initted)
        return -1;

    /* Select, wait for ready, deselect, and make sure it releases the data
       line. */
    scif_spi_set_cs(0);

    scif_spi_slow_rw_byte(0xFF);
    do {
        rv = scif_spi_slow_rw_byte(0xFF);
        ++i;
    } while(rv != 0xFF && i < MAX_RETRIES);

    scif_spi_set_cs(1);
    scif_spi_slow_rw_byte(0xFF);
    scif_spi_shutdown();
    initted = 0;

    return 0;
}

static int read_data(size_t bytes, uint8 *buf) {
    uint8 *ptr = buf;
    size_t cnt = bytes;
    uint8 byte;
    uint16 crc;
    int i = 0;

    /* This should come back in 100ms at worst... */
    do {
        byte = scif_spi_rw_byte(0xFF);
        ++i;
    } while(byte == 0xFF && i < READ_RETRIES);

    if(byte != 0xFE)
        return -1;

    /* Read in the data */
    while(cnt--) {
        *ptr++ = scif_spi_rw_byte(0xFF);
    }

    /* Read in the trailing CRC */
    crc = (scif_spi_rw_byte(0xFF) << 8) | scif_spi_rw_byte(0xFF);

    /* Return success if the CRC matches */
    return crc != net_crc16ccitt(buf, bytes, 0);
}

int sd_read_blocks(uint32 block, size_t count, uint8 *buf) {
    int rv = 0;

    if(!initted) {
        errno = ENXIO;
        return -1;
    }

    /* If we're in byte addressing mode, scale the block up. */
    if(byte_mode)
        block <<= 9;

    scif_spi_set_cs(0);

    if(count == 1) {
        /* Ask the card for the block */
        if(sd_send_cmd(CMD(17), block, 0)) {
            rv = -1;
            errno = EIO;
            goto out;
        }

        /* Read the block back */
        if(read_data(512, buf)) {
            rv = -1;
            errno = EIO;
            goto out;
        }
    }
    else {
        /* Set up the multi-block read */
        if(sd_send_cmd(CMD(18), block, 0)) {
            rv = -1;
            errno = EIO;
            goto out;
        }

        while(count--) {
            if(read_data(512, buf)) {
                rv = -1;
                errno = EIO;
                goto out;
            }

            buf += 512;
        }

        /* Stop the data transfer */
        sd_send_cmd(CMD(12), 0, 0);
    }

out:
    scif_spi_set_cs(1);
    scif_spi_rw_byte(0xFF);

    return rv;
}

static int write_data(uint8 tag, size_t bytes, const uint8 *buf) {
    uint8 rv;
    int i = 0;
    uint16 crc;
    const uint8 *ptr = buf;

    /* Wait for the card to be ready for our data */
    scif_spi_rw_byte(0xFF);
    do {
        rv = scif_spi_rw_byte(0xFF);
        ++i;
    } while(rv != 0xFF && i < WRITE_RETRIES);

    if(rv != 0xFF)
        return -1;

    scif_spi_rw_byte(tag);

    /* Send the data. */
    crc = net_crc16ccitt(buf, bytes, 0);
    while(bytes--) {
        scif_spi_rw_byte(*ptr++);
    }

    /* Write out the block's crc */
    scif_spi_rw_byte((uint8)(crc >> 8));
    scif_spi_rw_byte((uint8)crc);

    /* Make sure the card accepted the block */
    rv = scif_spi_rw_byte(0xFF);
    if((rv & 0x1F) != 0x05)
        return -1;

    return 0;
}

int sd_write_blocks(uint32 block, size_t count, const uint8 *buf) {
    int rv = 0, i = 0;
    uint8 byte;

    if(!initted) {
        errno = ENXIO;
        return -1;
    }

    /* If we're in byte addressing mode, scale the block up. */
    if(byte_mode)
        block <<= 9;

    scif_spi_set_cs(0);

    if(count == 1) {
        /* Prepare the card for the block */
        if(sd_send_cmd(CMD(24), block, 0)) {
            rv = -1;
            errno = EIO;
            goto out;
        }

        /* Read the block back */
        if(write_data(0xFE, 512, buf)) {
            rv = -1;
            errno = EIO;
            goto out;
        }
    }
    else {
        /* If we're on a SD card, inform the card ahead of time how many blocks
           we intend to write. */
        if(!is_mmc) {
            sd_send_cmd(CMD(55), 0, 0);
            sd_send_cmd(CMD(23), count, 0);
        }

        /* Set up the multi-block write */
        if(sd_send_cmd(CMD(25), block, 0)) {
            rv = -1;
            errno = EIO;
            goto out;
        }

        while(count--) {
            if(write_data(0xFC, 512, buf)) {
                /* Make sure we at least try to stop the transfer... */
                rv = -1;
                errno = EIO;
                break;
            }

            buf += 512;
        }

        /* Write the end data token. */
        scif_spi_rw_byte(0xFF);
        do {
            byte = scif_spi_rw_byte(0xFF);
            ++i;
        } while(byte != 0xFF && i < WRITE_RETRIES);

        if(byte != 0xFF) {
            rv = -1;
            errno = EIO;
            goto out;
        }

        scif_spi_rw_byte(0xFD);
    }

out:
    scif_spi_set_cs(1);
    scif_spi_rw_byte(0xFF);

    return rv;
}

uint64 sd_get_size(void) {
    uint8 csd[16];
    uint64 rv;
    int exponent;

    if(!initted) {
        errno = ENXIO;
        return (uint64)-1;
    }

    /* In order to get the size of the SD card, we must read the CSD register
       via CMD9. There are two different versions of the CSD structure, one of
       which is used on normal SD cards, and one that is used on SDHC and SDXC
       cards. Each one has a slightly different way of calculating the card
       size. The procedure here is described on pages 96-105 of the SD Physical
       Layer Simplified Specification v3.01. */

    /* Prepare the CSD send */
    scif_spi_set_cs(0);
    if(sd_send_cmd(CMD(9), 0, 0)) {
        rv = (uint64)-1;
        errno = EIO;
        goto out;
    }

    /* Read back the register */
    if(read_data(16, csd)) {
        rv = (uint64)-1;
        errno = EIO;
        goto out;
    }

    /* Figure out what version of the CSD register we're looking at */
    switch(csd[0] >> 6) {
        case 0:
            /* CSD version 1.0 (SD)
               C_SIZE is bits 62-73 of the CSD, C_SIZE_MULT is bits 47-49,
               READ_BL_LEN is bits 80-83.
               Card size is calculated as follows:
               (C_SIZE + 1) * 2^(C_SIZE_MULT + 2) * 2^(READ_BL_LEN) */
            exponent = (csd[5] & 0x0F) + ((csd[9] & 0x03) << 1) +
                (csd[10] >> 7) + 2;
            rv = ((csd[8] >> 6) | (csd[7] << 2) | ((csd[6] & 0x03) << 10)) + 1;
            rv <<= exponent;
            break;

        case 1:
            /* CSD version 2.0 (SDHC/SDXC)
               C_SIZE is bits 48-69 of the CSD, card size is calculated as
               (C_SIZE + 1) * 512KiB */
            rv = ((((uint64)csd[9]) | (uint64)(csd[8] << 8) |
                   ((uint64)(csd[7] & 0x3F) << 16)) + 1) << 19;
            break;

        default:
            /* Unknown version, punt. */
            rv = (uint64)-1;
            errno = ENODEV;
            goto out;
    }

out:
    scif_spi_set_cs(1);
    scif_spi_rw_byte(0xFF);

    return rv;
}

static int sdb_init(kos_blockdev_t *d) {
    (void)d;

    if(!initted) {
        errno = ENODEV;
        return -1;
    }

    return 0;
}

static int sdb_shutdown(kos_blockdev_t *d) {
    free(d->dev_data);
    return 0;
}

static int sdb_read_blocks(kos_blockdev_t *d, uint64_t block, size_t count,
                           void *buf) {
    sd_devdata_t *data = (sd_devdata_t *)d->dev_data;

    return sd_read_blocks(block + data->start_block, count, (uint8 *)buf);
}

static int sdb_write_blocks(kos_blockdev_t *d, uint64_t block, size_t count,
                            const void *buf) {
    sd_devdata_t *data = (sd_devdata_t *)d->dev_data;

    return sd_write_blocks(block + data->start_block, count,
                           (const uint8 *)buf);
}

static uint64_t sdb_count_blocks(kos_blockdev_t *d) {
    sd_devdata_t *data = (sd_devdata_t *)d->dev_data;

    return data->block_count;
}

static int sdb_flush(kos_blockdev_t *d) {
    /* Cache? We don't need no steenkin' cache! */
    (void)d;
    return 0;
}

static kos_blockdev_t sd_blockdev = {
    NULL,                   /* dev_data */
    9,                      /* l_block_size (block size of 512 bytes) */
    &sdb_init,              /* init */
    &sdb_shutdown,          /* shutdown */
    &sdb_read_blocks,       /* read_blocks */
    &sdb_write_blocks,      /* write_blocks */
    &sdb_count_blocks,      /* count_blocks */
    &sdb_flush              /* flush */
};

int sd_blockdev_for_partition(int partition, kos_blockdev_t *rv,
                              uint8 *partition_type) {
    uint8 buf[512];
    int pval;
    sd_devdata_t *ddata;

    if(!initted) {
        errno = ENXIO;
        return -1;
    }

    if(!rv || !partition_type) {
        errno = EFAULT;
        return -1;
    }

    /* Make sure the partition asked for is sane */
    if(partition < 0 || partition > 3) {
        dbglog(DBG_DEBUG, "Invalid partition number given: %d\n", partition);
        errno = EINVAL;
        return -1;
    }

    /* Read the MBR from the card */
    if(sd_read_blocks(0, 1, buf)) {
        return -1;
    }

    /* Make sure the SD card uses MBR partitions.
       TODO: Support GPT partitioning at some point. */
    if(buf[0x01FE] != 0x55 || buf[0x1FF] != 0xAA) {
        dbglog(DBG_DEBUG, "SD card doesn't appear to have a MBR\n");
        errno = ENOENT;
        return -1;
    }

    /* Figure out where the partition record we're concerned with is, and make
       sure that the partition actually exists. */
    pval = 16 * partition + 0x01BE;

    if(buf[pval + 4] == 0) {
        dbglog(DBG_DEBUG, "Partition %d appears to be empty\n", partition);
        errno = ENOENT;
        return -1;
    }

    /* Allocate the device data */
    if(!(ddata = (sd_devdata_t *)malloc(sizeof(sd_devdata_t)))) {
        errno = ENOMEM;
        return -1;
    }

    /* Copy in the template block device and fill it in */
    memcpy(rv, &sd_blockdev, sizeof(kos_blockdev_t));
    ddata->block_count = buf[pval + 0x0C] | (buf[pval + 0x0D] << 8) |
        (buf[pval + 0x0E] << 16) | (buf[pval + 0x0F] << 24);
    ddata->start_block = buf[pval + 0x08] | (buf[pval + 0x09] << 8) |
        (buf[pval + 0x0A] << 16) | (buf[pval + 0x0B] << 24);
    rv->dev_data = ddata;
    *partition_type = buf[pval + 4];

    return 0;
}
