/**
  *  \file archive.cc
  *  \brief Archive Manipulation
  */
#include <algorithm>
#include <cstring>
#include "archive.h"
#include "vfile.h"
#include "cpluslib/assert.h"
#include "except.h"
#include "parse.h"

/** Extract name from archive member header. /names/ contains the
    lfntab if present. */
string_t
ArchiveMember::getFileName(const VLArray<char>& names) const
{
    /* long file name? */
    if(filename[0] == ' ' || filename[0] == '/') {
        size_t i = 1;
        size_t index = 0;
        while(i < sizeof(filename) && filename[i] >= '0' && filename[i] <= '9')
            index = 10*index + filename[i++] - '0';

        if(index >= names.getSize() || names[index] == 012)
            throw LinkerError("Invalid archive member header (lfn table index out of range)");

        size_t end = index;
        while(end < names.getSize() && names[end] != 012)
            ++end;
        if(names[end-1] == '/')
            --end;
        return string_t(&names[index], end - index);
    }

    /* pass 1: scan backward for a slash. SysV stores `filename.o' as
       `filename.o/' to allow for embedded spaces */
    size_t i = sizeof(filename);
    while(i && filename[i-1] == ' ')
        --i;
    if(i && filename[i-1] == '/')
        /* okay */
        return string_t(filename, i-1);

    /* pass 2: scan forward for a space. BSD stores file names this way */
    i = 0;
    while(i < sizeof(filename) && filename[i] != ' ')
        ++i;
    return string_t(filename, i);
}

/** Return the size of the associated member. */
off_t
ArchiveMember::getSize() const
{
    return parseNumber(size, 10);
}

/** Check if the name of this member is /want/. This does not attempt
    to parse the header `smart', so this is the way to check for
    special files. */
bool
ArchiveMember::isNamed(const char* want) const
{
    for(size_t i = 0; i < sizeof(filename); ++i)
        if(filename[i] != (*want ? *want++ : ' '))
            return false;
    return true;
}

/**********************************************************************/

/**
 *  \class Archive -- Archive Access
 *
 *  This class provides convenient access to archive members. The
 *  archive is read on demand, building up a directory of members
 *  by name / by offset. Demand-reading gives a noticeable speed
 *  advantage if not all of the archive is needed.
 *
 *  Armap manipulation: member movements are collected and performed
 *  as a group. Hence, armap can be in state "sane" and in state
 *  "movement pending".
 *  - armap is "sane": getOffsetOfSymbol works; addArmapEntry works;
 *    changeArmapEntry enters state "movement pending".
 *  - armap is in state "movement pending": changeArmapEntry can be
 *    used to add more pending movements; updateArmap brings us back
 *    to state "sane". In this mode, addArmapEntry and getOffsetOfSymbol
 *    should not be used.
 */

/** Open archive. If /aparse/ is true, parse the armap (mapping from
    symbols to members). This is not needed for all operations, so
    omitting it can save time. */
Archive::Archive(Ptr<VFile> afile, bool aparse)
    : where(W_START), file(afile), lfntab(0), pos(0), parse_armap(aparse),
      armap_mtime(0)
{
    char mag[AR_MAGIC_LEN];
    if(file->read(0, mag, AR_MAGIC_LEN) != AR_MAGIC_LEN
       || !std::equal(mag, mag + AR_MAGIC_LEN, AR_MAGIC)) {
        throw LinkerError("Not an archive");
    }

    where = W_ARMAP;
    pos   = AR_MAGIC_LEN;
}

/** Create a new, empty archive. The second parameter of type /int/ is
    just a placeholder to give this constructor a different signature
    than the other one. */
Archive::Archive(Ptr<VFile> afile, int)
    : where(W_EOF), file(afile), lfntab(0), pos(0), parse_armap(true),
      armap_mtime(0)
{
    static char mag[] = AR_MAGIC;
    if(file->write(0, mag, AR_MAGIC_LEN) != AR_MAGIC_LEN)
        throw LinkerError("Can't write header");

    pos = AR_MAGIC_LEN;
}

/**
 *  Read one member header from file; parse the member if needed.
 *  Used internally.
 */
bool
Archive::getNextMember()
{
    if(where == W_EOF)
        return false;

    MemberData md;
    off_t memsz = file->read(pos, &md.member, sizeof(md.member));
    if(memsz == 0) {
        where = W_EOF;
        return false;
    } else if(memsz != sizeof(ArchiveMember)) {
        throw LinkerError("Archive damaged -- partial member header");
    }

    md.pos      = pos;
    md.size     = md.member.getSize();
    md.data_pos = pos + sizeof(ArchiveMember);
    md.type     = M_FILE;
    md.dirty    = false;
    pos = pos + md.size + sizeof(ArchiveMember) + (md.size & 1);

    MPtr mptr = new MemberData(md);

    /* read header - check it */
    if(where <= W_ARMAP && md.member.isNamed("/")) {
        /* COFF armap */
        if(parse_armap)
            parseCoffArmap(md);
        by_offset[md.pos] = armap_ptr = mptr;
        armap_mtime = parseNumber(md.member.mtime, 10);
        mptr->type = M_ARMAP;
        where = W_ARMAPX;
    } else if(where <= W_ARMAP &&
              (md.member.isNamed("__.SYMDEF") || md.member.isNamed("__.SYMDEF/"))) {
        /* BSD armap */
        if(parse_armap)
            parseBsdArmap(md);
        by_offset[md.pos] = armap_ptr = mptr;
        armap_mtime = parseNumber(md.member.mtime, 10);
        mptr->type = M_ARMAP;
        where = W_ARMAPX;
    } else if(where <= W_ARMAPX && md.member.isNamed("/")) {
        /* MSVC++ archives contain multiple armaps. I want
           iar to be able to at least read them (actually editing
           them is not possible; MSVC++ uses COFF not ELF). */
        by_offset[md.pos] = mptr;
        mptr->type = M_ARMAPX;
        where = W_ARMAPX;
    } else if(where <= W_LFNTAB &&
              (md.member.isNamed("//") || md.member.isNamed("ARFILENAMES/"))) {
        /* LFN table */
        lfntab.changeSize(md.size);
        file->read(md.data_pos, lfntab.getBuffer(), md.size);
        by_offset[md.pos] = lfntab_ptr = mptr;
        mptr->type = M_LFNTAB;
        where = W_FILES;
    } else {
        /* Regular Member */
        if(std::strncmp(md.member.filename, "$PAD.", 5) == 0) {
            if(mptr->size & 1)
                ++mptr->size;
            mptr->type = M_PAD;
        } else
            by_name[md.member.getFileName(lfntab)] = mptr;
        by_offset[md.pos] = mptr;
        where = W_FILES;
    }
    return true;
}

/**
 *  Parse BSD armap.
 *  FIXME: not implemented
 */
void
Archive::parseBsdArmap(MemberData& md)
{
    throw LinkerError("FIXME: parseBsdArmap not implemented");
}

/**
 *  Parse COFF armap. This type is used by Linux.
 */
void
Archive::parseCoffArmap(MemberData& md)
{
    // COFF armap
    //  +0     DWORD   # of entries
    //  +4   n DWORDs  offsets (pointing to members)
    //  +n   n BYTEs   names (zero-terminated strings)
    VLArray<char> armap_image(md.size);
    file->read(md.data_pos, armap_image.getBuffer(), md.size);

    uint32 nentries  = getLongB(armap_image, 0);
    uint32 stringptr = 4 * nentries + 4;
    uint32 offsetptr = 4;

    while(nentries--) {
        if(stringptr >= md.size)
            throw LinkerError("armap damaged: too small for its entry count");

        uint32 offset = getLongB(armap_image, offsetptr);
        offsetptr += 4;
        uint32 stringend = stringptr;
        while(armap_image[stringend] != 0 && stringend < md.size)
            ++stringend;
        string_t symname = string_t(armap_image.getBuffer() + stringptr,
                                          stringend - stringptr);
        stringptr = stringend + 1;
        armap[symname] = offset;
    }
}

/**
 *  Create empty member. Creates a new member at the specified position
 *  and size, and all other fields at default values.
 */
Archive::MPtr
Archive::createEmptyMember(off_t pos, off_t size)
{
    MPtr n = new MemberData;

    n->pos      = pos;
    n->data_pos = n->pos + sizeof(ArchiveMember);
    n->size     = size;
    n->type     = M_FILE;
    n->dirty    = true;
    memset(&n->member, ' ', sizeof(ArchiveMember));
    storeNumber(n->member.mtime, 10, 0);
    storeNumber(n->member.uid, 10, 0);
    storeNumber(n->member.gid, 10, 0);
    storeNumber(n->member.mode, 8, 0);
    storeNumber(n->member.size, 10, size);
    n->member.magic[0] = '`';
    n->member.magic[1] = '\012';
    return n;
}

/**
 *  Allocate space for a /size/ bytes entry, at an address after /min/.
 *  This attempts to re-use holes introduced by padding.
 *
 *  Note that the current implementation reads the whole archive
 *  which is clearly suboptimal.
 */
Archive::MPtr
Archive::allocateSpace(uint32 size, uint32 min)
{
    // FIXME: suboptimal
    while(getNextMember())
        ;

    uint32 needed = size + (size & 1);
    std::map<off_t, MPtr>::iterator i =
        by_offset.lower_bound(min);
    // i points at the first member after /min/
    while(i != by_offset.end()) {
        if(i->second->type == M_PAD) {
            if(i->second->size & 1)
                ++i->second->size;
            if(i->second->size == needed) {
                /* found an exact match */
                i->second->type = M_FILE;
                i->second->size = size;
                i->second->dirty = true;
                return i->second;
            }
            if(i->second->size >= needed + sizeof(ArchiveMember)) {
                /* found a larger hole */
                i->second->size -= needed + sizeof(ArchiveMember);
                i->second->dirty = true;
                ASSERT(i->second->size % 2 == 0);

                MPtr n = createEmptyMember(i->second->data_pos + i->second->size, size);

                by_offset[n->pos] = n;
                return n;
            }
        }
        ++i;
    }
    // no hole found: add at end
    ASSERT(min <= uint32(pos)); // FIXME: add padding instead of failing
    off_t p = min > uint32(pos) ? min : pos;
    MPtr n = createEmptyMember(p, size);

    by_offset[n->pos] = n;
    pos += needed + sizeof(ArchiveMember);
    return n;
}

/**
 *  Build armap. Constructs the complete armap in /array/.
 *  Currently, this always makes a COFF armap.
 *
 *  \pre array must be empty.
 */
void
Archive::buildArmap(VLArray<char>& array)
{
    ASSERT(array.getSize() == 0); // must be empty
    buildCoffArmap(array);        // FIXME: also BSD format
}

/**
 *  Build COFF armap.
 */
void
Archive::buildCoffArmap(VLArray<char>& array)
{
    /* first, compute size */
    uint32 size = 4;            // 4 bytes for entry count
    uint32 count = 0;
    for(std::map<string_t, off_t>::iterator i = armap.begin();
        i != armap.end(); ++i) {
        ++count;
        size += 5 + i->first.length();
        ASSERT(i->second != 0);
    }
    if(count == 0)              // this leaves the array empty
        return;

    /* now build the table */
    array.changeSize(size);
    uint32 stringptr = 4*count + 4;
    uint32 ptrptr = 4;
    putLongB(array, 0, count);
    for(std::map<string_t, off_t>::iterator i = armap.begin();
        i != armap.end(); ++i) {
        putLongB(array, ptrptr, i->second);
        ptrptr += 4;
        std::strcpy(array.getBuffer() + stringptr, i->first.c_str());
        stringptr += 1 + i->first.length();
    }
    ASSERT(stringptr == array.getSize());
    ASSERT(ptrptr == 4*count + 4);
}

/**
 *  Get Member by Name. Returns MPtr or 0 if no such member.
 */
Archive::MPtr
Archive::getMemberByName(string_t name)
{
    // read headers
    while(where < W_FILES && getNextMember())
        ;

    // already in table?
    std::map<string_t, MPtr>::iterator i = by_name.find(name);
    if(i != by_name.end())
        return i->second;

    // continue reading until found that member
    while(getNextMember()) {
        std::map<off_t, MPtr>::reverse_iterator j = by_offset.rbegin();
        if(j->second->member.getFileName(lfntab) == name)
            return j->second;
    }
    return 0;
}

/**
 *  Get Member by Offset. Returns pointer to it, or 0.
 */
Archive::MPtr
Archive::getMemberByOffset(off_t offset)
{
    // ensure we've read far enough. Note that in theory, we could
    // directly jump to the requested offset. However, this way we
    // have an implicit validity check, and also keep the data
    // structures consistent.
    while(pos <= offset && getNextMember())
        ;

    std::map<off_t, MPtr>::iterator i =
        by_offset.find(offset);
    if(i != by_offset.end())
        return i->second;
    else
        return 0;
}

/**
 *  Look up a symbol in the armap. Returns offset of member
 *  if found, or 0. The member can then be obtained with
 *  getMemberByOffset.
 */
off_t
Archive::getOffsetOfSymbol(string_t symname)
{
    // ensure armap is read
    while(where <= W_ARMAP && getNextMember())
        ;
    std::map<string_t, off_t>::iterator i =
        armap.find(symname);
    if(i != armap.end())
        return i->second;
    else
        return 0;
}

/**
 *  Change armap entry. Modifies the armap entries which previously
 *  pointed to the member at file position /old/ now point to /nw/.
 *  All such changes are collected in /armap_frob/, and not performed
 *  until updateArmap() is called. Setting /nw/ to 0 causes the
 *  appropriate entries to be deleted.
 */
void
Archive::changeArmapEntry(off_t old, off_t nw)
{
    armap_frob[old] = nw;
}

/** Completely wipe out armap. */
void
Archive::clearArmap()
{
    std::map<string_t,off_t>().swap(armap);
    std::map<off_t,off_t>().swap(armap_frob);
}

/**
 *  Add armap entry. This tells the linker that the symbol /name/
 *  can be found in the member at position /offs/. Note that you
 *  should not call this after changeArmapEntry before updateArmap.
 */
void
Archive::addArmapEntry(string_t name, off_t offs)
{
    armap[name] = offs;
}

/**
 *  Update armap. This performs all changes requested by
 *  changeArmapEntry.
 */
void
Archive::updateArmap()
{
    std::map<string_t, off_t>::iterator t, n;
    t = armap.begin();
    while(t != armap.end()) {
        n = t++;
        std::map<off_t, off_t>::iterator i = armap_frob.find(n->second);
        if(i != armap_frob.end()) {
            if(i->second)
                n->second = i->second;
            else
                armap.erase(n);
        }
    }
    std::map<off_t, off_t> dummy;
    armap_frob.swap(dummy);
}

/**
 *  Compact directory by collapsing adjacent padding blocks.
 */
void
Archive::compactDirectory()
{
    std::map<off_t, MPtr>::iterator t, n;
    t = by_offset.begin();
    while(t != by_offset.end()) {
        if(t->second->type != M_PAD) {
            ++t;
        } else {
            n = t;
            ++n;
            if(n != by_offset.end() && n->second->type == M_PAD) {
                /* ok, this is padding */
                t->second->size = n->second->data_pos + n->second->size
                    - t->second->data_pos;
                t->second->dirty = true;
                by_offset.erase(n);
            } else
                t = n;
        }
    }
}

Ptr<VFile>
Archive::getFile() const
{
    return file;
}

/**
 *  Change the lfntab. Sets the lfntab to be the /len/ bytes pointed to
 *  by /buf/. Absolutely no consistency checks, caller must make sure
 *  that everything remains valid here.
 */
void
Archive::setLfnTab(const char* buf, std::size_t len)
{
    lfntab.changeSize(len);
    std::copy(buf, buf + len, lfntab.getBuffer());
}

/**
 *  Perform self-check. Checks a bunch of assertions.
 */
void
Archive::selfcheck()
{
    if(by_offset.empty())
        return;
    /* debugging self-check */
    std::map<off_t, MPtr>::iterator t = by_offset.begin();
    ASSERT(t->second.ptr());
    off_t where = t->second->pos;

    while(t != by_offset.end()) {
        MemberData* volatile md = t->second.ptr();
        ASSERT(md);
      /*  std::cerr << where << ":\t"
                  << t->second->pos << " +"
                  << t->second->size << " ("
                  << string_t(t->second->member.filename, 16) << " @ "
                  << t->second->data_pos << ")\n"; */
        ASSERT(where == t->second->pos);
        ASSERT(t->second->data_pos == t->second->pos + sizeof(ArchiveMember));
        ASSERT(where % 2 == 0);
        where = t->second->data_pos + t->second->size + t->second->size % 2;
        ++t;
    }
    ASSERT(where == pos);
}

/**
 *  \class Archive::Iterator
 *  \brief Archive Iterator
 *
 *  This class provides a convenient way to iterate through an archive.
 *  This is \em not a standard STL iterator!
 *
 *  It is used like this:
 *  \verbatim
        Archive::Iterator it(arch);
        if(it.valid())
            do {
                doSomething(*it);
            } while(it.next());
 *  \endverbatim
 */

/** Create archive iterator for /aarch/. */
Archive::Iterator::Iterator(Archive& aarch)
    : arch(aarch)
{
    while(arch.where < W_FILES && arch.getNextMember())
        ;
    iter = arch.by_offset.begin();
}

/** Fetch next item. */
bool
Archive::Iterator::next()
{
    std::map<off_t, MPtr>::iterator save = iter;
    ++iter;
    if(iter == arch.by_offset.end()) {
        /* end of our information */
        if(arch.getNextMember())
            iter = ++save;
        return iter != arch.by_offset.end();
    } else
        return true;
}
