#!/usr/bin/python
#
# Script to check the internal integrity and errors in the rpmdb
# Additional parameters may be given which can be either rpms or dirs which
# then get scanned for rpms against which the rpms in the rpmdb are compared
# in the second phase.
#


import sys, os.path, getopt
from bsddb import hashopen, btopen
from binascii import b2a_hex, a2b_hex
from struct import unpack

PYRPMDIR = "/usr/share/pyrpm"
if not PYRPMDIR in sys.path:
    sys.path.append(PYRPMDIR)
from pyrpm import __version__
from pyrpm import *
from pyrpm.yum import *
from pyrpm.database.rpmdb import *


def usage():
    print """
pyrpmdbverify [-h, --help] [-v] [--dbpath=] [-c YUMCONFIG] [PACKAGES | DIRS]

--help:       This usage ;)
--dbpath:     Specifies a different path for the rpmdb. Default is /var/lib/rpm
--nointernal: Disable rpmdb interal check.
--noexternal: Disable rpmdb check against repo or command line packages.
-c:           Use the yum repositories specified in the given config file to
              test the rpms against. Can be used together with additional
              packages.
-v            verbose
PACKAGES | DIRS: Optional single rpms or directories of rpms against which the
                 the headers from the rpmdb will be checked against.
"""

# Tags which will be read from the rpms. CHANGE HERE IF SOME TESTS NEED MORE
# tags!!!!
rtags = ("name", "epoch", "version", "release", "arch", "sha1header", "sourcerpm")

#
# DB Read functions
#
def readDB4(db, func):
    rethash = {}
    for key in db.keys():
        data = db[key]
        tag = func(key)
        if tag == "\x00":
            tag = ""
        for i in xrange(0, len(data), 8):
            (id, idx) = unpack("II", data[i:i+8])
            if not rethash.has_key(id):
                rethash[id] = {}
            if not rethash[id].has_key(idx):
                rethash[id][idx] = []
            rethash[id][idx].append(tag)
    return rethash

def readDB4hash(filename, func=lambda x:x):
    db = hashopen(filename, "r")
    return readDB4(db, func)

def readDB4btree(filename, func=lambda x:x):
    db = btopen(filename, "r")
    return readDB4(db, func)

def readBasenames():
    global basenames
    basenames = readDB4hash(dbpath+"/Basenames")

def readConflictname():
    global conflictname
    conflictname = readDB4hash(dbpath+"/Conflictname")

def readDirnames():
    global dirnames
    dirnames = readDB4btree(dbpath+"/Dirnames")

def readFilemd5s():
    global filemd5s
    filemd5s = readDB4hash(dbpath+"/Filemd5s")

def readGroup():
    global group
    group = readDB4hash(dbpath+"/Group")

def readInstalltid():
    global installtid
    installtid = readDB4btree(dbpath+"/Installtid", lambda x:unpack("i", x)[0])

def readName():
    global name
    name = readDB4hash(dbpath+"/Name")

def readProvidename():
    global providename
    providename = readDB4hash(dbpath+"/Providename")

def readProvideversion():
    global provideversion
    provideversion = readDB4btree(dbpath+"/Provideversion")

def readRequirename():
    global requirename
    requirename = readDB4hash(dbpath+"/Requirename")

def readRequireversion():
    global requireversion
    requireversion = readDB4btree(dbpath+"/Requireversion")

def readSha1header():
    global sha1header
    sha1header = readDB4hash(dbpath+"/Sha1header")

def readSigmd5():
    global sigmd5
    sigmd5 = readDB4hash(dbpath+"/Sigmd5")

def readTriggername():
    global triggername
    triggername = readDB4hash(dbpath+"/Triggername")

def readRpmDB():
    global packages
    print "Reading rpm database. This might take some time..."
    readBasenames()
    readConflictname()
    readDirnames()
    readFilemd5s()
    for hash in filemd5s.values():
        for key in hash.keys():
                tlist = []
                for val in hash[key]:
                    tlist.append(b2a_hex(val))
                hash[key] = tlist
    readGroup()
    readInstalltid()
    readName()
    db = RpmDB(rpmconfig, dbpath)
    db.read()
    pkgs = db.getPkgs()
    packages = {}
    for pkg in pkgs:
        packages[unpack("I", pkg.key)[0]] = pkg
    for pkg in packages.values():
        if pkg["signature"].has_key("md5"):
            pkg["install_md5"] = pkg["signature"]["md5"]
        if pkg["signature"].has_key("sha1header"):
            pkg["install_sha1header"] = pkg["signature"]["sha1header"]
    readProvidename()
    readProvideversion()
    readRequirename()
    readRequireversion()
    readSha1header()
    readSigmd5()
    readTriggername()


#
# Verify functions
#
def verifyStructure(hash, tag, useidx=True):
    # Check db4 files against /var/lib/rpm/Packages
    for id in hash.keys():
        if not id in packages.keys():
            print "Error %s: Package id %s doesn't exist" % (tag, id)
            continue
        if not packages[id].has_key(tag):
            print "Error %s: Tag not found in package %s" % (tag, packages[id].getNEVRA())
            continue
        taghash = hash[id]
        pkghash = packages[id][tag]
        for idx in taghash.keys():
            if useidx:
                if len(pkghash) <= idx:
                    print "Error %s: Index to high for key %s in package %s" % (tag, str(taghash[idx]), packages[id].getNEVRA())
                    continue
                val = pkghash[idx]
            else:
                val = pkghash
            if not val in taghash[idx]:
                print "Error %s: %s not found in package %s" % (tag, val, packages[id].getNEVRA())

    # Check /var/lib/rpm/Packages against the db4 files
    for id in packages.keys():
        pkg = packages[id]
        refhash = pkg[tag]
        tnamehash = {}
        if not refhash:
            continue
        taghash = None
        if hash.has_key(id):
            taghash = hash[id]
        if not useidx:
            if not taghash:
                print "Error in %s db: No database entries found for package %s" % (tag, pkg.getNEVRA())
            elif refhash not in taghash[0]:
                print "Error in %s db: %s not referenced for package %s" % (tag, refhash, pkg.getNEVRA())
            continue
        for idx in xrange(len(refhash)):
            val = refhash[idx]
            if tag == "requirename":
                # Skip install prereqs, just like rpm does...
                if isInstallPreReq(pkg["requireflags"][idx]):
                    continue
            # only include filemd5s for none empty m5 sums
            if tag == "filemd5s" and (val == "" or val == "\x00"):
                continue
            if tag == "filemd5s" and not S_ISREG(pkg["filemodes"][idx]):
                continue
            # Equal Triggernames aren't added multiple times for the same
            # package
            if tag == "triggername":
                if tnamehash.has_key(val):
                    continue
                else:
                    tnamehash[val] = 1
            if not taghash:
                print "Error in %s db: No database entries found for package %s" % (tag, pkg.getNEVRA())
                break
            if not taghash.has_key(idx):
                # XXX: This does happen for .spec files with translations:
                #Group:          System/Libraries
                #Group(de):      System/Bibliotheken
                print "Error in %s db: Index %s not found in db for package %s" % (tag, idx, pkg.getNEVRA())
                continue
            if not val in taghash[idx]:
                print "Error in %s db: %s not referenced for package %s" % (tag, val, pkg.getNEVRA())
                continue

def verifyBasenames():
    print "Checking Basename integrity."
    verifyStructure(basenames, "basenames")

def verifyConflictname():
    print "Checking Conflictname integrity."
    verifyStructure(conflictname, "conflictname")

def verifyDirnames():
    print "Checking Dirnames integrity."
    verifyStructure(dirnames, "dirnames")

def verifyFilemd5s():
    print "Checking Filemd5s integrity."
    verifyStructure(filemd5s, "filemd5s")

def verifyGroup():
    print "Checking Group integrity."
    verifyStructure(group, "group")

def verifyInstalltid():
    print "Checking Installtid integrity."
    verifyStructure(installtid, "installtid")

def verifyName():
    print "Checking Name integrity."
    verifyStructure(name, "name", False)

def verifyPackages():
    print "Checking Packages integrity."
    rpmio = RpmFileIO(rpmconfig, "dummy")
    packages_db = hashopen(dbpath+"/Packages", "r")
    for id in packages.keys():
        # Don't try to verify that structure of gpg-pubkey packages yet.
        if packages[id]["name"] == "gpg-pubkey":
            continue
        #packages[id]["archivesize"] = packages[id]["signature"]["payloadsize"]
        (headerindex, headerdata) = rpmio._generateHeader(packages[id], 4)
        data1 = headerindex[16:]+headerdata
        data2 = packages_db[pack("I", id)][8:]
        if data1 != data2:
            print "Error packages: Self generated header different from original stored header for package %s" % packages[id].getNEVRA()
#            print len(data1), len(data2)
#            (indexNo, storeSize) = unpack("!2I", headerindex[8:16])
#            print indexNo, storeSize
#            (indexNo, storeSize) = unpack("!2I", packages_db[pack("I", id)][:8])
#            print indexNo, storeSize
#            sstart = len(data1) - storeSize
#            for idx in xrange(indexNo):
#                index1 = unpack("!4I", data1[idx*16:(idx+1)*16])
#                index2 = unpack("!4I", data2[idx*16:(idx+1)*16])
#                print index1, index2
#                if data1[sstart+index1[2]:] != data2[sstart+index2[2]:]:
#                    print "DATA STARTING FROM HERE DIFFERENT!"
        (headerindex, headerdata) = rpmio._generateHeader(packages[id], 1, [257, 261, 262, 264, 265, 267, 269, 1008, 1029, 1046, 1099, 1127, 1128])
        if packages[id]["signature"].has_key("sha1header"):
            if packages[id]["signature"]["sha1header"] == sha.new(headerindex+headerdata).hexdigest():
                continue
            if not packages[id].has_key("archivesize") and \
               pkg["signature"].has_key("payloadsize"):
                packages[id]["archivesize"] = pkg["signature"]["payloadsize"]
                (headerindex, headerdata) = rpmio._generateHeader(packages[id], 1, [257, 261, 262, 264, 265, 267, 269, 1008, 1029, 1046, 1099, 1127, 1128])
            if packages[id]["signature"]["sha1header"] == sha.new(headerindex+headerdata).hexdigest():
                continue
            print "Error packages: Package %s SHA1 header checksum incorrect" % packages[id].getNEVRA()


def verifyProvidename():
    print "Checking Providename integrity."
    verifyStructure(providename, "providename")

def verifyProvideversion():
    print "Checking Provideversion integrity."
    verifyStructure(provideversion, "provideversion")

def verifyRequirename():
    print "Checking Requirename integrity."
    verifyStructure(requirename, "requirename")

def verifyRequireversion():
    print "Checking Requireversion integrity."
    verifyStructure(requireversion, "requireversion")

def verifySha1header():
    print "Checking Sha1Header integrity."
    verifyStructure(sha1header, "install_sha1header", False)

def verifySigmd5():
    print "Checking Signature MD5 integrity."
    verifyStructure(sigmd5, "install_md5", False)

def verifyTriggername():
    print "Checking Triggername integrity."
    verifyStructure(triggername, "triggername")

def verifyRpmDB():
    verifyBasenames()
    verifyConflictname()
    verifyDirnames()
    verifyFilemd5s()
    verifyGroup()
    verifyInstalltid()
    verifyName()
    verifyPackages()
    verifyProvidename()
    verifyProvideversion()
    verifyRequirename()
    verifyRequireversion()
    verifySha1header()
    verifySigmd5()
    verifyTriggername()

def diffPkgs(pkg1, pkg2):
    ignore_list = ["signature", "obsoletes", "requires", "provides", "conflicts", "triggers", "install_dsaheader", "install_md5", "archivesize", "install_size_in_sig", "install_sha1header", "install_gpg", "instprefixes", "installtime", "installcolor", "installtid", "filestates"]
    nevra = pkg1.getNEVRA()
    for key in pkg1.keys():
        if key in ignore_list:
            continue
        if not pkg2.has_key(key):
            print "Error: Installed package %s doesn't contain tag %s" % (nevra, key)
            continue
        if len(pkg1[key]) != len(pkg2[key]):
            print "Error: Length of tag %s differ for package %s: %d != %d" % (key, nevra, len(pkg1[key]), len(pkg2[key]))
            continue
        for idx in xrange(len(pkg1[key])):
            if not pkg1[key][idx] == pkg2[key][idx]:
                print "Error: Value for package %s differ for tag %s at index %s" % (nevra, key, idx)

def checkRpmDB(repos):
    print "Checking RpmDB against list of packages."

    rpmio = RpmFileIO(rpmconfig, "dummy")
    packages_db = hashopen(dbpath+"/Packages", "r")
    for id in packages.keys():
        pkg = packages[id]
        if pkg["name"] == "gpg-pubkey":
            continue
        if not pkg["signature"].has_key("sha1header"):
            print "Skipping package %s, no sha1header." % pkg.getNEVRA()
            continue
        # Silently skip packages not found
        fname = None
        if repos:
            for p in repos.getPkgsByName(pkg["name"]):
                # TODO: check for equal sha1 sums
                if p.isEqual(pkg):
                    if p.yumrepo:
                        fname = os.path.join(p.yumrepo.baseurl, p.source)
                    else:
                        fname = p.source

        if not fname:
            if rpmconfig.verbose > 2:
                print "Skipping package %s, not found in packagelist." % pkg.getNEVRA()
            continue
        opkg = readRpmPackage(rpmconfig, fname)
        # Fill in the package with own information and some additional things
        # we got from the rpmdb package and see if we can recreate the same
        # binary package header.
        if opkg["signature"].has_key("size_in_sig"):
            opkg["install_size_in_sig"] = opkg["signature"]["size_in_sig"]
        if opkg["signature"].has_key("md5"):
            opkg["install_md5"] = opkg["signature"]["md5"]
        if opkg["signature"].has_key("gpg"):
            opkg["install_gpg"] = opkg["signature"]["gpg"]
        if opkg["signature"].has_key("badsha1_1"):
            opkg["install_badsha1_1"] = opkg["signature"]["badsha1_1"]
        if opkg["signature"].has_key("badsha1_2"):
            opkg["install_badsha1_2"] = opkg["signature"]["badsha1_2"]
        if opkg["signature"].has_key("dsaheader"):
            opkg["install_dsaheader"] = opkg["signature"]["dsaheader"]
        if opkg["signature"].has_key("sha1header"):
            opkg["install_sha1header"] = opkg["signature"]["sha1header"]
        if opkg["signature"].has_key("payloadsize"):
            opkg["archivesize"] = opkg["signature"]["payloadsize"]
        if pkg.has_key("instprefixes"):
            opkg["instprefixes"] = pkg["instprefixes"]
        if pkg.has_key("installtime"):
            opkg["installtime"] = pkg["installtime"]
        if pkg.has_key("installcolor"):
            opkg["installcolor"] = pkg["installcolor"]
        if pkg.has_key("installtid"):
            opkg["installtid"] = pkg["installtid"]
        if pkg.has_key("filestates"):
            opkg["filestates"] = pkg["filestates"]
        (headerindex, headerdata) = rpmio._generateHeader(opkg, 4)
        data1 = headerindex[16:]+headerdata
        data2 = packages_db[pack("I", id)][8:]
        if data1 != data2:
            print "Error: Couldn't recreate rpmdb package header correctly for package %s" % opkg.getNEVRA()
            print len(data1), len(data2)
            #print b2a_hex(data1)
            #print b2a_hex(data2)
            if False:
                d1 = b2a_hex(data1)
                d2 = b2a_hex(data2)
                idx = 0
                size = 50
                while idx*size < len(d1) and idx*size < len(d2):
                    print idx, d1[idx*size:(idx+1)*size]
                    print idx, d2[idx*size:(idx+1)*size]
                    idx += 1
        diffPkgs(opkg, pkg)
        diffPkgs(pkg, opkg)

#
# Main
#
def main():
    global dbpath
    # Default dbpath is /var/lib/rpm
    dbpath = "/var/lib/rpm"

    # Do /var/lib/rpm internal cross reference checks
    internal_check = 1

    # Do /var/lib/rpm/Packages cross ref checks with given rpms
    external_check = 1

    # No yum config file by default
    yumconf = []

    # And disable caching by default
    rpmconfig.nocache = 1

    # Argument parsing
    try:
      opts, args = getopt.getopt(sys.argv[1:], "?hc:v",
        ["help", "dbpath=", "nointernal", "noexternal"])
    except getopt.error, e:
        print "Error parsing command list arguments: %s" % e
        usage()
        return 0

    # Argument handling
    for (opt, val) in opts:
        if   opt in ['-h', "--help"]:
            usage()
            return 1
        elif opt == "--dbpath":
            dbpath = val
        elif opt == "--nointernal":
            internal_check = 0
        elif opt == "--noexternal":
            external_check = 0
        elif opt == "-c":
            yumconf.append(val)
        elif opt == "-v":
            rpmconfig.verbose += 1

    readRpmDB()
    for pkg in packages.values():
        if pkg["name"] == "fedora-release" or pkg["name"] == "redhat-release":
            rpmconfig.relver = pkg["version"]

    if internal_check:
        verifyRpmDB()

    if (not yumconf and len(args) == 0) or not external_check:
        return 1

    pkglist = []
    for arg in args:
        if   os.path.isdir(arg):
            readDir(arg, pkglist, rtags)
        elif arg.endswith(".rpm"):
            try:
                pkglist.append(readRpmPackage(rpmconfig, arg, tags=rtags))
            except (IOError, ValueError), e:
                sys.stderr.write("%s: %s\n" % (arg, e))

    if len(yumconf) > 0:
        print "Reading package headers from yum repository. Please wait..."
        rpmconfig.nofileconflicts = 1
        yum = RpmYum(rpmconfig)
        for yc in yumconf:
            if os.path.isfile(yc):
                yum.addRepo(yc)

    checkRpmDB(yum.repos)

    return 1

if __name__ == '__main__':
    if not run_main(main):
        sys.exit(1)

# vim:ts=4:sw=4:showmatch:expandtab
