#! /usr/bin/env python

usage = """%prog Version of 25th November 2009

(c) Mark Johnson

Counts the number of times features in a specific feature class get used

usage: %prog [options]"""

import gzip, optparse, sys

def features(inf):
    feats = []
    for line in inf:
        fields = line.split()
        id = int(fields[0])
        feat = intern(fields[1])
        assert(id == len(feats))
        feats.append(feat)
        assert(feats[id] == feat)
    return feats

def weights(inf):
    id_weight = {}
    for line in inf:
        fields = line.strip().split('=')
        ident = int(fields[0])
        weight = float(fields[1])
        assert(ident not in id_weight)
        id_weight[ident] = weight
    return id_weight

if __name__ == "__main__":
    parser = optparse.OptionParser(usage=usage)

    parser.add_option("-f", "--features", dest="features", type="str",
                      help="gzipped file of id feature_class feature")
    parser.add_option("-w", "--weights", dest="weights", type="str",
                      help="gzipped file of id weight")

    (options,args) = parser.parse_args()

    assert(options.features)
    assert(options.weights)

    fs = features(gzip.open(options.features, 'r'))
    ws = weights(gzip.open(options.weights, 'r'))

    feat_count = {}
    nonzerofeat_count = {}
    feats = []
    for i,f in enumerate(fs):
        if f in feat_count:
            feat_count[f] += 1
        else:
            feat_count[f] = 1
            feats.append(f)
        if i in ws:
            if f in nonzerofeat_count:
                nonzerofeat_count[f] += 1
            else:
                nonzerofeat_count[f] = 1

    print ', '.join(("Feature","NonzeroWeights","Count"))
    for feat in feats:
        print ', '.join((feat, str(nonzerofeat_count.get(feat,0)), str(feat_count.get(feat,0))))



    
        
