#include <sys/time.h>
#include <stdlib.h>
#include <iostream>
#include <hash_set>

int ELEMENTS = 0;
int BUCKETS = 0;

struct thrasher {
  // the error we had in class was caused by a signature mismatch
  // problem - the hash function must take a const paramerter and
  // return a const.
  int operator() (const int x)  {  
    hash<int> H;

    return H(x) % (BUCKETS);
  }
};


typedef hash_set<int,thrasher> myHash;

main(int argc, char* argv[])
{
  if (argc != 3) {
    cerr << "Usage: " << argv[0] << " <numbuckets> <numelements>" << endl;
    exit(1);
  }

  BUCKETS = atoi(argv[1]);
  ELEMENTS = atoi(argv[2]);

  vector<int> table(BUCKETS,0);  // a vector of collisions
  myHash s(BUCKETS); 
  timeval tp;

  int empty=0, singleton=0, collisions=0, biggest=0, collision_sum=0;
  int num, index;

  gettimeofday(&tp, NULL );
  srand(tp.tv_sec);

  for (int i=0; i< ELEMENTS; i++) {
    num = rand();
    index = thrasher()(num);
    // we were using count wrong in class
    // it returns the number of elements with a certain key - but
    // the key is the thing in the set, not the hash table index.  That
    // other "key" is hidden.
    if (s.count(num) == 0) {
      // insert and record it only if the same key hasn't already been added
      s.insert(num); 
      table[index]++;
    }
  }

  for (int i=0; i< BUCKETS; i++) {
    if (table[i] == 0) empty++;
    else if (table[i] == 1) singleton++;
    else { collisions++;  collision_sum += table[i]-1; }
    if (table[i]>biggest) biggest=table[i];
  }


  cout << endl << "Load Factor: " << (float) ELEMENTS / (float) BUCKETS << endl;
  cout << "Collisions: " << collision_sum << endl;
  cout << "Empty Buckets: " << empty << endl;
  cout << "Singleton Buckets: " << singleton << endl;
  cout << "Largest Bucket: " << biggest << endl;
  cout << "Avg. Collision Bucket size: " << (((float) collision_sum) / ((float) collisions)) << endl << endl;

}

