#include #include #include #include "HashTable.h" #include "HashTable.C" #include "PairHeap.h" #define AVG_NUM_ASSOCIATIONS 30 /* string_equal() and string_hash() are used as parameters to the HashTable class. */ int string_equal(char *s1, char *s2) { return (strcmp(s1, s2) == 0); } int string_hash(char *str) { int result = 0; while (*str != '\0') // Since the strings are basically series result += *(str++); // of random numbers, we don't need a // fancy hash function return result; } /* pairkey_equal() and pairkey_hash() are used as parameters to the HashTable class. */ int pairkey_equal(PairKey *s1, PairKey *s2) { return (string_equal(s1->book, s2->book) && string_equal(s1->cd, s2->cd)); } int pairkey_hash(PairKey *p) { return string_hash(p->book) + string_hash(p->cd); } /* Reads the contents of the CD file and places them in a hash table. */ HashTable* build_cd_table(char *cd_filename) { HashTable *result; ifstream cd_file(cd_filename); int num_purchases; char buffer[100]; char *id, *cd; cd_file >> num_purchases; /* Since the hash function is a sum of random numbers, there shouldn't be any correlation between its value and the hashtable size, so we can save the effort of finding prime numbers ... */ result = new HashTable(num_purchases / 2, string_hash, string_equal); for (int i = 0; i < num_purchases; i++) { cd_file >> buffer; // discard CUST_ID cd_file >> buffer; id = new char[strlen(buffer) + 1]; // Move customer ID from buffer strcpy(id, buffer); // into its own memory cd_file >> buffer; // discard CD cd_file >> buffer; cd = new char[strlen(buffer) + 1]; // Move CD title from buffer into strcpy(cd, buffer); // its own memory result->insert(id, cd); } return result; } void process(PairKey *key, HashTable& pairs, PairHeap& heap) { Pair *pair; if (pairs.find(key, &pair)) { pair->numOccurrences++; heap.updatePair(pair); } else { pair = new Pair; pair->key = *key; pair->numOccurrences = 1; heap.insert(pair); pairs.insert(&(pair->key), pair); } } /* This function implements the hash join operation. It assumes that the CD hashtable has already been created, and that the pair_table and heap have been initialized but are empty. At the end of process_book_file(), the heap has been filled in with all of the appropriate pairs with the correct number of occurrences. */ void process_book_file(ifstream& book_file, HashTable& cd_table, HashTable& pair_table, PairHeap& heap) { int num_books, i; char *book_name; char cust_buff[50]; char book_buff[50]; HashEntry *matches; PairKey key; book_file >> num_books; // Read each book file entry for (i = 0; i < num_books; i++) { book_name = NULL; book_file >> cust_buff; // skip CUST_ID book_file >> cust_buff; // No need to save this one in its // own memory, as with the CDs ... book_file >> book_buff; // skip BOOK book_file >> book_buff; // We delay putting the book title // into its own memory until there // is definitely a matching pair // Find the list of CDs with (potentially) matching customers matches = cd_table.getBucket(cust_buff); // Traverse list, processing each matching customer ID while (matches != NULL) { if (string_equal(cust_buff, matches->getKey())) { // If we haven't allocated new memory for this title, do so. // We only do this once per title. if (book_name == NULL) { book_name = new char[strlen(book_buff) + 1]; strcpy(book_name, book_buff); } key.book = book_name; key.cd = matches->getData(); process(&key, pair_table, heap); } matches = matches->getNext(); } } } int main(void) { char cd_filename[200]; char book_filename[200]; ifstream book_file; HashTable *cd_table; HashTable *pair_table; PairHeap *heap; Pair *heap_entry; cout << "Enter the CD file: "; cin >> cd_filename; cout << "Enter the book file: "; cin >> book_filename; // Step 1: Generate CD hash table and allocate other data structures cd_table = build_cd_table(cd_filename); pair_table = new HashTable( cd_table->numElements() * AVG_NUM_ASSOCIATIONS, pairkey_hash, pairkey_equal); heap = new PairHeap(cd_table->numElements() * AVG_NUM_ASSOCIATIONS); book_file.open(book_filename); // Step 2: Process the book file process_book_file(book_file, *cd_table, *pair_table, *heap); // Step 3: Print out the top 10 pairs for (int i = 0; i < 10; i++) { heap_entry = heap->removeMax(); cout << (i+1) << ". " << "CD: " << heap_entry->key.cd << " BOOK: " << heap_entry->key.book << " ( " << heap_entry->numOccurrences << " )\n"; } return 0; }