首页 > 解决方案 > 遇到分段错误与我使用线性探测的哈希表实现不一致

问题描述

试图为一个项目实现一个带有线性探测的哈希表,但我遇到了一些问题,我认为其中一个是罪魁祸首。

对于初学者,在编译代码后,如果我连续运行程序 10 次,我会遇到分段错误:大约 2/3 的时间是 11 次。

当代码确实运行时,它似乎“大部分”工作。指数 9500-10000 是完美的,所有插槽都已填满。但是当继续向下(9000-9500)时,会看到超过 10 个 NULL 空间,并且有一些槽充满了虚假值,即。值 > 100,000。

我正在使用来自 csv 文件的 10,000 个整数的数据集,所有值均小于 100,000。我打算尝试使用 GDB 和核心对其进行调试,但是我的计算机目前对安装它不太满意。

#ifndef HASHLINEAR_HPP
#define HASHLINEAR_HPP

struct node{
    int key;
};

class HashLinear{
    struct node** table;
    int tableSize;
    int numCollisions = 0;

    public:
        HashLinear(int bsize);
        void insert(int key);
        unsigned int hashFunction(int key);
        int search(int key);

        int getCollisions();
        void printTable();
};


#endif
#include "hashlinear.hpp"
#include <iostream>
using namespace std;

HashLinear::HashLinear(int bsize){
    this->tableSize = bsize;
    table = new node*[tableSize];
    for(int i = 0; i < tableSize; i++){
        table[i] = NULL;
    }
}

int HashLinear::getCollisions(){
    return numCollisions;
}

unsigned int HashLinear::hashFunction(int key){
    return key % tableSize;
}

void HashLinear::insert(int key){
    node* newNode = new node;
    newNode->key = key;
    int index = hashFunction(key);
    
    while(table[index] != NULL && table[index]->key != key){

        numCollisions++;
        index = (index + 1) % tableSize;
    }

    table[index] = newNode;
}



int HashLinear::search(int key){
    int value = hashFunction(key);

    int num = 0;

    while(table[value] != NULL){
        num = 0;
        if(num++ > tableSize){
            break;
        }

        if(table[value]->key == key){
            return value;
        }
        value++;
        value %= tableSize;
    }
    return -1;
}

void HashLinear::printTable(){
    for(int i = 0; i < tableSize; i++){
        cout << i << " || ";
        if(table[i] == NULL){
            cout << "NULL" << endl;
        }
        else{
            cout << table[i]->key << endl;
        }
        
    }
}
#include "hashlinear.hpp"
#include <iostream>
#include <fstream>
#include <sstream>
#include <time.h>
#include <stdlib.h>
#include <chrono>
using namespace std;



int main(){
    //******Read in data******//
    int testData[10000];
    float insertTime[100];
    float searchTime[100];
    int index = 0;
    string line, temp, word;
    ifstream inputFile;
    inputFile.open("dataSetA-updatedhashlinear.csv");
    if(inputFile.fail()){
        cout << "Could not open data." << endl;
        return -1;
    }
    else{
        while(inputFile >> temp){
            getline(inputFile, temp);
            stringstream inStream(temp);
            while(getline(inStream, word, ',')){
                testData[index] = stoi(word);
                index++;
            }
        }
        inputFile.close();
    }

    //******Read in data******//

    //cout << "Printing random data in range of 0 ~ 10: " << testData[rand() % 10 + 0] << endl;

    //******Insert/Search data in Linked List******//
    HashLinear table(10009);
    int hashIndex = 0;
    int insertTimeIndex = 0;
    int searchTimeIndex = 0;
    int num = 0;
    int upperIndex = 99;

    while(hashIndex < 10009){
        //Block for 100 insertions
        auto insertionStart = chrono::steady_clock::now();//Insert time start
        for(int i = hashIndex; i < upperIndex; i++){ //Keep track of current index as well as an upper index to control amount of inserts
            table.insert(testData[i]);
            hashIndex++;
            
        }
        auto insertionEnd = chrono::steady_clock::now();
        insertTime[insertTimeIndex] = chrono::duration_cast<chrono::microseconds>(insertionEnd - insertionStart).count() / 100.0;//Insert time end
        insertTimeIndex++;
        //Block for 100 insertions
        //Block for 100 searches
        num = 0;
        auto searchStart = chrono::steady_clock::now();//Search time start
        while(num < 100){ //Do 100 random searches from 0 index to upperindex
            srand((unsigned)time(0));
            int searchNode = table.search(testData[rand() % upperIndex + 0]);
            num++;
        }
        auto searchEnd = chrono::steady_clock::now();
        searchTime[searchTimeIndex] = chrono::duration_cast<chrono::microseconds>(searchEnd - searchStart).count() / 100.0;//Search time end
        searchTimeIndex++;
        //Block for 100 searches

        upperIndex += 100;
    }
    //******Insert/Search data in Linked List******//

    //******TESTING******//
    table.printTable();
    cout << "Search time: " << searchTime[20] << endl;
    cout << "Insert time: " << insertTime[20] << endl;
    cout << "Collisons: " << table.getCollisions() << endl;
    int testIndex = table.search(34262);
    cout << "Index of 34262: " << testIndex << endl;
    //******TESTING******//
}

标签: c++data-structures

解决方案


因此,经过更多调试后,我发现我的速度很慢。我在 testData 数组末尾迭代 100 次,创建虚假值并使哈希表无法正确填充。


推荐阅读