Docstoc

An Introduction to Hashing

Document Sample
An Introduction to Hashing Powered By Docstoc
					An Introduction to Hashing.

        By: Sara Kennedy
   Presented: November 1, 2002
             What is hashing?
Hashing is a method of inserting data into a
  table.
Tables can be implemented in many ways.
Examples include a fixed array (limiting
  number of elements), array of linked lists
  (potentially unlimited number of elements)
              Why use hashing?

There is the potential to retrieve data faster.

Using the proper hash function will distribute
 the elements throughout the table. To
 retrieve the element, apply the hash function
 until it is found or it is clear that it was not
 found.
          Hash Functions (1)
Let U be the universe of possible keys for a
  set of elements. It is generally assumed that
  all elements have unique integer keys.
Let m be the size of our array that will hold
  the elements.
A hash function h(key) is a function that maps
  U to Zm.
          Hash Functions (2)
If we can define a one-to-one mapping from U
   to Zm, h(k) is called a perfect hashing
   function.
If we cannot define a perfect hashing function,
   we must deal with collisions.
           Hash Functions (3)
A collision is defined when multiple keys map onto
  the same table index.
There are many ways to handle collisions. These
  include “chaining”, “double hashing”, “linear
  probing”, “quadratic probing”, “random probing”,
  etc.
The method that I will describe is double hashing.
        MIS for Hash Table
INTERFACE:

 TYPE Element;
 PROCEDURE Put(key: INTEGER);
 PROCEDURE Find(key: INTEGER): BOOLEAN;
 PROCEDURE Delete(key: INTEGER);
(*
     Title: Hash.m
     Author: Sara Kennedy
     Last Revised: November 1, 2002
     Description: An implementation of a hash table using double
     hashing.
     Interface:
          TYPE Element;
          PROCEDURE Put(key: INTEGER);
          PROCEDURE Find(key: INTEGER): BOOLEAN;
          PROCEDURE Delete(key: INTEGER);
*)

MODULE Hash;
TYPE Element* = RECORD
       key: INTEGER;
       data: REAL;
       status: INTEGER;
  END;
CONST Size = 100;
VAR Table: ARRAY Size OF Element;

(* Find the array index using a hash function *)
PROCEDURE HashValue(key, i: INTEGER): INTEGER;
    VAR h, h1, h2: INTEGER;
    BEGIN
         h1 := key MOD Size;
         h2 := 1 + (key MOD Size-1);
         h := (h1 + i*h2) MOD Size;
         RETURN h;
END HashValue;
(* Insert a given key into the Hash Table *)
PROCEDURE Put*(key: INTEGER);
    VAR loc, i: INTEGER;
    BEGIN
         i:= 0;
         loc:= HashValue(key, i);

       WHILE (Table[loc].status = 1) DO
               i := i + 1;
               loc := HashValue(key, i);
       END;
       Table[loc].key := key;
       Table[loc].status := 1; (*Array index has an element *)
END Put;
(* Find location of key if it exists in the hash table *)
PROCEDURE FindLoc(key: INTEGER; VAR loc: INTEGER): BOOLEAN;
    VAR i: INTEGER;
    BEGIN
         i := 0;
         loc := HashValue(key, i);
         WHILE ((Table[loc].status # -1) & (i < Size)) DO
             IF ((Table[loc].key = key) & (Table[loc].status = 1))
    THEN
                  RETURN TRUE;
             ELSE
                  i := i + 1;
                  loc := HashValue(key, i);
             END;
         END;
    RETURN FALSE;
END FindLoc;
(* Find if the key exists in the table *)
PROCEDURE Find*(key: INTEGER): BOOLEAN;
    VAR loc: INTEGER;
    BEGIN
         RETURN FindLoc(key, loc);
END Find;

(* Remove a key from the table *)
PROCEDURE Delete*(key: INTEGER);
    VAR loc: INTEGER;
    BEGIN
         IF FindLoc(key, loc) THEN
                 Table[loc].status := 0;
         END;
END Delete;
VAR init: INTEGER;
  BEGIN
        init := 0;
        (* Initialize the table as an empty table *)
        WHILE (init < Size) DO
                  Table[init].key := 0;
                  Table[init].status := -1;
                  init := init + 1;
        END
END Hash.
MODULE TestHash
IMPORT Out, Hash;

PROCEDURE Insert(key: INTEGER);
  BEGIN
        Hash.Put(key);
        Out.Int(key, 0);
        IF Hash.Find(key) THEN
                Out.String(“ inserted.”);
        ELSE
                Out.String(“ not inserted.”);
        END
        Out.Ln;
END Insert;
PROCEDURE Delete(key: INTEGER);
  BEGIN
       Out.Int(key, 0);
       IF Hash.Find(key) THEN
               Hash.Delete(key);
               IF Hash.Find(key) THEN
                        Out.String(" not deleted.");
               ELSE
                        Out.String(" deleted.");
               END;
       ELSE
               Out.String(" not deleted.");
       END;
       Out.Ln;
END Delete;
PROCEDURE Find(key: INTEGER);
  BEGIN
       Out.Real(key);
       IF Hash.Find(key) THEN
               Out.String(“ found.”);
       ELSE
               Out.String(“ not found.”);
       END
       Out.Ln;
END Find;
BEGIN
  Insert(3);
  Insert(200);
  Insert(2056);
  Insert(-97);
  Find(3);
  Delete(200);
  Delete(5);
  Insert(5);
  Insert(103);
  Find(200);
  Find(103);
  Find(100);
END TestHash.
Output:
3 inserted.
200 inserted.
2056 inserted.
-97 inserted.
3 found.
200 deleted.
5 not deleted.
5 inserted.
103 inserted.
200 not found.
103 found.
100 not found.
(*
     Title: Hash.m
     Author: Sara Kennedy
     Last Revised: November 1, 2002
     Description: An implementation of a hash table using double
     hashing on 2D vectors with integer coordinates.
     Interface:
          TYPE Element;
          PROCEDURE Put(x, y: INTEGER);
          PROCEDURE Find(x, y: INTEGER): BOOLEAN;
          PROCEDURE Delete(x, y: INTEGER);
*)

MODULE Hash;

TYPE Element* = RECORD
       x, y: INTEGER;
       status: INTEGER;
  END;
CONST Size = 1000;
VAR Table: ARRAY Size OF Element;

(* Find the array index using a hash function *)
PROCEDURE HashValue(key, i: INTEGER): INTEGER;
    VAR h, h1, h2: INTEGER;
    BEGIN
         h1 := key MOD Size;
         h2 := 1 + (key MOD Size-1);
         h := (h1 + i*h2) MOD Size;
         RETURN h;
END HashValue;

(* Calculates the key *)
PROCEDURE GetKey(x, y: INTEGER): INTEGER;
BEGIN
    RETURN x + y;
END GetKey;
(* Insert a given key into the Hash Table *)
PROCEDURE Put*(x, y: INTEGER);
    VAR loc, i: INTEGER;
    VAR key: INTEGER;

   BEGIN
       i:= 0;
       key := GetKey(x, y);
       loc:= HashValue(key, i);

       WHILE (Table[loc].status = 1) DO
               i := i + 1;
               loc := HashValue(key, i);
       END;
       Table[loc].x := x;
       Table[loc].y := y;
       Table[loc].status := 1; (*Array index has an element *)
END Put;
(* Find location of key if it exists in the hash table *)
PROCEDURE FindLoc(x, y: INTEGER; VAR loc: INTEGER): BOOLEAN;
VAR i, key: INTEGER;
  BEGIN
        i := 0;
        key := GetKey(x, y);
        loc := HashValue(key, i);
        WHILE ((Table[loc].status # -1) & (i < Size)) DO
            IF ((Table[loc].x = x) & (Table[loc].y = y) &
  (Table[loc].status = 1)) THEN
                 RETURN TRUE;
            ELSE
                 i := i + 1;
                 loc := HashValue(key, i);
            END;
        END;
  RETURN FALSE;
END FindLoc;
(* Find if the key exists in the table *)
PROCEDURE Find*(x, y: INTEGER): BOOLEAN;
   VAR loc: INTEGER;
   BEGIN
          RETURN FindLoc(x, y, loc);
END Find;

(* Remove a key from the table *)
PROCEDURE Delete*(x, y: INTEGER);
   VAR loc: INTEGER;
   BEGIN
       IF FindLoc(x, y, loc) THEN
                Table[loc].status := 0;
       END;
END Delete;
VAR init: INTEGER;
  BEGIN
      init := 0;
      (* Initialize the table as an empty table *)
      WHILE (init < Size) DO
               Table[init].status := -1;
               init := init + 1;
      END
END Hash.
MODULE TestHash;
IMPORT Out, Hash;

PROCEDURE Print(x, y: INTEGER);
BEGIN
  Out.String("(");
  Out.Int(x, 0);
  Out.String(", ");
  Out.Int(y, 0);
  Out.String(")");
END Print;
PROCEDURE Insert(x, y: INTEGER);
  BEGIN
        Hash.Put(x, y);
        Print(x, y);
        IF Hash.Find(x, y) THEN
                 Out.String(" inserted.");
        ELSE
                 Out.String(" not inserted.");
        END;
        Out.Ln;
END Insert;
PROCEDURE Delete(x, y: INTEGER);
  BEGIN
       Print(x, y);
       IF Hash.Find(x, y) THEN
                Hash.Delete(x, y);
                IF Hash.Find(x, y) THEN
                        Out.String(" not deleted.");
                ELSE
                        Out.String(" deleted.");
                END;
       ELSE
                Out.String(" not deleted.");
       END;
       Out.Ln;
END Delete;
PROCEDURE Find(x, y: INTEGER);
  BEGIN
       Print(x, y);
       IF Hash.Find(x, y) THEN
                Out.String(" found.");
       ELSE
                Out.String(" not found.");
       END;
       Out.Ln;
END Find;
BEGIN
  Insert(3, 4);
  Insert(200, 1);
  Insert(2056, 1028);
  Insert(-97, 0);
  Find(3, 4);
  Delete(200, 1);
  Delete(5, 4);
  Insert(5, 5);
  Insert(103, 301);
  Find(200, 1);
  Find(103, 103);
  Find(100, 200);
  Find(3, 6);
END TestHash;
Output:
(3, 4) inserted.
(200, 1) inserted.
(2056, 1028) inserted.
(-97, 0) inserted.
(3, 4) found.
(200, 1) deleted.
(5, 4) not deleted.
(5, 5) inserted.
(103, 301) inserted.
(200, 1) not found.
(103, 103) not found.
(100, 200) not found.
(3, 6) not found.

				
DOCUMENT INFO
Shared By:
Categories:
Stats:
views:6
posted:4/5/2010
language:English
pages:30