LCOV - code coverage report
Current view: top level - src/core - hsd_hash_table.f90 (source / functions) Coverage Total Hit
Test: coverage.info Lines: 98.6 % 211 208
Test Date: 2026-02-04 13:26:36 Functions: 71.4 % 14 10

            Line data    Source code
       1              : !> Hash table implementation for O(1) child name lookup
       2              : !>
       3              : !> This module provides a simple hash table for mapping string names to
       4              : !> integer indices. Used by hsd_table to accelerate child lookup when
       5              : !> tables have many children.
       6              : !>
       7              : !> ## Thread Safety
       8              : !>
       9              : !> This module is NOT thread-safe. Concurrent modifications to the same
      10              : !> hash table from multiple threads may cause data corruption. Use external
      11              : !> synchronization if concurrent access is required.
      12              : module hsd_hash_table
      13              :   use hsd_constants, only: sp
      14              :   use hsd_utils, only: to_lower
      15              :   implicit none (type, external)
      16              :   private
      17              : 
      18              :   public :: hsd_name_index_t
      19              : 
      20              :   !> Hash table entry
      21              :   type :: hash_entry_t
      22              :     character(len=:), allocatable :: key
      23              :     character(len=:), allocatable :: key_lower  !< Lowercased key for case-insensitive lookup
      24              :     integer :: value = 0
      25              :     logical :: occupied = .false.
      26              :     integer :: next = 0  !< Index of next entry in chain (0 = no more)
      27              :   end type hash_entry_t
      28              : 
      29              :   !> Hash table for name-to-index mapping
      30              :   !>
      31              :   !> Uses a hybrid collision resolution strategy:
      32              :   !> 1. Primary storage is a fixed-size bucket array accessed via hash (open addressing).
      33              :   !> 2. Collisions are handled via explicit chaining, but unlike traditional chaining where
      34              :   !>    nodes are individually allocated on the heap, here they are stored in a pre-allocated
      35              :   !>    contiguous `overflow` array.
      36              :   !>
      37              :   !> This "flat chaining" approach provides better cache locality and reduces memory fragmentation.
      38              :   !> In the `next` field, negative values (-idx) indicate an index into the overflow array.
      39              :   type :: hsd_name_index_t
      40              :     type(hash_entry_t), allocatable :: buckets(:)
      41              :     type(hash_entry_t), allocatable :: overflow(:)
      42              :     integer :: num_buckets = 0
      43              :     integer :: num_overflow = 0
      44              :     integer :: overflow_capacity = 0
      45              :     integer :: num_entries = 0
      46              :   contains
      47              :     procedure :: init => name_index_init
      48              :     procedure :: insert => name_index_insert
      49              :     procedure :: lookup => name_index_lookup
      50              :     procedure :: lookup_case_insensitive => name_index_lookup_ci
      51              :     procedure :: remove => name_index_remove
      52              :     procedure :: clear => name_index_clear
      53              :     procedure :: destroy => name_index_destroy
      54              :     procedure :: rehash => name_index_rehash
      55              :   end type hsd_name_index_t
      56              : 
      57              : contains
      58              : 
      59              :   !> Hash function (djb2 algorithm)
      60      2257279 :   pure function hash_string(str) result(hash)
      61              :     character(len=*), intent(in) :: str
      62              :     integer :: hash
      63      2257279 :     integer :: i
      64              : 
      65      2257279 :     hash = 5381
      66     15975510 :     do i = 1, len(str)
      67              :       ! hash = hash * 33 + char, but avoid overflow with iand
      68     15975510 :       hash = iand(ishft(hash, 5) + hash + ichar(str(i:i)), huge(hash))
      69              :     end do
      70      2257279 :     hash = abs(hash)  ! Ensure positive
      71              : 
      72      2257279 :   end function hash_string
      73              : 
      74              :   !> Initialize the hash table
      75        78599 :   subroutine name_index_init(self, capacity)
      76              :     class(hsd_name_index_t), intent(inout) :: self
      77              :     integer, intent(in), optional :: capacity
      78              : 
      79        78599 :     integer :: cap
      80              : 
      81        78599 :     cap = 32  ! Default capacity
      82        78578 :     if (present(capacity)) cap = max(8, capacity)
      83              : 
      84              :     ! Round up to power of 2 for efficient modulo
      85        78599 :     cap = 2 ** ceiling(log(real(cap, sp)) / log(2.0_sp))
      86              : 
      87        78599 :     if (allocated(self%buckets)) then
      88            6 :       call self%destroy()
      89              :     end if
      90              : 
      91       707959 :     allocate(self%buckets(cap))
      92        78599 :     self%num_buckets = cap
      93        78599 :     self%num_entries = 0
      94              : 
      95              :     ! Pre-allocate overflow area
      96        78599 :     self%overflow_capacity = max(8, cap / 4)
      97       707399 :     allocate(self%overflow(self%overflow_capacity))
      98        78599 :     self%num_overflow = 0
      99              : 
     100      2257279 :   end subroutine name_index_init
     101              : 
     102              :   !> Add an entry to overflow area, returns its index (negative = -idx in overflow)
     103        11135 :   function add_overflow_entry(self, key, value) result(idx)
     104              :     class(hsd_name_index_t), intent(inout) :: self
     105              :     character(len=*), intent(in) :: key
     106              :     integer, intent(in) :: value
     107              :     integer :: idx
     108              : 
     109        11135 :     type(hash_entry_t), allocatable :: tmp(:)
     110        11135 :     integer :: new_capacity
     111              : 
     112              :     ! Grow overflow if needed
     113        11135 :     if (self%num_overflow >= self%overflow_capacity) then
     114           11 :       new_capacity = self%overflow_capacity * 2
     115         6411 :       allocate(tmp(new_capacity))
     116         3211 :       tmp(1:self%num_overflow) = self%overflow(1:self%num_overflow)
     117         3222 :       call move_alloc(tmp, self%overflow)
     118           11 :       self%overflow_capacity = new_capacity
     119              :     end if
     120              : 
     121        11135 :     self%num_overflow = self%num_overflow + 1
     122        11135 :     self%overflow(self%num_overflow)%key = key
     123        11135 :     self%overflow(self%num_overflow)%key_lower = to_lower(key)
     124        11135 :     self%overflow(self%num_overflow)%value = value
     125        11135 :     self%overflow(self%num_overflow)%occupied = .true.
     126        11135 :     self%overflow(self%num_overflow)%next = 0
     127              : 
     128              :     ! Return negative index to indicate overflow area
     129        11135 :     idx = -self%num_overflow
     130              : 
     131        89734 :   end function add_overflow_entry
     132              : 
     133              :   !> Insert a key-value pair
     134      1057041 :   recursive subroutine name_index_insert(self, key, value)
     135              :     class(hsd_name_index_t), intent(inout) :: self
     136              :     character(len=*), intent(in) :: key
     137              :     integer, intent(in) :: value
     138              : 
     139      1057041 :     integer :: idx, chain_idx, overflow_idx
     140              : 
     141              :     ! Initialize if needed
     142            1 :     if (self%num_buckets == 0) call self%init()
     143              : 
     144              :     ! Check load factor and rehash if needed (> 0.75)
     145      1057041 :     if (self%num_entries * 4 > self%num_buckets * 3) then
     146        67586 :       call self%rehash()
     147              :     end if
     148              : 
     149      1057041 :     idx = mod(hash_string(key), self%num_buckets) + 1
     150              : 
     151      1057041 :     if (.not. self%buckets(idx)%occupied) then
     152              :       ! Empty bucket - use directly
     153      1045804 :       self%buckets(idx)%key = key
     154      1045804 :       self%buckets(idx)%key_lower = to_lower(key)
     155      1045804 :       self%buckets(idx)%value = value
     156      1045804 :       self%buckets(idx)%occupied = .true.
     157      1045804 :       self%buckets(idx)%next = 0
     158      1045804 :       self%num_entries = self%num_entries + 1
     159      1045804 :       return
     160              :     end if
     161              : 
     162              :     ! Check if key already exists in bucket
     163        11237 :     if (allocated(self%buckets(idx)%key)) then
     164        11237 :       if (self%buckets(idx)%key == key) then
     165          100 :         self%buckets(idx)%value = value
     166          100 :         return
     167              :       end if
     168              :     end if
     169              : 
     170              :     ! Follow chain to check for existing key and find end
     171        11137 :     chain_idx = self%buckets(idx)%next
     172        11796 :     do while (chain_idx /= 0)
     173              :       ! In overflow area (chain_idx is always negative for overflow entries)
     174         3880 :       overflow_idx = -chain_idx
     175         3880 :       if (allocated(self%overflow(overflow_idx)%key)) then
     176         3880 :         if (self%overflow(overflow_idx)%key == key) then
     177            2 :           self%overflow(overflow_idx)%value = value
     178            2 :           return
     179              :         end if
     180              :       end if
     181         3878 :       if (self%overflow(overflow_idx)%next == 0) exit
     182          659 :       chain_idx = self%overflow(overflow_idx)%next
     183              :     end do
     184              : 
     185              :     ! Add new entry to overflow and link it
     186        11135 :     overflow_idx = add_overflow_entry(self, key, value)
     187              : 
     188              :     ! Link to chain
     189        11135 :     if (self%buckets(idx)%next == 0) then
     190         7916 :       self%buckets(idx)%next = overflow_idx
     191              :     else
     192              :       ! Find last in chain (chain entries are always negative for overflow)
     193         3219 :       chain_idx = self%buckets(idx)%next
     194         3878 :       do while (chain_idx /= 0)
     195         3878 :         if (self%overflow(-chain_idx)%next == 0) then
     196         3219 :           self%overflow(-chain_idx)%next = overflow_idx
     197         3219 :           exit
     198              :         end if
     199          659 :         chain_idx = self%overflow(-chain_idx)%next
     200              :       end do
     201              :     end if
     202              : 
     203        11135 :     self%num_entries = self%num_entries + 1
     204              : 
     205      1068176 :   end subroutine name_index_insert
     206              : 
     207              :   !> Lookup a key (case-sensitive)
     208      1200217 :   function name_index_lookup(self, key, found) result(value)
     209              :     class(hsd_name_index_t), intent(in) :: self
     210              :     character(len=*), intent(in) :: key
     211              :     logical, intent(out), optional :: found
     212              :     integer :: value
     213              : 
     214      1200217 :     integer :: idx, chain_idx, overflow_idx
     215              : 
     216      1200217 :     value = 0
     217      1200053 :     if (present(found)) found = .false.
     218              : 
     219      1200217 :     if (self%num_buckets == 0) return
     220              : 
     221      1200217 :     idx = mod(hash_string(key), self%num_buckets) + 1
     222              : 
     223      1200217 :     if (.not. self%buckets(idx)%occupied) return
     224              : 
     225              :     ! Check bucket
     226      1200192 :     if (allocated(self%buckets(idx)%key)) then
     227      1200192 :       if (self%buckets(idx)%key == key) then
     228      1000178 :         value = self%buckets(idx)%value
     229      1000178 :         if (present(found)) found = .true.
     230      1000178 :         return
     231              :       end if
     232              :     end if
     233              : 
     234              :     ! Check chain (overflow entries use negative indices)
     235       200014 :     chain_idx = self%buckets(idx)%next
     236       200015 :     do while (chain_idx /= 0)
     237       200011 :       overflow_idx = -chain_idx
     238       200011 :       if (allocated(self%overflow(overflow_idx)%key)) then
     239       200010 :         if (self%overflow(overflow_idx)%key == key) then
     240       200010 :           value = self%overflow(overflow_idx)%value
     241       200010 :           if (present(found)) found = .true.
     242       200010 :           return
     243              :         end if
     244              :       end if
     245            1 :       chain_idx = self%overflow(overflow_idx)%next
     246              :     end do
     247              : 
     248      1200217 :   end function name_index_lookup
     249              : 
     250              :   !> Lookup a key (case-insensitive)
     251       611681 :   function name_index_lookup_ci(self, key, found) result(value)
     252              :     class(hsd_name_index_t), intent(in) :: self
     253              :     character(len=*), intent(in) :: key
     254              :     logical, intent(out), optional :: found
     255              :     integer :: value
     256              : 
     257       611681 :     integer :: i, chain_idx, overflow_idx
     258       611681 :     character(len=:), allocatable :: key_lower
     259              : 
     260       611681 :     value = 0
     261       611674 :     if (present(found)) found = .false.
     262              : 
     263       611681 :     if (self%num_buckets == 0) return
     264              : 
     265       611681 :     key_lower = to_lower(key)
     266              : 
     267              :     ! For case-insensitive, we need to scan all buckets since different
     268              :     ! casings hash differently
     269      3880039 :     do i = 1, self%num_buckets
     270      3880039 :       if (self%buckets(i)%occupied) then
     271              :         ! Check bucket
     272       918614 :         if (allocated(self%buckets(i)%key_lower)) then
     273       918614 :           if (self%buckets(i)%key_lower == key_lower) then
     274       610574 :             value = self%buckets(i)%value
     275       610574 :             if (present(found)) found = .true.
     276       610574 :             return
     277              :           end if
     278              :         end if
     279              : 
     280              :         ! Check chain (overflow entries use negative indices)
     281       308040 :         chain_idx = self%buckets(i)%next
     282       499805 :         do while (chain_idx /= 0)
     283       191778 :           overflow_idx = -chain_idx
     284       191778 :           if (allocated(self%overflow(overflow_idx)%key_lower)) then
     285       191778 :             if (self%overflow(overflow_idx)%key_lower == key_lower) then
     286           13 :               value = self%overflow(overflow_idx)%value
     287           13 :               if (present(found)) found = .true.
     288           13 :               return
     289              :             end if
     290              :           end if
     291       191765 :           chain_idx = self%overflow(overflow_idx)%next
     292              :         end do
     293              :       end if
     294              :     end do
     295              : 
     296      1811898 :   end function name_index_lookup_ci
     297              : 
     298              :   !> Remove a key (just marks as deleted, actual cleanup on rehash)
     299           21 :   subroutine name_index_remove(self, key)
     300              :     class(hsd_name_index_t), intent(inout) :: self
     301              :     character(len=*), intent(in) :: key
     302              : 
     303           21 :     integer :: idx, chain_idx, overflow_idx
     304              : 
     305            0 :     if (self%num_buckets == 0) return
     306              : 
     307           21 :     idx = mod(hash_string(key), self%num_buckets) + 1
     308              : 
     309           21 :     if (.not. self%buckets(idx)%occupied) return
     310              : 
     311              :     ! Check bucket
     312           20 :     if (allocated(self%buckets(idx)%key)) then
     313           20 :       if (self%buckets(idx)%key == key) then
     314              :         ! Clear the bucket but keep chain
     315           19 :         if (allocated(self%buckets(idx)%key)) deallocate(self%buckets(idx)%key)
     316           19 :         if (allocated(self%buckets(idx)%key_lower)) deallocate(self%buckets(idx)%key_lower)
     317           19 :         self%buckets(idx)%value = 0
     318              : 
     319              :         ! If there's a chain, promote first chain entry (overflow uses negative indices)
     320           19 :         if (self%buckets(idx)%next /= 0) then
     321            1 :           overflow_idx = -self%buckets(idx)%next
     322            1 :           self%buckets(idx)%key = self%overflow(overflow_idx)%key
     323            1 :           self%buckets(idx)%key_lower = self%overflow(overflow_idx)%key_lower
     324            1 :           self%buckets(idx)%value = self%overflow(overflow_idx)%value
     325            1 :           self%buckets(idx)%next = self%overflow(overflow_idx)%next
     326            1 :           self%overflow(overflow_idx)%occupied = .false.
     327              :         else
     328           18 :           self%buckets(idx)%occupied = .false.
     329              :         end if
     330              : 
     331           19 :         self%num_entries = self%num_entries - 1
     332           19 :         return
     333              :       end if
     334              :     end if
     335              : 
     336              :     ! Check chain (overflow entries use negative indices)
     337            1 :     chain_idx = self%buckets(idx)%next
     338            1 :     do while (chain_idx /= 0)
     339            1 :       overflow_idx = -chain_idx
     340            1 :       if (allocated(self%overflow(overflow_idx)%key)) then
     341            1 :         if (self%overflow(overflow_idx)%key == key) then
     342            1 :           if (allocated(self%overflow(overflow_idx)%key)) &
     343            1 :             deallocate(self%overflow(overflow_idx)%key)
     344            1 :           if (allocated(self%overflow(overflow_idx)%key_lower)) &
     345            1 :             deallocate(self%overflow(overflow_idx)%key_lower)
     346            1 :           self%overflow(overflow_idx)%occupied = .false.
     347            1 :           self%num_entries = self%num_entries - 1
     348            1 :           return
     349              :         end if
     350              :       end if
     351            0 :       chain_idx = self%overflow(overflow_idx)%next
     352              :     end do
     353              : 
     354       611702 :   end subroutine name_index_remove
     355              : 
     356              :   !> Clear all entries
     357        78599 :   subroutine name_index_clear(self)
     358              :     class(hsd_name_index_t), intent(inout) :: self
     359        78599 :     integer :: i
     360              : 
     361      1265759 :     do i = 1, self%num_buckets
     362      1187160 :       if (allocated(self%buckets(i)%key)) deallocate(self%buckets(i)%key)
     363      1187160 :       if (allocated(self%buckets(i)%key_lower)) deallocate(self%buckets(i)%key_lower)
     364      1187160 :       self%buckets(i)%occupied = .false.
     365      1265759 :       self%buckets(i)%next = 0
     366              :     end do
     367              : 
     368        84798 :     do i = 1, self%num_overflow
     369         6199 :       if (allocated(self%overflow(i)%key)) deallocate(self%overflow(i)%key)
     370         6199 :       if (allocated(self%overflow(i)%key_lower)) deallocate(self%overflow(i)%key_lower)
     371         6199 :       self%overflow(i)%occupied = .false.
     372        84798 :       self%overflow(i)%next = 0
     373              :     end do
     374              : 
     375        78599 :     self%num_entries = 0
     376        78599 :     self%num_overflow = 0
     377              : 
     378           21 :   end subroutine name_index_clear
     379              : 
     380              :   !> Destroy the hash table
     381        78649 :   subroutine name_index_destroy(self)
     382              :     class(hsd_name_index_t), intent(inout) :: self
     383              : 
     384        78649 :     if (allocated(self%buckets)) then
     385        78592 :       call self%clear()
     386      1344232 :       deallocate(self%buckets)
     387              :     end if
     388        78649 :     if (allocated(self%overflow)) then
     389       790256 :       deallocate(self%overflow)
     390              :     end if
     391        78649 :     self%num_buckets = 0
     392        78649 :     self%num_entries = 0
     393        78649 :     self%num_overflow = 0
     394        78649 :     self%overflow_capacity = 0
     395              : 
     396        78599 :   end subroutine name_index_destroy
     397              : 
     398              :   !> Rehash to larger table
     399        67587 :   subroutine name_index_rehash(self)
     400              :     class(hsd_name_index_t), intent(inout) :: self
     401              : 
     402        67587 :     type(hash_entry_t), allocatable :: old_buckets(:), old_overflow(:)
     403        67587 :     integer :: i, old_num_buckets, old_num_overflow, chain_idx, overflow_idx
     404              : 
     405        67587 :     old_num_buckets = self%num_buckets
     406        67587 :     old_num_overflow = self%num_overflow
     407        67587 :     call move_alloc(self%buckets, old_buckets)
     408        67587 :     call move_alloc(self%overflow, old_overflow)
     409              : 
     410              :     ! Initialize with double capacity
     411        67587 :     self%num_buckets = old_num_buckets * 2
     412      1183091 :     allocate(self%buckets(self%num_buckets))
     413        67587 :     self%overflow_capacity = max(8, self%num_buckets / 4)
     414       616467 :     allocate(self%overflow(self%overflow_capacity))
     415        67587 :     self%num_entries = 0
     416        67587 :     self%num_overflow = 0
     417              : 
     418              :     ! Reinsert all entries
     419       625339 :     do i = 1, old_num_buckets
     420       625339 :       if (old_buckets(i)%occupied) then
     421       480962 :         if (allocated(old_buckets(i)%key)) then
     422       480962 :           call self%insert(old_buckets(i)%key, old_buckets(i)%value)
     423              :         end if
     424              : 
     425              :         ! Process chain (overflow entries use negative indices)
     426       480962 :         chain_idx = old_buckets(i)%next
     427       485898 :         do while (chain_idx /= 0)
     428         4936 :           overflow_idx = -chain_idx
     429         4936 :           if (old_overflow(overflow_idx)%occupied) then
     430         4936 :             if (allocated(old_overflow(overflow_idx)%key)) then
     431            0 :               call self%insert(old_overflow(overflow_idx)%key, &
     432         4936 :                                old_overflow(overflow_idx)%value)
     433              :             end if
     434              :           end if
     435         4936 :           chain_idx = old_overflow(overflow_idx)%next
     436              :         end do
     437              :       end if
     438              :     end do
     439              : 
     440       692926 :     deallocate(old_buckets)
     441       682926 :     deallocate(old_overflow)
     442              : 
     443       146236 :   end subroutine name_index_rehash
     444              : 
     445        67587 : end module hsd_hash_table
        

Generated by: LCOV version 2.0-1