Line data Source code
1 : !> Hash table implementation for O(1) child name lookup
2 : !>
3 : !> This module provides a simple hash table for mapping string names to
4 : !> integer indices. Used by hsd_table to accelerate child lookup when
5 : !> tables have many children.
6 : !>
7 : !> ## Thread Safety
8 : !>
9 : !> This module is NOT thread-safe. Concurrent modifications to the same
10 : !> hash table from multiple threads may cause data corruption. Use external
11 : !> synchronization if concurrent access is required.
12 : module hsd_hash_table
13 : use hsd_constants, only: sp
14 : use hsd_utils, only: to_lower
15 : implicit none (type, external)
16 : private
17 :
18 : public :: hsd_name_index_t
19 :
20 : !> Hash table entry
21 : type :: hash_entry_t
22 : character(len=:), allocatable :: key
23 : character(len=:), allocatable :: key_lower !< Lowercased key for case-insensitive lookup
24 : integer :: value = 0
25 : logical :: occupied = .false.
26 : integer :: next = 0 !< Index of next entry in chain (0 = no more)
27 : end type hash_entry_t
28 :
29 : !> Hash table for name-to-index mapping
30 : !>
31 : !> Uses a hybrid collision resolution strategy:
32 : !> 1. Primary storage is a fixed-size bucket array accessed via hash (open addressing).
33 : !> 2. Collisions are handled via explicit chaining, but unlike traditional chaining where
34 : !> nodes are individually allocated on the heap, here they are stored in a pre-allocated
35 : !> contiguous `overflow` array.
36 : !>
37 : !> This "flat chaining" approach provides better cache locality and reduces memory fragmentation.
38 : !> In the `next` field, negative values (-idx) indicate an index into the overflow array.
39 : type :: hsd_name_index_t
40 : type(hash_entry_t), allocatable :: buckets(:)
41 : type(hash_entry_t), allocatable :: overflow(:)
42 : integer :: num_buckets = 0
43 : integer :: num_overflow = 0
44 : integer :: overflow_capacity = 0
45 : integer :: num_entries = 0
46 : contains
47 : procedure :: init => name_index_init
48 : procedure :: insert => name_index_insert
49 : procedure :: lookup => name_index_lookup
50 : procedure :: lookup_case_insensitive => name_index_lookup_ci
51 : procedure :: remove => name_index_remove
52 : procedure :: clear => name_index_clear
53 : procedure :: destroy => name_index_destroy
54 : procedure :: rehash => name_index_rehash
55 : end type hsd_name_index_t
56 :
57 : contains
58 :
59 : !> Hash function (djb2 algorithm)
60 2257279 : pure function hash_string(str) result(hash)
61 : character(len=*), intent(in) :: str
62 : integer :: hash
63 2257279 : integer :: i
64 :
65 2257279 : hash = 5381
66 15975510 : do i = 1, len(str)
67 : ! hash = hash * 33 + char, but avoid overflow with iand
68 15975510 : hash = iand(ishft(hash, 5) + hash + ichar(str(i:i)), huge(hash))
69 : end do
70 2257279 : hash = abs(hash) ! Ensure positive
71 :
72 2257279 : end function hash_string
73 :
74 : !> Initialize the hash table
75 78599 : subroutine name_index_init(self, capacity)
76 : class(hsd_name_index_t), intent(inout) :: self
77 : integer, intent(in), optional :: capacity
78 :
79 78599 : integer :: cap
80 :
81 78599 : cap = 32 ! Default capacity
82 78578 : if (present(capacity)) cap = max(8, capacity)
83 :
84 : ! Round up to power of 2 for efficient modulo
85 78599 : cap = 2 ** ceiling(log(real(cap, sp)) / log(2.0_sp))
86 :
87 78599 : if (allocated(self%buckets)) then
88 6 : call self%destroy()
89 : end if
90 :
91 707959 : allocate(self%buckets(cap))
92 78599 : self%num_buckets = cap
93 78599 : self%num_entries = 0
94 :
95 : ! Pre-allocate overflow area
96 78599 : self%overflow_capacity = max(8, cap / 4)
97 707399 : allocate(self%overflow(self%overflow_capacity))
98 78599 : self%num_overflow = 0
99 :
100 2257279 : end subroutine name_index_init
101 :
102 : !> Add an entry to overflow area, returns its index (negative = -idx in overflow)
103 11135 : function add_overflow_entry(self, key, value) result(idx)
104 : class(hsd_name_index_t), intent(inout) :: self
105 : character(len=*), intent(in) :: key
106 : integer, intent(in) :: value
107 : integer :: idx
108 :
109 11135 : type(hash_entry_t), allocatable :: tmp(:)
110 11135 : integer :: new_capacity
111 :
112 : ! Grow overflow if needed
113 11135 : if (self%num_overflow >= self%overflow_capacity) then
114 11 : new_capacity = self%overflow_capacity * 2
115 6411 : allocate(tmp(new_capacity))
116 3211 : tmp(1:self%num_overflow) = self%overflow(1:self%num_overflow)
117 3222 : call move_alloc(tmp, self%overflow)
118 11 : self%overflow_capacity = new_capacity
119 : end if
120 :
121 11135 : self%num_overflow = self%num_overflow + 1
122 11135 : self%overflow(self%num_overflow)%key = key
123 11135 : self%overflow(self%num_overflow)%key_lower = to_lower(key)
124 11135 : self%overflow(self%num_overflow)%value = value
125 11135 : self%overflow(self%num_overflow)%occupied = .true.
126 11135 : self%overflow(self%num_overflow)%next = 0
127 :
128 : ! Return negative index to indicate overflow area
129 11135 : idx = -self%num_overflow
130 :
131 89734 : end function add_overflow_entry
132 :
133 : !> Insert a key-value pair
134 1057041 : recursive subroutine name_index_insert(self, key, value)
135 : class(hsd_name_index_t), intent(inout) :: self
136 : character(len=*), intent(in) :: key
137 : integer, intent(in) :: value
138 :
139 1057041 : integer :: idx, chain_idx, overflow_idx
140 :
141 : ! Initialize if needed
142 1 : if (self%num_buckets == 0) call self%init()
143 :
144 : ! Check load factor and rehash if needed (> 0.75)
145 1057041 : if (self%num_entries * 4 > self%num_buckets * 3) then
146 67586 : call self%rehash()
147 : end if
148 :
149 1057041 : idx = mod(hash_string(key), self%num_buckets) + 1
150 :
151 1057041 : if (.not. self%buckets(idx)%occupied) then
152 : ! Empty bucket - use directly
153 1045804 : self%buckets(idx)%key = key
154 1045804 : self%buckets(idx)%key_lower = to_lower(key)
155 1045804 : self%buckets(idx)%value = value
156 1045804 : self%buckets(idx)%occupied = .true.
157 1045804 : self%buckets(idx)%next = 0
158 1045804 : self%num_entries = self%num_entries + 1
159 1045804 : return
160 : end if
161 :
162 : ! Check if key already exists in bucket
163 11237 : if (allocated(self%buckets(idx)%key)) then
164 11237 : if (self%buckets(idx)%key == key) then
165 100 : self%buckets(idx)%value = value
166 100 : return
167 : end if
168 : end if
169 :
170 : ! Follow chain to check for existing key and find end
171 11137 : chain_idx = self%buckets(idx)%next
172 11796 : do while (chain_idx /= 0)
173 : ! In overflow area (chain_idx is always negative for overflow entries)
174 3880 : overflow_idx = -chain_idx
175 3880 : if (allocated(self%overflow(overflow_idx)%key)) then
176 3880 : if (self%overflow(overflow_idx)%key == key) then
177 2 : self%overflow(overflow_idx)%value = value
178 2 : return
179 : end if
180 : end if
181 3878 : if (self%overflow(overflow_idx)%next == 0) exit
182 659 : chain_idx = self%overflow(overflow_idx)%next
183 : end do
184 :
185 : ! Add new entry to overflow and link it
186 11135 : overflow_idx = add_overflow_entry(self, key, value)
187 :
188 : ! Link to chain
189 11135 : if (self%buckets(idx)%next == 0) then
190 7916 : self%buckets(idx)%next = overflow_idx
191 : else
192 : ! Find last in chain (chain entries are always negative for overflow)
193 3219 : chain_idx = self%buckets(idx)%next
194 3878 : do while (chain_idx /= 0)
195 3878 : if (self%overflow(-chain_idx)%next == 0) then
196 3219 : self%overflow(-chain_idx)%next = overflow_idx
197 3219 : exit
198 : end if
199 659 : chain_idx = self%overflow(-chain_idx)%next
200 : end do
201 : end if
202 :
203 11135 : self%num_entries = self%num_entries + 1
204 :
205 1068176 : end subroutine name_index_insert
206 :
207 : !> Lookup a key (case-sensitive)
208 1200217 : function name_index_lookup(self, key, found) result(value)
209 : class(hsd_name_index_t), intent(in) :: self
210 : character(len=*), intent(in) :: key
211 : logical, intent(out), optional :: found
212 : integer :: value
213 :
214 1200217 : integer :: idx, chain_idx, overflow_idx
215 :
216 1200217 : value = 0
217 1200053 : if (present(found)) found = .false.
218 :
219 1200217 : if (self%num_buckets == 0) return
220 :
221 1200217 : idx = mod(hash_string(key), self%num_buckets) + 1
222 :
223 1200217 : if (.not. self%buckets(idx)%occupied) return
224 :
225 : ! Check bucket
226 1200192 : if (allocated(self%buckets(idx)%key)) then
227 1200192 : if (self%buckets(idx)%key == key) then
228 1000178 : value = self%buckets(idx)%value
229 1000178 : if (present(found)) found = .true.
230 1000178 : return
231 : end if
232 : end if
233 :
234 : ! Check chain (overflow entries use negative indices)
235 200014 : chain_idx = self%buckets(idx)%next
236 200015 : do while (chain_idx /= 0)
237 200011 : overflow_idx = -chain_idx
238 200011 : if (allocated(self%overflow(overflow_idx)%key)) then
239 200010 : if (self%overflow(overflow_idx)%key == key) then
240 200010 : value = self%overflow(overflow_idx)%value
241 200010 : if (present(found)) found = .true.
242 200010 : return
243 : end if
244 : end if
245 1 : chain_idx = self%overflow(overflow_idx)%next
246 : end do
247 :
248 1200217 : end function name_index_lookup
249 :
250 : !> Lookup a key (case-insensitive)
251 611681 : function name_index_lookup_ci(self, key, found) result(value)
252 : class(hsd_name_index_t), intent(in) :: self
253 : character(len=*), intent(in) :: key
254 : logical, intent(out), optional :: found
255 : integer :: value
256 :
257 611681 : integer :: i, chain_idx, overflow_idx
258 611681 : character(len=:), allocatable :: key_lower
259 :
260 611681 : value = 0
261 611674 : if (present(found)) found = .false.
262 :
263 611681 : if (self%num_buckets == 0) return
264 :
265 611681 : key_lower = to_lower(key)
266 :
267 : ! For case-insensitive, we need to scan all buckets since different
268 : ! casings hash differently
269 3880039 : do i = 1, self%num_buckets
270 3880039 : if (self%buckets(i)%occupied) then
271 : ! Check bucket
272 918614 : if (allocated(self%buckets(i)%key_lower)) then
273 918614 : if (self%buckets(i)%key_lower == key_lower) then
274 610574 : value = self%buckets(i)%value
275 610574 : if (present(found)) found = .true.
276 610574 : return
277 : end if
278 : end if
279 :
280 : ! Check chain (overflow entries use negative indices)
281 308040 : chain_idx = self%buckets(i)%next
282 499805 : do while (chain_idx /= 0)
283 191778 : overflow_idx = -chain_idx
284 191778 : if (allocated(self%overflow(overflow_idx)%key_lower)) then
285 191778 : if (self%overflow(overflow_idx)%key_lower == key_lower) then
286 13 : value = self%overflow(overflow_idx)%value
287 13 : if (present(found)) found = .true.
288 13 : return
289 : end if
290 : end if
291 191765 : chain_idx = self%overflow(overflow_idx)%next
292 : end do
293 : end if
294 : end do
295 :
296 1811898 : end function name_index_lookup_ci
297 :
298 : !> Remove a key (just marks as deleted, actual cleanup on rehash)
299 21 : subroutine name_index_remove(self, key)
300 : class(hsd_name_index_t), intent(inout) :: self
301 : character(len=*), intent(in) :: key
302 :
303 21 : integer :: idx, chain_idx, overflow_idx
304 :
305 0 : if (self%num_buckets == 0) return
306 :
307 21 : idx = mod(hash_string(key), self%num_buckets) + 1
308 :
309 21 : if (.not. self%buckets(idx)%occupied) return
310 :
311 : ! Check bucket
312 20 : if (allocated(self%buckets(idx)%key)) then
313 20 : if (self%buckets(idx)%key == key) then
314 : ! Clear the bucket but keep chain
315 19 : if (allocated(self%buckets(idx)%key)) deallocate(self%buckets(idx)%key)
316 19 : if (allocated(self%buckets(idx)%key_lower)) deallocate(self%buckets(idx)%key_lower)
317 19 : self%buckets(idx)%value = 0
318 :
319 : ! If there's a chain, promote first chain entry (overflow uses negative indices)
320 19 : if (self%buckets(idx)%next /= 0) then
321 1 : overflow_idx = -self%buckets(idx)%next
322 1 : self%buckets(idx)%key = self%overflow(overflow_idx)%key
323 1 : self%buckets(idx)%key_lower = self%overflow(overflow_idx)%key_lower
324 1 : self%buckets(idx)%value = self%overflow(overflow_idx)%value
325 1 : self%buckets(idx)%next = self%overflow(overflow_idx)%next
326 1 : self%overflow(overflow_idx)%occupied = .false.
327 : else
328 18 : self%buckets(idx)%occupied = .false.
329 : end if
330 :
331 19 : self%num_entries = self%num_entries - 1
332 19 : return
333 : end if
334 : end if
335 :
336 : ! Check chain (overflow entries use negative indices)
337 1 : chain_idx = self%buckets(idx)%next
338 1 : do while (chain_idx /= 0)
339 1 : overflow_idx = -chain_idx
340 1 : if (allocated(self%overflow(overflow_idx)%key)) then
341 1 : if (self%overflow(overflow_idx)%key == key) then
342 1 : if (allocated(self%overflow(overflow_idx)%key)) &
343 1 : deallocate(self%overflow(overflow_idx)%key)
344 1 : if (allocated(self%overflow(overflow_idx)%key_lower)) &
345 1 : deallocate(self%overflow(overflow_idx)%key_lower)
346 1 : self%overflow(overflow_idx)%occupied = .false.
347 1 : self%num_entries = self%num_entries - 1
348 1 : return
349 : end if
350 : end if
351 0 : chain_idx = self%overflow(overflow_idx)%next
352 : end do
353 :
354 611702 : end subroutine name_index_remove
355 :
356 : !> Clear all entries
357 78599 : subroutine name_index_clear(self)
358 : class(hsd_name_index_t), intent(inout) :: self
359 78599 : integer :: i
360 :
361 1265759 : do i = 1, self%num_buckets
362 1187160 : if (allocated(self%buckets(i)%key)) deallocate(self%buckets(i)%key)
363 1187160 : if (allocated(self%buckets(i)%key_lower)) deallocate(self%buckets(i)%key_lower)
364 1187160 : self%buckets(i)%occupied = .false.
365 1265759 : self%buckets(i)%next = 0
366 : end do
367 :
368 84798 : do i = 1, self%num_overflow
369 6199 : if (allocated(self%overflow(i)%key)) deallocate(self%overflow(i)%key)
370 6199 : if (allocated(self%overflow(i)%key_lower)) deallocate(self%overflow(i)%key_lower)
371 6199 : self%overflow(i)%occupied = .false.
372 84798 : self%overflow(i)%next = 0
373 : end do
374 :
375 78599 : self%num_entries = 0
376 78599 : self%num_overflow = 0
377 :
378 21 : end subroutine name_index_clear
379 :
380 : !> Destroy the hash table
381 78649 : subroutine name_index_destroy(self)
382 : class(hsd_name_index_t), intent(inout) :: self
383 :
384 78649 : if (allocated(self%buckets)) then
385 78592 : call self%clear()
386 1344232 : deallocate(self%buckets)
387 : end if
388 78649 : if (allocated(self%overflow)) then
389 790256 : deallocate(self%overflow)
390 : end if
391 78649 : self%num_buckets = 0
392 78649 : self%num_entries = 0
393 78649 : self%num_overflow = 0
394 78649 : self%overflow_capacity = 0
395 :
396 78599 : end subroutine name_index_destroy
397 :
398 : !> Rehash to larger table
399 67587 : subroutine name_index_rehash(self)
400 : class(hsd_name_index_t), intent(inout) :: self
401 :
402 67587 : type(hash_entry_t), allocatable :: old_buckets(:), old_overflow(:)
403 67587 : integer :: i, old_num_buckets, old_num_overflow, chain_idx, overflow_idx
404 :
405 67587 : old_num_buckets = self%num_buckets
406 67587 : old_num_overflow = self%num_overflow
407 67587 : call move_alloc(self%buckets, old_buckets)
408 67587 : call move_alloc(self%overflow, old_overflow)
409 :
410 : ! Initialize with double capacity
411 67587 : self%num_buckets = old_num_buckets * 2
412 1183091 : allocate(self%buckets(self%num_buckets))
413 67587 : self%overflow_capacity = max(8, self%num_buckets / 4)
414 616467 : allocate(self%overflow(self%overflow_capacity))
415 67587 : self%num_entries = 0
416 67587 : self%num_overflow = 0
417 :
418 : ! Reinsert all entries
419 625339 : do i = 1, old_num_buckets
420 625339 : if (old_buckets(i)%occupied) then
421 480962 : if (allocated(old_buckets(i)%key)) then
422 480962 : call self%insert(old_buckets(i)%key, old_buckets(i)%value)
423 : end if
424 :
425 : ! Process chain (overflow entries use negative indices)
426 480962 : chain_idx = old_buckets(i)%next
427 485898 : do while (chain_idx /= 0)
428 4936 : overflow_idx = -chain_idx
429 4936 : if (old_overflow(overflow_idx)%occupied) then
430 4936 : if (allocated(old_overflow(overflow_idx)%key)) then
431 0 : call self%insert(old_overflow(overflow_idx)%key, &
432 4936 : old_overflow(overflow_idx)%value)
433 : end if
434 : end if
435 4936 : chain_idx = old_overflow(overflow_idx)%next
436 : end do
437 : end if
438 : end do
439 :
440 692926 : deallocate(old_buckets)
441 682926 : deallocate(old_overflow)
442 :
443 146236 : end subroutine name_index_rehash
444 :
445 67587 : end module hsd_hash_table
|