Line data Source code
1 : !> Lightweight XML parser: parse well-formed XML 1.0 into an hsd_table tree.
2 : !>
3 : !> This is a purpose-built pull parser for structured data interchange,
4 : !> NOT a full-featured XML parser. It handles:
5 : !> - Elements, text content, attributes
6 : !> - Self-closing tags (<tag/>)
7 : !> - Character entity references (& < > " ')
8 : !> - CDATA sections (content preserved, markers stripped)
9 : !> - XML declarations (<?xml ...?>) — skipped
10 : !> - Comments (<!-- ... -->) — skipped
11 : !> - Processing instructions (<?...?>) — skipped
12 : !>
13 : !> NOT supported: DTD, namespaces, XSD, XPath, encoding conversion.
14 : module hsd_data_xml_parser
15 : use hsd, only: hsd_table, hsd_value, hsd_error_t, new_table, new_value, &
16 : & HSD_STAT_SYNTAX_ERROR, HSD_STAT_IO_ERROR
17 : use hsd_data_xml_escape, only: xml_unescape
18 : implicit none(type, external)
19 : private
20 :
21 : public :: xml_parse_file, xml_parse_string
22 :
23 : !> Maximum nesting depth
24 : integer, parameter :: MAX_DEPTH = 256
25 :
26 : contains
27 :
28 : !> Parse an XML file into an hsd_table tree.
29 48 : subroutine xml_parse_file(filename, root, error)
30 : character(len=*), intent(in) :: filename
31 : type(hsd_table), intent(out) :: root
32 : type(hsd_error_t), allocatable, intent(out), optional :: error
33 :
34 24 : character(len=:), allocatable :: source
35 24 : integer :: unit_num, ios, file_size
36 24 : logical :: exists
37 :
38 24 : inquire(file=filename, exist=exists)
39 24 : if (.not. exists) then
40 0 : if (present(error)) then
41 0 : allocate(error)
42 0 : error%code = HSD_STAT_IO_ERROR
43 0 : error%message = "File not found: " // trim(filename)
44 : end if
45 0 : return
46 : end if
47 :
48 24 : inquire(file=filename, size=file_size)
49 24 : if (file_size <= 0) file_size = 65536
50 :
51 24 : allocate(character(len=file_size) :: source)
52 :
53 : open(newunit=unit_num, file=filename, status="old", action="read", &
54 24 : & access="stream", form="unformatted", iostat=ios)
55 24 : if (ios /= 0) then
56 0 : if (present(error)) then
57 0 : allocate(error)
58 0 : error%code = HSD_STAT_IO_ERROR
59 0 : error%message = "Cannot open file: " // trim(filename)
60 : end if
61 0 : return
62 : end if
63 24 : read(unit_num, iostat=ios) source
64 24 : close(unit_num)
65 :
66 : ! Trim to actual content (file_size from inquire may include padding)
67 24 : call xml_parse_string(source, root, error, filename)
68 :
69 24 : end subroutine xml_parse_file
70 :
71 : !> Parse an XML string into an hsd_table tree.
72 178 : subroutine xml_parse_string(source, root, error, filename)
73 : character(len=*), intent(in) :: source
74 : type(hsd_table), intent(out) :: root
75 : type(hsd_error_t), allocatable, intent(out), optional :: error
76 : character(len=*), intent(in), optional :: filename
77 :
78 71 : integer :: pos, src_len, line, col
79 71 : character(len=:), allocatable :: fname
80 71 : character(len=:), allocatable :: doc_tag, close_name
81 71 : character(len=:), allocatable :: attr_name, attr_value
82 :
83 71 : if (present(filename)) then
84 24 : fname = filename
85 : else
86 47 : fname = "<string>"
87 : end if
88 :
89 71 : call new_table(root)
90 :
91 71 : src_len = len_trim(source)
92 71 : pos = 1
93 71 : line = 1
94 71 : col = 1
95 :
96 : ! Skip BOM if present
97 71 : if (src_len >= 3) then
98 : if (iachar(source(1:1)) == 239 .and. iachar(source(2:2)) == 187 &
99 70 : & .and. iachar(source(3:3)) == 191) then
100 0 : pos = 4
101 : end if
102 : end if
103 :
104 : ! Skip prolog: whitespace, PIs (<?xml ...?>), and comments before
105 : ! the document element.
106 127 : do while (pos <= src_len)
107 126 : call skip_whitespace(source, src_len, pos, line, col)
108 126 : if (pos > src_len) exit
109 126 : if (source(pos:pos) == "<" .and. pos + 1 <= src_len) then
110 181 : if (source(pos + 1:pos + 1) == "?") then
111 55 : call skip_pi(source, src_len, pos, line, col, error, fname)
112 55 : if (allocated(error)) return
113 55 : cycle
114 72 : else if (pos + 3 <= src_len .and. source(pos:pos + 3) == "<!--") then
115 1 : call skip_comment(source, src_len, pos, line, col, error, fname)
116 1 : if (allocated(error)) return
117 1 : cycle
118 : end if
119 : end if
120 70 : exit
121 : end do
122 :
123 71 : if (pos > src_len .or. source(pos:pos) /= "<") then
124 : ! Empty or whitespace-only input — return empty root
125 1 : return
126 : end if
127 :
128 : ! Read the document element open tag.
129 : ! We unwrap it so its children go directly into root.
130 70 : call advance(source, pos, line, col) ! skip '<'
131 70 : call read_name(source, src_len, pos, line, col, doc_tag)
132 70 : if (len(doc_tag) == 0) then
133 : call make_parse_error(error, "Expected document element name", &
134 0 : & fname, line, col)
135 0 : return
136 : end if
137 :
138 : ! Skip document element attributes
139 70 : call skip_whitespace(source, src_len, pos, line, col)
140 70 : do while (pos <= src_len)
141 70 : if (source(pos:pos) == ">") then
142 69 : call advance(source, pos, line, col)
143 69 : exit
144 1 : else if (source(pos:pos) == "/") then
145 : ! Self-closing document element → empty root
146 1 : if (pos + 1 <= src_len .and. source(pos + 1:pos + 1) == ">") then
147 1 : call advance(source, pos, line, col)
148 1 : call advance(source, pos, line, col)
149 1 : return
150 : end if
151 : else
152 0 : call read_name(source, src_len, pos, line, col, attr_name)
153 0 : call skip_whitespace(source, src_len, pos, line, col)
154 0 : if (pos <= src_len .and. source(pos:pos) == "=") then
155 0 : call advance(source, pos, line, col)
156 0 : call skip_whitespace(source, src_len, pos, line, col)
157 : call read_attrib_value(source, src_len, pos, line, col, &
158 0 : & attr_value, error, fname)
159 0 : if (allocated(error)) return
160 : end if
161 0 : call skip_whitespace(source, src_len, pos, line, col)
162 : end if
163 : end do
164 :
165 : ! Parse document element content directly into root
166 69 : call parse_content(source, src_len, pos, line, col, root, error, fname)
167 69 : if (allocated(error)) return
168 :
169 : ! Read document element close tag
170 : call read_close_tag(source, src_len, pos, line, col, close_name, &
171 68 : & error, fname)
172 68 : if (allocated(error)) return
173 :
174 68 : if (close_name /= doc_tag) then
175 : call make_parse_error(error, "Mismatched document element: expected </" &
176 : & // doc_tag // "> but got </" // close_name // ">", &
177 0 : & fname, line, col)
178 0 : return
179 : end if
180 :
181 95 : end subroutine xml_parse_string
182 :
183 : !> Parse content: elements and text at the current nesting level.
184 : !> Adds children to parent_table. Stops at EOF or a closing tag.
185 1782 : recursive subroutine parse_content(src, src_len, pos, line, col, &
186 : & parent, error, fname)
187 : character(len=*), intent(in) :: src
188 : integer, intent(in) :: src_len
189 : integer, intent(inout) :: pos, line, col
190 : type(hsd_table), intent(inout) :: parent
191 : type(hsd_error_t), allocatable, intent(out), optional :: error
192 : character(len=*), intent(in) :: fname
193 :
194 1782 : character(len=:), allocatable :: text_buf
195 1782 : integer :: text_len
196 1782 : type(hsd_error_t), allocatable :: sub_error
197 :
198 1782 : text_len = 0
199 1782 : allocate(character(len=4096) :: text_buf)
200 :
201 122875 : do while (pos <= src_len)
202 122875 : if (src(pos:pos) == "<") then
203 : ! Flush accumulated text
204 3496 : if (text_len > 0) then
205 3451 : call flush_text(text_buf, text_len, parent)
206 : end if
207 :
208 : ! Check what kind of markup
209 3496 : if (pos + 1 > src_len) then
210 : call make_parse_error(error, "Unexpected end of input after '<'", &
211 0 : & fname, line, col)
212 0 : return
213 : end if
214 :
215 3496 : if (src(pos + 1:pos + 1) == "/") then
216 : ! Closing tag — return to caller
217 1781 : return
218 1716 : else if (src(pos + 1:pos + 1) == "!") then
219 : call skip_comment_or_cdata(src, src_len, pos, line, col, &
220 1 : & text_buf, text_len, error, fname)
221 1 : if (present(error)) then
222 1 : if (allocated(error)) return
223 : end if
224 1714 : else if (src(pos + 1:pos + 1) == "?") then
225 0 : call skip_pi(src, src_len, pos, line, col, error, fname)
226 0 : if (present(error)) then
227 0 : if (allocated(error)) return
228 : end if
229 : else
230 : ! Opening tag
231 : call parse_element(src, src_len, pos, line, col, parent, &
232 1714 : & sub_error, fname)
233 1714 : if (allocated(sub_error)) then
234 1 : if (present(error)) then
235 1 : error = sub_error
236 : end if
237 1 : return
238 : end if
239 : end if
240 : else
241 : ! Accumulate text content
242 119379 : call accum_text(text_buf, text_len, src(pos:pos))
243 119379 : call advance(src, pos, line, col)
244 : end if
245 : end do
246 :
247 : ! Flush remaining text
248 0 : if (text_len > 0) then
249 0 : call flush_text(text_buf, text_len, parent)
250 : end if
251 :
252 3635 : end subroutine parse_content
253 :
254 : !> Parse a single element: <tag attrs>content</tag> or <tag attrs/>
255 1714 : recursive subroutine parse_element(src, src_len, pos, line, col, &
256 : & parent, error, fname)
257 : character(len=*), intent(in) :: src
258 : integer, intent(in) :: src_len
259 : integer, intent(inout) :: pos, line, col
260 : type(hsd_table), intent(inout) :: parent
261 : type(hsd_error_t), allocatable, intent(out) :: error
262 : character(len=*), intent(in) :: fname
263 :
264 1714 : character(len=:), allocatable :: tag_name, attr_name, attr_value
265 1714 : character(len=:), allocatable :: all_attribs
266 1714 : type(hsd_table), allocatable :: child_table
267 1714 : type(hsd_value), allocatable :: child_value
268 1714 : character(len=:), allocatable :: close_name
269 1714 : logical :: self_closing
270 1714 : integer :: n_extra_attrs, jj
271 : integer, parameter :: MAX_EXTRA_ATTRS = 64
272 : character(len=256) :: extra_attr_names(MAX_EXTRA_ATTRS)
273 : character(len=256) :: extra_attr_values(MAX_EXTRA_ATTRS)
274 :
275 : ! Skip '<'
276 1714 : call advance(src, pos, line, col)
277 :
278 : ! Read tag name
279 1714 : call read_name(src, src_len, pos, line, col, tag_name)
280 1714 : if (len(tag_name) == 0) then
281 : call make_parse_error(error, "Expected element name after '<'", &
282 0 : & fname, line, col)
283 0 : return
284 : end if
285 :
286 : ! Read attributes
287 1714 : all_attribs = ""
288 1714 : self_closing = .false.
289 1714 : n_extra_attrs = 0
290 :
291 1714 : call skip_whitespace(src, src_len, pos, line, col)
292 :
293 1768 : do while (pos <= src_len)
294 1768 : if (src(pos:pos) == ">") then
295 1713 : call advance(src, pos, line, col)
296 1713 : exit
297 55 : else if (src(pos:pos) == "/") then
298 1 : if (pos + 1 <= src_len .and. src(pos + 1:pos + 1) == ">") then
299 1 : self_closing = .true.
300 1 : call advance(src, pos, line, col) ! skip /
301 1 : call advance(src, pos, line, col) ! skip >
302 1 : exit
303 : end if
304 : else
305 : ! Read attribute
306 54 : call read_name(src, src_len, pos, line, col, attr_name)
307 54 : if (len(attr_name) == 0) then
308 : call make_parse_error(error, "Expected attribute name or '>' in element", &
309 0 : & fname, line, col)
310 0 : return
311 : end if
312 54 : call skip_whitespace(src, src_len, pos, line, col)
313 54 : if (pos <= src_len .and. src(pos:pos) == "=") then
314 54 : call advance(src, pos, line, col)
315 54 : call skip_whitespace(src, src_len, pos, line, col)
316 : call read_attrib_value(src, src_len, pos, line, col, attr_value, &
317 54 : & error, fname)
318 54 : if (allocated(error)) return
319 :
320 : ! Map 'unit' attribute to HSD attrib field
321 54 : if (attr_name == "unit") then
322 52 : if (len(all_attribs) > 0) then
323 0 : all_attribs = all_attribs // ", " // attr_value
324 : else
325 52 : all_attribs = attr_value
326 : end if
327 : else
328 : ! Store non-unit attributes for __attr_<name> children
329 2 : if (n_extra_attrs < MAX_EXTRA_ATTRS) then
330 2 : n_extra_attrs = n_extra_attrs + 1
331 2 : extra_attr_names(n_extra_attrs) = attr_name
332 2 : extra_attr_values(n_extra_attrs) = attr_value
333 : end if
334 : end if
335 : end if
336 54 : call skip_whitespace(src, src_len, pos, line, col)
337 : end if
338 : end do
339 :
340 1715 : if (self_closing) then
341 : ! Self-closing element → empty table
342 1 : allocate(child_table)
343 1 : call new_table(child_table, name=tag_name)
344 1 : if (len(all_attribs) > 0) child_table%attrib = all_attribs
345 1 : do jj = 1, n_extra_attrs
346 0 : allocate(child_value)
347 0 : call new_value(child_value, &
348 0 : & name="__attr_" // trim(extra_attr_names(jj)))
349 0 : child_value%string_value = trim(extra_attr_values(jj))
350 0 : call child_table%add_child(child_value)
351 1 : deallocate(child_value)
352 : end do
353 1 : call parent%add_child(child_table)
354 1 : return
355 : end if
356 :
357 : ! Parse content between open and close tags.
358 : ! First, check if it's pure text content (no child elements).
359 : ! We use a temp table and inspect what we get.
360 1713 : allocate(child_table)
361 1713 : call new_table(child_table, name=tag_name)
362 1713 : if (len(all_attribs) > 0) child_table%attrib = all_attribs
363 1715 : do jj = 1, n_extra_attrs
364 2 : allocate(child_value)
365 2 : call new_value(child_value, &
366 2 : & name="__attr_" // trim(extra_attr_names(jj)))
367 2 : child_value%string_value = trim(extra_attr_values(jj))
368 2 : call child_table%add_child(child_value)
369 1715 : deallocate(child_value)
370 : end do
371 :
372 1713 : call parse_content(src, src_len, pos, line, col, child_table, error, fname)
373 1713 : if (allocated(error)) return
374 :
375 : ! Now we should be at </tag>
376 1713 : call read_close_tag(src, src_len, pos, line, col, close_name, error, fname)
377 1713 : if (allocated(error)) return
378 :
379 1713 : if (close_name /= tag_name) then
380 : call make_parse_error(error, "Mismatched closing tag: expected </" &
381 1 : & // tag_name // "> but got </" // close_name // ">", fname, line, col)
382 1 : return
383 : end if
384 :
385 : ! Optimization: if the table has exactly one unnamed value child,
386 : ! convert to a named value node instead (matching HSD semantics).
387 : ! Exception: if the text contains newlines, keep as table with #text
388 : ! child to preserve multi-line block structure for matrix data.
389 1712 : if (child_table%num_children == 1) then
390 0 : select type (only_child => child_table%children(1)%node)
391 : type is (hsd_value)
392 1338 : block
393 1338 : logical :: is_unnamed
394 1338 : is_unnamed = .not. allocated(only_child%name)
395 1338 : if (.not. is_unnamed) is_unnamed = (len_trim(only_child%name) == 0)
396 1338 : if (is_unnamed) then
397 : ! Check if the text content contains newlines
398 1287 : if (has_newline_content(only_child)) then
399 : ! Multi-line content: keep as table with #text child
400 36 : only_child%name = "#text"
401 : ! Invalidate hash index since we renamed the child
402 36 : call child_table%invalidate_index()
403 36 : call parent%add_child(child_table)
404 1287 : return
405 : end if
406 1251 : allocate(child_value)
407 1251 : child_value%name = tag_name
408 1251 : child_value%value_type = only_child%value_type
409 1251 : if (allocated(only_child%string_value)) &
410 1251 : & child_value%string_value = only_child%string_value
411 1251 : child_value%int_value = only_child%int_value
412 1251 : child_value%real_value = only_child%real_value
413 1251 : child_value%logical_value = only_child%logical_value
414 1251 : child_value%complex_value = only_child%complex_value
415 1251 : if (allocated(only_child%raw_text)) child_value%raw_text = only_child%raw_text
416 1251 : if (allocated(child_table%attrib)) child_value%attrib = child_table%attrib
417 1251 : call parent%add_child(child_value)
418 1251 : return
419 : end if
420 : end block
421 : end select
422 : end if
423 :
424 : ! Add as table
425 425 : call parent%add_child(child_table)
426 :
427 35142 : end subroutine parse_element
428 :
429 : !> Read a closing tag </name> and return the name.
430 1781 : subroutine read_close_tag(src, src_len, pos, line, col, tag_name, error, fname)
431 : character(len=*), intent(in) :: src
432 : integer, intent(in) :: src_len
433 : integer, intent(inout) :: pos, line, col
434 : character(len=:), allocatable, intent(out) :: tag_name
435 : type(hsd_error_t), allocatable, intent(out) :: error
436 : character(len=*), intent(in) :: fname
437 :
438 : ! Expect </
439 1781 : if (pos + 1 > src_len .or. src(pos:pos + 1) /= "</") then
440 0 : call make_parse_error(error, "Expected closing tag '</'", fname, line, col)
441 0 : return
442 : end if
443 1781 : call advance(src, pos, line, col) ! <
444 1781 : call advance(src, pos, line, col) ! /
445 :
446 1781 : call read_name(src, src_len, pos, line, col, tag_name)
447 1781 : call skip_whitespace(src, src_len, pos, line, col)
448 :
449 1781 : if (pos > src_len .or. src(pos:pos) /= ">") then
450 0 : call make_parse_error(error, "Expected '>' in closing tag", fname, line, col)
451 0 : return
452 : end if
453 1781 : call advance(src, pos, line, col)
454 :
455 1781 : end subroutine read_close_tag
456 :
457 : !> Skip <!-- comment --> or handle <![CDATA[...]]>
458 1 : subroutine skip_comment_or_cdata(src, src_len, pos, line, col, &
459 : & text_buf, text_len, error, fname)
460 : character(len=*), intent(in) :: src
461 : integer, intent(in) :: src_len
462 : integer, intent(inout) :: pos, line, col
463 : character(len=:), allocatable, intent(inout) :: text_buf
464 : integer, intent(inout) :: text_len
465 : type(hsd_error_t), allocatable, intent(out), optional :: error
466 : character(len=*), intent(in) :: fname
467 :
468 : ! pos is at '<', next is '!'
469 1 : if (pos + 3 <= src_len .and. src(pos:pos + 3) == "<!--") then
470 : ! Skip comment
471 0 : pos = pos + 4
472 0 : col = col + 4
473 0 : do while (pos + 2 <= src_len)
474 0 : if (src(pos:pos + 2) == "-->") then
475 0 : pos = pos + 3
476 0 : col = col + 3
477 0 : return
478 : end if
479 0 : call advance(src, pos, line, col)
480 : end do
481 0 : call make_parse_error(error, "Unterminated comment", fname, line, col)
482 1 : else if (pos + 8 <= src_len .and. src(pos:pos + 8) == "<![CDATA[") then
483 : ! CDATA section: preserve content
484 1 : pos = pos + 9
485 1 : col = col + 9
486 6 : do while (pos + 2 <= src_len)
487 6 : if (src(pos:pos + 2) == "]]>") then
488 1 : pos = pos + 3
489 1 : col = col + 3
490 1 : return
491 : end if
492 5 : call accum_text(text_buf, text_len, src(pos:pos))
493 5 : call advance(src, pos, line, col)
494 : end do
495 0 : call make_parse_error(error, "Unterminated CDATA section", fname, line, col)
496 : else
497 : ! Unknown <! construct — skip to >
498 0 : do while (pos <= src_len .and. src(pos:pos) /= ">")
499 0 : call advance(src, pos, line, col)
500 : end do
501 0 : if (pos <= src_len) call advance(src, pos, line, col)
502 : end if
503 :
504 1782 : end subroutine skip_comment_or_cdata
505 :
506 : !> Skip a comment <!-- ... --> without text accumulation (for prolog).
507 1 : subroutine skip_comment(src, src_len, pos, line, col, error, fname)
508 : character(len=*), intent(in) :: src
509 : integer, intent(in) :: src_len
510 : integer, intent(inout) :: pos, line, col
511 : type(hsd_error_t), allocatable, intent(out), optional :: error
512 : character(len=*), intent(in) :: fname
513 :
514 : ! pos is at '<', expect <!--
515 1 : pos = pos + 4
516 1 : col = col + 4
517 10 : do while (pos + 2 <= src_len)
518 10 : if (src(pos:pos + 2) == "-->") then
519 1 : pos = pos + 3
520 1 : col = col + 3
521 1 : return
522 : end if
523 9 : call advance(src, pos, line, col)
524 : end do
525 0 : call make_parse_error(error, "Unterminated comment", fname, line, col)
526 :
527 2 : end subroutine skip_comment
528 :
529 : !> Skip a processing instruction <?...?>
530 55 : subroutine skip_pi(src, src_len, pos, line, col, error, fname)
531 : character(len=*), intent(in) :: src
532 : integer, intent(in) :: src_len
533 : integer, intent(inout) :: pos, line, col
534 : type(hsd_error_t), allocatable, intent(out), optional :: error
535 : character(len=*), intent(in) :: fname
536 :
537 : ! pos is at '<', next is '?'
538 55 : pos = pos + 2
539 55 : col = col + 2
540 1891 : do while (pos + 1 <= src_len)
541 1891 : if (src(pos:pos + 1) == "?>") then
542 55 : pos = pos + 2
543 55 : col = col + 2
544 55 : return
545 : end if
546 1836 : call advance(src, pos, line, col)
547 : end do
548 : call make_parse_error(error, "Unterminated processing instruction", &
549 0 : & fname, line, col)
550 :
551 56 : end subroutine skip_pi
552 :
553 : !> Read an XML name (tag name or attribute name).
554 3619 : subroutine read_name(src, src_len, pos, line, col, name)
555 : character(len=*), intent(in) :: src
556 : integer, intent(in) :: src_len
557 : integer, intent(inout) :: pos, line, col
558 : character(len=:), allocatable, intent(out) :: name
559 :
560 3619 : integer :: start
561 :
562 3619 : start = pos
563 :
564 : ! line is accepted for interface consistency but cannot change
565 : ! (XML names never contain newlines)
566 : if (.false.) line = line
567 33997 : do while (pos <= src_len)
568 33997 : select case (src(pos:pos))
569 : case (" ", achar(9), achar(10), achar(13), "=", ">", "/")
570 3619 : exit
571 : case default
572 30378 : pos = pos + 1
573 33997 : col = col + 1
574 : end select
575 : end do
576 :
577 3619 : if (pos > start) then
578 3619 : name = src(start:pos - 1)
579 : else
580 0 : name = ""
581 : end if
582 :
583 55 : end subroutine read_name
584 :
585 : !> Read a quoted attribute value.
586 54 : subroutine read_attrib_value(src, src_len, pos, line, col, value, error, fname)
587 : character(len=*), intent(in) :: src
588 : integer, intent(in) :: src_len
589 : integer, intent(inout) :: pos, line, col
590 : character(len=:), allocatable, intent(out) :: value
591 : type(hsd_error_t), allocatable, intent(out) :: error
592 : character(len=*), intent(in) :: fname
593 :
594 : character(len=1) :: quote
595 54 : integer :: start
596 :
597 54 : if (pos > src_len) then
598 0 : call make_parse_error(error, "Expected attribute value", fname, line, col)
599 0 : return
600 : end if
601 :
602 54 : quote = src(pos:pos)
603 54 : if (quote /= '"' .and. quote /= "'") then
604 : call make_parse_error(error, "Expected quoted attribute value", &
605 0 : & fname, line, col)
606 0 : return
607 : end if
608 :
609 54 : call advance(src, pos, line, col) ! skip opening quote
610 54 : start = pos
611 :
612 403 : do while (pos <= src_len)
613 403 : if (src(pos:pos) == quote) then
614 54 : value = xml_unescape(src(start:pos - 1))
615 54 : call advance(src, pos, line, col) ! skip closing quote
616 54 : return
617 : end if
618 349 : call advance(src, pos, line, col)
619 : end do
620 :
621 0 : call make_parse_error(error, "Unterminated attribute value", fname, line, col)
622 :
623 3673 : end subroutine read_attrib_value
624 :
625 : !> Skip whitespace.
626 3853 : subroutine skip_whitespace(src, src_len, pos, line, col)
627 : character(len=*), intent(in) :: src
628 : integer, intent(in) :: src_len
629 : integer, intent(inout) :: pos, line, col
630 :
631 3960 : do while (pos <= src_len)
632 3960 : select case (src(pos:pos))
633 : case (" ", achar(9), achar(10), achar(13))
634 107 : call advance(src, pos, line, col)
635 : case default
636 3960 : return
637 : end select
638 : end do
639 :
640 3907 : end subroutine skip_whitespace
641 :
642 : !> Advance position by one character, tracking line/col.
643 130760 : subroutine advance(src, pos, line, col)
644 : character(len=*), intent(in) :: src
645 : integer, intent(inout) :: pos, line, col
646 :
647 130760 : if (pos <= len(src) .and. src(pos:pos) == achar(10)) then
648 2293 : line = line + 1
649 2293 : col = 1
650 : else
651 128467 : col = col + 1
652 : end if
653 130760 : pos = pos + 1
654 :
655 3853 : end subroutine advance
656 :
657 : !> Accumulate a character into the text buffer.
658 119384 : subroutine accum_text(buf, buf_len, ch)
659 : character(len=:), allocatable, intent(inout) :: buf
660 : integer, intent(inout) :: buf_len
661 : character(len=*), intent(in) :: ch
662 :
663 119384 : character(len=:), allocatable :: tmp
664 119384 : integer :: new_cap
665 :
666 119384 : if (buf_len + 1 > len(buf)) then
667 5 : new_cap = len(buf) * 2
668 5 : allocate(character(len=new_cap) :: tmp)
669 5 : tmp(1:buf_len) = buf(1:buf_len)
670 5 : call move_alloc(tmp, buf)
671 : end if
672 119384 : buf_len = buf_len + 1
673 119384 : buf(buf_len:buf_len) = ch
674 :
675 250144 : end subroutine accum_text
676 :
677 : !> Flush accumulated text to parent as an anonymous hsd_value.
678 : !> Whitespace-only text (spaces, newlines, tabs) is discarded as
679 : !> insignificant whitespace between XML elements.
680 3451 : subroutine flush_text(buf, buf_len, parent)
681 : character(len=:), allocatable, intent(inout) :: buf
682 : integer, intent(inout) :: buf_len
683 : type(hsd_table), intent(inout) :: parent
684 :
685 3451 : type(hsd_value), allocatable :: val
686 3451 : character(len=:), allocatable :: unescaped
687 : character(len=*), parameter :: WHITESPACE = " " // char(9) // char(10) // char(13)
688 3451 : integer :: first, last
689 :
690 3451 : unescaped = xml_unescape(buf(1:buf_len))
691 3451 : buf_len = 0
692 :
693 : ! Discard if entirely whitespace (spaces, tabs, newlines, CR)
694 3451 : if (verify(unescaped, WHITESPACE) == 0) return
695 :
696 : ! Strip leading and trailing whitespace (including newlines)
697 1289 : first = verify(unescaped, WHITESPACE)
698 1289 : last = verify(unescaped, WHITESPACE, back=.true.)
699 :
700 1289 : allocate(val)
701 1289 : call new_value(val)
702 1289 : call val%set_string(unescaped(first:last))
703 1289 : call parent%add_child(val)
704 :
705 122835 : end subroutine flush_text
706 :
707 : !> Create a parse error.
708 1 : subroutine make_parse_error(error, message, filename, line, col)
709 : type(hsd_error_t), allocatable, intent(out), optional :: error
710 : character(len=*), intent(in) :: message, filename
711 : integer, intent(in) :: line, col
712 :
713 : character(len=20) :: line_str, col_str
714 :
715 0 : if (.not. present(error)) return
716 :
717 1 : write(line_str, "(i0)") line
718 1 : write(col_str, "(i0)") col
719 :
720 1 : allocate(error)
721 1 : error%code = HSD_STAT_SYNTAX_ERROR
722 : error%message = trim(filename) // ":" // trim(line_str) // ":" // &
723 1 : & trim(col_str) // ": " // message
724 1 : error%filename = filename
725 1 : error%line_start = line
726 1 : error%column = col
727 :
728 3452 : end subroutine make_parse_error
729 :
730 : !> Check if a value node contains newline characters in its content.
731 1287 : pure function has_newline_content(val) result(has_nl)
732 : type(hsd_value), intent(in) :: val
733 : logical :: has_nl
734 :
735 1287 : has_nl = .false.
736 1287 : if (allocated(val%string_value)) then
737 1287 : has_nl = index(val%string_value, new_line("a")) > 0
738 : end if
739 1287 : if (.not. has_nl .and. allocated(val%raw_text)) then
740 0 : has_nl = index(val%raw_text, new_line("a")) > 0
741 : end if
742 :
743 1288 : end function has_newline_content
744 :
745 1509 : end module hsd_data_xml_parser
|