LCOV - code coverage report
Current view: top level - src/backends - hsd_data_xml_parser.f90 (source / functions) Coverage Total Hit
Test: coverage.info Lines: 81.7 % 372 304
Test Date: 2026-02-15 21:36:29 Functions: 100.0 % 16 16

            Line data    Source code
       1              : !> Lightweight XML parser: parse well-formed XML 1.0 into an hsd_table tree.
       2              : !>
       3              : !> This is a purpose-built pull parser for structured data interchange,
       4              : !> NOT a full-featured XML parser. It handles:
       5              : !>   - Elements, text content, attributes
       6              : !>   - Self-closing tags (<tag/>)
       7              : !>   - Character entity references (&amp; &lt; &gt; &quot; &apos;)
       8              : !>   - CDATA sections (content preserved, markers stripped)
       9              : !>   - XML declarations (<?xml ...?>) — skipped
      10              : !>   - Comments (<!-- ... -->) — skipped
      11              : !>   - Processing instructions (<?...?>) — skipped
      12              : !>
      13              : !> NOT supported: DTD, namespaces, XSD, XPath, encoding conversion.
      14              : module hsd_data_xml_parser
      15              :   use hsd, only: hsd_table, hsd_value, hsd_error_t, new_table, new_value, &
      16              :       & HSD_STAT_SYNTAX_ERROR, HSD_STAT_IO_ERROR
      17              :   use hsd_data_xml_escape, only: xml_unescape
      18              :   implicit none(type, external)
      19              :   private
      20              : 
      21              :   public :: xml_parse_file, xml_parse_string
      22              : 
      23              :   !> Maximum nesting depth
      24              :   integer, parameter :: MAX_DEPTH = 256
      25              : 
      26              : contains
      27              : 
      28              :   !> Parse an XML file into an hsd_table tree.
      29           48 :   subroutine xml_parse_file(filename, root, error)
      30              :     character(len=*), intent(in) :: filename
      31              :     type(hsd_table), intent(out) :: root
      32              :     type(hsd_error_t), allocatable, intent(out), optional :: error
      33              : 
      34           24 :     character(len=:), allocatable :: source
      35           24 :     integer :: unit_num, ios, file_size
      36           24 :     logical :: exists
      37              : 
      38           24 :     inquire(file=filename, exist=exists)
      39           24 :     if (.not. exists) then
      40            0 :       if (present(error)) then
      41            0 :         allocate(error)
      42            0 :         error%code = HSD_STAT_IO_ERROR
      43            0 :         error%message = "File not found: " // trim(filename)
      44              :       end if
      45            0 :       return
      46              :     end if
      47              : 
      48           24 :     inquire(file=filename, size=file_size)
      49           24 :     if (file_size <= 0) file_size = 65536
      50              : 
      51           24 :     allocate(character(len=file_size) :: source)
      52              : 
      53              :     open(newunit=unit_num, file=filename, status="old", action="read", &
      54           24 :         & access="stream", form="unformatted", iostat=ios)
      55           24 :     if (ios /= 0) then
      56            0 :       if (present(error)) then
      57            0 :         allocate(error)
      58            0 :         error%code = HSD_STAT_IO_ERROR
      59            0 :         error%message = "Cannot open file: " // trim(filename)
      60              :       end if
      61            0 :       return
      62              :     end if
      63           24 :     read(unit_num, iostat=ios) source
      64           24 :     close(unit_num)
      65              : 
      66              :     ! Trim to actual content (file_size from inquire may include padding)
      67           24 :     call xml_parse_string(source, root, error, filename)
      68              : 
      69           24 :   end subroutine xml_parse_file
      70              : 
      71              :   !> Parse an XML string into an hsd_table tree.
      72          178 :   subroutine xml_parse_string(source, root, error, filename)
      73              :     character(len=*), intent(in) :: source
      74              :     type(hsd_table), intent(out) :: root
      75              :     type(hsd_error_t), allocatable, intent(out), optional :: error
      76              :     character(len=*), intent(in), optional :: filename
      77              : 
      78           71 :     integer :: pos, src_len, line, col
      79           71 :     character(len=:), allocatable :: fname
      80           71 :     character(len=:), allocatable :: doc_tag, close_name
      81           71 :     character(len=:), allocatable :: attr_name, attr_value
      82              : 
      83           71 :     if (present(filename)) then
      84           24 :       fname = filename
      85              :     else
      86           47 :       fname = "<string>"
      87              :     end if
      88              : 
      89           71 :     call new_table(root)
      90              : 
      91           71 :     src_len = len_trim(source)
      92           71 :     pos = 1
      93           71 :     line = 1
      94           71 :     col = 1
      95              : 
      96              :     ! Skip BOM if present
      97           71 :     if (src_len >= 3) then
      98              :       if (iachar(source(1:1)) == 239 .and. iachar(source(2:2)) == 187 &
      99           70 :           & .and. iachar(source(3:3)) == 191) then
     100            0 :         pos = 4
     101              :       end if
     102              :     end if
     103              : 
     104              :     ! Skip prolog: whitespace, PIs (<?xml ...?>), and comments before
     105              :     ! the document element.
     106          127 :     do while (pos <= src_len)
     107          126 :       call skip_whitespace(source, src_len, pos, line, col)
     108          126 :       if (pos > src_len) exit
     109          126 :       if (source(pos:pos) == "<" .and. pos + 1 <= src_len) then
     110          181 :         if (source(pos + 1:pos + 1) == "?") then
     111           55 :           call skip_pi(source, src_len, pos, line, col, error, fname)
     112           55 :           if (allocated(error)) return
     113           55 :           cycle
     114           72 :         else if (pos + 3 <= src_len .and. source(pos:pos + 3) == "<!--") then
     115            1 :           call skip_comment(source, src_len, pos, line, col, error, fname)
     116            1 :           if (allocated(error)) return
     117            1 :           cycle
     118              :         end if
     119              :       end if
     120           70 :       exit
     121              :     end do
     122              : 
     123           71 :     if (pos > src_len .or. source(pos:pos) /= "<") then
     124              :       ! Empty or whitespace-only input — return empty root
     125            1 :       return
     126              :     end if
     127              : 
     128              :     ! Read the document element open tag.
     129              :     ! We unwrap it so its children go directly into root.
     130           70 :     call advance(source, pos, line, col)  ! skip '<'
     131           70 :     call read_name(source, src_len, pos, line, col, doc_tag)
     132           70 :     if (len(doc_tag) == 0) then
     133              :       call make_parse_error(error, "Expected document element name", &
     134            0 :           & fname, line, col)
     135            0 :       return
     136              :     end if
     137              : 
     138              :     ! Skip document element attributes
     139           70 :     call skip_whitespace(source, src_len, pos, line, col)
     140           70 :     do while (pos <= src_len)
     141           70 :       if (source(pos:pos) == ">") then
     142           69 :         call advance(source, pos, line, col)
     143           69 :         exit
     144            1 :       else if (source(pos:pos) == "/") then
     145              :         ! Self-closing document element → empty root
     146            1 :         if (pos + 1 <= src_len .and. source(pos + 1:pos + 1) == ">") then
     147            1 :           call advance(source, pos, line, col)
     148            1 :           call advance(source, pos, line, col)
     149            1 :           return
     150              :         end if
     151              :       else
     152            0 :         call read_name(source, src_len, pos, line, col, attr_name)
     153            0 :         call skip_whitespace(source, src_len, pos, line, col)
     154            0 :         if (pos <= src_len .and. source(pos:pos) == "=") then
     155            0 :           call advance(source, pos, line, col)
     156            0 :           call skip_whitespace(source, src_len, pos, line, col)
     157              :           call read_attrib_value(source, src_len, pos, line, col, &
     158            0 :               & attr_value, error, fname)
     159            0 :           if (allocated(error)) return
     160              :         end if
     161            0 :         call skip_whitespace(source, src_len, pos, line, col)
     162              :       end if
     163              :     end do
     164              : 
     165              :     ! Parse document element content directly into root
     166           69 :     call parse_content(source, src_len, pos, line, col, root, error, fname)
     167           69 :     if (allocated(error)) return
     168              : 
     169              :     ! Read document element close tag
     170              :     call read_close_tag(source, src_len, pos, line, col, close_name, &
     171           68 :         & error, fname)
     172           68 :     if (allocated(error)) return
     173              : 
     174           68 :     if (close_name /= doc_tag) then
     175              :       call make_parse_error(error, "Mismatched document element: expected </" &
     176              :           & // doc_tag // "> but got </" // close_name // ">", &
     177            0 :           & fname, line, col)
     178            0 :       return
     179              :     end if
     180              : 
     181           95 :   end subroutine xml_parse_string
     182              : 
     183              :   !> Parse content: elements and text at the current nesting level.
     184              :   !> Adds children to parent_table. Stops at EOF or a closing tag.
     185         1782 :   recursive subroutine parse_content(src, src_len, pos, line, col, &
     186              :       & parent, error, fname)
     187              :     character(len=*), intent(in) :: src
     188              :     integer, intent(in) :: src_len
     189              :     integer, intent(inout) :: pos, line, col
     190              :     type(hsd_table), intent(inout) :: parent
     191              :     type(hsd_error_t), allocatable, intent(out), optional :: error
     192              :     character(len=*), intent(in) :: fname
     193              : 
     194         1782 :     character(len=:), allocatable :: text_buf
     195         1782 :     integer :: text_len
     196         1782 :     type(hsd_error_t), allocatable :: sub_error
     197              : 
     198         1782 :     text_len = 0
     199         1782 :     allocate(character(len=4096) :: text_buf)
     200              : 
     201       122875 :     do while (pos <= src_len)
     202       122875 :       if (src(pos:pos) == "<") then
     203              :         ! Flush accumulated text
     204         3496 :         if (text_len > 0) then
     205         3451 :           call flush_text(text_buf, text_len, parent)
     206              :         end if
     207              : 
     208              :         ! Check what kind of markup
     209         3496 :         if (pos + 1 > src_len) then
     210              :           call make_parse_error(error, "Unexpected end of input after '<'", &
     211            0 :               & fname, line, col)
     212            0 :           return
     213              :         end if
     214              : 
     215         3496 :         if (src(pos + 1:pos + 1) == "/") then
     216              :           ! Closing tag — return to caller
     217         1781 :           return
     218         1716 :         else if (src(pos + 1:pos + 1) == "!") then
     219              :           call skip_comment_or_cdata(src, src_len, pos, line, col, &
     220            1 :               & text_buf, text_len, error, fname)
     221            1 :           if (present(error)) then
     222            1 :             if (allocated(error)) return
     223              :           end if
     224         1714 :         else if (src(pos + 1:pos + 1) == "?") then
     225            0 :           call skip_pi(src, src_len, pos, line, col, error, fname)
     226            0 :           if (present(error)) then
     227            0 :             if (allocated(error)) return
     228              :           end if
     229              :         else
     230              :           ! Opening tag
     231              :           call parse_element(src, src_len, pos, line, col, parent, &
     232         1714 :               & sub_error, fname)
     233         1714 :           if (allocated(sub_error)) then
     234            1 :             if (present(error)) then
     235            1 :               error = sub_error
     236              :             end if
     237            1 :             return
     238              :           end if
     239              :         end if
     240              :       else
     241              :         ! Accumulate text content
     242       119379 :         call accum_text(text_buf, text_len, src(pos:pos))
     243       119379 :         call advance(src, pos, line, col)
     244              :       end if
     245              :     end do
     246              : 
     247              :     ! Flush remaining text
     248            0 :     if (text_len > 0) then
     249            0 :       call flush_text(text_buf, text_len, parent)
     250              :     end if
     251              : 
     252         3635 :   end subroutine parse_content
     253              : 
     254              :   !> Parse a single element: <tag attrs>content</tag> or <tag attrs/>
     255         1714 :   recursive subroutine parse_element(src, src_len, pos, line, col, &
     256              :       & parent, error, fname)
     257              :     character(len=*), intent(in) :: src
     258              :     integer, intent(in) :: src_len
     259              :     integer, intent(inout) :: pos, line, col
     260              :     type(hsd_table), intent(inout) :: parent
     261              :     type(hsd_error_t), allocatable, intent(out) :: error
     262              :     character(len=*), intent(in) :: fname
     263              : 
     264         1714 :     character(len=:), allocatable :: tag_name, attr_name, attr_value
     265         1714 :     character(len=:), allocatable :: all_attribs
     266         1714 :     type(hsd_table), allocatable :: child_table
     267         1714 :     type(hsd_value), allocatable :: child_value
     268         1714 :     character(len=:), allocatable :: close_name
     269         1714 :     logical :: self_closing
     270         1714 :     integer :: n_extra_attrs, jj
     271              :     integer, parameter :: MAX_EXTRA_ATTRS = 64
     272              :     character(len=256) :: extra_attr_names(MAX_EXTRA_ATTRS)
     273              :     character(len=256) :: extra_attr_values(MAX_EXTRA_ATTRS)
     274              : 
     275              :     ! Skip '<'
     276         1714 :     call advance(src, pos, line, col)
     277              : 
     278              :     ! Read tag name
     279         1714 :     call read_name(src, src_len, pos, line, col, tag_name)
     280         1714 :     if (len(tag_name) == 0) then
     281              :       call make_parse_error(error, "Expected element name after '<'", &
     282            0 :           & fname, line, col)
     283            0 :       return
     284              :     end if
     285              : 
     286              :     ! Read attributes
     287         1714 :     all_attribs = ""
     288         1714 :     self_closing = .false.
     289         1714 :     n_extra_attrs = 0
     290              : 
     291         1714 :     call skip_whitespace(src, src_len, pos, line, col)
     292              : 
     293         1768 :     do while (pos <= src_len)
     294         1768 :       if (src(pos:pos) == ">") then
     295         1713 :         call advance(src, pos, line, col)
     296         1713 :         exit
     297           55 :       else if (src(pos:pos) == "/") then
     298            1 :         if (pos + 1 <= src_len .and. src(pos + 1:pos + 1) == ">") then
     299            1 :           self_closing = .true.
     300            1 :           call advance(src, pos, line, col)  ! skip /
     301            1 :           call advance(src, pos, line, col)  ! skip >
     302            1 :           exit
     303              :         end if
     304              :       else
     305              :         ! Read attribute
     306           54 :         call read_name(src, src_len, pos, line, col, attr_name)
     307           54 :         if (len(attr_name) == 0) then
     308              :           call make_parse_error(error, "Expected attribute name or '>' in element", &
     309            0 :               & fname, line, col)
     310            0 :           return
     311              :         end if
     312           54 :         call skip_whitespace(src, src_len, pos, line, col)
     313           54 :         if (pos <= src_len .and. src(pos:pos) == "=") then
     314           54 :           call advance(src, pos, line, col)
     315           54 :           call skip_whitespace(src, src_len, pos, line, col)
     316              :           call read_attrib_value(src, src_len, pos, line, col, attr_value, &
     317           54 :               & error, fname)
     318           54 :           if (allocated(error)) return
     319              : 
     320              :           ! Map 'unit' attribute to HSD attrib field
     321           54 :           if (attr_name == "unit") then
     322           52 :             if (len(all_attribs) > 0) then
     323            0 :               all_attribs = all_attribs // ", " // attr_value
     324              :             else
     325           52 :               all_attribs = attr_value
     326              :             end if
     327              :           else
     328              :             ! Store non-unit attributes for __attr_<name> children
     329            2 :             if (n_extra_attrs < MAX_EXTRA_ATTRS) then
     330            2 :               n_extra_attrs = n_extra_attrs + 1
     331            2 :               extra_attr_names(n_extra_attrs) = attr_name
     332            2 :               extra_attr_values(n_extra_attrs) = attr_value
     333              :             end if
     334              :           end if
     335              :         end if
     336           54 :         call skip_whitespace(src, src_len, pos, line, col)
     337              :       end if
     338              :     end do
     339              : 
     340         1715 :     if (self_closing) then
     341              :       ! Self-closing element → empty table
     342            1 :       allocate(child_table)
     343            1 :       call new_table(child_table, name=tag_name)
     344            1 :       if (len(all_attribs) > 0) child_table%attrib = all_attribs
     345            1 :       do jj = 1, n_extra_attrs
     346            0 :         allocate(child_value)
     347            0 :         call new_value(child_value, &
     348            0 :             & name="__attr_" // trim(extra_attr_names(jj)))
     349            0 :         child_value%string_value = trim(extra_attr_values(jj))
     350            0 :         call child_table%add_child(child_value)
     351            1 :         deallocate(child_value)
     352              :       end do
     353            1 :       call parent%add_child(child_table)
     354            1 :       return
     355              :     end if
     356              : 
     357              :     ! Parse content between open and close tags.
     358              :     ! First, check if it's pure text content (no child elements).
     359              :     ! We use a temp table and inspect what we get.
     360         1713 :     allocate(child_table)
     361         1713 :     call new_table(child_table, name=tag_name)
     362         1713 :     if (len(all_attribs) > 0) child_table%attrib = all_attribs
     363         1715 :     do jj = 1, n_extra_attrs
     364            2 :       allocate(child_value)
     365            2 :       call new_value(child_value, &
     366            2 :           & name="__attr_" // trim(extra_attr_names(jj)))
     367            2 :       child_value%string_value = trim(extra_attr_values(jj))
     368            2 :       call child_table%add_child(child_value)
     369         1715 :       deallocate(child_value)
     370              :     end do
     371              : 
     372         1713 :     call parse_content(src, src_len, pos, line, col, child_table, error, fname)
     373         1713 :     if (allocated(error)) return
     374              : 
     375              :     ! Now we should be at </tag>
     376         1713 :     call read_close_tag(src, src_len, pos, line, col, close_name, error, fname)
     377         1713 :     if (allocated(error)) return
     378              : 
     379         1713 :     if (close_name /= tag_name) then
     380              :       call make_parse_error(error, "Mismatched closing tag: expected </" &
     381            1 :           & // tag_name // "> but got </" // close_name // ">", fname, line, col)
     382            1 :       return
     383              :     end if
     384              : 
     385              :     ! Optimization: if the table has exactly one unnamed value child,
     386              :     ! convert to a named value node instead (matching HSD semantics).
     387              :     ! Exception: if the text contains newlines, keep as table with #text
     388              :     ! child to preserve multi-line block structure for matrix data.
     389         1712 :     if (child_table%num_children == 1) then
     390            0 :       select type (only_child => child_table%children(1)%node)
     391              :       type is (hsd_value)
     392         1338 :         block
     393         1338 :           logical :: is_unnamed
     394         1338 :           is_unnamed = .not. allocated(only_child%name)
     395         1338 :           if (.not. is_unnamed) is_unnamed = (len_trim(only_child%name) == 0)
     396         1338 :           if (is_unnamed) then
     397              :             ! Check if the text content contains newlines
     398         1287 :             if (has_newline_content(only_child)) then
     399              :               ! Multi-line content: keep as table with #text child
     400           36 :               only_child%name = "#text"
     401              :               ! Invalidate hash index since we renamed the child
     402           36 :               call child_table%invalidate_index()
     403           36 :               call parent%add_child(child_table)
     404         1287 :               return
     405              :             end if
     406         1251 :             allocate(child_value)
     407         1251 :             child_value%name = tag_name
     408         1251 :             child_value%value_type = only_child%value_type
     409         1251 :             if (allocated(only_child%string_value)) &
     410         1251 :                 & child_value%string_value = only_child%string_value
     411         1251 :             child_value%int_value = only_child%int_value
     412         1251 :             child_value%real_value = only_child%real_value
     413         1251 :             child_value%logical_value = only_child%logical_value
     414         1251 :             child_value%complex_value = only_child%complex_value
     415         1251 :             if (allocated(only_child%raw_text)) child_value%raw_text = only_child%raw_text
     416         1251 :             if (allocated(child_table%attrib)) child_value%attrib = child_table%attrib
     417         1251 :             call parent%add_child(child_value)
     418         1251 :             return
     419              :           end if
     420              :         end block
     421              :       end select
     422              :     end if
     423              : 
     424              :     ! Add as table
     425          425 :     call parent%add_child(child_table)
     426              : 
     427        35142 :   end subroutine parse_element
     428              : 
     429              :   !> Read a closing tag </name> and return the name.
     430         1781 :   subroutine read_close_tag(src, src_len, pos, line, col, tag_name, error, fname)
     431              :     character(len=*), intent(in) :: src
     432              :     integer, intent(in) :: src_len
     433              :     integer, intent(inout) :: pos, line, col
     434              :     character(len=:), allocatable, intent(out) :: tag_name
     435              :     type(hsd_error_t), allocatable, intent(out) :: error
     436              :     character(len=*), intent(in) :: fname
     437              : 
     438              :     ! Expect </
     439         1781 :     if (pos + 1 > src_len .or. src(pos:pos + 1) /= "</") then
     440            0 :       call make_parse_error(error, "Expected closing tag '</'", fname, line, col)
     441            0 :       return
     442              :     end if
     443         1781 :     call advance(src, pos, line, col)  ! <
     444         1781 :     call advance(src, pos, line, col)  ! /
     445              : 
     446         1781 :     call read_name(src, src_len, pos, line, col, tag_name)
     447         1781 :     call skip_whitespace(src, src_len, pos, line, col)
     448              : 
     449         1781 :     if (pos > src_len .or. src(pos:pos) /= ">") then
     450            0 :       call make_parse_error(error, "Expected '>' in closing tag", fname, line, col)
     451            0 :       return
     452              :     end if
     453         1781 :     call advance(src, pos, line, col)
     454              : 
     455         1781 :   end subroutine read_close_tag
     456              : 
     457              :   !> Skip <!-- comment --> or handle <![CDATA[...]]>
     458            1 :   subroutine skip_comment_or_cdata(src, src_len, pos, line, col, &
     459              :       & text_buf, text_len, error, fname)
     460              :     character(len=*), intent(in) :: src
     461              :     integer, intent(in) :: src_len
     462              :     integer, intent(inout) :: pos, line, col
     463              :     character(len=:), allocatable, intent(inout) :: text_buf
     464              :     integer, intent(inout) :: text_len
     465              :     type(hsd_error_t), allocatable, intent(out), optional :: error
     466              :     character(len=*), intent(in) :: fname
     467              : 
     468              :     ! pos is at '<', next is '!'
     469            1 :     if (pos + 3 <= src_len .and. src(pos:pos + 3) == "<!--") then
     470              :       ! Skip comment
     471            0 :       pos = pos + 4
     472            0 :       col = col + 4
     473            0 :       do while (pos + 2 <= src_len)
     474            0 :         if (src(pos:pos + 2) == "-->") then
     475            0 :           pos = pos + 3
     476            0 :           col = col + 3
     477            0 :           return
     478              :         end if
     479            0 :         call advance(src, pos, line, col)
     480              :       end do
     481            0 :       call make_parse_error(error, "Unterminated comment", fname, line, col)
     482            1 :     else if (pos + 8 <= src_len .and. src(pos:pos + 8) == "<![CDATA[") then
     483              :       ! CDATA section: preserve content
     484            1 :       pos = pos + 9
     485            1 :       col = col + 9
     486            6 :       do while (pos + 2 <= src_len)
     487            6 :         if (src(pos:pos + 2) == "]]>") then
     488            1 :           pos = pos + 3
     489            1 :           col = col + 3
     490            1 :           return
     491              :         end if
     492            5 :         call accum_text(text_buf, text_len, src(pos:pos))
     493            5 :         call advance(src, pos, line, col)
     494              :       end do
     495            0 :       call make_parse_error(error, "Unterminated CDATA section", fname, line, col)
     496              :     else
     497              :       ! Unknown <! construct — skip to >
     498            0 :       do while (pos <= src_len .and. src(pos:pos) /= ">")
     499            0 :         call advance(src, pos, line, col)
     500              :       end do
     501            0 :       if (pos <= src_len) call advance(src, pos, line, col)
     502              :     end if
     503              : 
     504         1782 :   end subroutine skip_comment_or_cdata
     505              : 
     506              :   !> Skip a comment <!-- ... --> without text accumulation (for prolog).
     507            1 :   subroutine skip_comment(src, src_len, pos, line, col, error, fname)
     508              :     character(len=*), intent(in) :: src
     509              :     integer, intent(in) :: src_len
     510              :     integer, intent(inout) :: pos, line, col
     511              :     type(hsd_error_t), allocatable, intent(out), optional :: error
     512              :     character(len=*), intent(in) :: fname
     513              : 
     514              :     ! pos is at '<', expect <!--
     515            1 :     pos = pos + 4
     516            1 :     col = col + 4
     517           10 :     do while (pos + 2 <= src_len)
     518           10 :       if (src(pos:pos + 2) == "-->") then
     519            1 :         pos = pos + 3
     520            1 :         col = col + 3
     521            1 :         return
     522              :       end if
     523            9 :       call advance(src, pos, line, col)
     524              :     end do
     525            0 :     call make_parse_error(error, "Unterminated comment", fname, line, col)
     526              : 
     527            2 :   end subroutine skip_comment
     528              : 
     529              :   !> Skip a processing instruction <?...?>
     530           55 :   subroutine skip_pi(src, src_len, pos, line, col, error, fname)
     531              :     character(len=*), intent(in) :: src
     532              :     integer, intent(in) :: src_len
     533              :     integer, intent(inout) :: pos, line, col
     534              :     type(hsd_error_t), allocatable, intent(out), optional :: error
     535              :     character(len=*), intent(in) :: fname
     536              : 
     537              :     ! pos is at '<', next is '?'
     538           55 :     pos = pos + 2
     539           55 :     col = col + 2
     540         1891 :     do while (pos + 1 <= src_len)
     541         1891 :       if (src(pos:pos + 1) == "?>") then
     542           55 :         pos = pos + 2
     543           55 :         col = col + 2
     544           55 :         return
     545              :       end if
     546         1836 :       call advance(src, pos, line, col)
     547              :     end do
     548              :     call make_parse_error(error, "Unterminated processing instruction", &
     549            0 :         & fname, line, col)
     550              : 
     551           56 :   end subroutine skip_pi
     552              : 
     553              :   !> Read an XML name (tag name or attribute name).
     554         3619 :   subroutine read_name(src, src_len, pos, line, col, name)
     555              :     character(len=*), intent(in) :: src
     556              :     integer, intent(in) :: src_len
     557              :     integer, intent(inout) :: pos, line, col
     558              :     character(len=:), allocatable, intent(out) :: name
     559              : 
     560         3619 :     integer :: start
     561              : 
     562         3619 :     start = pos
     563              : 
     564              :     ! line is accepted for interface consistency but cannot change
     565              :     ! (XML names never contain newlines)
     566              :     if (.false.) line = line
     567        33997 :     do while (pos <= src_len)
     568        33997 :       select case (src(pos:pos))
     569              :       case (" ", achar(9), achar(10), achar(13), "=", ">", "/")
     570         3619 :         exit
     571              :       case default
     572        30378 :         pos = pos + 1
     573        33997 :         col = col + 1
     574              :       end select
     575              :     end do
     576              : 
     577         3619 :     if (pos > start) then
     578         3619 :       name = src(start:pos - 1)
     579              :     else
     580            0 :       name = ""
     581              :     end if
     582              : 
     583           55 :   end subroutine read_name
     584              : 
     585              :   !> Read a quoted attribute value.
     586           54 :   subroutine read_attrib_value(src, src_len, pos, line, col, value, error, fname)
     587              :     character(len=*), intent(in) :: src
     588              :     integer, intent(in) :: src_len
     589              :     integer, intent(inout) :: pos, line, col
     590              :     character(len=:), allocatable, intent(out) :: value
     591              :     type(hsd_error_t), allocatable, intent(out) :: error
     592              :     character(len=*), intent(in) :: fname
     593              : 
     594              :     character(len=1) :: quote
     595           54 :     integer :: start
     596              : 
     597           54 :     if (pos > src_len) then
     598            0 :       call make_parse_error(error, "Expected attribute value", fname, line, col)
     599            0 :       return
     600              :     end if
     601              : 
     602           54 :     quote = src(pos:pos)
     603           54 :     if (quote /= '"' .and. quote /= "'") then
     604              :       call make_parse_error(error, "Expected quoted attribute value", &
     605            0 :           & fname, line, col)
     606            0 :       return
     607              :     end if
     608              : 
     609           54 :     call advance(src, pos, line, col)  ! skip opening quote
     610           54 :     start = pos
     611              : 
     612          403 :     do while (pos <= src_len)
     613          403 :       if (src(pos:pos) == quote) then
     614           54 :         value = xml_unescape(src(start:pos - 1))
     615           54 :         call advance(src, pos, line, col)  ! skip closing quote
     616           54 :         return
     617              :       end if
     618          349 :       call advance(src, pos, line, col)
     619              :     end do
     620              : 
     621            0 :     call make_parse_error(error, "Unterminated attribute value", fname, line, col)
     622              : 
     623         3673 :   end subroutine read_attrib_value
     624              : 
     625              :   !> Skip whitespace.
     626         3853 :   subroutine skip_whitespace(src, src_len, pos, line, col)
     627              :     character(len=*), intent(in) :: src
     628              :     integer, intent(in) :: src_len
     629              :     integer, intent(inout) :: pos, line, col
     630              : 
     631         3960 :     do while (pos <= src_len)
     632         3960 :       select case (src(pos:pos))
     633              :       case (" ", achar(9), achar(10), achar(13))
     634          107 :         call advance(src, pos, line, col)
     635              :       case default
     636         3960 :         return
     637              :       end select
     638              :     end do
     639              : 
     640         3907 :   end subroutine skip_whitespace
     641              : 
     642              :   !> Advance position by one character, tracking line/col.
     643       130760 :   subroutine advance(src, pos, line, col)
     644              :     character(len=*), intent(in) :: src
     645              :     integer, intent(inout) :: pos, line, col
     646              : 
     647       130760 :     if (pos <= len(src) .and. src(pos:pos) == achar(10)) then
     648         2293 :       line = line + 1
     649         2293 :       col = 1
     650              :     else
     651       128467 :       col = col + 1
     652              :     end if
     653       130760 :     pos = pos + 1
     654              : 
     655         3853 :   end subroutine advance
     656              : 
     657              :   !> Accumulate a character into the text buffer.
     658       119384 :   subroutine accum_text(buf, buf_len, ch)
     659              :     character(len=:), allocatable, intent(inout) :: buf
     660              :     integer, intent(inout) :: buf_len
     661              :     character(len=*), intent(in) :: ch
     662              : 
     663       119384 :     character(len=:), allocatable :: tmp
     664       119384 :     integer :: new_cap
     665              : 
     666       119384 :     if (buf_len + 1 > len(buf)) then
     667            5 :       new_cap = len(buf) * 2
     668            5 :       allocate(character(len=new_cap) :: tmp)
     669            5 :       tmp(1:buf_len) = buf(1:buf_len)
     670            5 :       call move_alloc(tmp, buf)
     671              :     end if
     672       119384 :     buf_len = buf_len + 1
     673       119384 :     buf(buf_len:buf_len) = ch
     674              : 
     675       250144 :   end subroutine accum_text
     676              : 
     677              :   !> Flush accumulated text to parent as an anonymous hsd_value.
     678              :   !> Whitespace-only text (spaces, newlines, tabs) is discarded as
     679              :   !> insignificant whitespace between XML elements.
     680         3451 :   subroutine flush_text(buf, buf_len, parent)
     681              :     character(len=:), allocatable, intent(inout) :: buf
     682              :     integer, intent(inout) :: buf_len
     683              :     type(hsd_table), intent(inout) :: parent
     684              : 
     685         3451 :     type(hsd_value), allocatable :: val
     686         3451 :     character(len=:), allocatable :: unescaped
     687              :     character(len=*), parameter :: WHITESPACE = " " // char(9) // char(10) // char(13)
     688         3451 :     integer :: first, last
     689              : 
     690         3451 :     unescaped = xml_unescape(buf(1:buf_len))
     691         3451 :     buf_len = 0
     692              : 
     693              :     ! Discard if entirely whitespace (spaces, tabs, newlines, CR)
     694         3451 :     if (verify(unescaped, WHITESPACE) == 0) return
     695              : 
     696              :     ! Strip leading and trailing whitespace (including newlines)
     697         1289 :     first = verify(unescaped, WHITESPACE)
     698         1289 :     last = verify(unescaped, WHITESPACE, back=.true.)
     699              : 
     700         1289 :     allocate(val)
     701         1289 :     call new_value(val)
     702         1289 :     call val%set_string(unescaped(first:last))
     703         1289 :     call parent%add_child(val)
     704              : 
     705       122835 :   end subroutine flush_text
     706              : 
     707              :   !> Create a parse error.
     708            1 :   subroutine make_parse_error(error, message, filename, line, col)
     709              :     type(hsd_error_t), allocatable, intent(out), optional :: error
     710              :     character(len=*), intent(in) :: message, filename
     711              :     integer, intent(in) :: line, col
     712              : 
     713              :     character(len=20) :: line_str, col_str
     714              : 
     715            0 :     if (.not. present(error)) return
     716              : 
     717            1 :     write(line_str, "(i0)") line
     718            1 :     write(col_str, "(i0)") col
     719              : 
     720            1 :     allocate(error)
     721            1 :     error%code = HSD_STAT_SYNTAX_ERROR
     722              :     error%message = trim(filename) // ":" // trim(line_str) // ":" // &
     723            1 :         & trim(col_str) // ": " // message
     724            1 :     error%filename = filename
     725            1 :     error%line_start = line
     726            1 :     error%column = col
     727              : 
     728         3452 :   end subroutine make_parse_error
     729              : 
     730              :   !> Check if a value node contains newline characters in its content.
     731         1287 :   pure function has_newline_content(val) result(has_nl)
     732              :     type(hsd_value), intent(in) :: val
     733              :     logical :: has_nl
     734              : 
     735         1287 :     has_nl = .false.
     736         1287 :     if (allocated(val%string_value)) then
     737         1287 :       has_nl = index(val%string_value, new_line("a")) > 0
     738              :     end if
     739         1287 :     if (.not. has_nl .and. allocated(val%raw_text)) then
     740            0 :       has_nl = index(val%raw_text, new_line("a")) > 0
     741              :     end if
     742              : 
     743         1288 :   end function has_newline_content
     744              : 
     745         1509 : end module hsd_data_xml_parser
        

Generated by: LCOV version 2.0-1