CLASS zcl_abapgit_syntax_xml DEFINITION PUBLIC INHERITING FROM zcl_abapgit_syntax_highlighter CREATE PUBLIC . PUBLIC SECTION. CONSTANTS: BEGIN OF c_css, xml_tag TYPE string VALUE 'xml_tag', "#EC NOTEXT attr TYPE string VALUE 'attr', "#EC NOTEXT attr_val TYPE string VALUE 'attr_val', "#EC NOTEXT comment TYPE string VALUE 'comment', "#EC NOTEXT END OF c_css . CONSTANTS: BEGIN OF c_token, xml_tag TYPE c VALUE 'X', "#EC NOTEXT attr TYPE c VALUE 'A', "#EC NOTEXT attr_val TYPE c VALUE 'V', "#EC NOTEXT comment TYPE c VALUE 'C', "#EC NOTEXT END OF c_token . CONSTANTS: BEGIN OF c_regex, "for XML tags, we will use a submatch " main pattern includes quoted strings so we can ignore < and > in attr values xml_tag TYPE string VALUE '(?:"[^"]*")|(?:''[^'']*'')|([<>])', "#EC NOTEXT attr TYPE string VALUE '(?:^|\s)[-a-z:_0-9]+\s*(?==\s*["|''])', "#EC NOTEXT attr_val TYPE string VALUE '("[^"]*")|(''[^'']*'')', "#EC NOTEXT " comments comment TYPE string VALUE '[\<]!--.*--[\>]|[\<]!--|--[\>]', "#EC NOTEXT END OF c_regex . METHODS constructor . PROTECTED SECTION. CLASS-DATA gv_comment TYPE abap_bool. METHODS order_matches REDEFINITION. PRIVATE SECTION. ENDCLASS. CLASS zcl_abapgit_syntax_xml IMPLEMENTATION. METHOD constructor. super->constructor( ). " Reset indicator for multi-line comments CLEAR gv_comment. " Initialize instances of regular expressions add_rule( iv_regex = c_regex-xml_tag iv_token = c_token-xml_tag iv_style = c_css-xml_tag iv_submatch = 1 ). add_rule( iv_regex = c_regex-attr iv_token = c_token-attr iv_style = c_css-attr ). add_rule( iv_regex = c_regex-attr_val iv_token = c_token-attr_val iv_style = c_css-attr_val ). add_rule( iv_regex = c_regex-comment iv_token = c_token-comment iv_style = c_css-comment ). ENDMETHOD. METHOD order_matches. DATA: lv_match TYPE string, lv_line_len TYPE i, lv_cmmt_end TYPE i, lv_index TYPE sy-tabix, lv_prev_token TYPE c, lv_state TYPE c VALUE 'O'. " O - for open tag; C - for closed tag; FIELD-SYMBOLS: TYPE ty_match, TYPE ty_match. SORT ct_matches BY offset. lv_line_len = strlen( iv_line ). " Check if this is part of multi-line comment and mark it accordingly IF gv_comment = abap_true. READ TABLE ct_matches WITH KEY token = c_token-comment TRANSPORTING NO FIELDS. IF sy-subrc <> 0. CLEAR ct_matches. APPEND INITIAL LINE TO ct_matches ASSIGNING . -token = c_token-comment. -offset = 0. -length = lv_line_len. RETURN. ENDIF. ENDIF. LOOP AT ct_matches ASSIGNING . lv_index = sy-tabix. lv_match = substring( val = iv_line off = -offset len = -length ). CASE -token. WHEN c_token-xml_tag. -text_tag = lv_match. " No other matches between two tags IF -text_tag = '>' AND lv_prev_token = c_token-xml_tag. lv_state = 'C'. -length = -offset - -offset + -length. DELETE ct_matches INDEX lv_index. CONTINUE. " Adjust length and offset of closing tag ELSEIF -text_tag = '>' AND lv_prev_token <> c_token-xml_tag. lv_state = 'C'. IF IS ASSIGNED. -length = -offset - -offset - -length + -length. -offset = -offset + -length. ENDIF. ELSE. lv_state = 'O'. ENDIF. WHEN c_token-comment. IF lv_match = ''. DELETE ct_matches WHERE offset < -offset. -length = -offset + 3. -offset = 0. gv_comment = abap_false. ELSE. lv_cmmt_end = -offset + -length. DELETE ct_matches WHERE offset > -offset AND offset <= lv_cmmt_end. DELETE ct_matches WHERE offset = -offset AND token = c_token-xml_tag. ENDIF. WHEN OTHERS. IF lv_prev_token = c_token-xml_tag. -length = -offset - -offset. " Extend length of the opening tag ENDIF. IF lv_state = 'C'. " Delete all matches between tags DELETE ct_matches INDEX lv_index. CONTINUE. ENDIF. ENDCASE. lv_prev_token = -token. ASSIGN TO . ENDLOOP. "if the last XML tag is not closed, extend it to the end of the tag IF lv_prev_token = c_token-xml_tag AND IS ASSIGNED AND -length = 1 AND -text_tag = '<'. FIND REGEX '<\s*[^\s]*' IN iv_line+-offset MATCH LENGTH -length. IF sy-subrc <> 0. -length = 1. ENDIF. ENDIF. ENDMETHOD. ENDCLASS.