diff --git a/src/zcl_excel_writer_2007.clas.abap b/src/zcl_excel_writer_2007.clas.abap index b97a1c5..daa512b 100644 --- a/src/zcl_excel_writer_2007.clas.abap +++ b/src/zcl_excel_writer_2007.clas.abap @@ -199,16 +199,12 @@ private section. constants C_OFF type STRING value '0'. "#EC NOTEXT constants C_ON type STRING value '1'. "#EC NOTEXT constants C_XL_PRINTERSETTINGS type STRING value 'xl/printerSettings/printerSettings#.bin'. "#EC NOTEXT - DATA support_non_xml_characters TYPE abap_bool. methods FLAG2BOOL importing !IP_FLAG type FLAG returning value(EP_BOOLEAN) type CHAR5 . - METHODS is_support_non_xml_characters - RETURNING - VALUE(r_result) TYPE abap_bool. ENDCLASS. @@ -217,7 +213,6 @@ CLASS ZCL_EXCEL_WRITER_2007 IMPLEMENTATION. METHOD CONSTRUCTOR. me->ixml = cl_ixml=>create( ). - me->support_non_xml_characters = is_support_non_xml_characters( ). ENDMETHOD. @@ -7862,24 +7857,66 @@ METHOD render_xml_document. DATA lo_streamfactory TYPE REF TO if_ixml_stream_factory. DATA lo_ostream TYPE REF TO if_ixml_ostream. DATA lo_renderer TYPE REF TO if_ixml_renderer. + DATA lv_string TYPE string. + " So that the rendering of io_document to a XML text in UTF-8 XSTRING works for all Unicode characters (Chinese, + " emoticons, etc.) the method CREATE_OSTREAM_CSTRING must be used instead of CREATE_OSTREAM_XSTRING as explained + " in note 2922674 below (original there: https://launchpad.support.sap.com/#/notes/2922674), and then the STRING + " variable can be converted into UTF-8. + " + " Excerpt from Note 2922674 - Support for Unicode Characters U+10000 to U+10FFFF in the iXML kernel library / ABAP package SIXML. + " + " You are running a unicode system with SAP Netweaver / SAP_BASIS release equal or lower than 7.51. + " + " Some functions in the iXML kernel library / ABAP package SIXML does not fully or incorrectly support unicode + " characters of the supplementary planes. This is caused by using UCS-2 in codepage conversion functions. + " Therefore, when reading from iXML input steams, the characters from the supplementary planes, that are not + " supported by UCS-2, might be replaced by the character #. When writing to iXML output streams, UTF-16 surrogate + " pairs, representing characters from the supplementary planes, might be incorrectly encoded in UTF-8. + " + " The characters incorrectly encoded in UTF-8, might be accepted as input for the iXML parser or external parsers, + " but might also be rejected. + " + " Support for unicode characters of the supplementary planes was introduced for SAP_BASIS 7.51 or lower with note + " 2220720, but later withdrawn with note 2346627 for functional issues. + " + " Characters of the supplementary planes are supported with ABAP Platform 1709 / SAP_BASIS 7.52 and higher. + " + " Please note, that the iXML runtime behaves like the ABAP runtime concerning the handling of unicode characters of + " the supplementary planes. In iXML and ABAP, these characters have length 2 (as returned by ABAP build-in function + " STRLEN), and string processing functions like SUBSTRING might split these characters into 2 invalid characters + " with length 1. These invalid characters are commonly referred to as broken surrogate pairs. + " + " A workaround for the incorrect UTF-8 encoding in SAP_BASIS 7.51 or lower is to render the document to an ABAP + " variable with type STRING using a output stream created with factory method IF_IXML_STREAM_FACTORY=>CREATE_OSTREAM_CSTRING + " and then to convert the STRING variable to UTF-8 using method CL_ABAP_CODEPAGE=>CONVERT_TO. + + " 1) RENDER TO XML STRING lo_streamfactory = me->ixml->create_stream_factory( ). - lo_ostream = lo_streamfactory->create_ostream_xstring( string = ep_content ). - - IF support_non_xml_characters = abap_false. - " remove illegal characters in the XML according to the note 1750204 - " the following method is only available if the corresponding kernel is used - TRY. - CALL METHOD lo_ostream->('SKIP_NON_XML_CHARACTERS') - EXPORTING - is_skipping = abap_true. - CATCH cx_sy_dyn_call_illegal_method. - - ENDTRY. - ENDIF. - + lo_ostream = lo_streamfactory->create_ostream_cstring( string = lv_string ). lo_renderer = me->ixml->create_renderer( ostream = lo_ostream document = io_document ). lo_renderer->render( ). + + " 2) CONVERT IT TO UTF-8 + "----------------- + " The beginning of the XML string has these 57 characters: + " X + " (where "X" is the special character corresponding to the utf-16 BOM, hexadecimal FFFE or FEFF, + " but there's no "X" in non-Unicode SAP systems) + " The encoding must be removed otherwise Excel would fail to decode correctly the UTF-8 XML. + " For a better performance, it's assumed that "encoding" is in the first 100 characters. + IF strlen( lv_string ) < 100. + REPLACE REGEX 'encoding="[^"]+"' IN lv_string WITH ``. + ELSE. + REPLACE REGEX 'encoding="[^"]+"' IN SECTION LENGTH 100 OF lv_string WITH ``. + ENDIF. + " Convert XML text to UTF-8 (NB: if 2 first bytes are the UTF-16 BOM, they are converted into 3 bytes of UTF-8 BOM) + ep_content = cl_abap_codepage=>convert_to( source = lv_string ). + " Add the UTF-8 Byte Order Mark if missing (NB: that serves as substitute of "encoding") + IF xstrlen( ep_content ) >= 3 AND ep_content(3) <> cl_abap_char_utilities=>byte_order_mark_utf8. + CONCATENATE cl_abap_char_utilities=>byte_order_mark_utf8 ep_content INTO ep_content IN BYTE MODE. + ENDIF. + ENDMETHOD. @@ -7889,38 +7926,4 @@ method ZIF_EXCEL_WRITER~WRITE_FILE. ep_file = me->create( ). endmethod. -METHOD is_support_non_xml_characters. - DATA: - "! Unicode character U+10000 - lv_u10000 TYPE string, - lo_ixml TYPE REF TO if_ixml, - lo_document TYPE REF TO if_ixml_document, - lo_root TYPE REF TO if_ixml_element, - lo_text TYPE REF TO if_ixml_text, - lo_stream_factory TYPE REF TO if_ixml_stream_factory, - lo_ostream TYPE REF TO if_ixml_ostream, - lv_xstring TYPE xstring, - lo_renderer TYPE REF TO if_ixml_renderer, - lv_rc TYPE i. - CONSTANTS lc_utf8_value_of_u10000 TYPE xstring VALUE 'F0908080'. - - lv_u10000 = cl_abap_conv_in_ce=>uccp( 'D800' ) && cl_abap_conv_in_ce=>uccp( 'DC00' ). - lo_ixml = cl_ixml=>create( ). - lo_document = lo_ixml->create_document( ). - lo_root = lo_document->create_simple_element( name = 'ROOT' parent = lo_document ). - lo_text = lo_document->create_text( lv_u10000 ). - lv_rc = lo_root->append_child( lo_text ). - lo_stream_factory = lo_ixml->create_stream_factory( ). - lo_ostream = lo_stream_factory->create_ostream_xstring( string = lv_xstring ). - lo_renderer = lo_ixml->create_renderer( ostream = lo_ostream - document = lo_document ). - lv_rc = lo_renderer->render( ). - - FIND lc_utf8_value_of_u10000 IN lv_xstring IN BYTE MODE. - IF sy-subrc = 0. - r_result = abap_true. - ELSE. - r_result = abap_false. - ENDIF. -ENDMETHOD. ENDCLASS.