mirror of
https://github.com/abapGit/abapGit.git
synced 2025-04-30 20:03:20 +08:00
new algorithm for binary file detection (#5068)
* new algorithm for binary file detection * only check first 1000 bytes * cleanup Co-authored-by: Lars Hvam <larshp@hotmail.com>
This commit is contained in:
parent
01df821def
commit
0dc66c6246
|
@ -177,26 +177,7 @@ CLASS zcl_abapgit_gui_page_merge_res IMPLEMENTATION.
|
|||
ASSIGN iv_d2 TO <lv_data>.
|
||||
ENDIF.
|
||||
|
||||
lv_len = xstrlen( <lv_data> ).
|
||||
IF lv_len = 0.
|
||||
RETURN.
|
||||
ENDIF.
|
||||
|
||||
IF lv_len > 100.
|
||||
lv_len = 100.
|
||||
ENDIF.
|
||||
|
||||
" Simple char range test
|
||||
" stackoverflow.com/questions/277521/how-to-identify-the-file-content-as-ascii-or-binary
|
||||
DO lv_len TIMES. " I'm sure there is more efficient way ...
|
||||
lv_idx = sy-index - 1.
|
||||
lv_x = <lv_data>+lv_idx(1).
|
||||
|
||||
IF NOT ( lv_x BETWEEN 9 AND 13 OR lv_x BETWEEN 32 AND 126 ).
|
||||
rv_yes = abap_true.
|
||||
EXIT.
|
||||
ENDIF.
|
||||
ENDDO.
|
||||
rv_yes = zcl_abapgit_utils=>is_binary( <lv_data> ).
|
||||
|
||||
ENDMETHOD.
|
||||
|
||||
|
|
|
@ -7,9 +7,9 @@ CLASS zcl_abapgit_utils DEFINITION
|
|||
|
||||
CLASS-METHODS is_binary
|
||||
IMPORTING
|
||||
!iv_data TYPE xstring
|
||||
!iv_data TYPE xstring
|
||||
RETURNING
|
||||
VALUE(rv_yes) TYPE abap_bool .
|
||||
VALUE(rv_is_binary) TYPE abap_bool .
|
||||
CLASS-METHODS extract_author_data
|
||||
IMPORTING
|
||||
!iv_author TYPE string
|
||||
|
@ -46,30 +46,43 @@ CLASS ZCL_ABAPGIT_UTILS IMPLEMENTATION.
|
|||
|
||||
METHOD is_binary.
|
||||
|
||||
DATA: lv_len TYPE i,
|
||||
lv_idx TYPE i,
|
||||
lv_x TYPE x.
|
||||
" Previously we did a simple char range test described here
|
||||
" stackoverflow.com/questions/277521/how-to-identify-the-file-content-as-ascii-or-binary
|
||||
" but this is insufficient if the data contains german umlauts and other special characters.
|
||||
" Therefore we adopted another algorithm, which is similarily used by AL11
|
||||
" RSWATCH0 / GUESS_FILE_TYPE
|
||||
" We count non-printable characters if there are more than XX% it's binary.
|
||||
|
||||
lv_len = xstrlen( iv_data ).
|
||||
IF lv_len = 0.
|
||||
CONSTANTS:
|
||||
lc_binary_threshold TYPE i VALUE 10,
|
||||
lc_bytes_to_check TYPE i VALUE 1000.
|
||||
|
||||
DATA: lv_string_data TYPE string,
|
||||
lv_printable_chars_count TYPE i,
|
||||
lv_percentage TYPE i,
|
||||
lv_data TYPE xstring,
|
||||
lv_xlen TYPE i.
|
||||
|
||||
lv_xlen = xstrlen( iv_data ).
|
||||
IF lv_xlen = 0.
|
||||
RETURN.
|
||||
ENDIF.
|
||||
|
||||
IF lv_len > 100.
|
||||
lv_len = 100.
|
||||
ENDIF.
|
||||
lv_xlen = nmin(
|
||||
val1 = lv_xlen
|
||||
val2 = lc_bytes_to_check ).
|
||||
|
||||
" Simple char range test
|
||||
" stackoverflow.com/questions/277521/how-to-identify-the-file-content-as-ascii-or-binary
|
||||
DO lv_len TIMES. " I'm sure there is more efficient way ...
|
||||
lv_idx = sy-index - 1.
|
||||
lv_x = iv_data+lv_idx(1).
|
||||
lv_data = iv_data(lv_xlen).
|
||||
|
||||
IF NOT ( lv_x BETWEEN 9 AND 13 OR lv_x BETWEEN 32 AND 126 ).
|
||||
rv_yes = abap_true.
|
||||
EXIT.
|
||||
ENDIF.
|
||||
ENDDO.
|
||||
lv_string_data = zcl_abapgit_convert=>xstring_to_string_utf8( lv_data ).
|
||||
|
||||
REPLACE ALL OCCURRENCES OF cl_abap_char_utilities=>newline IN lv_string_data WITH space.
|
||||
REPLACE ALL OCCURRENCES OF cl_abap_char_utilities=>cr_lf IN lv_string_data WITH space.
|
||||
|
||||
FIND ALL OCCURRENCES OF REGEX '[^[:print:]]' IN lv_string_data MATCH COUNT lv_printable_chars_count.
|
||||
lv_percentage = lv_printable_chars_count * 100 / strlen( lv_string_data ).
|
||||
rv_is_binary = boolc( lv_percentage > lc_binary_threshold ).
|
||||
|
||||
ENDMETHOD.
|
||||
|
||||
ENDCLASS.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
CLASS ltcl_utils_test DEFINITION FINAL
|
||||
CLASS ltcl_extract_author_data DEFINITION FINAL
|
||||
FOR TESTING
|
||||
DURATION SHORT
|
||||
RISK LEVEL HARMLESS.
|
||||
|
@ -16,7 +16,7 @@ CLASS ltcl_utils_test DEFINITION FINAL
|
|||
|
||||
ENDCLASS.
|
||||
|
||||
CLASS ltcl_utils_test IMPLEMENTATION.
|
||||
CLASS ltcl_extract_author_data IMPLEMENTATION.
|
||||
|
||||
METHOD extract_author_data1.
|
||||
TRY.
|
||||
|
@ -97,3 +97,189 @@ CLASS ltcl_utils_test IMPLEMENTATION.
|
|||
ENDMETHOD.
|
||||
|
||||
ENDCLASS.
|
||||
|
||||
|
||||
CLASS ltcl_is_binary DEFINITION FINAL FOR TESTING
|
||||
DURATION SHORT
|
||||
RISK LEVEL HARMLESS.
|
||||
PUBLIC SECTION.
|
||||
CLASS-METHODS:
|
||||
class_constructor.
|
||||
|
||||
PRIVATE SECTION.
|
||||
DATA mv_given_file TYPE xstring.
|
||||
DATA mv_act_is_binary TYPE abap_bool.
|
||||
CLASS-DATA:
|
||||
gv_nl TYPE c LENGTH 1,
|
||||
gv_cr_lf TYPE c LENGTH 2.
|
||||
|
||||
METHODS:
|
||||
cds_metadata_is_text FOR TESTING RAISING cx_static_check,
|
||||
cds_with_umlaut_is_text FOR TESTING RAISING cx_static_check,
|
||||
image_is_binary FOR TESTING RAISING cx_static_check,
|
||||
|
||||
given_file
|
||||
IMPORTING
|
||||
iv_file TYPE string,
|
||||
given_image,
|
||||
given_cds_metadata,
|
||||
given_cds_view_with_umlaut,
|
||||
|
||||
when_is_binary_determined,
|
||||
|
||||
then_is_not_binary,
|
||||
then_is_binary.
|
||||
|
||||
ENDCLASS.
|
||||
|
||||
|
||||
CLASS ltcl_is_binary IMPLEMENTATION.
|
||||
|
||||
METHOD class_constructor.
|
||||
|
||||
gv_nl = cl_abap_char_utilities=>newline.
|
||||
gv_cr_lf = cl_abap_char_utilities=>cr_lf.
|
||||
|
||||
ENDMETHOD.
|
||||
|
||||
METHOD cds_metadata_is_text.
|
||||
|
||||
given_cds_metadata( ).
|
||||
when_is_binary_determined( ).
|
||||
then_is_not_binary( ).
|
||||
|
||||
ENDMETHOD.
|
||||
|
||||
|
||||
METHOD cds_with_umlaut_is_text.
|
||||
|
||||
given_cds_view_with_umlaut( ).
|
||||
when_is_binary_determined( ).
|
||||
then_is_not_binary( ).
|
||||
|
||||
ENDMETHOD.
|
||||
|
||||
|
||||
METHOD image_is_binary.
|
||||
|
||||
given_image( ).
|
||||
when_is_binary_determined( ).
|
||||
then_is_binary( ).
|
||||
|
||||
ENDMETHOD.
|
||||
|
||||
|
||||
METHOD given_file.
|
||||
|
||||
mv_given_file = zcl_abapgit_convert=>string_to_xstring_utf8( iv_file ).
|
||||
|
||||
ENDMETHOD.
|
||||
|
||||
|
||||
METHOD given_image.
|
||||
|
||||
mv_given_file =
|
||||
zcl_abapgit_convert=>base64_to_xstring(
|
||||
`iVBORw0KGgoAAAANSUhEUgAAALEAAAA1CAYAAAAOCAoLAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8Y`
|
||||
&& `QUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAecSURBVHhe7ZvLilxVFIb1BfQFdOxAcaAgJIgX8B4HOkkPHB`
|
||||
&& `kxOjFiFBRaEIzowIEYBVGQBMULeIsjFeNI0YGT6EzsB+jETuIblP0V7mbVqn/vs06dc6rqdPbgg+4+a1/`
|
||||
&& `O3t++VvVV29vbk0plzFSJK6OnSlwZPVXiyuipEldGT5W4MnoGk3jn848nl44/Pdn58D35vFLpi0Ekvvjy`
|
||||
&& `85N/b795D35XcZVKH/QusRe4ilwZml4lzgmcqCJXhqBXiXc2j0t5LVXkSt/0IvGFn37Y+7mKXFk2nSVOW`
|
||||
&& `4h/Pvto729V5Moy6SRxEjjhRb549LGZ554q8v7j15/PTs5+/80U9XwIFpbYC5ywIidKM/OqRf7j3O+Tt9`
|
||||
&& `54QfLjd1/LNBUNbXbbDVfvsXHopsnW33/J2D5ZSOKcwInzv/0yl+bSI/fIWFilyGe+ODXT8BY6RaXZbzC`
|
||||
&& `QP3j3xOTYEw9OjmwcnGmDu2+9dvo32oJZVqUHZLXpEu+/86qM75PWEjcJDDLdmm4t/Oxh2e8SI6WXtomH`
|
||||
&& `77x+8unpk3N5sWqp+GW0YSuJIwKD3FK8/oqMtaxCZGaQT069Pdl46MaVdMCq4J39+7ZB5aniSjMxz1Lc4`
|
||||
&& `4cPyJgIYYmjAk+5/+DMtVtinW8t1LZiv0pcWn0AoRLqOah8rZTAxFDaE5/YfHIvdnCJWwn8P2wfSIvMY7`
|
||||
&& `h+U8vhfpQ4t+wfuuO66eys0qgBruKA/TVlRA7FlJnyG1TiRQQGKzG/r7vIV4rEVpxE04wJPo2KaYMfGIN`
|
||||
&& `JHNnH5vASw9xeeWur9+u3dE/JafvMl6enP5dO1YkmiZlh0v0ntLk6Ijalo17pZ/JU8W3w9VIxidw+ONI+`
|
||||
&& `Xn4VE8VvO2AQibkmU2JFURJD2+s3dUj0IMlLz25MT86+cRJcFSFlTr6cxPw9d4J/bfNoUWbScm2l0ia4S`
|
||||
&& `yXOp6VsFZ8OSgip3je9p88P1B73xWOHZayH/SvpUx72mWq7hK2L3QNHyL2HJyvxhW+/klJFyUnM774sYm`
|
||||
&& `2MZefkm3PxFiSi41QjKBBS5aM6ojQoEkio8mt7+md5telzElN/ylTPLAxqmx+ouMgs3ET0rl0NohKdJWa`
|
||||
&& `pv3zfASlWhJzEama9/Oi9MzEWJb2FmUQ1AA121y3XyGdq5ivNJk34xs4NLJZkdZUHDBibB3nm6h/FDozc`
|
||||
&& `+9kyF4Vy1F4bVivxLiz9i4qck3jm+m13oJQOjpGthO9oGtMu8Wr/xbJm84BcJ5M/A4UGzQ0YhG3KK20Dg`
|
||||
&& `Popmf0embhnjjwwFwe2XrkYu0qoOiGVLa8rqq2tiMz61EPF0R48s0TPDEWJYVGRsxIHiQgMdCCdkWZeNX`
|
||||
&& `p9g6nOo9F8HHnbAQG5bQLpbV6pTsBzv2xTz1IepTg1CFUcpPqr56oduqDaUPWHiutSl0aJYRGRu0gcFTh`
|
||||
&& `KpMGiHQBq6czF5lBSRSVWceDjbKzKpyQOg440Tdi6RNtQxQ0uMbQRmbiZLcNzT8k4Rd8CQ6TBoh0A6pSd`
|
||||
&& `i81BvM/DCtE2DngvH5v2xWoFKYmjylXY9157iSEq8pyIwUNiF4HZP/HFFBqNkzmn+ESkwdpIrDqYrYeKJ`
|
||||
&& `V/iIX1LDNTNh5JTlaXiQEmc3kG9H/g8Ern9vyflD9E2VHFLkxgiIp//89xcutI1GiwqMHs+pPWNUmIIie`
|
||||
&& `l0G8MMGLmisyg5VVkqDkoSM8j9M8hdsTFzkxaU0OkgZtNE21DFLVViaBKZO+aZNMzEhWu0LgI33ZmqW4A`
|
||||
&& `hJLaxpXtT4ACq9tVKTlWWigMlsY31NzmgDomeqHTRNozmF2UhiaEkMsLyfBrbwzVajtxVjZ9dfExXidWV`
|
||||
&& `lo1VA4fn9qYjKmc0DtTAsG2h9vJcD/obGE9UumgbRvOLsrDE0DQjl2Zf6CIwKJlUB/uYLh1Ah/s4sOX6Z`
|
||||
&& `8hl84C+JUZWHwc2JrelYDUriazaZt9IDJE9sqKrwMCL+8awn1KB6lzVYHJW3+1cP6vnDjxJAiW5/zAkl0`
|
||||
&& `9UYg6I9oMAylTbKnXYVLMxkF6VD31LrAZTZEXI0VliaCtyHwJDbnnkm2KQO/DRYT4vJUuC+NyNAvhDnYp`
|
||||
&& `BPOpEObl8/ACEUr3STQfvrJ6r/KbCi+1OgrqRJ+XyBafceyuJ1Vkgt+f2cUA7Uy7QXr5dc/QiMURF7ktg`
|
||||
&& `aPslG4+dqUqylOCwZGdFUCtEG3ivrvXKXflBk8gRrMSRfvBCRtpIrWCK3iSGJpH7FDjR1Bl0ptoqgO0It`
|
||||
&& `b9u6hwEVkuwWlYtpCPvXEcibspLSdz0zjxvWpp5Tt7qxiJCUx09tq2hqY0Sqn09vUoMOZGHEBjoDEa57w`
|
||||
&& `ykTMsp+1oa0WOXOn5Wz0jrBacsyvQzsIXG97JxwLPpGFy2zETTTEze4OtF/sQ3CWwhlvKol7rdSFAvYoj`
|
||||
&& `1+acBWUJtK9Q7AO1GvD+P5OhdYkDk9EV3hB5K4GXTRo6+yEmsYvtkFe+6aJmDSDxla2v6/Qn16V0lzqok`
|
||||
&& `HhPDSVzphSpxM1XiNYa9s/ofvfTPpqtY8teRKvGaombgHCr9lUSVeE2pEsepEq8p7HsROYJKfyVRJa6Mn`
|
||||
&& `ipxZfRUiSujp0pcGT1V4sroqRJXRk+VuDJytif/Aa8ZlmVpfNVQAAAAAElFTkSuQmCC` ).
|
||||
|
||||
ENDMETHOD.
|
||||
|
||||
|
||||
METHOD given_cds_metadata.
|
||||
|
||||
given_file( `{`
|
||||
&& gv_nl && `"BASEINFO":`
|
||||
&& gv_nl && `{`
|
||||
&& gv_nl && `"FROM":`
|
||||
&& gv_nl && `[`
|
||||
&& gv_nl && `"T100"`
|
||||
&& gv_nl && `],`
|
||||
&& gv_nl && `"ASSOCIATED":`
|
||||
&& gv_nl && `[],`
|
||||
&& gv_nl && `"BASE":`
|
||||
&& gv_nl && `[],`
|
||||
&& gv_nl && `"ANNO_REF":`
|
||||
&& gv_nl && `[],`
|
||||
&& gv_nl && `"VERSION":0`
|
||||
&& gv_nl && `}`
|
||||
&& gv_nl && `}` ).
|
||||
|
||||
ENDMETHOD.
|
||||
|
||||
|
||||
METHOD given_cds_view_with_umlaut.
|
||||
|
||||
CONSTANTS lc_umlaut_ue TYPE xstring VALUE `C3BC`.
|
||||
|
||||
given_file( `@EndUserText.label: `
|
||||
&& zcl_abapgit_convert=>xstring_to_string_utf8( lc_umlaut_ue )
|
||||
&& `bernahmekandidat'`
|
||||
&& gv_cr_lf && `@AbapCatalog.sqlViewName: 'ZTESTDDLSBUG2'`
|
||||
&& gv_cr_lf && `@AbapCatalog.compiler.compareFilter: true`
|
||||
&& gv_cr_lf && `@AbapCatalog.preserveKey: true`
|
||||
&& gv_cr_lf && `@AccessControl.authorizationCheck: #CHECK`
|
||||
&& gv_cr_lf && `define view ztest_ddls_bug2`
|
||||
&& gv_cr_lf && ` as select from t100`
|
||||
&& gv_cr_lf && `{`
|
||||
&& gv_cr_lf && ` key sprsl as Sprsl,`
|
||||
&& gv_cr_lf && ` key arbgb as Arbgb,`
|
||||
&& gv_cr_lf && ` key msgnr as Msgnr,`
|
||||
&& gv_cr_lf && ` text as Text`
|
||||
&& gv_cr_lf && `}` ).
|
||||
|
||||
ENDMETHOD.
|
||||
|
||||
METHOD when_is_binary_determined.
|
||||
|
||||
mv_act_is_binary = zcl_abapgit_utils=>is_binary( mv_given_file ).
|
||||
|
||||
ENDMETHOD.
|
||||
|
||||
|
||||
METHOD then_is_not_binary.
|
||||
|
||||
cl_abap_unit_assert=>assert_false( mv_act_is_binary ).
|
||||
|
||||
ENDMETHOD.
|
||||
|
||||
|
||||
METHOD then_is_binary.
|
||||
|
||||
cl_abap_unit_assert=>assert_true( mv_act_is_binary ).
|
||||
|
||||
ENDMETHOD.
|
||||
|
||||
ENDCLASS.
|
||||
|
|
|
@ -144,9 +144,10 @@
|
|||
{"object": "ZCL_ABAPGIT_ZLIB", "class": "ltcl_zlib", "method": "fixed"},
|
||||
{"object": "ZCL_ABAPGIT_GIT_ADD_PATCH", "class": "ltcl_calculate_patch", "method": "multiple_non_adjacent_delete"},
|
||||
{"object": "ZCL_ABAPGIT_GIT_ADD_PATCH", "class": "ltcl_calculate_patch", "method": "multiple_partial_delete"},
|
||||
{"object": "ZCL_ABAPGIT_UTILS", "class": "ltcl_utils_test", "method": "extract_author_data1", "note": "ASSERT failed, ??"},
|
||||
{"object": "ZCL_ABAPGIT_UTILS", "class": "ltcl_utils_test", "method": "extract_author_data2"},
|
||||
{"object": "ZCL_ABAPGIT_UTILS", "class": "ltcl_utils_test", "method": "extract_author_data9"},
|
||||
{"object": "ZCL_ABAPGIT_UTILS", "class": "ltcl_extract_author_data", "method": "extract_author_data1", "note": "ASSERT failed, ??"},
|
||||
{"object": "ZCL_ABAPGIT_UTILS", "class": "ltcl_extract_author_data", "method": "extract_author_data2"},
|
||||
{"object": "ZCL_ABAPGIT_UTILS", "class": "ltcl_extract_author_data", "method": "extract_author_data9"},
|
||||
{"object": "ZCL_ABAPGIT_UTILS", "class": "ltcl_is_binary", "method": "image_is_binary"},
|
||||
|
||||
{"object": "ZCL_ABAPGIT_PERSISTENCE_USER", "class": "ltcl_user", "method": "set_get_git_user", "note": "skip, this changes database"},
|
||||
{"object": "ZCL_ABAPGIT_PERSISTENCE_USER", "class": "ltcl_user", "method": "set_get_repo_show", "note": "skip, this changes database"},
|
||||
|
|
Loading…
Reference in New Issue
Block a user