diff --git a/src/spdx_tools/spdx/parser/json/json_parser.py b/src/spdx_tools/spdx/parser/json/json_parser.py index 9ca35fd85..269e968a4 100644 --- a/src/spdx_tools/spdx/parser/json/json_parser.py +++ b/src/spdx_tools/spdx/parser/json/json_parser.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 import json +from typing import Optional from beartype.typing import Dict @@ -9,8 +10,8 @@ from spdx_tools.spdx.parser.jsonlikedict.json_like_dict_parser import JsonLikeDictParser -def parse_from_file(file_name: str) -> Document: - with open(file_name) as file: +def parse_from_file(file_name: str, encoding: Optional[str] = None) -> Document: + with open(file_name, encoding=encoding) as file: input_doc_as_dict: Dict = json.load(file) return JsonLikeDictParser().parse(input_doc_as_dict) diff --git a/src/spdx_tools/spdx/parser/parse_anything.py b/src/spdx_tools/spdx/parser/parse_anything.py index b91f76111..ae5e69568 100644 --- a/src/spdx_tools/spdx/parser/parse_anything.py +++ b/src/spdx_tools/spdx/parser/parse_anything.py @@ -9,6 +9,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Optional + from spdx_tools.spdx.formats import FileFormat, file_name_to_format from spdx_tools.spdx.parser.json import json_parser from spdx_tools.spdx.parser.rdf import rdf_parser @@ -17,15 +19,15 @@ from spdx_tools.spdx.parser.yaml import yaml_parser -def parse_file(file_name: str): +def parse_file(file_name: str, encoding: Optional[str] = None): input_format = file_name_to_format(file_name) if input_format == FileFormat.RDF_XML: - return rdf_parser.parse_from_file(file_name) + return rdf_parser.parse_from_file(file_name, encoding) elif input_format == FileFormat.TAG_VALUE: - return tagvalue_parser.parse_from_file(file_name) + return tagvalue_parser.parse_from_file(file_name, encoding) elif input_format == FileFormat.JSON: - return json_parser.parse_from_file(file_name) + return json_parser.parse_from_file(file_name, encoding) elif input_format == FileFormat.XML: - return xml_parser.parse_from_file(file_name) + return xml_parser.parse_from_file(file_name, encoding) elif input_format == FileFormat.YAML: - return yaml_parser.parse_from_file(file_name) + return yaml_parser.parse_from_file(file_name, encoding) diff --git a/src/spdx_tools/spdx/parser/rdf/rdf_parser.py b/src/spdx_tools/spdx/parser/rdf/rdf_parser.py index 3856f8d59..cfa7054d4 100644 --- a/src/spdx_tools/spdx/parser/rdf/rdf_parser.py +++ b/src/spdx_tools/spdx/parser/rdf/rdf_parser.py @@ -1,6 +1,8 @@ # SPDX-FileCopyrightText: 2023 spdx contributors # # SPDX-License-Identifier: Apache-2.0 +from typing import Optional + from beartype.typing import Any, Dict from rdflib import RDF, Graph @@ -22,9 +24,9 @@ from spdx_tools.spdx.rdfschema.namespace import SPDX_NAMESPACE -def parse_from_file(file_name: str) -> Document: +def parse_from_file(file_name: str, encoding: Optional[str] = None) -> Document: graph = Graph() - with open(file_name) as file: + with open(file_name, encoding=encoding) as file: graph.parse(file, format="xml") document: Document = translate_graph_to_document(graph) diff --git a/src/spdx_tools/spdx/parser/tagvalue/tagvalue_parser.py b/src/spdx_tools/spdx/parser/tagvalue/tagvalue_parser.py index c28596363..b2c9c9e56 100644 --- a/src/spdx_tools/spdx/parser/tagvalue/tagvalue_parser.py +++ b/src/spdx_tools/spdx/parser/tagvalue/tagvalue_parser.py @@ -1,13 +1,15 @@ # SPDX-FileCopyrightText: 2023 spdx contributors # # SPDX-License-Identifier: Apache-2.0 +from typing import Optional + from spdx_tools.spdx.model import Document from spdx_tools.spdx.parser.tagvalue.parser import Parser -def parse_from_file(file_name: str) -> Document: +def parse_from_file(file_name: str, encoding: Optional[str] = None) -> Document: parser = Parser() - with open(file_name) as file: + with open(file_name, encoding=encoding) as file: data = file.read() document: Document = parser.parse(data) return document diff --git a/src/spdx_tools/spdx/parser/xml/xml_parser.py b/src/spdx_tools/spdx/parser/xml/xml_parser.py index f0cd77025..4d18fdfd3 100644 --- a/src/spdx_tools/spdx/parser/xml/xml_parser.py +++ b/src/spdx_tools/spdx/parser/xml/xml_parser.py @@ -1,6 +1,8 @@ # SPDX-FileCopyrightText: 2023 spdx contributors # # SPDX-License-Identifier: Apache-2.0 +from typing import Optional + import xmltodict from beartype.typing import Any, Dict @@ -36,8 +38,8 @@ ] -def parse_from_file(file_name: str) -> Document: - with open(file_name) as file: +def parse_from_file(file_name: str, encoding: Optional[str] = None) -> Document: + with open(file_name, encoding=encoding) as file: parsed_xml: Dict = xmltodict.parse(file.read(), encoding="utf-8") input_doc_as_dict: Dict = _fix_list_like_fields(parsed_xml).get("Document") diff --git a/src/spdx_tools/spdx/parser/yaml/yaml_parser.py b/src/spdx_tools/spdx/parser/yaml/yaml_parser.py index 1a7349eb8..5a269e84d 100644 --- a/src/spdx_tools/spdx/parser/yaml/yaml_parser.py +++ b/src/spdx_tools/spdx/parser/yaml/yaml_parser.py @@ -1,6 +1,8 @@ # SPDX-FileCopyrightText: 2023 spdx contributors # # SPDX-License-Identifier: Apache-2.0 +from typing import Optional + import yaml from beartype.typing import Dict @@ -8,8 +10,8 @@ from spdx_tools.spdx.parser.jsonlikedict.json_like_dict_parser import JsonLikeDictParser -def parse_from_file(file_name: str) -> Document: - with open(file_name) as file: +def parse_from_file(file_name: str, encoding: Optional[str] = None) -> Document: + with open(file_name, encoding=encoding) as file: input_doc_as_dict: Dict = yaml.safe_load(file) return JsonLikeDictParser().parse(input_doc_as_dict) diff --git a/tests/spdx/data/SPDXJSONExample-UTF-16.spdx.json b/tests/spdx/data/SPDXJSONExample-UTF-16.spdx.json new file mode 100644 index 000000000..570d67d3e Binary files /dev/null and b/tests/spdx/data/SPDXJSONExample-UTF-16.spdx.json differ diff --git a/tests/spdx/data/SPDXRdfExample-UTF-16.spdx.rdf.xml b/tests/spdx/data/SPDXRdfExample-UTF-16.spdx.rdf.xml new file mode 100644 index 000000000..c3760c7c8 Binary files /dev/null and b/tests/spdx/data/SPDXRdfExample-UTF-16.spdx.rdf.xml differ diff --git a/tests/spdx/data/SPDXTagExample-UTF-16.spdx b/tests/spdx/data/SPDXTagExample-UTF-16.spdx new file mode 100644 index 000000000..72682949b Binary files /dev/null and b/tests/spdx/data/SPDXTagExample-UTF-16.spdx differ diff --git a/tests/spdx/data/SPDXXMLExample-UTF-16.spdx.xml b/tests/spdx/data/SPDXXMLExample-UTF-16.spdx.xml new file mode 100644 index 000000000..9e417c69a Binary files /dev/null and b/tests/spdx/data/SPDXXMLExample-UTF-16.spdx.xml differ diff --git a/tests/spdx/data/SPDXYAMLExample-UTF-16.spdx.yaml b/tests/spdx/data/SPDXYAMLExample-UTF-16.spdx.yaml new file mode 100644 index 000000000..d951651c7 Binary files /dev/null and b/tests/spdx/data/SPDXYAMLExample-UTF-16.spdx.yaml differ diff --git a/tests/spdx/parser/all_formats/test_parse_from_file.py b/tests/spdx/parser/all_formats/test_parse_from_file.py index 7fad968f2..5a62fb357 100644 --- a/tests/spdx/parser/all_formats/test_parse_from_file.py +++ b/tests/spdx/parser/all_formats/test_parse_from_file.py @@ -44,7 +44,7 @@ def test_parse_from_file_with_2_3_example(self, parser, format_name, extension): assert len(doc.relationships) == 13 assert len(doc.extracted_licensing_info) == 5 - def test_parse_json_with_2_2_example(self, parser, format_name, extension): + def test_parse_from_file_with_2_2_example(self, parser, format_name, extension): doc = parser.parse_from_file( os.path.join(os.path.dirname(__file__), f"../../data/SPDX{format_name}Example-v2.2.spdx{extension}") ) @@ -55,3 +55,16 @@ def test_parse_json_with_2_2_example(self, parser, format_name, extension): assert len(doc.snippets) == 1 assert len(doc.relationships) == 11 assert len(doc.extracted_licensing_info) == 5 + + def test_parse_from_file_with_encoding_example(self, parser, format_name, extension): + doc = parser.parse_from_file( + os.path.join(os.path.dirname(__file__), f"../../data/SPDX{format_name}Example-UTF-16.spdx{extension}"), + "utf-16", + ) + assert isinstance(doc, Document) + assert len(doc.annotations) == 5 + assert len(doc.files) == 4 + assert len(doc.packages) == 4 + assert len(doc.snippets) == 1 + assert len(doc.relationships) == 11 + assert len(doc.extracted_licensing_info) == 5