Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/somef/parser/composer_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import logging
import os
import re
from pathlib import Path
from ..process_results import Result
from ..utils import constants
Expand Down
142 changes: 142 additions & 0 deletions src/somef/parser/julia_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# -*- coding: utf-8 -*-
import tomli
from pathlib import Path
import re
import logging
from somef.process_results import Result
from somef.utils import constants

def parse_project_toml(file_path, metadata_result: Result, source):
"""
Parse a Project.toml file to extract metadata.

Parameters
----------
file_path: path to the Project.toml file being analyzed
metadata_result: Metadata object dictionary
source: source of the package file (URL)

Returns
-------
"""
try:
if Path(file_path).name in ["Project.toml"]:
metadata_result.add_result(
constants.CAT_HAS_PACKAGE_FILE,
{
"value": "Project.toml",
"type": constants.URL,
},
1,
constants.TECHNIQUE_CODE_CONFIG_PARSER,
source
)

with open(file_path, "rb") as f:
data = tomli.load(f)

if "name" in data:
metadata_result.add_result(
constants.CAT_PACKAGE_ID,
{
"value": data["name"],
"type": constants.STRING
},
1,
constants.TECHNIQUE_CODE_CONFIG_PARSER,
source
)

if "compat" in data:
compat = data["compat"]
for package_name, version in compat.items():
metadata_result.add_result(
constants.CAT_RUNTIME_PLATFORM,
{
"value": f"{package_name}",
"package": package_name,
"version": version,
"type": constants.STRING
},
1,
constants.TECHNIQUE_CODE_CONFIG_PARSER,
source
)

if "version" in data:
metadata_result.add_result(
constants.CAT_VERSION,
{
"value": data["version"],
"type": constants.STRING
},
1,
constants.TECHNIQUE_CODE_CONFIG_PARSER,
source
)

if "uuid" in data:
metadata_result.add_result(
constants.CAT_IDENTIFIER,
{
"value": data["uuid"],
"type": constants.STRING
},
1,
constants.TECHNIQUE_CODE_CONFIG_PARSER,
source
)

if "deps" in data:
deps = data["deps"]
for req in deps.keys():
metadata_result.add_result(
constants.CAT_REQUIREMENTS,
{
"value": req,
"type": constants.STRING
},
1,
constants.TECHNIQUE_CODE_CONFIG_PARSER,
source
)

if "authors" in data:
authors = data["authors"]
for auth in authors:
match = re.match(r'^(.+?)\s*<(.+?)>$', auth.strip())

if match:
author_name = match.group(1).strip()
author_email = match.group(2).strip()

metadata_result.add_result(
constants.CAT_AUTHORS,
{
"value": author_name,
"name": author_name,
"email": author_email,
"type": constants.AGENT
},
1,
constants.TECHNIQUE_CODE_CONFIG_PARSER,
source
)
else:
metadata_result.add_result(
constants.CAT_AUTHORS,
{
"value": auth.strip(),
"name": auth.strip(),
"type": constants.AGENT
},
1,
constants.TECHNIQUE_CODE_CONFIG_PARSER,
source
)


except Exception as e:
logging.error(f"Error parsing Project.toml file {file_path}: {str(e)}")

return metadata_result
11 changes: 7 additions & 4 deletions src/somef/process_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import yaml
import string
from urllib.parse import urlparse
from .utils import constants, markdown_utils
from .utils import constants
from . import extract_ontologies, extract_workflows
from .process_results import Result
from .regular_expressions import detect_license_spdx, extract_scholarly_article_natural, extract_scholarly_article_properties
Expand All @@ -21,6 +21,7 @@
from .parser.bower_parser import parse_bower_json_file
from .parser.gemspec_parser import parse_gemspec_file
from .parser.description_parser import parse_description_file
from somef.test.julia_parser import parse_project_toml
from chardet import detect


Expand Down Expand Up @@ -235,8 +236,8 @@ def process_repository_files(repo_dir, metadata_result: Result, repo_type, owner
if filename.lower() == "pom.xml" or filename.lower() == "package.json" or \
filename.lower() == "pyproject.toml" or filename.lower() == "setup.py" or filename.endswith(".gemspec") or \
filename.lower() == "requirements.txt" or filename.lower() == "bower.json" or filename == "DESCRIPTION" or \
(filename.lower() == "cargo.toml" and repo_relative_path == ".") or (filename.lower() == "composer.json" and repo_relative_path == "."):

(filename.lower() == "cargo.toml" and repo_relative_path == ".") or (filename.lower() == "composer.json" and repo_relative_path == ".") or \
(filename == "Project.toml" and repo_relative_path == "."):
if filename.lower() in parsed_build_files and repo_relative_path != ".":
logging.info(f"Ignoring secondary {filename} in {dir_path}")
continue
Expand All @@ -253,7 +254,7 @@ def process_repository_files(repo_dir, metadata_result: Result, repo_type, owner
},
1,
constants.TECHNIQUE_FILE_EXPLORATION, build_file_url)
logging.info(f"############### Processing package file: {filename} ############### ")
logging.info(f"############### (NEW UPDATE) Processing package file: {filename} ############### ")
if filename.lower() == "pom.xml":
metadata_result = parse_pom_file(os.path.join(dir_path, filename), metadata_result, build_file_url)
if filename.lower() == "package.json":
Expand All @@ -274,6 +275,8 @@ def process_repository_files(repo_dir, metadata_result: Result, repo_type, owner
metadata_result = parse_gemspec_file(os.path.join(dir_path, filename), metadata_result, build_file_url)
if filename == "DESCRIPTION":
metadata_result = parse_description_file(os.path.join(dir_path, filename), metadata_result, build_file_url)
if filename == "Project.toml":
metadata_result = parse_project_toml(os.path.join(dir_path, filename), metadata_result, build_file_url)

parsed_build_files.add(filename.lower())

Expand Down
71 changes: 71 additions & 0 deletions src/somef/test/test_data/repositories/Flux.jl/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
name = "Flux"
uuid = "587475ba-b771-5e3f-ad9e-33799f191a9c"
version = "0.16.5"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MLCore = "c2834f40-e789-41da-a90e-33b280584a8c"
MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40"
MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
OneHotArrays = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f"
Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[weakdeps]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
NCCL = "3fe64909-d7a1-4096-9b7d-7a0f12cf0f6b"
cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"

[extensions]
FluxAMDGPUExt = "AMDGPU"
FluxCUDAExt = "CUDA"
FluxCUDAcuDNNExt = ["CUDA", "cuDNN"]
FluxEnzymeExt = "Enzyme"
FluxMPIExt = "MPI"
FluxMPINCCLExt = ["CUDA", "MPI", "NCCL"]

[compat]
AMDGPU = "1, 2"
Adapt = "4"
CUDA = "5"
ChainRulesCore = "1.12"
Compat = "4.10.0"
Enzyme = "0.13"
EnzymeCore = "0.7.7, 0.8.4"
Functors = "0.5"
MLCore = "1.0.0"
MLDataDevices = "1.4.2"
MLUtils = "0.4"
MPI = "0.20.19"
MacroTools = "0.5"
NCCL = "0.1.1"
NNlib = "0.9.22"
OneHotArrays = "0.2.4"
Optimisers = "0.4.1"
Preferences = "1"
ProgressLogging = "0.1"
Reexport = "1.0"
Setfield = "1.1"
SpecialFunctions = "2.1.2"
Statistics = "1"
Zygote = "0.6.67, 0.7"
cuDNN = "1"
julia = "1.10"
84 changes: 84 additions & 0 deletions src/somef/test/test_data/repositories/Pluto.jl/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
name = "Pluto"
uuid = "c3e4b0f8-55cb-11ea-2926-15256bba5781"
license = "MIT"
authors = ["Fons van der Plas <[email protected]>"]
version = "0.20.20"

[deps]
Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
Configurations = "5218b696-f38b-4ac9-8b61-a12ec717816d"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
ExpressionExplorer = "21656369-7473-754a-2065-74616d696c43"
FileWatching = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
GracefulPkg = "828d9ff0-206c-6161-646e-6576656f7244"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
HypertextLiteral = "ac1192a8-f4b3-4bfe-ba22-af5b92cd3ab2"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
LRUCache = "8ac3fa9e-de4c-5943-b1dc-09c6b5f20637"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36"
MIMEs = "6c6e2e6c-3030-632d-7369-2d6c69616d65"
Malt = "36869731-bdee-424d-aa32-cab38c994e3b"
Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
MsgPack = "99f44e22-a591-53d1-9472-aa23ef4bd671"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
PlutoDependencyExplorer = "72656b73-756c-7461-726b-72656b6b696b"
PrecompileSignatures = "91cefc8d-f054-46dc-8f8c-26e11d7c5411"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
RegistryInstances = "2792f1a3-b283-48e8-9a74-f99dce5104f3"
RelocatableFolders = "05181044-ff0b-4ac5-8273-598c1e38db00"
SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
Scratch = "6c6a2e73-6563-6170-7368-637461726353"
Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[compat]
Base64 = "1"
Configurations = "0.15, 0.16, 0.17"
Dates = "0, 1"
Downloads = "1"
ExpressionExplorer = "0.5, 0.6, 1"
FileWatching = "1"
GracefulPkg = "2"
HTTP = "^1.10.17"
HypertextLiteral = "0.7, 0.8, 0.9"
InteractiveUtils = "1"
LRUCache = "1.6.2"
Logging = "1"
LoggingExtras = "0.4, 1"
MIMEs = "0.1, 1"
Malt = "1.1"
Markdown = "1"
MsgPack = "1.1"
Pkg = "1"
PlutoDependencyExplorer = "~1.2"
PrecompileSignatures = "3"
PrecompileTools = "~1.2, ~1.3"
REPL = "1"
RegistryInstances = "0.1"
RelocatableFolders = "0.1, 0.2, 0.3, 1"
SHA = "0.7, 1"
Scratch = "1.1"
Sockets = "1"
TOML = "1"
Tables = "1"
URIs = "1.3"
UUIDs = "1"
julia = "^1.10"

[extras]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Memoize = "c03570c3-d221-55d1-a50c-7939bbd78826"
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"

[targets]
test = ["DataFrames", "OffsetArrays", "Sockets", "Test", "TimerOutputs", "Memoize"]
Loading