Skip to content

Commit 9d1afad

Browse files
authored
Merge pull request #709 from KnowledgeCaptureAndDiscovery/dev
Dev
2 parents 9ecc956 + e23ecc6 commit 9d1afad

File tree

5 files changed

+184
-25
lines changed

5 files changed

+184
-25
lines changed

poetry.lock

Lines changed: 64 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ markdown = "^3.5.2"
4040
rdflib-jsonld = "^0.6.2"
4141
requests = "^2.31.0"
4242
scikit-learn = "1.3.2"
43+
pyyaml = "^6.0.2"
4344

4445
[tool.poetry.scripts]
4546
somef = "somef.__main__:cli"

src/somef/export/json_export.py

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import re
3+
import yaml
34
from dateutil import parser as date_parser
45
from ..utils import constants
56

@@ -169,27 +170,59 @@ def format_date(date_string):
169170
if constants.CAT_CITATION in repo_data:
170171
url_cit = []
171172
codemeta_output["referencePublication"] = []
172-
scholarlyArticle = {}
173+
scholarlyArticles = {}
173174
for cit in repo_data[constants.CAT_CITATION]:
174175
scholarlyArticle = {"@type": "ScholarlyArticle"}
175-
if constants.PROP_DOI in cit[constants.PROP_RESULT].keys():
176-
# url_cit.append(cit[constants.PROP_RESULT][constants.PROP_DOI])
177-
scholarlyArticle[constants.CAT_IDENTIFIER] = cit[constants.PROP_RESULT][constants.PROP_DOI]
178-
# elif constants.PROP_FORMAT in cit[constants.PROP_RESULT].keys() \
179-
# and cit[constants.PROP_RESULT][constants.PROP_FORMAT] == constants.FORMAT_CFF:
180-
# url_cit.append(cit[constants.PROP_SOURCE])
181-
182-
if constants.PROP_URL in cit[constants.PROP_RESULT].keys():
183-
scholarlyArticle[constants.PROP_URL] = cit[constants.PROP_RESULT][constants.PROP_URL]
184-
# if constants.PROP_AUTHOR in cit[constants.PROP_RESULT].keys():
185-
# scholarlyArticle[constants.PROP_AUTHOR] = cit[constants.PROP_RESULT][constants.PROP_AUTHOR]
186-
if constants.PROP_TITLE in cit[constants.PROP_RESULT].keys():
187-
scholarlyArticle[constants.PROP_NAME] = cit[constants.PROP_RESULT][constants.PROP_TITLE]
188-
if len(scholarlyArticle) > 1: # Debe tener más que solo "@type"
176+
177+
doi = None
178+
title = None
179+
is_bibtex = False
180+
181+
if constants.PROP_FORMAT in cit[constants.PROP_RESULT] and cit[constants.PROP_RESULT][constants.PROP_FORMAT] == "cff":
182+
yaml_content = yaml.safe_load(cit[constants.PROP_RESULT]["value"])
183+
preferred_citation = yaml_content.get("preferred-citation", {})
184+
185+
title = normalize_title(preferred_citation.get("title", None))
186+
doi = preferred_citation.get("doi", None)
187+
url = preferred_citation.get("url", None)
188+
if url:
189+
final_url = url
190+
elif doi:
191+
final_url = f"https://doi.org/{doi}"
192+
scholarlyArticle[constants.PROP_NAME] = title
193+
scholarlyArticle[constants.CAT_IDENTIFIER] = doi
194+
scholarlyArticle[constants.PROP_URL] = final_url
195+
else:
196+
if constants.PROP_DOI in cit[constants.PROP_RESULT].keys():
197+
doi = cit[constants.PROP_RESULT][constants.PROP_DOI]
198+
scholarlyArticle[constants.CAT_IDENTIFIER] = cit[constants.PROP_RESULT][constants.PROP_DOI]
199+
# elif constants.PROP_FORMAT in cit[constants.PROP_RESULT].keys() \
200+
# and cit[constants.PROP_RESULT][constants.PROP_FORMAT] == constants.FORMAT_CFF:
201+
# url_cit.append(cit[constants.PROP_SOURCE])
202+
203+
if constants.PROP_URL in cit[constants.PROP_RESULT].keys():
204+
scholarlyArticle[constants.PROP_URL] = cit[constants.PROP_RESULT][constants.PROP_URL]
205+
# if constants.PROP_AUTHOR in cit[constants.PROP_RESULT].keys():
206+
# scholarlyArticle[constants.PROP_AUTHOR] = cit[constants.PROP_RESULT][constants.PROP_AUTHOR]
207+
if constants.PROP_TITLE in cit[constants.PROP_RESULT].keys():
208+
title = normalize_title(cit[constants.PROP_RESULT][constants.PROP_TITLE])
209+
scholarlyArticle[constants.PROP_NAME] = cit[constants.PROP_RESULT][constants.PROP_TITLE]
210+
is_bibtex = True
211+
212+
if len(scholarlyArticle) > 1:
189213
# look por information in values as pagination, issn and others
190214
scholarlyArticle = extract_scholarly_article_properties(cit[constants.PROP_RESULT][constants.PROP_VALUE], scholarlyArticle)
191-
codemeta_output["referencePublication"].append(scholarlyArticle)
192215

216+
key = (doi, title)
217+
218+
if key in scholarlyArticles:
219+
if is_bibtex:
220+
codemeta_output["referencePublication"].remove(scholarlyArticles[key])
221+
codemeta_output["referencePublication"].append(scholarlyArticle)
222+
scholarlyArticles[key] = scholarlyArticle
223+
else:
224+
codemeta_output["referencePublication"].append(scholarlyArticle)
225+
scholarlyArticles[key] = scholarlyArticle
193226
# if len(url_cit) > 0:
194227
# codemeta_output["citation"] = url_cit
195228

@@ -254,3 +287,6 @@ def create_missing_fields(result):
254287
if c not in repo_data:
255288
missing.append(c)
256289
return missing
290+
291+
def normalize_title(title):
292+
return re.sub(r"\s+", " ", title.strip().lower()) if title else None

0 commit comments

Comments
 (0)