|
1 | 1 | import json |
2 | 2 | import re |
| 3 | +import yaml |
3 | 4 | from dateutil import parser as date_parser |
4 | 5 | from ..utils import constants |
5 | 6 |
|
@@ -169,27 +170,59 @@ def format_date(date_string): |
169 | 170 | if constants.CAT_CITATION in repo_data: |
170 | 171 | url_cit = [] |
171 | 172 | codemeta_output["referencePublication"] = [] |
172 | | - scholarlyArticle = {} |
| 173 | + scholarlyArticles = {} |
173 | 174 | for cit in repo_data[constants.CAT_CITATION]: |
174 | 175 | scholarlyArticle = {"@type": "ScholarlyArticle"} |
175 | | - if constants.PROP_DOI in cit[constants.PROP_RESULT].keys(): |
176 | | - # url_cit.append(cit[constants.PROP_RESULT][constants.PROP_DOI]) |
177 | | - scholarlyArticle[constants.CAT_IDENTIFIER] = cit[constants.PROP_RESULT][constants.PROP_DOI] |
178 | | - # elif constants.PROP_FORMAT in cit[constants.PROP_RESULT].keys() \ |
179 | | - # and cit[constants.PROP_RESULT][constants.PROP_FORMAT] == constants.FORMAT_CFF: |
180 | | - # url_cit.append(cit[constants.PROP_SOURCE]) |
181 | | - |
182 | | - if constants.PROP_URL in cit[constants.PROP_RESULT].keys(): |
183 | | - scholarlyArticle[constants.PROP_URL] = cit[constants.PROP_RESULT][constants.PROP_URL] |
184 | | - # if constants.PROP_AUTHOR in cit[constants.PROP_RESULT].keys(): |
185 | | - # scholarlyArticle[constants.PROP_AUTHOR] = cit[constants.PROP_RESULT][constants.PROP_AUTHOR] |
186 | | - if constants.PROP_TITLE in cit[constants.PROP_RESULT].keys(): |
187 | | - scholarlyArticle[constants.PROP_NAME] = cit[constants.PROP_RESULT][constants.PROP_TITLE] |
188 | | - if len(scholarlyArticle) > 1: # Debe tener más que solo "@type" |
| 176 | + |
| 177 | + doi = None |
| 178 | + title = None |
| 179 | + is_bibtex = False |
| 180 | + |
| 181 | + if constants.PROP_FORMAT in cit[constants.PROP_RESULT] and cit[constants.PROP_RESULT][constants.PROP_FORMAT] == "cff": |
| 182 | + yaml_content = yaml.safe_load(cit[constants.PROP_RESULT]["value"]) |
| 183 | + preferred_citation = yaml_content.get("preferred-citation", {}) |
| 184 | + |
| 185 | + title = normalize_title(preferred_citation.get("title", None)) |
| 186 | + doi = preferred_citation.get("doi", None) |
| 187 | + url = preferred_citation.get("url", None) |
| 188 | + if url: |
| 189 | + final_url = url |
| 190 | + elif doi: |
| 191 | + final_url = f"https://doi.org/{doi}" |
| 192 | + scholarlyArticle[constants.PROP_NAME] = title |
| 193 | + scholarlyArticle[constants.CAT_IDENTIFIER] = doi |
| 194 | + scholarlyArticle[constants.PROP_URL] = final_url |
| 195 | + else: |
| 196 | + if constants.PROP_DOI in cit[constants.PROP_RESULT].keys(): |
| 197 | + doi = cit[constants.PROP_RESULT][constants.PROP_DOI] |
| 198 | + scholarlyArticle[constants.CAT_IDENTIFIER] = cit[constants.PROP_RESULT][constants.PROP_DOI] |
| 199 | + # elif constants.PROP_FORMAT in cit[constants.PROP_RESULT].keys() \ |
| 200 | + # and cit[constants.PROP_RESULT][constants.PROP_FORMAT] == constants.FORMAT_CFF: |
| 201 | + # url_cit.append(cit[constants.PROP_SOURCE]) |
| 202 | + |
| 203 | + if constants.PROP_URL in cit[constants.PROP_RESULT].keys(): |
| 204 | + scholarlyArticle[constants.PROP_URL] = cit[constants.PROP_RESULT][constants.PROP_URL] |
| 205 | + # if constants.PROP_AUTHOR in cit[constants.PROP_RESULT].keys(): |
| 206 | + # scholarlyArticle[constants.PROP_AUTHOR] = cit[constants.PROP_RESULT][constants.PROP_AUTHOR] |
| 207 | + if constants.PROP_TITLE in cit[constants.PROP_RESULT].keys(): |
| 208 | + title = normalize_title(cit[constants.PROP_RESULT][constants.PROP_TITLE]) |
| 209 | + scholarlyArticle[constants.PROP_NAME] = cit[constants.PROP_RESULT][constants.PROP_TITLE] |
| 210 | + is_bibtex = True |
| 211 | + |
| 212 | + if len(scholarlyArticle) > 1: |
189 | 213 | # look por information in values as pagination, issn and others |
190 | 214 | scholarlyArticle = extract_scholarly_article_properties(cit[constants.PROP_RESULT][constants.PROP_VALUE], scholarlyArticle) |
191 | | - codemeta_output["referencePublication"].append(scholarlyArticle) |
192 | 215 |
|
| 216 | + key = (doi, title) |
| 217 | + |
| 218 | + if key in scholarlyArticles: |
| 219 | + if is_bibtex: |
| 220 | + codemeta_output["referencePublication"].remove(scholarlyArticles[key]) |
| 221 | + codemeta_output["referencePublication"].append(scholarlyArticle) |
| 222 | + scholarlyArticles[key] = scholarlyArticle |
| 223 | + else: |
| 224 | + codemeta_output["referencePublication"].append(scholarlyArticle) |
| 225 | + scholarlyArticles[key] = scholarlyArticle |
193 | 226 | # if len(url_cit) > 0: |
194 | 227 | # codemeta_output["citation"] = url_cit |
195 | 228 |
|
@@ -254,3 +287,6 @@ def create_missing_fields(result): |
254 | 287 | if c not in repo_data: |
255 | 288 | missing.append(c) |
256 | 289 | return missing |
| 290 | + |
| 291 | +def normalize_title(title): |
| 292 | + return re.sub(r"\s+", " ", title.strip().lower()) if title else None |
0 commit comments