diff --git a/app/Console/Commands/UpdateOrcids.php b/app/Console/Commands/UpdateOrcids.php index a68d6c39..f892912c 100644 --- a/app/Console/Commands/UpdateOrcids.php +++ b/app/Console/Commands/UpdateOrcids.php @@ -40,18 +40,32 @@ public function __construct() */ public function handle() { - $inc = 0; + $total_profiles_count = $total_orcid_works_count = $updated_total = $created_total = 0; + $exact_id_match_total = $contained_id_url_match_total = $exact_title_match_total = $contained_title_match_total = 0; + $no_url_count_total = $similar_title_count_total = 0; + $profiles = Profile::whereHas('data', function ($query) { - $query->where('type', 'information') - ->where('data->orc_id_managed', '1') - ->whereNotNull('data->orc_id'); - })->get(); + $query->where('type', 'information') + ->where('data->orc_id_managed', '1') + ->whereNotNull('data->orc_id'); + })->get(); - $this->lineAndLog("Starting scheduled ORCiD data update for {$profiles->count()} profiles... "); + $this->lineAndLog("Starting scheduled ORCiD data update for {$profiles->count()} profiles... \n"); foreach ($profiles as $profile) { - if ($profile->updateORCID()) { - $inc++; + $results = $profile->updateORCID(); + + if ($results['completed']) { + $total_profiles_count++; + $total_orcid_works_count += $results['orcid_works_count']; + $created_total += $results['created']; + $updated_total += $results['updated']; + $exact_id_match_total += $results['exact_id_match']; + $contained_id_url_match_total += $results['contained_id_url_match']; + $exact_title_match_total += $results['exact_title_match']; + $contained_title_match_total += $results['contained_title_match']; + $no_url_count_total += $results['no_url_count']; + $similar_title_count_total += $results['similar_title_count']; $this->lineAndLog("Updated ORCiD info for {$profile->full_name}"); } else { @@ -59,7 +73,15 @@ public function handle() } } - $this->lineAndLog("Completed: {$inc}/{$profiles->count()} profiles have been updated."); + $this->lineAndLog("Completed: {$total_profiles_count}/{$profiles->count()} profiles have been updated."); + $this->lineAndLog("TOTAL: {$updated_total} publications updated/{$total_orcid_works_count} orcid records found."); + $this->lineAndLog("TOTAL: {$created_total} new publications created."); + $this->lineAndLog("TOTAL: {$exact_id_match_total} publications found by exact ID."); + $this->lineAndLog("TOTAL: {$contained_id_url_match_total} publications found by id contained in URL."); + $this->lineAndLog("TOTAL: {$exact_title_match_total} publications found by exact title."); + $this->lineAndLog("TOTAL: {$contained_title_match_total} publications found by title contained in existing record."); + $this->lineAndLog("TOTAL: {$similar_title_count_total} similar publications have been found."); + $this->lineAndLog("TOTAL: {$no_url_count_total} publications without URL."); return Command::SUCCESS; } diff --git a/app/Http/Controllers/ProfilesController.php b/app/Http/Controllers/ProfilesController.php index 42c160ce..dae9c88e 100644 --- a/app/Http/Controllers/ProfilesController.php +++ b/app/Http/Controllers/ProfilesController.php @@ -232,13 +232,6 @@ public function create(Request $request, User $user, LdapHelperContract $ldap): */ public function edit(Profile $profile, string $section): View|ViewContract|RedirectResponse { - //dont manage auto-managed publications - if ($section == 'publications' && $profile->hasOrcidManagedPublications()) { - $profile->updateORCID(); - return redirect() - ->route('profiles.show', $profile->slug) - ->with('flash_message', 'Publications updated via ORCID.'); - } $data = $profile->data()->$section()->get(); diff --git a/app/Profile.php b/app/Profile.php index d8f6f0e7..944fba05 100644 --- a/app/Profile.php +++ b/app/Profile.php @@ -7,6 +7,7 @@ use App\ProfileStudent; use App\Student; use App\User; +use Carbon\Carbon; use Illuminate\Database\Eloquent\Model; use OwenIt\Auditing\Auditable as HasAudits; use OwenIt\Auditing\Contracts\Auditable; @@ -20,6 +21,7 @@ use Illuminate\Database\Eloquent\Builder; use Illuminate\Database\Eloquent\SoftDeletes; use Illuminate\Support\Facades\Cache; +use Illuminate\Support\Facades\Log; /** * @method public() @@ -174,64 +176,417 @@ public function hasOrcidManagedPublications() return $this->information()->where('data->orc_id_managed', '1')->exists(); } + /** + * Updates the current user's publications by importing data from the ORCID API. + * + * This method fetches ORCID works, checks for duplicates using various identifiers (DOI, EID), + * and imports new publications or updates existing ones accordingly. + * + * @return array + */ public function updateORCID() { - $orc_id = $this->information()->get(array('data'))->toArray()[0]['data']['orc_id']; + // Initializing counters + $updated = $created = 0; + $exact_id_match = $contained_id_url_match = $exact_title_match = $contained_title_match = 0; + $no_url_count = $similar_title_count = 0; + + $orcid_works = $this->fetchOrcidWorks()['group']; + $orcid_works_count = count($orcid_works); + + /** @var \Illuminate\Database\Eloquent\Collection */ + $current_publications = $this->publications()->get(); + + Log::info("STARTING ORCID update for {$this->full_name} ‼️‼️"); + + foreach ($orcid_works as $record) { + $existing_pub = null; + $similar_title_found = collect(); + + $work_summary = self::getBestWorkSummary($record['work-summary']); + + $doi_record = self::getIdentifier($work_summary, 'doi'); + $eid_record = self::getIdentifier($work_summary, 'eid'); + + $title = $work_summary['title']['title']['value'] ?? null; + $year = $work_summary['publication-date']['year']['value'] ?? null; + $month = $work_summary['publication-date']['month']['value'] ?? null; + $day = $work_summary['publication-date']['day']['value'] ?? null; + + if ($current_publications->isNotEmpty()) { + + $no_url_count = $current_publications->filter(function ($pub) { + return empty(data_get($pub, 'data.url')); + })->count(); + + // Searching by title and date to save the count of similar matches + $results_by_title_and_date = self::searchPublicationByTitleAndDate($title, $month, $day, $year, $current_publications); + + if (isset($doi_record['id'])) { // Start searching by DOI + $existing_pub = self::searchPublicationByPubIdentifier($doi_record['id'], 'doi', $current_publications, $exact_id_match, $contained_id_url_match); // Search by DOI in both, id and url + } + + if (!$existing_pub && isset($eid_record['id'])) { // If not publciation was found, then search by EID + $existing_pub = self::searchPublicationByPubIdentifier($eid_record['id'], 'eid', $current_publications, $exact_id_match, $contained_id_url_match); // Search by EID in both, id and url + } + + if ($existing_pub) { + $similar_title_found = $results_by_title_and_date['exact_title_matching'] + ->merge($results_by_title_and_date['contained_title_matching']) + ->merge($results_by_title_and_date['similar_title_matching']) + ->reject(function($item) use ($existing_pub) { + return $item->id === $existing_pub->id; + }); + } + else { + if ($results_by_title_and_date['exact_title_matching']->isNotEmpty()) { + + $existing_pub = $results_by_title_and_date['exact_title_matching']->first(); + $exact_title_match++; + + Log::info($results_by_title_and_date['exact_title_matching']->first()->message); + + $similar_title_found = $results_by_title_and_date['contained_title_matching'] + ->merge($results_by_title_and_date['similar_title_matching']); + } + elseif ($results_by_title_and_date['contained_title_matching']->isNotEmpty()) { + $existing_pub = $results_by_title_and_date['contained_title_matching']->first(); + $contained_title_match++; + + Log::info($results_by_title_and_date['contained_title_matching']->first()->message); + + $similar_title_found = $results_by_title_and_date['similar_title_matching']; + } + } + + $similar_title_count += $similar_title_found->count(); + } - if(is_null($orc_id)){ - //can't update if we don't know your ID - return false; - } + if (!isset($doi_record['id']) && !isset($eid_record['id'])) { + $additional_identifier = self::getIdentifier($work_summary); + } - $orc_url = "https://pub.orcid.org/v2.0/" . $orc_id . "/activities"; + $identifiers = array_filter( + [$doi_record, $eid_record, $additional_identifier ?? null], + function ($record) { + if (!is_array($record)) return false; + return isset($record['id']) && isset($record['id_type']); + } + ); + + $data = [ + 'title' => $work_summary['title']['title']['value'], + 'year' => $work_summary['publication-date']['year']['value'] ?? null, + 'publication_date' => compact('year', 'month', 'day'), + 'type' => ucwords(strtolower(str_replace('_', ' ', $work_summary['type']))), + 'status' => 'Published', + 'put-code' => $work_summary['put-code'], + 'identifiers' => $identifiers, + 'source' => 'orcid', + 'source_id' => $work_summary['source']['source-client-id']['uri'] ?? null, + 'source_path' => $work_summary['source']['source-client-id']['path'] ?? null, + 'published_in' => $work_summary['journal-title']['value'] ?? null, + 'orginal_source' => $work_summary['source']['source-name']['value'] ?? null, + ]; + + //If the publication date is null then use the existing pub year to calculate the sort order + $sort_order = self::getSortOrder(...array_values($data['publication_date'] ?? ['year' => $existing_pub->year])); + + $this->updateOrInsertPublication($data, $sort_order, $existing_pub, $created, $updated); + } - $client = new Client(); + Log::info("Existing publications: {$current_publications->count()}, ORCID API publications: {$orcid_works_count}"); - $res = $client->get($orc_url, [ - 'headers' => [ - 'Authorization' => 'Bearer ' . config('ORCID_TOKEN'), - 'Accept' => 'application/json' - ], - 'http_errors' => false, // don't throw exceptions for 4xx,5xx responses - ]); + foreach (compact('updated', 'created', 'exact_id_match', 'contained_id_url_match', 'exact_title_match', 'contained_title_match', 'similar_title_count') as $key => $value) { + $key = strtoupper($key); + Log::info("Total {$key} publications: {$value} "); + } - //an error of some sort - if($res->getStatusCode() != 200){ - return false; - } + Log::info("ORCID update for {$this->full_name} completed ✅"); - $datum = json_decode($res->getBody()->getContents(), true); + Cache::tags(['profile-{$this->id}-current_publications'])->flush(); + Cache::tags(['profile_data'])->flush(); - foreach($datum['works']['group'] as $record){ - $url = NULL; - foreach($record['external-ids']['external-id'] as $ref){ - if($ref['external-id-type'] == "eid"){ - $url = "https://www.scopus.com/record/display.uri?origin=resultslist&eid=" . $ref['external-id-value']; + return array_merge(['completed' => true], + compact( + 'orcid_works_count', + 'created', + 'updated', + 'exact_id_match', + 'contained_id_url_match', + 'exact_title_match', + 'contained_title_match', + 'no_url_count', + 'similar_title_count') + ); + } + + /** + * Fetches the list of works (publications) from the ORCID API for the current user. + * Returns false if the ORCID ID is missing or the API request fails, the method returns false. + * Otherwise, returns an associative array of works data. + * + * @return array|false + */ + protected function fetchOrcidWorks() + { + $orc_id = $this->information()->get(array('data'))->toArray()[0]['data']['orc_id']; + + if (!$orc_id) { + return false; + } + + $orc_url = "https://pub.orcid.org/v3.0/$orc_id/works"; + + $client = new Client(); + + $response = $client->get($orc_url, [ + 'headers' => [ + 'Authorization' => 'Bearer ' . config('ORCID_TOKEN'), + 'Accept' => 'application/json' + ], + 'http_errors' => false, + ]); + + if ($response->getStatusCode() != 200) { + return false; + } + + return json_decode($response->getBody()->getContents(), true); + } + + /** + * Returns the best work summary from an array of ORCID work summaries. + * If there's only one summary, it is returned. Otherwise, the summaries are sorted + * by `display-index` in descending order, and the first summary is returned. + * + * @param array $work_summaries + * @return array|null + */ + protected static function getBestWorkSummary($work_summaries) + { + if (count($work_summaries) === 1) { + return $work_summaries[0]; + } + + $sorted = collect($work_summaries) + ->sortByDesc('display-index') + ->values(); + + return $sorted->first(); + } + + /** + * If a specific type key is provided, it looks for an external ID of that type with a relationship of 'self'. + * If no type key is given, it returns the first external ID with a 'self' relationship for a work summary. + * @param array $work_summary + * @param string|null $type_key + * @return array{ + * id: string|null, + * id_type: string|null, + * id_url: string|null + * } + */ + protected static function getIdentifier($work_summary, $type_key = null) + { + $id_record = collect($work_summary['external-ids']['external-id'] ?? null)->first(function($ext_id) use ($type_key) { + if ($type_key) { + return $ext_id['external-id-type'] === $type_key && $ext_id['external-id-relationship'] === 'self'; + } + return $ext_id['external-id-relationship'] === 'self'; + }); + + $id = $id_record['external-id-normalized']['value'] ?? null; + $id_url = $id_record['external-id-url']['value'] ?? null; + $id_type = $type_key ?? ($id_record['external-id-type'] ?? 'unknown'); + + return compact('id', 'id_type', 'id_url'); + } + + /** + * Search publication by exact identifier, if not found, then it searches for the identifier within the URL. + * + * @param string $id + * @param string $id_type + * @param \Illuminate\Database\Eloquent\Collection $publications + * @return \App\ProfileData|null Returns the matched publication or null if none found. + */ + protected static function searchPublicationByPubIdentifier($id, $id_type, $publications, &$exact_id_match, &$contained_id_url_match) + { + $id = strtolower($id); + + return $publications->first(function ($publication) use ($id, $id_type, &$exact_id_match, &$contained_id_url_match) { + $pub_id = strtolower($publication->data['id'] ?? ''); + $pub_url = strtolower($publication->data['url'] ?? ''); + + if ($pub_id && $pub_id === $id) { + $exact_id_match++; + Log::info("Publication matched: exact ID match for {$id_type}, {$pub_id}"); + return true; } - else if($ref['external-id-type'] == "doi"){ - $url = "http://doi.org/" . $ref['external-id-value']; + + if ($pub_url && str_contains($pub_url, $id)) { + $contained_id_url_match++; + Log::info("Publication matched: ID found in URL, for {$id} in {$pub_url} for {$publication->id}"); + return true; } - } - $record = ProfileData::firstOrCreate([ - 'profile_id' => $this->id, - 'type' => 'publications', - 'data->title' => $record['work-summary'][0]['title']['title']['value'], - 'sort_order' => $record['work-summary'][0]['publication-date']['year']['value'] ?? null, - ],[ - 'data' => [ - 'url' => $url, - 'title' => $record['work-summary'][0]['title']['title']['value'], - 'year' => $record['work-summary'][0]['publication-date']['year']['value'] ?? null, - 'type' => ucwords(strtolower(str_replace('_', ' ', $record['work-summary'][0]['type']))), - 'status' => 'Published' - ], - ]); - } - Cache::tags(['profile_data'])->flush(); + return false; + }); + } + + /** + * Searches for a publication by matching title and publication date. + * It checks for exact, contained, or similar title matches. Also collects similar title matches (if year matches) in a separate group for reference. + * + * @param string $title + * @param string|null $month + * @param string|null $day + * @param string|null $year + * @param \Illuminate\Database\Eloquent\Collection $existing_publications + * + * @return \Illuminate\Support\Collection + */ + protected static function searchPublicationByTitleAndDate(string $title, ?string $month, ?string $day, ?string $year, $existing_publications) + { + /** @var \Illuminate\Support\Collection */ + $results = [ + 'exact_title_matching' => collect(), + 'contained_title_matching' => collect(), + 'similar_title_matching' => collect(), + ]; + + foreach ($existing_publications as $existing_pub) { + $data = $existing_pub->data; + $existing_title = strtolower($data['title'] ?? ''); + + $pub_date = $data['publication_date'] ?? []; + + $pub_day = $pub_date['day'] ?? null; + $pub_month = $pub_date['month'] ?? null; + $pub_year = $pub_date['year'] ?? null; + + $pub_year = $pub_year ?: $data['year']; + + if ($pub_year !== $year) { + continue; // Continue loop if year doesn't match + } + + $title_match_type = self::getTitleMatchType(strtolower($title), $existing_title); + + if (!$title_match_type) { + continue; // Continue loop if title doesn't match + } + + if ($month && $day && $pub_month === $month && $pub_day === $day) { + $existing_pub->message = "Matching publication found {$existing_pub->id} by $title_match_type title and full pubblication date: {$title}, {$year}, {$month}, {$day}"; + } + + if ($month && $pub_month === $month) { + $existing_pub->message = "Matching publication {$existing_pub->id} found by $title_match_type title and month: {$title}, {$month}"; + } + + $existing_pub->message = "Matching publication {$existing_pub->id} found by $title_match_type title and year: {$title}, {$year}"; + + $results["{$title_match_type}_title_matching"]->push($existing_pub); + } + + if ($results['exact_title_matching']->isEmpty() && $results['contained_title_matching']->isEmpty()) { + Log::warning("No matching publication found for: {$title}, {$year}, {$month}, {$day}"); + } + + return $results; + } + + /** + * Determines the match type between two publication titles. + * + * @param string $new_title + * @param string $existing_title + * @return string|false Returns the match type ('exact', 'contained', 'similar') or false if no match. + */ + protected static function getTitleMatchType(string $new_title, string $existing_title) + { + if ($new_title === $existing_title) { + return 'exact'; + } + + if (str_contains($new_title, $existing_title) || str_contains($existing_title, $new_title)) { + return 'contained'; + } + + similar_text($new_title, $existing_title, $percent); + + return ($percent >= 93 && $percent <= 97) ? 'similar' : false; + } + + /** + * Calculates an integer value used to sort publications in reverse chronological order. + * It converts a given date (year, month, day) into a numeric value such that more recent dates have lower values. + * + * @param int|string $year + * @param int|string|null $month + * @param int|string|null $day + * @return int An integer representing the reverse chronological sort order. + */ + protected static function getSortOrder($year, $month = null, $day = null) + { + $year = (int) $year; + $month = $month !== null ? str_pad((string) $month, 2, '0', STR_PAD_LEFT) : '00'; + $day = $day !== null ? str_pad((string) $day, 2, '0', STR_PAD_LEFT) : '00'; + + $rev_year = 9999 - $year; + + if ($month === '00') { + return (int) sprintf('%04d0000', $rev_year); + } + + $rev_month = 12 - (int) $month; + + if ($day === '00') { + return (int) sprintf('%04d%02d00', $rev_year, $rev_month); + } + + $rev_day = 31 - (int) $day; + + return (int) sprintf('%04d%02d%02d', $rev_year, $rev_month, $rev_day); + } + + /** + * Updates an existing publication or inserts a new one. + * + * @param array $data + * @param \App\ProfileData|null $existing_pub + * @param int &$created + * @param int &$updated + * @return bool + */ + protected function updateOrInsertPublication($data, $sort_order, $existing_pub, &$created, &$updated) + { + if ($existing_pub) { + unset($existing_pub->message); + $data = array_merge($data, ['url' => $existing_pub->data['url'] ?? null ]); + + $existing_pub->update([ + 'data' => $data, + 'sort_order' => $sort_order, + ]); + + $updated++; + Log::info("Updated best match publication for id: {$existing_pub->id}"); + } + else { + $existing_pub = $this->publications()->create([ + 'data' => $data, + 'sort_order' => $sort_order, + 'type' => 'publications', + ]); + + $created++; + Log::info("Created new publication (no best match found) for title: {$data['title']}"); + } - //ran through process successfully - return true; + return true; } public function updateDatum($section, $request) diff --git a/resources/views/livewire/profile-data-cards/publications.blade.php b/resources/views/livewire/profile-data-cards/publications.blade.php index 5c517ef2..d59ca656 100644 --- a/resources/views/livewire/profile-data-cards/publications.blade.php +++ b/resources/views/livewire/profile-data-cards/publications.blade.php @@ -1,13 +1,14 @@

Publications @if($editable) - - @if($profile->hasOrcidManagedPublications()) - Sync - @else - Edit + @if($profile->hasOrcidManagedPublications()) + + Sync + @endif - + + Edit + @endif

@foreach($data as $pub) diff --git a/resources/views/profiles/edit/information.blade.php b/resources/views/profiles/edit/information.blade.php index ba3c214a..939e7540 100644 --- a/resources/views/profiles/edit/information.blade.php +++ b/resources/views/profiles/edit/information.blade.php @@ -153,7 +153,7 @@
-

Refresh all publications via ORCID. All previous publications will be removed and fresh data will be pulled in at regular intervals. Keep unchecked to manually edit your publications.

+

Refresh all publications via ORCID. Identifies publications that were previously automatically imported and updates them with the latest data. Manually added publications will not be updated. New publications will also be pulled. Automatic updates occurr weekly.

diff --git a/resources/views/profiles/edit/publications.blade.php b/resources/views/profiles/edit/publications.blade.php index 9ec8d4e2..a509f293 100644 --- a/resources/views/profiles/edit/publications.blade.php +++ b/resources/views/profiles/edit/publications.blade.php @@ -25,22 +25,31 @@
-
+
+ name="data[{{ $pub->id }}][data][url]" + value="{{ $pub->data['identifiers'][0]['id_url'] ?? $pub->url }}"> +
+
+ +
+
+
-
+
-
+