Class: Work

Inherits:
ApplicationRecord show all
Includes:
AASM
Defined in:
app/models/work.rb

Overview

rubocop:disable Metrics/ClassLength

Defined Under Namespace

Classes: InvalidGroupError

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#user_entered_doiObject

Returns the value of attribute user_entered_doi.



18
19
20
# File 'app/models/work.rb', line 18

def user_entered_doi
  @user_entered_doi
end

Class Method Details

.find_by_ark(ark) ⇒ Object



107
108
109
110
111
# File 'app/models/work.rb', line 107

def find_by_ark(ark)
  prefix = "ark:/"
  ark = "#{prefix}#{ark}" unless ark.blank? || ark.start_with?(prefix)
  Work.find_by!("metadata @> ?", JSON.dump(ark:))
end

.find_by_doi(doi) ⇒ Object



101
102
103
104
105
# File 'app/models/work.rb', line 101

def find_by_doi(doi)
  prefix = "10.34770/"
  doi = "#{prefix}#{doi}" unless doi.blank? || doi.start_with?(prefix)
  Work.find_by!("metadata @> ?", JSON.dump(doi:))
end

.list_embargoedObject



115
116
117
# File 'app/models/work.rb', line 115

def list_embargoed
  Work.where("embargo_date >= current_date").where(state: "approved")
end

.list_released_embargoObject



119
120
121
# File 'app/models/work.rb', line 119

def list_released_embargo
  Work.where("embargo_date = current_date-1").where(state: "approved")
end

.presenter_classObject



504
505
506
# File 'app/models/work.rb', line 504

def self.presenter_class
  WorkPresenter
end

Instance Method Details

#activitiesObject



288
289
290
# File 'app/models/work.rb', line 288

def activities
  WorkActivity.activities_for_work(id, WorkActivity::MESSAGE_ACTIVITY_TYPES + WorkActivity::CHANGE_LOG_ACTIVITY_TYPES)
end

#add_message(message, current_user_id) ⇒ Object



270
271
272
# File 'app/models/work.rb', line 270

def add_message(message, current_user_id)
  WorkActivity.add_work_activity(id, message, current_user_id, activity_type: WorkActivity::MESSAGE)
end

#add_provenance_note(date, note, current_user_id, change_label = "") ⇒ Object



274
275
276
# File 'app/models/work.rb', line 274

def add_provenance_note(date, note, current_user_id, change_label = "")
  WorkActivity.add_work_activity(id, { note:, change_label: }.to_json, current_user_id, activity_type: WorkActivity::PROVENANCE_NOTES, created_at: date)
end

#administered_by?(user) ⇒ Boolean

Returns:

  • (Boolean)


96
97
98
# File 'app/models/work.rb', line 96

def administered_by?(user)
  user.has_role?(:group_admin, group)
end

#artifact_uploadsArray<S3File>

Retrieve the S3 file uploads which are research artifacts proper (not README or other files providing metadata/documentation)

Returns:



338
339
340
# File 'app/models/work.rb', line 338

def artifact_uploads
  uploads.reject { |s3_file| s3_file.filename.include?("README") }
end

#as_json(*args) ⇒ String

Generates the JSON serialized expression of the Work

Parameters:

  • args (Array<Hash>)

Options Hash (*args):

  • :force_post_curation (Boolean)

    Force the request of AWS S3 Resources, clearing the in-memory cache

Returns:

  • (String)


445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
# File 'app/models/work.rb', line 445

def as_json(*args)
  files = files_as_json(*args)

  # to_json returns a string of serialized JSON.
  # as_json returns the corresponding hash.
  {
    "resource" => resource.as_json,
    "files" => files,
    "group" => group.as_json.except("id"),
    "embargo_date" => embargo_date_as_json,
    "created_at" => format_date_for_solr(created_at),
    "updated_at" => format_date_for_solr(updated_at),
    "date_approved" => date_approved
  }
end

#change_curator(curator_user_id, current_user) ⇒ Object



229
230
231
232
233
234
235
# File 'app/models/work.rb', line 229

def change_curator(curator_user_id, current_user)
  if curator_user_id == "no-one"
    clear_curator(current_user)
  else
    update_curator(curator_user_id, current_user)
  end
end

#changesObject



512
513
514
# File 'app/models/work.rb', line 512

def changes
  @changes ||= []
end

#clear_curator(current_user) ⇒ Object



237
238
239
240
241
242
243
244
# File 'app/models/work.rb', line 237

def clear_curator(current_user)
  # Update the curator on the Work
  self.curator_user_id = nil
  save!

  # ...and log the activity
  WorkActivity.add_work_activity(id, "Unassigned existing curator", current_user.id, activity_type: WorkActivity::SYSTEM)
end

#created_by_userObject



195
196
197
198
199
# File 'app/models/work.rb', line 195

def created_by_user
  User.find(created_by_user_id)
rescue ActiveRecord::RecordNotFound
  nil
end

#current_transitionObject



318
319
320
# File 'app/models/work.rb', line 318

def current_transition
  aasm.current_event.to_s.humanize.delete("!")
end

#date_approvedObject



569
570
571
572
573
# File 'app/models/work.rb', line 569

def date_approved
  @approved_activity ||= WorkActivity.where("work_id = ? and message='marked as Approved'", id).first
  return nil if @approved_activity.nil?
  @approved_activity.created_at.to_date.to_s
end

#doi_urlString

Return the DOI formatted as a URL, so it can be used as a link on display pages

Returns:

  • (String)

    A url formatted version of the DOI



190
191
192
193
# File 'app/models/work.rb', line 190

def doi_url
  return "https://doi.org/#{doi}" unless doi.starts_with?("https://doi.org")
  doi
end

#draft_doiObject



182
183
184
185
186
# File 'app/models/work.rb', line 182

def draft_doi
  return if resource.doi.present?
  resource.doi = datacite_service.draft_doi
  save!
end

#editable_by?(user) ⇒ Boolean

Is this work editable by a given user? A work is editable when:

  • it is being edited by the person who made it

  • it is being edited by a group admin of the group where is resides

  • it is being edited by a super admin

Parameters:

Returns:

  • (Boolean)


80
81
82
# File 'app/models/work.rb', line 80

def editable_by?(user)
  (user) || administered_by?(user)
end

#editable_in_current_state?(user) ⇒ Boolean

Returns:

  • (Boolean)


84
85
86
87
88
89
90
# File 'app/models/work.rb', line 84

def editable_in_current_state?(user)
  # anyone with edit privleges can edit a work while it is in draft
  return editable_by?(user) if draft?

  # Only admisitrators can edit a work in other states
  administered_by?(user)
end

#embargoed?Boolean

Determine whether or not the Work is under active embargo

Embargoes are not fully released until the day after the embargo date

Returns:

  • (Boolean)


534
535
536
537
538
539
# File 'app/models/work.rb', line 534

def embargoed?
  return false if embargo_date.blank?

  current_date = Time.zone.now.to_date
  current_date <= embargo_date
end

#file_listObject

Returns the list of files for the work with some basic information about each of them. This method is much faster than ‘uploads` because it does not return the actual S3File objects to the client, instead it returns just a few selected data elements. rubocop:disable Metrics/MethodLength



346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
# File 'app/models/work.rb', line 346

def file_list
  start = Time.zone.now
  s3_files = approved? ? post_curation_uploads : pre_curation_uploads
  files_info = s3_files.map do |s3_file|
    {
      "safe_id": s3_file.safe_id,
      "filename": s3_file.filename,
      "filename_display": s3_file.filename_display,
      "last_modified": s3_file.last_modified,
      "last_modified_display": s3_file.last_modified_display,
      "size": s3_file.size,
      "display_size": s3_file.display_size,
      "url": s3_file.url,
      "is_folder": s3_file.is_folder
    }
  end
  log_performance(start, "file_list called for #{id}")
  files_info
end

#files_location_cluster?Boolean

Returns:

  • (Boolean)


221
222
223
# File 'app/models/work.rb', line 221

def files_location_cluster?
  files_location == "file_cluster"
end

#files_location_other?Boolean

Returns:

  • (Boolean)


225
226
227
# File 'app/models/work.rb', line 225

def files_location_other?
  files_location == "file_other"
end

#files_location_upload?Boolean

Returns:

  • (Boolean)


217
218
219
# File 'app/models/work.rb', line 217

def files_location_upload?
  files_location.blank? || files_location == "file_upload"
end

#files_modeObject

Returns the bucket name where the files are stored for this work.



557
558
559
560
561
562
563
564
565
566
567
# File 'app/models/work.rb', line 557

def files_mode
  if approved?
    if embargoed?
      PULS3Client::EMBARGO
    else
      PULS3Client::POSTCURATION
    end
  else
    PULS3Client::PRECURATION
  end
end

#files_mode_humanObject

Returns a human friendly name for the bucket where the files for the work are located. Notice that we don’t use the values from PULS3Client because those are not human friendly (e.g. the lack dashes between words)



544
545
546
547
548
549
550
551
552
553
554
# File 'app/models/work.rb', line 544

def files_mode_human
  if approved?
    if embargoed?
      "embargo"
    else
      "post-curation"
    end
  else
    "pre-curation"
  end
end

#find_bucket_s3_dir(bucket_name:) ⇒ Aws::S3::Types::HeadObjectOutput

Transmit a HEAD request for the S3 Bucket directory for this Work

Parameters:

  • bucket_name

    location to be checked to be found

Returns:

  • (Aws::S3::Types::HeadObjectOutput)


428
429
430
431
432
433
434
435
436
437
438
# File 'app/models/work.rb', line 428

def find_bucket_s3_dir(bucket_name:)
  # TODO: Directories really do not exists in S3
  #      if we really need this check then we need to do something else to check the bucket
  s3_client.head_object({
                          bucket: bucket_name,
                          key: s3_object_key
                        })
  true
rescue Aws::S3::Errors::NotFound
  nil
end

#form_attributesObject



176
177
178
179
180
# File 'app/models/work.rb', line 176

def form_attributes
  {
    uploads: uploads_attributes
  }
end

#format_date_for_solr(date) ⇒ String

Format the date for Apache Solr

Parameters:

  • date (ActiveSupport::TimeWithZone)

Returns:

  • (String)


464
465
466
# File 'app/models/work.rb', line 464

def format_date_for_solr(date)
  date.strftime("%Y-%m-%dT%H:%M:%SZ")
end

#has_rights?(rights_id) ⇒ Boolean

rubocop:disable Naming/PredicateName

Returns:

  • (Boolean)


521
522
523
# File 'app/models/work.rb', line 521

def has_rights?(rights_id)
  resource.rights_many.index { |rights| rights.identifier == rights_id } != nil
end

#log_changes(resource_compare, current_user_id) ⇒ Object



278
279
280
281
# File 'app/models/work.rb', line 278

def log_changes(resource_compare, current_user_id)
  return if resource_compare.identical?
  WorkActivity.add_work_activity(id, resource_compare.differences.to_json, current_user_id, activity_type: WorkActivity::CHANGES)
end

#log_file_changes(current_user_id) ⇒ Object



283
284
285
286
# File 'app/models/work.rb', line 283

def log_file_changes(current_user_id)
  return if changes.count == 0
  WorkActivity.add_work_activity(id, changes.to_json, current_user_id, activity_type: WorkActivity::FILE_CHANGES)
end

#mark_new_notifications_as_read(user_id) ⇒ Object

Marks as read the notifications for the given user_id in this work. In practice, the user_id is the id of the current user and therefore this method marks the current’s user notifications as read.



302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
# File 'app/models/work.rb', line 302

def mark_new_notifications_as_read(user_id)
  # Notice that we fetch and update the information in batches
  # so that we don't issue individual SQL SELECT + SQL UPDATE
  # for each notification.
  #
  # Rails batching information:
  #   https://guides.rubyonrails.org/active_record_querying.html
  #   https://api.rubyonrails.org/classes/ActiveRecord/Batches.html

  # Disable this validation since we want to force a SQL UPDATE.
  # rubocop:disable Rails/SkipsModelValidations
  now_utc = Time.now.utc
  WorkActivityNotification.joins(:work_activity).where("user_id=? and work_id=?", user_id, id).in_batches(of: 1000).update_all(read_at: now_utc)
  # rubocop:enable Rails/SkipsModelValidations
end

#new_notification_count_for_user(user_id) ⇒ Object



292
293
294
295
296
297
# File 'app/models/work.rb', line 292

def new_notification_count_for_user(user_id)
  WorkActivityNotification.joins(:work_activity)
                          .where(user_id:, read_at: nil)
                          .where(work_activity: { work_id: id })
                          .count
end

#past_snapshotsObject



477
478
479
# File 'app/models/work.rb', line 477

def past_snapshots
  UploadSnapshot.where(work: self)
end

#pdc_discovery_urlObject

This is the solr id / work show page in PDC Discovery



527
528
529
# File 'app/models/work.rb', line 527

def pdc_discovery_url
  "https://datacommons.princeton.edu/discovery/catalog/doi-#{doi.tr('/', '-').tr('.', '-')}"
end

#post_curation_s3_resourcesObject

Accesses post-curation S3 Bucket Objects



388
389
390
391
392
393
394
# File 'app/models/work.rb', line 388

def post_curation_s3_resources
  if approved?
    s3_resources
  else
    []
  end
end

#post_curation_uploads(force_post_curation: false) ⇒ Object

Returns the files in post-curation for the work



397
398
399
400
401
402
403
404
405
406
# File 'app/models/work.rb', line 397

def post_curation_uploads(force_post_curation: false)
  if force_post_curation
    # Always use the post-curation data regardless of the work's status
    post_curation_s3_query_service = S3QueryService.new(self, PULS3Client::POSTCURATION)
    post_curation_s3_query_service.data_profile.fetch(:objects, [])
  else
    # Return the list based of files honoring the work status
    post_curation_s3_resources
  end
end

#pre_curation_uploadsObject

Fetches the data from S3 directly bypassing ActiveStorage



383
384
385
# File 'app/models/work.rb', line 383

def pre_curation_uploads
  s3_query_service.client_s3_files.sort_by(&:filename)
end

#presenterObject



508
509
510
# File 'app/models/work.rb', line 508

def presenter
  self.class.presenter_class.new(work: self)
end

#readme_uploadsArray<S3File>

Retrieve the S3 file uploads named “README”

Returns:



332
333
334
# File 'app/models/work.rb', line 332

def readme_uploads
  uploads.select { |s3_file| s3_file.filename.include?("README") }
end

#reload(options = nil) ⇒ Object

Overload ActiveRecord.reload method apidock.com/rails/ActiveRecord/Base/reload

NOTE: Usually ‘after_save` is a better place to put this kind of code:

after_save do |work|
  work.resource = nil
end

but that does not work in this case because the block points to a different memory object for ‘work` than the we want we want to reload.



152
153
154
155
156
157
# File 'app/models/work.rb', line 152

def reload(options = nil)
  super
  # Force `resource` to be reloaded
  @resource = nil
  self
end

#reload_snapshots(user_id: nil) ⇒ UploadSnapshot

Build or find persisted UploadSnapshot models for this Work

Parameters:

  • user_id (integer) (defaults to: nil)

    optional user to assign the snapshot to

Returns:



484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
# File 'app/models/work.rb', line 484

def reload_snapshots(user_id: nil)
  work_changes = []
  s3_files = pre_curation_uploads
  s3_filenames = s3_files.map(&:filename)

  upload_snapshot = latest_snapshot

  upload_snapshot.snapshot_deletions(work_changes, s3_filenames)

  upload_snapshot.snapshot_modifications(work_changes, s3_files)

  # Create WorkActivity models with the set of changes
  unless work_changes.empty?
    new_snapshot = UploadSnapshot.new(work: self, url: s3_query_service.prefix)
    new_snapshot.store_files(s3_files)
    new_snapshot.save!
    WorkActivity.add_work_activity(id, work_changes.to_json, user_id, activity_type: WorkActivity::FILE_CHANGES)
  end
end

#resourceObject



207
208
209
# File 'app/models/work.rb', line 207

def resource
  @resource ||= PDCMetadata::Resource.new_from_jsonb()
end

#resource=(resource) ⇒ Object



201
202
203
204
205
# File 'app/models/work.rb', line 201

def resource=(resource)
  @resource = resource
  # Ensure that the metadata JSONB postgres field is persisted properly
  self. = JSON.parse(resource.to_json)
end

#s3_clientObject



412
413
414
# File 'app/models/work.rb', line 412

def s3_client
  s3_query_service.client
end

#s3_filesObject



408
409
410
# File 'app/models/work.rb', line 408

def s3_files
  pre_curation_uploads
end

#s3_object_keyString

Generates the S3 Object key

Returns:

  • (String)


421
422
423
# File 'app/models/work.rb', line 421

def s3_object_key
  "#{doi}/#{id}"
end

#s3_query_serviceS3QueryService

S3QueryService object associated with this Work

Returns:



473
474
475
# File 'app/models/work.rb', line 473

def s3_query_service
  @s3_query_service ||= S3QueryService.new(self, files_mode)
end

#state=(new_state) ⇒ Object

Raises:

  • (StandardError)


65
66
67
68
69
70
# File 'app/models/work.rb', line 65

def state=(new_state)
  new_state_sym = new_state.to_sym
  valid_states = self.class.aasm.states.map(&:name)
  raise(StandardError, "Invalid state '#{new_state}'") unless valid_states.include?(new_state_sym)
  aasm_write_state_without_persistence(new_state_sym)
end

#submitted_by?(user) ⇒ Boolean

Returns:

  • (Boolean)


92
93
94
# File 'app/models/work.rb', line 92

def (user)
  created_by_user_id == user.id
end

#titleObject



159
160
161
# File 'app/models/work.rb', line 159

def title
  resource.main_title
end

#total_file_sizeObject

rubocop:enable Metrics/MethodLength



367
368
369
370
371
372
373
# File 'app/models/work.rb', line 367

def total_file_size
  total_size = 0
  file_list.each do |file|
    total_size += file[:size]
  end
  total_size
end

#total_file_size_from_list(files) ⇒ Object

Calculates the total file size from a given list of files This is so that we don’t fetch the list twice from AWS since it can be expensive when there are thousands of files on the work.



378
379
380
# File 'app/models/work.rb', line 378

def total_file_size_from_list(files)
  files.sum { |file| file[:size] }
end

#track_change(action, filename) ⇒ Object



516
517
518
# File 'app/models/work.rb', line 516

def track_change(action, filename)
  changes << { action:, filename: }
end

#update_curator(curator_user_id, current_user) ⇒ Object



246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# File 'app/models/work.rb', line 246

def update_curator(curator_user_id, current_user)
  # Update the curator on the Work
  self.curator_user_id = curator_user_id
  save!

  # ...and log the activity
  new_curator = User.find(curator_user_id)

  work_url = "[#{title}](#{Rails.application.routes.url_helpers.work_url(self)})"

  # Troubleshooting https://github.com/pulibrary/pdc_describe/issues/1783
  if work_url.include?("/describe/describe/")
    Rails.logger.error("URL #{work_url} included /describe/describe/ and was fixed. See https://github.com/pulibrary/pdc_describe/issues/1783")
    work_url = work_url.gsub("/describe/describe/", "/describe/")
  end

  message = if curator_user_id.to_i == current_user.id
              "Self-assigned @#{current_user.uid} as curator for work #{work_url}"
            else
              "Set curator to @#{new_curator.uid} for work #{work_url}"
            end
  WorkActivity.add_work_activity(id, message, current_user.id, activity_type: WorkActivity::SYSTEM)
end

#uploadsArray<S3File>

Retrieve the S3 file uploads associated with the Work

Returns:



324
325
326
327
328
# File 'app/models/work.rb', line 324

def uploads
  return post_curation_uploads if approved?

  pre_curation_uploads
end

#uploads_attributesObject



163
164
165
166
167
168
169
170
171
172
173
174
# File 'app/models/work.rb', line 163

def uploads_attributes
  return [] if approved? # once approved we no longer allow the updating of uploads via the application
  uploads.map do |upload|
    {
      id: upload.id,
      key: upload.key,
      filename: upload.filename.to_s,
      created_at: upload.created_at,
      url: upload.url
    }
  end
end

#urlObject



211
212
213
214
215
# File 'app/models/work.rb', line 211

def url
  return unless persisted?

  @url ||= url_for(self)
end