Class: S3QueryService

Inherits:
Object
  • Object
show all
Defined in:
app/services/s3_query_service.rb

Overview

A service to query an S3 bucket for information about a given data set rubocop:disable Metrics/ClassLength

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(model, mode = PULS3Client::PRECURATION, bucket_name: nil) ⇒ S3QueryService

This value controls the AWS S3 bucket used to access the files.

Examples:

S3QueryService.new(Work.find(1), PULS3Client::PRECURATION)

Parameters:

  • model (Work)
  • mode (String) (defaults to: PULS3Client::PRECURATION)

    Valid values are PULS3Client::PRECURATION, PULS3Client::POSTCURATION PULS3Client::PRESERVATION, and PULS3Client::EMBARGO.



20
21
22
23
24
25
26
# File 'app/services/s3_query_service.rb', line 20

def initialize(model, mode = PULS3Client::PRECURATION, bucket_name: nil)
  @model = model
  @doi = model.doi
  @s3client = PULS3Client.new(mode, bucket_name:)
  @part_size = 5_368_709_120 # 5GB is the maximum part size for AWS
  @s3_responses = {}
end

Instance Attribute Details

#last_responseObject (readonly)

Returns the value of attribute last_response.



10
11
12
# File 'app/services/s3_query_service.rb', line 10

def last_response
  @last_response
end

#modelObject (readonly)

Returns the value of attribute model.



8
9
10
# File 'app/services/s3_query_service.rb', line 8

def model
  @model
end

#part_sizeObject (readonly)

Returns the value of attribute part_size.



10
11
12
# File 'app/services/s3_query_service.rb', line 10

def part_size
  @part_size
end

#s3clientObject (readonly)

Returns the value of attribute s3client.



10
11
12
# File 'app/services/s3_query_service.rb', line 10

def s3client
  @s3client
end

Class Method Details

.object_attributesObject

required, accepts ETag, Checksum, ObjectParts, StorageClass, ObjectSize



43
44
45
46
47
48
49
50
51
# File 'app/services/s3_query_service.rb', line 43

def self.object_attributes
  [
    "ETag",
    "Checksum",
    "ObjectParts",
    "StorageClass",
    "ObjectSize"
  ]
end

Instance Method Details

#check_file(bucket:, key:) ⇒ Object



188
189
190
191
192
193
194
# File 'app/services/s3_query_service.rb', line 188

def check_file(bucket:, key:)
  client.head_object({ bucket:, key: })
rescue Aws::Errors::ServiceError => aws_service_error
  message = "An error was encountered when requesting to check the status of the AWS S3 Object in the bucket #{bucket} with the key #{key}: #{aws_service_error}"
  Rails.logger.error(message)
  raise aws_service_error
end

#client_s3_empty_files(reload: false, bucket_name: self.bucket_name, prefix: self.prefix) ⇒ Object



72
73
74
75
76
77
78
79
80
81
# File 'app/services/s3_query_service.rb', line 72

def client_s3_empty_files(reload: false, bucket_name: self.bucket_name, prefix: self.prefix)
  if reload # force a reload
    @client_s3_empty_files = nil
    clear_s3_responses(bucket_name:, prefix:)
  end
  @client_s3_empty_files ||= begin
    files_and_directories = get_s3_objects(bucket_name:, prefix:)
    files_and_directories.select(&:empty?)
  end
end

#client_s3_files(reload: false, bucket_name: self.bucket_name, prefix: self.prefix) ⇒ Array<S3File>

Retrieve the S3 resources uploaded to the S3 Bucket

Returns:



64
65
66
67
68
69
70
# File 'app/services/s3_query_service.rb', line 64

def client_s3_files(reload: false, bucket_name: self.bucket_name, prefix: self.prefix)
  if reload # force a reload
    @client_s3_files = nil
    clear_s3_responses(bucket_name:, prefix:)
  end
  @client_s3_files ||= get_s3_objects(bucket_name:, prefix:)
end

#copy_file(source_key:, target_bucket:, target_key:, size:) ⇒ Object



131
132
133
134
135
136
137
138
139
140
141
142
# File 'app/services/s3_query_service.rb', line 131

def copy_file(source_key:, target_bucket:, target_key:, size:)
  Rails.logger.info("Copying #{source_key} to #{target_bucket}/#{target_key}")
  if size > part_size
    copy_multi_part(source_key:, target_bucket:, target_key:, size:)
  else
    client.copy_object(copy_source: source_key.gsub("+", "%2B"), bucket: target_bucket, key: target_key, checksum_algorithm: "SHA256")
  end
rescue Aws::Errors::ServiceError => aws_service_error
  message = "An error was encountered when requesting to copy AWS S3 Object from #{source_key} to #{target_key} in the bucket #{target_bucket}: #{aws_service_error}"
  Rails.logger.error(message)
  raise aws_service_error
end

#copy_multi_part(source_key:, target_bucket:, target_key:, size:) ⇒ Object



144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'app/services/s3_query_service.rb', line 144

def copy_multi_part(source_key:, target_bucket:, target_key:, size:)
  multi = client.create_multipart_upload(bucket: target_bucket, key: target_key, checksum_algorithm: "SHA256")
  part_num = 0
  start_byte = 0
  parts = []
  while start_byte < size
    part_num += 1
    end_byte = [start_byte + part_size, size].min - 1
    resp = client.upload_part_copy(bucket: target_bucket, copy_source: source_key, key: multi.key, part_number: part_num,
                                   upload_id: multi.upload_id, copy_source_range: "bytes=#{start_byte}-#{end_byte}")
    parts << { etag: resp.copy_part_result.etag, part_number: part_num, checksum_sha256: resp.copy_part_result.checksum_sha256 }
    start_byte = end_byte + 1
  end
  client.complete_multipart_upload(bucket: target_bucket, key: target_key, upload_id: multi.upload_id, multipart_upload: { parts: })
rescue Aws::Errors::ServiceError => aws_service_error
  message = "An error was encountered when requesting to multipart copy AWS S3 Object from #{source_key} to #{target_key} in the bucket #{target_bucket}: #{aws_service_error}"
  Rails.logger.error(message)
  raise aws_service_error
end

#create_directoryObject



173
174
175
176
177
178
179
# File 'app/services/s3_query_service.rb', line 173

def create_directory
  client.put_object({ bucket: bucket_name, key: prefix, content_length: 0 })
rescue Aws::Errors::ServiceError => aws_service_error
  message = "An error was encountered when requesting to create the AWS S3 directory Object in the bucket #{bucket_name} with the key #{prefix}: #{aws_service_error}"
  Rails.logger.error(message)
  raise aws_service_error
end

#data_profileObject

Returns:

  • Hash with two properties [<S3File>], ok: Bool objects is an Array of S3File objects ok is false if there is an error connecting to S3. Otherwise true.



91
92
93
94
95
96
97
# File 'app/services/s3_query_service.rb', line 91

def data_profile
  { objects: client_s3_files, ok: true }
rescue => ex
  Rails.logger.error("Error querying S3. Bucket: #{bucket_name}. DOI: #{@doi}. Exception: #{ex.message}")

  { objects: [], ok: false }
end

#delete_s3_object(s3_file_key, bucket: bucket_name) ⇒ Object



164
165
166
167
168
169
170
171
# File 'app/services/s3_query_service.rb', line 164

def delete_s3_object(s3_file_key, bucket: bucket_name)
  resp = client.delete_object({ bucket:, key: s3_file_key })
  resp.to_h
rescue Aws::Errors::ServiceError => aws_service_error
  message = "An error was encountered when requesting to delete the AWS S3 Object #{s3_file_key} in the bucket #{bucket_name}: #{aws_service_error}"
  Rails.logger.error(message)
  raise aws_service_error
end

#file_url(key) ⇒ Object

Public signed URL to fetch this file from the S3 (valid for a limited time)



37
38
39
40
# File 'app/services/s3_query_service.rb', line 37

def file_url(key)
  signer = Aws::S3::Presigner.new(client:)
  signer.presigned_url(:get_object, bucket: bucket_name, key:)
end

#get_s3_object_attributes(key:) ⇒ Object



53
54
55
56
57
58
59
60
# File 'app/services/s3_query_service.rb', line 53

def get_s3_object_attributes(key:)
  response = client.get_object_attributes({
                                            bucket: bucket_name,
                                            key:,
                                            object_attributes: self.class.object_attributes
                                          })
  response.to_h
end

#prefixObject

The S3 prefix for this object, i.e., the address within the S3 bucket, which is based on the DOI



31
32
33
# File 'app/services/s3_query_service.rb', line 31

def prefix
  "#{@doi}/#{model.id}/"
end

#publish_files(current_user) ⇒ Object

Copies the existing files from the pre-curation bucket to the target bucket (postcuration or embargo). Notice that the copy process happens at AWS (i.e. the files are not downloaded and re-uploaded).



102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'app/services/s3_query_service.rb', line 102

def publish_files(current_user)
  source_bucket = PULS3Client.pre_curation_config[:bucket]
  target_bucket = if model.embargoed?
                    PULS3Client.embargo_config[:bucket]
                  else
                    PULS3Client.post_curation_config[:bucket]
                  end

  empty_files = client_s3_empty_files(reload: true, bucket_name: source_bucket)
  # Do not move the empty files, however, ensure that it is noted that the
  #   presence of empty files is specified in the provenance log.
  unless empty_files.empty?
    empty_files.each do |empty_file|
      message = "Warning: Attempted to publish empty S3 file #{empty_file.filename}."
      WorkActivity.add_work_activity(model.id, message, current_user.id, activity_type: WorkActivity::SYSTEM)
    end
  end

  files = client_s3_files(reload: true, bucket_name: source_bucket)
  snapshot = ApprovedUploadSnapshot.new(work: model)
  snapshot.store_files(files, current_user:)
  snapshot.save
  files.each do |file|
    ApprovedFileMoveJob.perform_later(work_id: model.id, source_bucket:, source_key: file.key, target_bucket:,
                                      target_key: file.key, size: file.size, snapshot_id: snapshot.id)
  end
  true
end

#upload_file(io:, filename:, size:, md5_digest: nil) ⇒ Object



181
182
183
184
185
186
# File 'app/services/s3_query_service.rb', line 181

def upload_file(io:, filename:, size:, md5_digest: nil)
  key = "#{prefix}#{filename}"
  if s3client.upload_file(io:, target_key: key, size:, md5_digest:)
    key
  end
end