Class: PULDspaceConnector

Inherits:
Object
  • Object
show all
Defined in:
app/services/pul_dspace_connector.rb

Constant Summary collapse

DSPACE_PAGE_SIZE =
20

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(work) ⇒ PULDspaceConnector

Returns a new instance of PULDspaceConnector.



7
8
9
10
11
# File 'app/services/pul_dspace_connector.rb', line 7

def initialize(work)
  @work = work
  @ark = work.ark&.gsub("ark:/", "")
  @download_base = "#{Rails.configuration.dspace.base_url.gsub('rest/', '')}bitstream/#{ark}"
end

Instance Attribute Details

#arkObject (readonly)

Returns the value of attribute ark.



3
4
5
# File 'app/services/pul_dspace_connector.rb', line 3

def ark
  @ark
end

#download_baseObject (readonly)

Returns the value of attribute download_base.



3
4
5
# File 'app/services/pul_dspace_connector.rb', line 3

def download_base
  @download_base
end

#workObject (readonly)

Returns the value of attribute work.



3
4
5
# File 'app/services/pul_dspace_connector.rb', line 3

def work
  @work
end

Instance Method Details

#bitstreamsObject



20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'app/services/pul_dspace_connector.rb', line 20

def bitstreams
  @bitstreams ||= begin
                    data = []
                    # handle pages if needed
                    # this is a inelegant way to get all the files, but I am not seeing a count anywhere
                    loop do
                      new_data = get_data("items/#{id}/bitstreams?offset=#{data.length}&limit=#{DSPACE_PAGE_SIZE}")
                      data.concat(new_data) unless new_data.empty?
                      break if new_data.count < DSPACE_PAGE_SIZE
                    end
                    data
                  end
end

#doiObject



72
73
74
75
76
77
78
# File 'app/services/pul_dspace_connector.rb', line 72

def doi
  return "" if ark.nil?
  @doi ||= begin
             doi_url = ["dc.identifier.uri"].select { |value| value.starts_with?("https://doi.org/") }&.first
             doi_url&.gsub("https://doi.org/", "")
           end
end

#download_bitstreams(bitstream_list) ⇒ Object



61
62
63
64
65
66
67
68
69
70
# File 'app/services/pul_dspace_connector.rb', line 61

def download_bitstreams(bitstream_list)
  bitstream_list.map do |file|
    filename = download_bitstream(file.url, file.filename)
    if checksum_file(filename, file.checksum)
      file
    else
      { file:, error: "Checsum Missmatch" }
    end
  end
end

#idObject



13
14
15
16
17
18
# File 'app/services/pul_dspace_connector.rb', line 13

def id
  @id ||= begin
            json = get_data("handle/#{ark}")
            json["id"]
          end
end

#list_bitsteamsObject



47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'app/services/pul_dspace_connector.rb', line 47

def list_bitsteams
  @list_bitsteams ||=
    original_bitstreams.map do |bitstream|
      path = File.join(Rails.configuration.dspace.download_file_path, "dspace_download", work.id.to_s)
      filename = File.join(path, bitstream["name"])
      if bitstream["checkSum"]["checkSumAlgorithm"] != "MD5"
        Honeybadger.notify("Unknown checksum algorithm #{bitstream['checkSum']['checkSumAlgorithm']} #{filename} #{bitstream}")
      end

      S3File.new(filename_display: bitstream["name"], checksum: base64digest(bitstream["checkSum"]["value"]), last_modified: DateTime.now,
                 size: -1, work:, url: "#{download_base}/#{bitstream['sequenceId']}", filename:)
    end
end

#metadataObject



34
35
36
37
38
39
40
41
42
43
44
45
# File 'app/services/pul_dspace_connector.rb', line 34

def 
  @metadata ||= begin
                  json = get_data("items/#{id}/metadata")
                   = {}
                  json.each do |value|
                    key = value["key"]
                    [key] = [] if [key].blank?
                    [key] << value["value"]
                  end
                  
                end
end