Skip to content

Commit 676e4b5

Browse files
committed
changes to fix document processing errors on production
1 parent d27ce86 commit 676e4b5

File tree

11 files changed

+44
-111
lines changed

11 files changed

+44
-111
lines changed

.env

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
RAILS_ENV=development
22
RACK_ENV=development
33
EMAIL_DOMAIN=localhost:5000
4+
PORT=3000

Gemfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ gem 'high_voltage', '~> 2.1.0'
1818
gem "aws-sdk"
1919
gem "paperclip"
2020
gem "delayed_job_active_record"
21-
gem 'google_drive'
2221
gem 'pdf-reader'
2322
gem 'pdf-reader-html'
2423
gem 'apartment'
24+
gem 'yomu'
2525

2626
group :development do
2727
gem 'sextant'

Gemfile.lock

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,14 @@ GEM
6666
builder (3.0.4)
6767
callsite (0.0.11)
6868
cancan (1.6.10)
69-
capybara (2.1.0)
69+
capybara (2.5.0)
7070
mime-types (>= 1.16)
7171
nokogiri (>= 1.3.3)
7272
rack (>= 1.0.0)
7373
rack-test (>= 0.5.4)
7474
xpath (~> 2.0)
75-
capybara-webkit (1.1.1)
76-
capybara (>= 2.0.2, < 2.2.0)
75+
capybara-webkit (1.7.0)
76+
capybara (>= 2.3.0, < 2.6.0)
7777
json
7878
carrierwave (0.10.0)
7979
activemodel (>= 3.2.0)
@@ -121,8 +121,6 @@ GEM
121121
factory_girl_rails (4.4.1)
122122
factory_girl (~> 4.4.0)
123123
railties (>= 3.0.0)
124-
faraday (0.9.0)
125-
multipart-post (>= 1.2, < 3)
126124
figaro (0.7.0)
127125
bundler (~> 1.0)
128126
rails (>= 3, < 5)
@@ -147,10 +145,6 @@ GEM
147145
gon (5.0.4)
148146
actionpack (>= 2.3.0)
149147
json
150-
google_drive (0.3.9)
151-
nokogiri (>= 1.4.4, != 1.5.2, != 1.5.1)
152-
oauth (>= 0.3.6)
153-
oauth2 (>= 0.5.0)
154148
has_scope (0.6.0.rc)
155149
actionpack (>= 3.2, < 5)
156150
activesupport (>= 3.2, < 5)
@@ -208,19 +202,11 @@ GEM
208202
subexec (~> 0.2.1)
209203
multi_json (1.10.1)
210204
multi_xml (0.5.5)
211-
multipart-post (2.0.0)
212205
net-scp (1.0.4)
213206
net-ssh (>= 1.99.1)
214207
net-ssh (2.9.1)
215208
newrelic_rpm (3.8.1.221)
216209
nokogiri (1.5.11)
217-
oauth (0.4.7)
218-
oauth2 (0.9.4)
219-
faraday (>= 0.8, < 0.10)
220-
jwt (~> 1.0)
221-
multi_json (~> 1.3)
222-
multi_xml (~> 0.5)
223-
rack (~> 1.2)
224210
orm_adapter (0.5.0)
225211
paperclip (4.1.1)
226212
activemodel (>= 3.0.0)
@@ -359,6 +345,9 @@ GEM
359345
will_paginate (3.0.5)
360346
xpath (2.0.0)
361347
nokogiri (~> 1.3)
348+
yomu (0.2.2)
349+
json (~> 1.8)
350+
mime-types (~> 1.23)
362351

363352
PLATFORMS
364353
ruby
@@ -382,7 +371,6 @@ DEPENDENCIES
382371
foreman
383372
friendly_id
384373
gon
385-
google_drive
386374
high_voltage (~> 2.1.0)
387375
highline
388376
intercom-rails
@@ -418,3 +406,4 @@ DEPENDENCIES
418406
uglifier
419407
unicorn
420408
will_paginate (> 3.0)
409+
yomu

config/database.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
development: &default
22
adapter: postgresql
3-
database: annotation_studio_development
3+
database: annos
44
encoding: utf8
55
host: localhost
66
pool: 5
77
timeout: 5000
88

99
test:
1010
<<: *default
11-
database: annotation_studio_test
11+
database: annos_test
Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1 @@
1-
if ['development','test'].include?(Rails.env)
2-
Rails.application.config.use_fake_document_processor = true
3-
else
41
Rails.application.config.use_fake_document_processor = false
5-
end

lib/document_processor_dispatcher.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ def self.processor_for(mime_type)
55
end
66

77
if mime_type.in?('application/msword', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')
8-
GoogleDriveProcessor
8+
YomuProcessor
99
elsif mime_type == 'application/pdf'
10-
PdfProcessor
10+
YomuProcessor
1111
else
1212
NullProcessor
1313
end

lib/google_drive_processor.rb

Lines changed: 0 additions & 39 deletions
This file was deleted.

lib/tasks/annotation-studio.rake

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,5 @@
11
require 'rake'
2-
require 'google_drive_processor'
32

43
namespace :annotationstudio do
5-
desc 'test google drive API html conversion'
6-
task drive_conversion_test: :environment do
74

8-
document = Document.create(
9-
title: 'Today\'s Date: ' + Time.now.to_s, author: 'Test Author',
10-
upload: File.open('spec/support/example_files/annotation-studio-white-paper.docx'),
11-
user_id: 2,
12-
rep_group_list: "public",
13-
)
14-
15-
16-
processor = GoogleDriveProcessor.new(document, 'published')
17-
processor.work
18-
19-
document.reload
20-
end
215
end

lib/yomu_processor.rb

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
class YomuProcessor
2+
def initialize(document, document_state)
3+
@document = document
4+
@original_state = document_state
5+
end
6+
7+
def work
8+
local_copy = Tempfile.new(@document.upload_file_name)
9+
@document.upload.copy_to_local_file(:original, local_copy.path)
10+
11+
yomu = Yomu.new(local_copy)
12+
complete = Nokogiri::HTML(yomu.html)
13+
@document.text = complete.css("body").inner_html
14+
@document.processed_at = DateTime.now
15+
@document.state = @original_state
16+
@document.save
17+
end
18+
end

spec/lib/document_processor_dispatcher_spec.rb

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,29 @@
33
describe DocumentProcessorDispatcher do
44
context 'when a fake is configured' do
55
it 'returns the fake' do
6-
expect(described_class.processor_for('blarg')).to eq ProcessorFake
6+
with_real_document_processors(false) do
7+
expect(described_class.processor_for('blarg')).to eq ProcessorFake
8+
end
79
end
810
end
911

1012
context '.processor_for' do
11-
it 'returns GoogleDriveProcessor for .doc and .docx mime-types' do
13+
it 'returns YomuProcessor for .doc and .docx mime-types' do
1214
with_real_document_processors do
1315
%w(
1416
application/msword
1517
application/vnd.openxmlformats-officedocument.wordprocessingml.document
1618
).each do |mime_type|
1719

18-
expect(described_class.processor_for(mime_type)).to eq GoogleDriveProcessor
20+
expect(described_class.processor_for(mime_type)).to eq YomuProcessor
1921
end
2022
end
2123
end
2224

23-
it 'returns PdfProcessor for pdf mime-types' do
25+
it 'returns YomuProcessor for pdf mime-types' do
2426
with_real_document_processors do
2527
expect(described_class.processor_for('application/pdf')).to eq \
26-
PdfProcessor
28+
YomuProcessor
2729
end
2830
end
2931

@@ -34,12 +36,14 @@
3436
end
3537
end
3638

37-
def with_real_document_processors
39+
def with_real_document_processors( use_real=true )
40+
default_setting = Rails.application.config.use_fake_document_processor
3841
begin
39-
Rails.application.config.use_fake_document_processor = false
42+
Rails.application.config.use_fake_document_processor = (not use_real)
4043
yield
4144
ensure
42-
Rails.application.config.use_fake_document_processor = true
45+
Rails.application.config.use_fake_document_processor = default_setting
4346
end
4447
end
45-
end
48+
49+
end

0 commit comments

Comments
 (0)