Skip to content

Commit 32c6f8a

Browse files
authored
Merge pull request #14 from BobLd/nurminen-detect
Implement Simple Nurminen Detection Algorithm
2 parents f8c2fe9 + e9584ea commit 32c6f8a

File tree

12 files changed

+974
-1038
lines changed

12 files changed

+974
-1038
lines changed

Tabula.Csv/Tabula.Csv.csproj

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
<TargetFrameworks>netcoreapp3.1;netstandard2.0;net45;net451;net452;net46;net461;net462;net47</TargetFrameworks>
55
<Description>Extract tables from PDF files (port of tabula-java using PdfPig). Csv and Tsv writers.</Description>
66
<PackageProjectUrl>https://github.com/BobLd/tabula-sharp</PackageProjectUrl>
7-
<Version>0.1.0-alpha001</Version>
7+
<Version>0.1.0-alpha002</Version>
8+
<Authors>BobLd</Authors>
89
</PropertyGroup>
910

1011
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|netcoreapp3.1|AnyCPU'">

Tabula.Json/Tabula.Json.csproj

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
<TargetFrameworks>netcoreapp3.1;netstandard2.0;net45;net451;net452;net46;net461;net462;net47</TargetFrameworks>
55
<Description>Extract tables from PDF files (port of tabula-java using PdfPig). Json writer.</Description>
66
<PackageProjectUrl>https://github.com/BobLd/tabula-sharp</PackageProjectUrl>
7-
<Version>0.1.0-alpha001</Version>
7+
<Version>0.1.0-alpha002</Version>
8+
<Company>BobLd</Company>
9+
<Authors>BobLd</Authors>
810
</PropertyGroup>
911

1012
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|netcoreapp3.1|AnyCPU'">
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Text;
4+
using Tabula.Detectors;
5+
using Tabula.Extractors;
6+
using UglyToad.PdfPig;
7+
using Xunit;
8+
9+
namespace Tabula.Tests
10+
{
11+
public class TestsNurminenDetector
12+
{
13+
[Fact(Skip = "TO DO")]
14+
public void TestLinesToCells()
15+
{
16+
using (PdfDocument document = PdfDocument.Open(@"test3.pdf", new ParsingOptions() { ClipPaths = true }))
17+
{
18+
ObjectExtractor oe = new ObjectExtractor(document);
19+
PageArea page = oe.Extract(1);
20+
21+
SimpleNurminenDetectionAlgorithm detector = new SimpleNurminenDetectionAlgorithm();
22+
var regions = detector.Detect(page);
23+
24+
foreach (var a in regions)
25+
{
26+
IExtractionAlgorithm ea = new BasicExtractionAlgorithm();
27+
var newArea = page.GetArea(a.BoundingBox);
28+
List<Table> tables = ea.Extract(newArea);
29+
}
30+
}
31+
}
32+
}
33+
}

0 commit comments

Comments
 (0)