Move parsing logic to internal
This commit is contained in:
parent
f1b648247e
commit
7fb6713395
5 changed files with 44 additions and 32 deletions
0
.gitignore
vendored
Normal file
0
.gitignore
vendored
Normal file
18
internal/parser/parser.go
Normal file
18
internal/parser/parser.go
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
package parser
|
||||||
|
|
||||||
|
import "github.com/rudolfoborges/pdf2go"
|
||||||
|
|
||||||
|
func ParseX5Check(filePath string) string {
|
||||||
|
pdf, err := pdf2go.New(filePath, pdf2go.Config{
|
||||||
|
LogLevel: pdf2go.LogLevelError,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
text, err := pdf.Text()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return text
|
||||||
|
}
|
BIN
internal/parser/testfiles/test2.pdf
Normal file
BIN
internal/parser/testfiles/test2.pdf
Normal file
Binary file not shown.
46
main.go
46
main.go
|
@ -2,9 +2,10 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/rudolfoborges/pdf2go"
|
"t0xa/pdf_to_txt/internal/parser"
|
||||||
)
|
)
|
||||||
|
|
||||||
type CheckEntry struct {
|
type CheckEntry struct {
|
||||||
|
@ -13,34 +14,27 @@ type CheckEntry struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
pdf, err := pdf2go.New("./test.pdf", pdf2go.Config{
|
// TODO: Redo format of file passing to function
|
||||||
LogLevel: pdf2go.LogLevelError,
|
text := parser.ParseX5Check("./internal/parser/testfiles/test.pdf")
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
text, err := pdf.Text()
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
split := strings.Split(text, "\n")
|
split := strings.Split(text, "\n")
|
||||||
for index, element := range split{
|
stringsToFilter := "https://mail|Gmail - Электронный чек|Сумма"
|
||||||
if element == "Цена*Кол" {
|
re := regexp.MustCompile(stringsToFilter)
|
||||||
fmt.Println("----")
|
filtered := []string{}
|
||||||
fmt.Println(split[index-4])
|
for _, element := range split {
|
||||||
fmt.Println(split[index-3])
|
if re.MatchString(element) {
|
||||||
fmt.Println(split[index-2])
|
continue
|
||||||
fmt.Println(split[index-1])
|
}
|
||||||
fmt.Println(split[index])
|
if len(element) < 1 {
|
||||||
fmt.Println(split[index+1])
|
continue
|
||||||
fmt.Println(split[index+2])
|
}
|
||||||
fmt.Println(split[index+3])
|
filtered = append(filtered, element)
|
||||||
fmt.Println(split[index+4])
|
|
||||||
|
|
||||||
fmt.Println("Товар: ", split[index-2])
|
}
|
||||||
fmt.Println("Цена: ", split[index+2])
|
for index, element := range filtered {
|
||||||
|
if element == "Цена*Кол" {
|
||||||
|
fmt.Println("Товар: ", filtered[index-1])
|
||||||
|
fmt.Println("Цена: ", filtered[index+1])
|
||||||
fmt.Println("------")
|
fmt.Println("------")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue