Move parsing logic to internal
This commit is contained in:
parent
f1b648247e
commit
7fb6713395
5 changed files with 44 additions and 32 deletions
0
.gitignore
vendored
Normal file
0
.gitignore
vendored
Normal file
18
internal/parser/parser.go
Normal file
18
internal/parser/parser.go
Normal file
|
@ -0,0 +1,18 @@
|
|||
package parser
|
||||
|
||||
import "github.com/rudolfoborges/pdf2go"
|
||||
|
||||
func ParseX5Check(filePath string) string {
|
||||
pdf, err := pdf2go.New(filePath, pdf2go.Config{
|
||||
LogLevel: pdf2go.LogLevelError,
|
||||
})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
text, err := pdf.Text()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return text
|
||||
}
|
BIN
internal/parser/testfiles/test2.pdf
Normal file
BIN
internal/parser/testfiles/test2.pdf
Normal file
Binary file not shown.
58
main.go
58
main.go
|
@ -2,46 +2,40 @@ package main
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/rudolfoborges/pdf2go"
|
||||
"t0xa/pdf_to_txt/internal/parser"
|
||||
)
|
||||
|
||||
type CheckEntry struct {
|
||||
Product string
|
||||
Price string
|
||||
Product string
|
||||
Price string
|
||||
}
|
||||
|
||||
func main() {
|
||||
pdf, err := pdf2go.New("./test.pdf", pdf2go.Config{
|
||||
LogLevel: pdf2go.LogLevelError,
|
||||
})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
// TODO: Redo format of file passing to function
|
||||
text := parser.ParseX5Check("./internal/parser/testfiles/test.pdf")
|
||||
|
||||
split := strings.Split(text, "\n")
|
||||
stringsToFilter := "https://mail|Gmail - Электронный чек|Сумма"
|
||||
re := regexp.MustCompile(stringsToFilter)
|
||||
filtered := []string{}
|
||||
for _, element := range split {
|
||||
if re.MatchString(element) {
|
||||
continue
|
||||
}
|
||||
if len(element) < 1 {
|
||||
continue
|
||||
}
|
||||
filtered = append(filtered, element)
|
||||
|
||||
}
|
||||
|
||||
text, err := pdf.Text()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
for index, element := range filtered {
|
||||
if element == "Цена*Кол" {
|
||||
fmt.Println("Товар: ", filtered[index-1])
|
||||
fmt.Println("Цена: ", filtered[index+1])
|
||||
fmt.Println("------")
|
||||
}
|
||||
}
|
||||
|
||||
split := strings.Split(text, "\n")
|
||||
for index, element := range split{
|
||||
if element == "Цена*Кол" {
|
||||
fmt.Println("----")
|
||||
fmt.Println(split[index-4])
|
||||
fmt.Println(split[index-3])
|
||||
fmt.Println(split[index-2])
|
||||
fmt.Println(split[index-1])
|
||||
fmt.Println(split[index])
|
||||
fmt.Println(split[index+1])
|
||||
fmt.Println(split[index+2])
|
||||
fmt.Println(split[index+3])
|
||||
fmt.Println(split[index+4])
|
||||
|
||||
fmt.Println("Товар: ", split[index-2])
|
||||
fmt.Println("Цена: ", split[index+2])
|
||||
fmt.Println("------")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue