Move parsing logic to internal

This commit is contained in:
pro100ton 2024-11-04 22:41:30 +03:00
parent f1b648247e
commit 7fb6713395
5 changed files with 44 additions and 32 deletions

0
.gitignore vendored Normal file
View file

18
internal/parser/parser.go Normal file
View file

@ -0,0 +1,18 @@
package parser
import "github.com/rudolfoborges/pdf2go"
func ParseX5Check(filePath string) string {
pdf, err := pdf2go.New(filePath, pdf2go.Config{
LogLevel: pdf2go.LogLevelError,
})
if err != nil {
panic(err)
}
text, err := pdf.Text()
if err != nil {
panic(err)
}
return text
}

Binary file not shown.

46
main.go
View file

@ -2,9 +2,10 @@ package main
import ( import (
"fmt" "fmt"
"regexp"
"strings" "strings"
"github.com/rudolfoborges/pdf2go" "t0xa/pdf_to_txt/internal/parser"
) )
type CheckEntry struct { type CheckEntry struct {
@ -13,34 +14,27 @@ type CheckEntry struct {
} }
func main() { func main() {
pdf, err := pdf2go.New("./test.pdf", pdf2go.Config{ // TODO: Redo format of file passing to function
LogLevel: pdf2go.LogLevelError, text := parser.ParseX5Check("./internal/parser/testfiles/test.pdf")
})
if err != nil {
panic(err)
}
text, err := pdf.Text()
if err != nil {
panic(err)
}
split := strings.Split(text, "\n") split := strings.Split(text, "\n")
for index, element := range split{ stringsToFilter := "https://mail|Gmail - Электронный чек|Сумма"
if element == "Цена*Кол" { re := regexp.MustCompile(stringsToFilter)
fmt.Println("----") filtered := []string{}
fmt.Println(split[index-4]) for _, element := range split {
fmt.Println(split[index-3]) if re.MatchString(element) {
fmt.Println(split[index-2]) continue
fmt.Println(split[index-1]) }
fmt.Println(split[index]) if len(element) < 1 {
fmt.Println(split[index+1]) continue
fmt.Println(split[index+2]) }
fmt.Println(split[index+3]) filtered = append(filtered, element)
fmt.Println(split[index+4])
fmt.Println("Товар: ", split[index-2]) }
fmt.Println("Цена: ", split[index+2]) for index, element := range filtered {
if element == "Цена*Кол" {
fmt.Println("Товар: ", filtered[index-1])
fmt.Println("Цена: ", filtered[index+1])
fmt.Println("------") fmt.Println("------")
} }
} }