Move parsing logic to internal

This commit is contained in:
pro100ton 2024-11-04 22:41:30 +03:00
parent f1b648247e
commit 7fb6713395
5 changed files with 44 additions and 32 deletions

0
.gitignore vendored Normal file
View file

18
internal/parser/parser.go Normal file
View file

@ -0,0 +1,18 @@
package parser
import "github.com/rudolfoborges/pdf2go"
func ParseX5Check(filePath string) string {
pdf, err := pdf2go.New(filePath, pdf2go.Config{
LogLevel: pdf2go.LogLevelError,
})
if err != nil {
panic(err)
}
text, err := pdf.Text()
if err != nil {
panic(err)
}
return text
}

Binary file not shown.

58
main.go
View file

@ -2,46 +2,40 @@ package main
import (
"fmt"
"regexp"
"strings"
"github.com/rudolfoborges/pdf2go"
"t0xa/pdf_to_txt/internal/parser"
)
type CheckEntry struct {
Product string
Price string
Product string
Price string
}
func main() {
pdf, err := pdf2go.New("./test.pdf", pdf2go.Config{
LogLevel: pdf2go.LogLevelError,
})
if err != nil {
panic(err)
// TODO: Redo format of file passing to function
text := parser.ParseX5Check("./internal/parser/testfiles/test.pdf")
split := strings.Split(text, "\n")
stringsToFilter := "https://mail|Gmail - Электронный чек|Сумма"
re := regexp.MustCompile(stringsToFilter)
filtered := []string{}
for _, element := range split {
if re.MatchString(element) {
continue
}
if len(element) < 1 {
continue
}
filtered = append(filtered, element)
}
text, err := pdf.Text()
if err != nil {
panic(err)
for index, element := range filtered {
if element == "Цена*Кол" {
fmt.Println("Товар: ", filtered[index-1])
fmt.Println("Цена: ", filtered[index+1])
fmt.Println("------")
}
}
split := strings.Split(text, "\n")
for index, element := range split{
if element == "Цена*Кол" {
fmt.Println("----")
fmt.Println(split[index-4])
fmt.Println(split[index-3])
fmt.Println(split[index-2])
fmt.Println(split[index-1])
fmt.Println(split[index])
fmt.Println(split[index+1])
fmt.Println(split[index+2])
fmt.Println(split[index+3])
fmt.Println(split[index+4])
fmt.Println("Товар: ", split[index-2])
fmt.Println("Цена: ", split[index+2])
fmt.Println("------")
}
}
}