diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/internal/parser/parser.go b/internal/parser/parser.go new file mode 100644 index 0000000..30304e5 --- /dev/null +++ b/internal/parser/parser.go @@ -0,0 +1,18 @@ +package parser + +import "github.com/rudolfoborges/pdf2go" + +func ParseX5Check(filePath string) string { + pdf, err := pdf2go.New(filePath, pdf2go.Config{ + LogLevel: pdf2go.LogLevelError, + }) + if err != nil { + panic(err) + } + + text, err := pdf.Text() + if err != nil { + panic(err) + } + return text +} diff --git a/test.pdf b/internal/parser/testfiles/test.pdf similarity index 100% rename from test.pdf rename to internal/parser/testfiles/test.pdf diff --git a/internal/parser/testfiles/test2.pdf b/internal/parser/testfiles/test2.pdf new file mode 100644 index 0000000..294e6fd Binary files /dev/null and b/internal/parser/testfiles/test2.pdf differ diff --git a/main.go b/main.go index d558db2..7efed2d 100644 --- a/main.go +++ b/main.go @@ -2,46 +2,40 @@ package main import ( "fmt" + "regexp" "strings" - "github.com/rudolfoborges/pdf2go" + "t0xa/pdf_to_txt/internal/parser" ) type CheckEntry struct { - Product string - Price string + Product string + Price string } func main() { - pdf, err := pdf2go.New("./test.pdf", pdf2go.Config{ - LogLevel: pdf2go.LogLevelError, - }) - if err != nil { - panic(err) + // TODO: Redo format of file passing to function + text := parser.ParseX5Check("./internal/parser/testfiles/test.pdf") + + split := strings.Split(text, "\n") + stringsToFilter := "https://mail|Gmail - Электронный чек|Сумма" + re := regexp.MustCompile(stringsToFilter) + filtered := []string{} + for _, element := range split { + if re.MatchString(element) { + continue + } + if len(element) < 1 { + continue + } + filtered = append(filtered, element) + } - - text, err := pdf.Text() - if err != nil { - panic(err) + for index, element := range filtered { + if element == "Цена*Кол" { + fmt.Println("Товар: ", filtered[index-1]) + fmt.Println("Цена: ", filtered[index+1]) + fmt.Println("------") + } } - - split := strings.Split(text, "\n") - for index, element := range split{ - if element == "Цена*Кол" { - fmt.Println("----") - fmt.Println(split[index-4]) - fmt.Println(split[index-3]) - fmt.Println(split[index-2]) - fmt.Println(split[index-1]) - fmt.Println(split[index]) - fmt.Println(split[index+1]) - fmt.Println(split[index+2]) - fmt.Println(split[index+3]) - fmt.Println(split[index+4]) - - fmt.Println("Товар: ", split[index-2]) - fmt.Println("Цена: ", split[index+2]) - fmt.Println("------") - } - } }