diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..f59444d --- /dev/null +++ b/TODO.md @@ -0,0 +1,2 @@ +# List of TODO's +- [ ] Make migration version control diff --git a/internal/migrations/000_create_migration_table.sql b/internal/migrations/000_create_migration_table.sql new file mode 100644 index 0000000..d6e680d --- /dev/null +++ b/internal/migrations/000_create_migration_table.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS migrations ( + id SERIAL PRIMARY KEY, + version TEXT NOT NULL UNIQUE, + applied_at TIMESTAMP DEFAULT NOW() +); + diff --git a/internal/parser/testfiles/test4.pdf b/internal/parser/testfiles/test4.pdf new file mode 100644 index 0000000..baf3beb Binary files /dev/null and b/internal/parser/testfiles/test4.pdf differ diff --git a/internal/regexper/regexper.go b/internal/regexper/regexper.go new file mode 100644 index 0000000..b4de48f --- /dev/null +++ b/internal/regexper/regexper.go @@ -0,0 +1,10 @@ +package regexper + +import "regexp" + +func MSymbolsRemover(inputString string) string { + re := regexp.MustCompile(`(\[М\+\]\s|\[М\]\s)`) + result := re.ReplaceAllString(inputString, "") + result = regexp.MustCompile(`^s+|s+$`).ReplaceAllString(result, "") + return result +} diff --git a/main.go b/main.go index addefec..4985baf 100644 --- a/main.go +++ b/main.go @@ -1,12 +1,12 @@ package main import ( - "context" - "log" + "fmt" + "regexp" + "strings" - "github.com/jackc/pgx/v5" - - "t0xa/pdf_to_txt/internal/migrator" + "t0xa/pdf_to_txt/internal/parser" + "t0xa/pdf_to_txt/internal/regexper" ) type CheckEntry struct { @@ -16,28 +16,30 @@ type CheckEntry struct { func main() { // TODO: Redo format of file passing to function - // text := parser.ParseX5Check("./internal/parser/testfiles/test3.pdf") - // - // split := strings.Split(text, "\n") stringsToFilter := "https://mail|Gmail - Электронный чек|Сумма|НДС" - // re := regexp.MustCompile(stringsToFilter) - // filtered := []string{} - // for _, element := range split { - // if re.MatchString(element) { - // continue - // } - // if len(element) < 1 { - // continue - // } - // filtered = append(filtered, element) - // - // } - // for index, element := range filtered { - // if element == "Цена*Кол" { - // fmt.Println("Товар: ", filtered[index-1]) - // fmt.Println("Цена: ", filtered[index+1]) - // fmt.Println("------") - // } - // } + text := parser.ParseX5Check("./internal/parser/testfiles/test4.pdf") + + split := strings.Split(text, "\n") + stringsToFilter := "https://mail|Gmail - Электронный чек|Сумма|НДС" + re := regexp.MustCompile(stringsToFilter) + filtered := []string{} + for _, element := range split { + if re.MatchString(element) { + continue + } + if len(element) < 1 { + continue + } + filtered = append(filtered, element) + + } + for index, element := range filtered { + if element == "Цена*Кол" { + clearedProductName := regexper.MSymbolsRemover(filtered[index-1]) + fmt.Print(clearedProductName + " : ") + fmt.Println(filtered[index+1]) + fmt.Println("------") + } + } // ctx := context.Background() // @@ -58,14 +60,15 @@ func main() { // } // fmt.Println(insertedProduct) - ctx := context.Background() - conn, err := pgx.Connect(ctx, "postgres://ruprod_user:ruprod_password@localhost:5433/ruprod") - if err != nil { - panic(err) - } - defer conn.Close(ctx) - - if err := migrator.Migrate(ctx, conn); err != nil { - log.Fatalf("Migration failed: %s", err) - } + // ctx := context.Background() + // // TODO: Move to env file + // conn, err := pgx.Connect(ctx, "postgres://ruprod_user:ruprod_password@localhost:5433/ruprod") + // if err != nil { + // panic(err) + // } + // defer conn.Close(ctx) + // + // if err := migrator.Migrate(ctx, conn); err != nil { + // log.Fatalf("Migration failed: %s", err) + // } }