Add regexp remover of M symbols

This commit is contained in:
pro100ton 2024-11-08 23:39:36 +03:00
parent c6e73dd360
commit 8a0223a843
5 changed files with 58 additions and 37 deletions

2
TODO.md Normal file
View file

@ -0,0 +1,2 @@
# List of TODO's
- [ ] Make migration version control

View file

@ -0,0 +1,6 @@
CREATE TABLE IF NOT EXISTS migrations (
id SERIAL PRIMARY KEY,
version TEXT NOT NULL UNIQUE,
applied_at TIMESTAMP DEFAULT NOW()
);

Binary file not shown.

View file

@ -0,0 +1,10 @@
package regexper
import "regexp"
func MSymbolsRemover(inputString string) string {
re := regexp.MustCompile(`(\[М\+\]\s|\[М\]\s)`)
result := re.ReplaceAllString(inputString, "")
result = regexp.MustCompile(`^s+|s+$`).ReplaceAllString(result, "")
return result
}

77
main.go
View file

@ -1,12 +1,12 @@
package main
import (
"context"
"log"
"fmt"
"regexp"
"strings"
"github.com/jackc/pgx/v5"
"t0xa/pdf_to_txt/internal/migrator"
"t0xa/pdf_to_txt/internal/parser"
"t0xa/pdf_to_txt/internal/regexper"
)
type CheckEntry struct {
@ -16,28 +16,30 @@ type CheckEntry struct {
func main() {
// TODO: Redo format of file passing to function
// text := parser.ParseX5Check("./internal/parser/testfiles/test3.pdf")
//
// split := strings.Split(text, "\n") stringsToFilter := "https://mail|Gmail - Электронный чек|Сумма|НДС"
// re := regexp.MustCompile(stringsToFilter)
// filtered := []string{}
// for _, element := range split {
// if re.MatchString(element) {
// continue
// }
// if len(element) < 1 {
// continue
// }
// filtered = append(filtered, element)
//
// }
// for index, element := range filtered {
// if element == "Цена*Кол" {
// fmt.Println("Товар: ", filtered[index-1])
// fmt.Println("Цена: ", filtered[index+1])
// fmt.Println("------")
// }
// }
text := parser.ParseX5Check("./internal/parser/testfiles/test4.pdf")
split := strings.Split(text, "\n")
stringsToFilter := "https://mail|Gmail - Электронный чек|Сумма|НДС"
re := regexp.MustCompile(stringsToFilter)
filtered := []string{}
for _, element := range split {
if re.MatchString(element) {
continue
}
if len(element) < 1 {
continue
}
filtered = append(filtered, element)
}
for index, element := range filtered {
if element == "Цена*Кол" {
clearedProductName := regexper.MSymbolsRemover(filtered[index-1])
fmt.Print(clearedProductName + " : ")
fmt.Println(filtered[index+1])
fmt.Println("------")
}
}
// ctx := context.Background()
//
@ -58,14 +60,15 @@ func main() {
// }
// fmt.Println(insertedProduct)
ctx := context.Background()
conn, err := pgx.Connect(ctx, "postgres://ruprod_user:ruprod_password@localhost:5433/ruprod")
if err != nil {
panic(err)
}
defer conn.Close(ctx)
if err := migrator.Migrate(ctx, conn); err != nil {
log.Fatalf("Migration failed: %s", err)
}
// ctx := context.Background()
// // TODO: Move to env file
// conn, err := pgx.Connect(ctx, "postgres://ruprod_user:ruprod_password@localhost:5433/ruprod")
// if err != nil {
// panic(err)
// }
// defer conn.Close(ctx)
//
// if err := migrator.Migrate(ctx, conn); err != nil {
// log.Fatalf("Migration failed: %s", err)
// }
}