Add regexp remover of M symbols

This commit is contained in:
pro100ton 2024-11-08 23:39:36 +03:00
parent c6e73dd360
commit 8a0223a843
5 changed files with 58 additions and 37 deletions

2
TODO.md Normal file
View file

@ -0,0 +1,2 @@
# List of TODO's
- [ ] Make migration version control

View file

@ -0,0 +1,6 @@
CREATE TABLE IF NOT EXISTS migrations (
id SERIAL PRIMARY KEY,
version TEXT NOT NULL UNIQUE,
applied_at TIMESTAMP DEFAULT NOW()
);

Binary file not shown.

View file

@ -0,0 +1,10 @@
package regexper
import "regexp"
func MSymbolsRemover(inputString string) string {
re := regexp.MustCompile(`(\[М\+\]\s|\[М\]\s)`)
result := re.ReplaceAllString(inputString, "")
result = regexp.MustCompile(`^s+|s+$`).ReplaceAllString(result, "")
return result
}

77
main.go
View file

@ -1,12 +1,12 @@
package main package main
import ( import (
"context" "fmt"
"log" "regexp"
"strings"
"github.com/jackc/pgx/v5" "t0xa/pdf_to_txt/internal/parser"
"t0xa/pdf_to_txt/internal/regexper"
"t0xa/pdf_to_txt/internal/migrator"
) )
type CheckEntry struct { type CheckEntry struct {
@ -16,28 +16,30 @@ type CheckEntry struct {
func main() { func main() {
// TODO: Redo format of file passing to function // TODO: Redo format of file passing to function
// text := parser.ParseX5Check("./internal/parser/testfiles/test3.pdf") text := parser.ParseX5Check("./internal/parser/testfiles/test4.pdf")
//
// split := strings.Split(text, "\n") stringsToFilter := "https://mail|Gmail - Электронный чек|Сумма|НДС" split := strings.Split(text, "\n")
// re := regexp.MustCompile(stringsToFilter) stringsToFilter := "https://mail|Gmail - Электронный чек|Сумма|НДС"
// filtered := []string{} re := regexp.MustCompile(stringsToFilter)
// for _, element := range split { filtered := []string{}
// if re.MatchString(element) { for _, element := range split {
// continue if re.MatchString(element) {
// } continue
// if len(element) < 1 { }
// continue if len(element) < 1 {
// } continue
// filtered = append(filtered, element) }
// filtered = append(filtered, element)
// }
// for index, element := range filtered { }
// if element == "Цена*Кол" { for index, element := range filtered {
// fmt.Println("Товар: ", filtered[index-1]) if element == "Цена*Кол" {
// fmt.Println("Цена: ", filtered[index+1]) clearedProductName := regexper.MSymbolsRemover(filtered[index-1])
// fmt.Println("------") fmt.Print(clearedProductName + " : ")
// } fmt.Println(filtered[index+1])
// } fmt.Println("------")
}
}
// ctx := context.Background() // ctx := context.Background()
// //
@ -58,14 +60,15 @@ func main() {
// } // }
// fmt.Println(insertedProduct) // fmt.Println(insertedProduct)
ctx := context.Background() // ctx := context.Background()
conn, err := pgx.Connect(ctx, "postgres://ruprod_user:ruprod_password@localhost:5433/ruprod") // // TODO: Move to env file
if err != nil { // conn, err := pgx.Connect(ctx, "postgres://ruprod_user:ruprod_password@localhost:5433/ruprod")
panic(err) // if err != nil {
} // panic(err)
defer conn.Close(ctx) // }
// defer conn.Close(ctx)
if err := migrator.Migrate(ctx, conn); err != nil { //
log.Fatalf("Migration failed: %s", err) // if err := migrator.Migrate(ctx, conn); err != nil {
} // log.Fatalf("Migration failed: %s", err)
// }
} }