Add regexp remover of M symbols
This commit is contained in:
parent
c6e73dd360
commit
8a0223a843
5 changed files with 58 additions and 37 deletions
2
TODO.md
Normal file
2
TODO.md
Normal file
|
@ -0,0 +1,2 @@
|
|||
# List of TODO's
|
||||
- [ ] Make migration version control
|
6
internal/migrations/000_create_migration_table.sql
Normal file
6
internal/migrations/000_create_migration_table.sql
Normal file
|
@ -0,0 +1,6 @@
|
|||
CREATE TABLE IF NOT EXISTS migrations (
|
||||
id SERIAL PRIMARY KEY,
|
||||
version TEXT NOT NULL UNIQUE,
|
||||
applied_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
BIN
internal/parser/testfiles/test4.pdf
Normal file
BIN
internal/parser/testfiles/test4.pdf
Normal file
Binary file not shown.
10
internal/regexper/regexper.go
Normal file
10
internal/regexper/regexper.go
Normal file
|
@ -0,0 +1,10 @@
|
|||
package regexper
|
||||
|
||||
import "regexp"
|
||||
|
||||
func MSymbolsRemover(inputString string) string {
|
||||
re := regexp.MustCompile(`(\[М\+\]\s|\[М\]\s)`)
|
||||
result := re.ReplaceAllString(inputString, "")
|
||||
result = regexp.MustCompile(`^s+|s+$`).ReplaceAllString(result, "")
|
||||
return result
|
||||
}
|
77
main.go
77
main.go
|
@ -1,12 +1,12 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/jackc/pgx/v5"
|
||||
|
||||
"t0xa/pdf_to_txt/internal/migrator"
|
||||
"t0xa/pdf_to_txt/internal/parser"
|
||||
"t0xa/pdf_to_txt/internal/regexper"
|
||||
)
|
||||
|
||||
type CheckEntry struct {
|
||||
|
@ -16,28 +16,30 @@ type CheckEntry struct {
|
|||
|
||||
func main() {
|
||||
// TODO: Redo format of file passing to function
|
||||
// text := parser.ParseX5Check("./internal/parser/testfiles/test3.pdf")
|
||||
//
|
||||
// split := strings.Split(text, "\n") stringsToFilter := "https://mail|Gmail - Электронный чек|Сумма|НДС"
|
||||
// re := regexp.MustCompile(stringsToFilter)
|
||||
// filtered := []string{}
|
||||
// for _, element := range split {
|
||||
// if re.MatchString(element) {
|
||||
// continue
|
||||
// }
|
||||
// if len(element) < 1 {
|
||||
// continue
|
||||
// }
|
||||
// filtered = append(filtered, element)
|
||||
//
|
||||
// }
|
||||
// for index, element := range filtered {
|
||||
// if element == "Цена*Кол" {
|
||||
// fmt.Println("Товар: ", filtered[index-1])
|
||||
// fmt.Println("Цена: ", filtered[index+1])
|
||||
// fmt.Println("------")
|
||||
// }
|
||||
// }
|
||||
text := parser.ParseX5Check("./internal/parser/testfiles/test4.pdf")
|
||||
|
||||
split := strings.Split(text, "\n")
|
||||
stringsToFilter := "https://mail|Gmail - Электронный чек|Сумма|НДС"
|
||||
re := regexp.MustCompile(stringsToFilter)
|
||||
filtered := []string{}
|
||||
for _, element := range split {
|
||||
if re.MatchString(element) {
|
||||
continue
|
||||
}
|
||||
if len(element) < 1 {
|
||||
continue
|
||||
}
|
||||
filtered = append(filtered, element)
|
||||
|
||||
}
|
||||
for index, element := range filtered {
|
||||
if element == "Цена*Кол" {
|
||||
clearedProductName := regexper.MSymbolsRemover(filtered[index-1])
|
||||
fmt.Print(clearedProductName + " : ")
|
||||
fmt.Println(filtered[index+1])
|
||||
fmt.Println("------")
|
||||
}
|
||||
}
|
||||
|
||||
// ctx := context.Background()
|
||||
//
|
||||
|
@ -58,14 +60,15 @@ func main() {
|
|||
// }
|
||||
// fmt.Println(insertedProduct)
|
||||
|
||||
ctx := context.Background()
|
||||
conn, err := pgx.Connect(ctx, "postgres://ruprod_user:ruprod_password@localhost:5433/ruprod")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer conn.Close(ctx)
|
||||
|
||||
if err := migrator.Migrate(ctx, conn); err != nil {
|
||||
log.Fatalf("Migration failed: %s", err)
|
||||
}
|
||||
// ctx := context.Background()
|
||||
// // TODO: Move to env file
|
||||
// conn, err := pgx.Connect(ctx, "postgres://ruprod_user:ruprod_password@localhost:5433/ruprod")
|
||||
// if err != nil {
|
||||
// panic(err)
|
||||
// }
|
||||
// defer conn.Close(ctx)
|
||||
//
|
||||
// if err := migrator.Migrate(ctx, conn); err != nil {
|
||||
// log.Fatalf("Migration failed: %s", err)
|
||||
// }
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue