Skip to content

Commit 2ddb885

Browse files
author
solnicki
committed
initial implementation
1 parent 6047d61 commit 2ddb885

File tree

12 files changed

+408
-0
lines changed

12 files changed

+408
-0
lines changed

.github/workflows/golangci-lint.yml

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name: golangci-lint
2+
on:
3+
push:
4+
branches:
5+
- main
6+
- master
7+
pull_request:
8+
9+
permissions:
10+
contents: read
11+
# Optional: allow read access to pull request. Use with `only-new-issues` option.
12+
# pull-requests: read
13+
14+
jobs:
15+
golangci:
16+
name: lint
17+
runs-on: ubuntu-latest
18+
steps:
19+
- uses: actions/checkout@v4
20+
- uses: actions/setup-go@v5
21+
with:
22+
go-version: stable
23+
- name: golangci-lint
24+
uses: golangci/golangci-lint-action@v6
25+
with:
26+
version: v1.59

.github/workflows/test.yml

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
name: Go
2+
on: [push]
3+
4+
jobs:
5+
build:
6+
runs-on: ubuntu-latest
7+
8+
steps:
9+
- uses: actions/checkout@v4
10+
- name: Setup Go
11+
uses: actions/setup-go@v5
12+
with:
13+
go-version: '1.22.1'
14+
- name: Install dependencies
15+
run: go get .
16+
- name: Test with the Go CLI
17+
run: go test -v -race -p 1 ./...

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
data/
2+
logs/

cmd/main.go

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package main
2+
3+
import (
4+
"flag"
5+
"fmt"
6+
7+
"github.com/logmanager-oss/logveil/internal/anonymizer"
8+
)
9+
10+
func main() {
11+
var anonDataDir string
12+
flag.StringVar(&anonDataDir, "dataDir", "../data", "Path to directory with anonymizing data")
13+
14+
var inputFile string
15+
flag.StringVar(&inputFile, "inputFile", "../logs/forti-traffic-small.csv", "Path to input file containing logs to be anonymized")
16+
17+
var outputFile string
18+
flag.StringVar(&outputFile, "outputFile", "", "Path to output file containing anonymized logs")
19+
20+
flag.Parse()
21+
22+
err := anonymize(inputFile, outputFile, anonDataDir)
23+
if err != nil {
24+
fmt.Printf("%v", err)
25+
return
26+
}
27+
}
28+
29+
func anonymize(inputFile string, outputFile string, anonDataDir string) error {
30+
fmt.Println("Command anonymize started")
31+
32+
anonymizer := anonymizer.New()
33+
err := anonymizer.ReadCSVLogs(inputFile)
34+
if err != nil {
35+
return fmt.Errorf("reading input file %s: %v", inputFile, err)
36+
}
37+
38+
err = anonymizer.LoadAnonData(anonDataDir)
39+
if err != nil {
40+
return fmt.Errorf("loading anonymizing data from %s: %v", anonDataDir, err)
41+
}
42+
43+
anonymizer.Run()
44+
if outputFile != "" {
45+
err := anonymizer.WriteAnonymizedOutput(outputFile)
46+
if err != nil {
47+
return fmt.Errorf("writing anonymized data to output file %s: %v", outputFile, err)
48+
}
49+
} else {
50+
fmt.Println(anonymizer.Output)
51+
}
52+
53+
fmt.Println("All done. Exiting...")
54+
55+
return nil
56+
}

go.mod

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
module github.com/logmanager-oss/logveil
2+
3+
go 1.22.1
4+
5+
require github.com/stretchr/testify v1.9.0
6+
7+
require (
8+
github.com/davecgh/go-spew v1.1.1 // indirect
9+
github.com/pmezard/go-difflib v1.0.0 // indirect
10+
golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7
11+
gopkg.in/yaml.v3 v3.0.1 // indirect
12+
)

go.sum

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
2+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
4+
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
5+
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
6+
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
7+
golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7 h1:wDLEX9a7YQoKdKNQt88rtydkqDxeGaBUTnIYc3iG/mA=
8+
golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
9+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
10+
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
11+
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

internal/anonymizer/anonymizer.go

+148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
package anonymizer
2+
3+
import (
4+
"bufio"
5+
"encoding/csv"
6+
"errors"
7+
"fmt"
8+
"io/fs"
9+
"os"
10+
"path/filepath"
11+
"strings"
12+
13+
"golang.org/x/exp/rand"
14+
)
15+
16+
type Anonimizer struct {
17+
csvData []map[string]string
18+
anonData map[string][]string
19+
fieldNames []string
20+
randFunc func(int) int
21+
Output []string
22+
}
23+
24+
func New() *Anonimizer {
25+
return &Anonimizer{anonData: make(map[string][]string), randFunc: rand.Intn}
26+
}
27+
28+
func (an *Anonimizer) ReadCSVLogs(filename string) error {
29+
fmt.Println("Reading CSV file:", filename)
30+
file, err := os.Open(filename)
31+
if err != nil {
32+
return err
33+
}
34+
defer file.Close()
35+
36+
csvReader := csv.NewReader(file)
37+
rows, err := csvReader.ReadAll()
38+
if err != nil {
39+
return err
40+
}
41+
42+
// First element of the csvReader contains field names
43+
an.fieldNames = rows[0]
44+
45+
for _, row := range rows[1:] {
46+
m := make(map[string]string)
47+
for i, val := range row {
48+
m[an.fieldNames[i]] = string(val)
49+
}
50+
an.csvData = append(an.csvData, m)
51+
}
52+
fmt.Println("Field names in the CSV file:", an.fieldNames)
53+
fmt.Println("Number of rows found in the CSV file:", len(an.csvData))
54+
55+
return nil
56+
}
57+
58+
func (an *Anonimizer) LoadAnonData(anonDataDir string) error {
59+
for i := range an.fieldNames {
60+
if an.fieldNames[i] == "raw" {
61+
continue
62+
}
63+
64+
filepath := filepath.Join(anonDataDir, an.fieldNames[i])
65+
_, err := os.Stat(filepath)
66+
if err != nil {
67+
if errors.Is(err, fs.ErrNotExist) {
68+
fmt.Printf("Anonymizing data not found for field %s. Skipping.\n", an.fieldNames[i])
69+
continue
70+
}
71+
return err
72+
}
73+
74+
data, err := an.loadAnonymizingData(filepath)
75+
if err != nil {
76+
return fmt.Errorf("loading anonymizing data file %s: %v", filepath, err)
77+
}
78+
79+
an.anonData[an.fieldNames[i]] = data
80+
fmt.Printf("Loaded anonymizing data for field: %s; values loaded: %d\n", an.fieldNames[i], len(data))
81+
}
82+
83+
return nil
84+
}
85+
86+
func (an *Anonimizer) loadAnonymizingData(filepath string) ([]string, error) {
87+
anonDataFile, err := os.OpenFile(filepath, os.O_RDONLY, os.ModePerm)
88+
if err != nil {
89+
return nil, err
90+
}
91+
defer anonDataFile.Close()
92+
93+
var anonData []string
94+
anonDataFileScanner := bufio.NewScanner(anonDataFile)
95+
for anonDataFileScanner.Scan() {
96+
anonData = append(anonData, anonDataFileScanner.Text())
97+
}
98+
99+
return anonData, nil
100+
}
101+
102+
func (an *Anonimizer) Run() {
103+
fmt.Println("Anonymizing data")
104+
105+
for _, logLine := range an.csvData {
106+
for field, value := range logLine {
107+
if field == "raw" {
108+
continue
109+
}
110+
111+
if value == "" {
112+
continue
113+
}
114+
115+
if anonValues, exists := an.anonData[field]; exists {
116+
newAnonValue := anonValues[an.randFunc(len(anonValues))]
117+
fmt.Printf("Replacing the values for field %s. From %s to %s.\n", field, value, newAnonValue)
118+
119+
logLine["raw"] = strings.Replace(logLine["raw"], value, newAnonValue, -1)
120+
}
121+
}
122+
123+
an.Output = append(an.Output, fmt.Sprint(logLine["raw"]))
124+
}
125+
}
126+
127+
func (an *Anonimizer) WriteAnonymizedOutput(filename string) error {
128+
fmt.Printf("Writing anonymized data to output file: %s", filename)
129+
file, err := os.Create(filename)
130+
if err != nil {
131+
return err
132+
}
133+
defer file.Close()
134+
135+
for _, line := range an.Output {
136+
_, err := file.WriteString(line + "\n")
137+
if err != nil {
138+
return fmt.Errorf("writing anonymized data to output file %s: %v", filename, err)
139+
}
140+
}
141+
fmt.Println("Writing done")
142+
143+
return nil
144+
}
145+
146+
func (an *Anonimizer) SetRandFunc(randFunc func(int) int) {
147+
an.randFunc = randFunc
148+
}
+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package anonymizer
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
)
8+
9+
func TestAnonimizer_ReadCSVLogs(t *testing.T) {
10+
tests := []struct {
11+
name string
12+
filename string
13+
expectedFieldNames []string
14+
expectedValues []map[string]string
15+
}{
16+
{
17+
name: "Test ReadCSVLogs",
18+
filename: "../../test/test_logs/test_logs.csv",
19+
expectedFieldNames: []string{"@timestamp", "raw", "msg.src_ip", "msg.username", "msg.organization"},
20+
expectedValues: []map[string]string{{
21+
"@timestamp": "2024-06-05T14:59:27.000+00:00",
22+
"msg.organization": "TESTuser.test.com",
23+
"msg.src_ip": "89.239.31.49", "msg.username": "[email protected]",
24+
"raw": "{\"@timestamp\": \"2024-06-05T14:59:27.000+00:00\", \"msg.src_ip\":\"89.239.31.49\", \"username\":\"[email protected]\", \"organization\":\"TESTuser.test.com\"}",
25+
}},
26+
},
27+
}
28+
for _, tt := range tests {
29+
t.Run(tt.name, func(t *testing.T) {
30+
an := &Anonimizer{}
31+
err := an.ReadCSVLogs(tt.filename)
32+
if err != nil {
33+
t.Error(err)
34+
}
35+
assert.Equal(t, tt.expectedFieldNames, an.fieldNames)
36+
assert.Equal(t, tt.expectedValues, an.csvData)
37+
})
38+
}
39+
}
40+
41+
func TestAnonimizer_LoadAnonData(t *testing.T) {
42+
tests := []struct {
43+
name string
44+
anonDataDir string
45+
fieldNames []string
46+
expectedAnonData map[string][]string
47+
}{
48+
{
49+
name: "Test LoadAnonData",
50+
fieldNames: []string{"msg.src_ip", "msg.username"},
51+
anonDataDir: "../../test/test_anon_data",
52+
expectedAnonData: map[string][]string{
53+
"msg.src_ip": {
54+
"10.10.10.1", "10.20.0.53", "192.168.100.1", "172.16.34.56", "10.121.202.234", "134.108.42.127", "41.188.232.249",
55+
},
56+
"msg.username": {
57+
"miloslav.illes", "ladislav.dosek", "katerina.janeckova", "josef.varga", "miroslav.skamrala", "jiri.filip", "patrik.topic",
58+
},
59+
},
60+
},
61+
}
62+
for _, tt := range tests {
63+
t.Run(tt.name, func(t *testing.T) {
64+
an := &Anonimizer{fieldNames: tt.fieldNames, anonData: make(map[string][]string)}
65+
err := an.LoadAnonData(tt.anonDataDir)
66+
if err != nil {
67+
t.Error(err)
68+
}
69+
assert.Equal(t, tt.expectedAnonData, an.anonData)
70+
})
71+
}
72+
}

0 commit comments

Comments
 (0)