Skip to content

Commit

Permalink
fix data structure unittest
Browse files Browse the repository at this point in the history
  • Loading branch information
limuy2022 committed Mar 16, 2024
1 parent 075ae5f commit e5ee64a
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 88 deletions.
35 changes: 35 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ stdlib = { path = "./stdlib" }
rust-i18n = "3.0"
sys-locale = "0.3"
rustyline = { version = "14.0", features = ["with-file-history"] }
suffix_array = "0.5.0"

[profile.release]
panic = "abort"
Expand Down
40 changes: 1 addition & 39 deletions src/tvm/algo/string.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::collections::{BTreeMap, HashMap};
use std::usize;

pub fn kmp(main_string: &str, pattern: &str) -> usize {
// 首先对模式串构建next数组
Expand Down Expand Up @@ -39,47 +39,9 @@ pub fn kmp_next(pattern: &str) -> Vec<i64> {
ret
}

pub fn sa(s: &str) -> Vec<usize> {
let mut sa: Vec<usize> = Vec::new();
let mut cntt: BTreeMap<char, usize> = BTreeMap::new();
let mut rk: HashMap<char, usize> = HashMap::new();
let mut cnt = 1;
for i in s.chars() {
rk.insert(i, cnt);
cnt += 1;
let tmp = cntt.entry(i).or_insert(0);
*tmp += 1;
}

cnt = 1;
for i in &cntt {
sa.push(*i.0 as usize);
}
let mut rk: Vec<usize> = Vec::new();
rk.resize(sa.len(), 0);
for i in &sa {
let _tmp = cntt.entry(char::from_u32(*i as u32).unwrap());
// rk[]
}
sa
}

#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sa_1() {
let s = "dkodkoe";
let sarray = sa(s);
}

#[test]
fn sa_2() {
let s = "ababa";
let sarray = vec![5, 3, 1, 4, 2];
assert_eq!(sa(s), sarray);
}

#[test]
fn kmp_1() {
assert_eq!(kmp("ABABABC", "ABA"), 2);
Expand Down
105 changes: 56 additions & 49 deletions src/tvm/types/data_structure/sam.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,20 @@ use crate::{compiler::token::TokenType, hash_map};
use derive::{trc_class, trc_function, trc_method};
use std::collections::HashMap;
use std::fmt::Display;
use std::usize;

#[derive(Debug, Clone)]
pub struct Node {
link: Option<usize>,
next: HashMap<char, usize>,
link: i32,
next: HashMap<char, i32>,
len: usize,
}

impl Node {
pub fn new(len: usize) -> Node {
Node {
len,
link: None,
link: -1,
next: HashMap::new(),
}
}
Expand All @@ -29,6 +30,7 @@ impl Node {
#[derive(Debug, Clone)]
pub struct Sam {
pub _states: Vec<Node>,
last: i32,
}

impl Sam {
Expand All @@ -39,67 +41,54 @@ impl Sam {
pub fn new() -> Sam {
Sam {
_states: vec![Node::new(0)],
last: 0,
}
}

pub fn extend_internel(s: &mut Sam, c: char) {
let id: usize = s._states.len();
// 后缀自动机最后一个节点
let mut last = id - 1;
let mut p = s.last;
// 新节点,创造是因为包括最后一个字符串之后一定生成了一个新的等价类,也就是整个字符串,而它的长度一定等于上一个节点的长度加一
let mut u = Node::new(s._states[last].len + 1);

loop {
if let std::collections::hash_map::Entry::Vacant(e) = s._states[last].next.entry(c) {
e.insert(id);
} else {
break;
}
match s._states[last].link {
None => {
break;
}
Some(ind) => {
last = ind;
}
}
let mut newobj = Node::new(s._states[p as usize].len + 1);
let cur = s._states.len() as i32;
// 这一步是一直向上跳后缀链接,直到找到已经存在字符为c的转移,那么往上的节点一定也已经存在为c的转移了,可以停下
while p != -1 && !s._states[p as usize].next.contains_key(&c) {
s._states[p as usize].next.insert(c, cur);
p = s._states[p as usize].link;
}
if s._states[last].link.is_none() {
u.link = Some(0);
s._states.push(u);
if p == -1 {
// 一直跑到根节点了,直接新建节点即可
newobj.link = 0;
s._states.push(newobj);
} else {
let q = s._states[last].next[&c];
if s._states[q].len == s._states[last].len + 1 {
u.link = Some(q);
s._states.push(u);
let q = s._states[p as usize].next[&c];
if s._states[q as usize].len == s._states[p as usize].len + 1 {
// 这种是刚好对上字符串范围,直接指向即可
newobj.link = q;
s._states.push(newobj);
} else {
let mut clone = Node::new(s._states[last].len + 1);
clone.next.clone_from(&s._states[q].next);
clone.link = s._states[q].link;
let cloneid = id + 1;
loop {
if let Some(tmp) = s._states[last].next.get_mut(&c) {
if *tmp != q {
break;
}
*tmp = cloneid;
} else {
// 没能对上,分裂节点
let mut clone = Node::new(s._states[p as usize].len + 1);
clone.next.clone_from(&s._states[q as usize].next);
clone.link = s._states[q as usize].link;
let cloneid = cur + 1;
while let Some(tmp) = s._states[p as usize].next.get_mut(&c) {
if *tmp != q {
break;
}
match s._states[last].link {
None => {
break;
}
Some(ind) => {
last = ind;
}
*tmp = cloneid;
p = s._states[p as usize].link;
if p == -1 {
break;
}
}
s._states[q].link = Some(cloneid);
u.link = Some(cloneid);
s._states.push(u);
s._states[q as usize].link = cloneid;
newobj.link = cloneid;
s._states.push(newobj);
s._states.push(clone);
}
}
s.last = cur;
}
}

Expand All @@ -126,6 +115,24 @@ impl Display for Sam {
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sam_easy() {
let mut sam = Sam::new();
for c in "abab".chars() {
Sam::extend_internel(&mut sam, c);
}
assert_eq!(sam._states.len(), 5);
}

#[test]
fn test_sam_easy2() {
let mut sam = Sam::new();
for c in "aabaa".chars() {
Sam::extend_internel(&mut sam, c);
}
assert_eq!(sam._states.len(), 6);
}

#[test]
fn test_sam() {
let mut sam = Sam::new();
Expand Down

0 comments on commit e5ee64a

Please sign in to comment.