完成计算rank数组的方法

This commit is contained in:
邹晓航
2014-12-30 11:46:53 +08:00
parent 2a6f77c0b0
commit 6a10707740

View File

@@ -7,10 +7,12 @@
namespace TinySTL{
class suffix_array{
public:
using array_type = std::vector < int > ;
private:
//typedef std::unique_ptr<std::vector<int>> vecPtr;
private:
std::vector<int> _array;
array_type _suffix_array;
array_type _height_array;
array_type _rank_array;
public:
template<class InputIterator>
//arr - Դ<><D4B4><EFBFBD><EFBFBD>
@@ -18,9 +20,35 @@ namespace TinySTL{
//max_len - max_len<65><6E><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>arr<72><72><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>ȡֵ<C8A1><D6B5>Χ<EFBFBD><CEA7><EFBFBD>ǻ<EFBFBD><C7BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD>ԭ<EFBFBD><D4AD><EFBFBD>ж<EFBFBD><D0B6><EFBFBD><EFBFBD><EFBFBD>ĸ<EFBFBD><C4B8><EFBFBD><EFBFBD>ֱ<EFBFBD><D6B1>ȡ128<32><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ԭ<EFBFBD><D4AD><EFBFBD>б<EFBFBD><D0B1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ļ<EFBFBD><C4BB><EFBFBD><EFBFBD><EFBFBD>m<EFBFBD><6D><EFBFBD><EFBFBD>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>1<EFBFBD><31>ֵ<EFBFBD><D6B5>
suffix_array(InputIterator arr, size_t len, size_t max_len = 128){
calSuffix(arr, len, max_len);
calRank();
}
array_type suffixArray(){
return _suffix_array;
}
array_type heightArray(){
//todo
}
array_type rankArray(){
return _rank_array;
}
private:
template<class InputIteraotr>
bool cmp(InputIteraotr arr, size_t a, size_t b, size_t l){
return arr[a] == arr[b] && arr[a + l] == arr[b + l];
}
void calRank(){
_rank_array.resize(_suffix_array.size());
for (auto i = 0; i != _suffix_array.size(); ++i){
_rank_array[_suffix_array[i]] = i;
}
}
template<class InputIterator>
void calSuffix(InputIterator arr, size_t len, size_t max_len){
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʵ<EFBFBD>ֵı<D6B5><C4B1><EFBFBD><EFBFBD>
//<2F>㷨ʱ<E3B7A8><EFBFBD>Ӷ<EFBFBD> = O(nlg(n))
_array.resize(len);
_suffix_array.resize(len);
int wa[1024], wb[1024], wv[1024], ws[1024];
int i, j, p, *x = wa, *y = wb, *t;
@@ -30,7 +58,7 @@ namespace TinySTL{
for (i = 0; i < len; i++) ws[x[i] = arr[i]]++;
for (i = 1; i < max_len; i++) ws[i] += ws[i - 1];
//i֮<69><D6AE><EFBFBD>Դ<EFBFBD>len-1<><31>ʼѭ<CABC><D1AD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϊ<EFBFBD>˱<EFBFBD>֤<EFBFBD>ڵ<EFBFBD><DAB5>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȵ<EFBFBD><C8B5>ַ<EFBFBD><D6B7><EFBFBD>ʱ<EFBFBD><CAB1>Ĭ<EFBFBD>Ͽ<EFBFBD>ǰ<EFBFBD><C7B0><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD>СһЩ<D2BB><D0A9>
for (i = len - 1; i >= 0; i--) _array[--ws[x[i]]] = i;
for (i = len - 1; i >= 0; i--) _suffix_array[--ws[x[i]]] = i;
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ѭ<EFBFBD><D1AD><EFBFBD><EFBFBD>p<EFBFBD><70><EFBFBD><EFBFBD>rankֵ<6B><D6B5><EFBFBD>õ<EFBFBD><C3B5>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>p<EFBFBD>ﵽlen<65><6E><EFBFBD><EFBFBD>ô<EFBFBD><C3B4><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>Ĵ<EFBFBD>С<EFBFBD><D0A1>ϵ<EFBFBD><CFB5><EFBFBD>Ѿ<EFBFBD><D1BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ˡ<EFBFBD>
//j<><6A><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0><EFBFBD>ϲ<EFBFBD><CFB2><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>ij<EFBFBD><C4B3>ȣ<EFBFBD>ÿ<EFBFBD>ν<EFBFBD><CEBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϊj<CEAA><6A><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>ϲ<EFBFBD><CFB2><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϊ2*j<><6A><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ȼ<EFBFBD><C8BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>ĩβ<C4A9><CEB2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵӦ<D6B5><D3A6><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ۣ<EFBFBD><DBA3><EFBFBD>˼<EFBFBD><CBBC><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD>ġ<EFBFBD>
@@ -39,7 +67,7 @@ namespace TinySTL{
{
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4><EFBFBD>ʵ<EFBFBD><CAB5><EFBFBD>˶Եڶ<D4B5><DAB6>ؼ<EFBFBD><D8BC>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD><EFBFBD>
for (p = 0, i = len - j; i < len; i++) y[p++] = i;
for (i = 0; i < len; i++) if (_array[i] >= j) y[p++] = _array[i] - j;
for (i = 0; i < len; i++) if (_suffix_array[i] >= j) y[p++] = _suffix_array[i] - j;
//<2F>ڶ<EFBFBD><DAB6>ؼ<EFBFBD><D8BC>ֻ<EFBFBD><D6BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ɺ<EFBFBD><C9BA><EFBFBD>y[]<5D><><EFBFBD><EFBFBD><EFBFBD>ŵ<EFBFBD><C5B5>ǰ<EFBFBD><C7B0>ڶ<EFBFBD><DAB6>ؼ<EFBFBD><D8BC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>±<EFBFBD>
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E0B5B1><EFBFBD><EFBFBD>ȡ<EFBFBD><C8A1>ÿ<EFBFBD><C3BF><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>ĵ<EFBFBD>һ<EFBFBD>ؼ<EFBFBD><D8BC>֣<EFBFBD>ǰ<EFBFBD><C7B0>˵<EFBFBD><CBB5><EFBFBD><EFBFBD>x[]<5D>DZ<EFBFBD><C7B1><EFBFBD>rankֵ<6B>ģ<EFBFBD>Ҳ<EFBFBD><D2B2><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>ĵ<EFBFBD>һ<EFBFBD>ؼ<EFBFBD><D8BC>֣<EFBFBD><D6A3><EFBFBD><EFBFBD>ŵ<EFBFBD>wv[]<5D><><EFBFBD><EFBFBD><EFBFBD>Ƿ<EFBFBD><C7B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʹ<EFBFBD><CAB9>
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0><EFBFBD>һ<EFBFBD>ؼ<EFBFBD><D8BC>ֽ<EFBFBD><D6BD>еĻ<D0B5><C4BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
@@ -47,21 +75,22 @@ namespace TinySTL{
for (i = 0; i < max_len; i++) ws[i] = 0;
for (i = 0; i < len; i++) ws[wv[i]]++;
for (i = 1; i < max_len; i++) ws[i] += ws[i - 1];
for (i = len - 1; i >= 0; i--) _array[--ws[wv[i]]] = y[i];
for (i = len - 1; i >= 0; i--) _suffix_array[--ws[wv[i]]] = y[i];
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>о<EFBFBD><D0BE>Ǽ<EFBFBD><C7BC><EFBFBD><EFBFBD>ϲ<EFBFBD>֮<EFBFBD><D6AE><EFBFBD><EFBFBD>rankֵ<6B>ˣ<EFBFBD><CBA3><EFBFBD><EFBFBD>ϲ<EFBFBD>֮<EFBFBD><D6AE><EFBFBD><EFBFBD>rankֵӦ<D6B5>ô<EFBFBD><C3B4><EFBFBD>x[]<5D><><EFBFBD><EFBFBD><E6A3AC><EFBFBD><EFBFBD><EFBFBD>Ǽ<EFBFBD><C7BC><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><CAB1><EFBFBD>ֱ<EFBFBD><D6B1><EFBFBD><EFBFBD>õ<EFBFBD><C3B5><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD>rankֵ<6B><D6B5>Ҳ<EFBFBD><D2B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>x[]<5D><><EFBFBD><EFBFBD><EFBFBD>ŵĶ<C5B5><C4B6><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҽ<EFBFBD>Ҫ<EFBFBD><D2AA>x[]<5D><><EFBFBD><EFBFBD><EFBFBD>ã<EFBFBD><C3A3><EFBFBD>Ҫ<EFBFBD><D2AA>x[]<5D><><EFBFBD><EFBFBD><EFBFBD>ţ<EFBFBD><C5A3><EFBFBD>ô<EFBFBD>
//<2F><>Ȼ<EFBFBD><C8BB><EFBFBD>Ȱ<EFBFBD>x[]<5D>Ķ<EFBFBD><C4B6><EFBFBD><EFBFBD>ŵ<EFBFBD><C5B5><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>棬ʡ<E6A3AC><CAA1><EFBFBD><EFBFBD><EFBFBD>ˡ<EFBFBD><CBA1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ý<EFBFBD><C3BD><EFBFBD>ָ<EFBFBD><D6B8><EFBFBD>ķ<EFBFBD>ʽ<EFBFBD><CABD><EFBFBD><EFBFBD>Чʵ<D0A7><CAB5><EFBFBD>˽<EFBFBD>x[]<5D>Ķ<EFBFBD><C4B6><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ơ<EFBFBD><C6A1><EFBFBD><EFBFBD><EFBFBD>y[]<5D>С<EFBFBD>
for (t = x, x = y, y = t, p = 1, x[_array[0]] = 0, i = 1; i < len; i++)
x[_array[i]] = cmp(y, _array[i - 1], _array[i], j) ? p - 1 : p++;
for (t = x, x = y, y = t, p = 1, x[_suffix_array[0]] = 0, i = 1; i < len; i++)
x[_suffix_array[i]] = cmp(y, _suffix_array[i - 1], _suffix_array[i], j) ? p - 1 : p++;
}
return;
}
const std::vector<int> suffixArray()const{
return _array;
}
private:
bool cmp(int *arr, int a, int b, int l){
return arr[a] == arr[b] && arr[a + l] == arr[b + l];
template<class InputIteraotr>
void calHeight(InputIteraotr arr, size_t len)
{
int i, j, k = 0;
for (i = 1; i <= n; i++) _rank_array[_suffix_array[i]] = i;
for (i = 0; i < n; _height_array[_rank_array[i++]] = k)
for (k ? k-- : 0, j = _suffix_array[_rank_array[i] - 1]; arr[i + k] == arr[j + k]; k++);
return;
}
};
}