Standardized code and add comments

This commit is contained in:
linyufeng
2022-07-14 16:16:51 +08:00
parent 50a852828a
commit f300f7eaca
9 changed files with 238 additions and 512 deletions

View File

@@ -10,8 +10,6 @@ ip2region xdb maker
main.py [command] [command options]
Command:
gen generate the binary db file
search binary xdb search test
bench binary xdb bench test
```
# `xdb` 数据生成
@@ -35,56 +33,6 @@ options:
```
# `xdb` 数据查询
# `xdb` 数据查询 和 bench 测试
通过 `python main.py search` 命令来测试查询输入的 ip
```
➜ python git:(v2.0_xdb) ✗ python main.py search
main.py search [command options]
options:
--db string ip2region binary xdb file path
```
例如,使用自带的 xdb 文件来运行查询测试:
```
➜ python git:(v2.0_xdb) ✗ python main.py search --db=./ip2region.xdb
ip2region xdb search test program, commands:
loadIndex : load the vector index for search speedup.
clearIndex: clear the vector index.
quit : exit the test program
ip2region>> 117.148.181.111
[region:中国|0|浙江省|嘉兴市|移动, took:0s]
ip2region>> 120.196.20.28
[region:中国|0|广东省|茂名市|移动, took:0s]
ip2region>> 81.33.22.150
[region:西班牙|0|马德里|马德里|西班牙电信, took:0s]
ip2region>>
```
# bench 测试
如果你自主生成了 `xdb` 文件,请确保运行如下的 `python main.py bench` 命令来确保生成的的 `xdb` 文件的正确性:
```
➜ python git:(v2.0_xdb) ✗ python main.py bench
main.py bench [command options]
options:
--db string ip2region binary xdb file path
--src string source ip text file path
--ignore-error bool keep going if bench failed
```
例如:使用 data/ip.merge.txt 源文件来 bench 测试 data/ip2region.xdb 这个 xdb 文件:
```
➜ python git:(v2.0_xdb) ✗ python main.py bench --db=../../data/ip2region.xdb --src=../../data/ip.merge.txt
# 会看到一堆输出,看到类似如下的数据表示 bench 测试通过了,否则就会报错
...
try to bench segment: `{}` 224.0.0.0|255.255.255.255|0|0|0|内网IP|内网IP
|-try to bench ip '224.0.0.0' ... --[Ok]
|-try to bench ip '231.255.255.255' ... --[Ok]
|-try to bench ip '239.255.255.255' ... --[Ok]
|-try to bench ip '247.255.255.255' ... --[Ok]
|-try to bench ip '255.255.255.255' ... --[Ok]
Bench finished, [count: 3417955, failed: 0, took: 88.061s]
```
*请注意 bench 测试使用的 `src` 文件需要是对应的生成 ip2region.xdb 的源文件相同*
如果运行过程中有错误会立马停止运行,也可以执行 --ignore-error=true 参数来忽略错误,在最后看 failed 的统计结果。
基于xdb格式的查询功能和测试见 [ip2region binding](https://github.com/lionsoul2014/ip2region/tree/master/binding)

View File

@@ -1,34 +1,35 @@
# Created by leolin49 on 2022/7/7.
# Copyright (C) 2022 leolin49. All rights reserved.
# Copyright 2022 The Ip2Region Authors. All rights reserved.
# Use of this source code is governed by a Apache2.0-style
# license that can be found in the LICENSE file.
#
# Author: leolin49 <leolin49@foxmail.com>
#
import logging
import sys
import time
from datetime import datetime
import xdb.maker as mk
import xdb.index as idx
import xdb.searcher as sc
import xdb.util as util
# format log
logging.basicConfig(level=logging.INFO, format='%(asctime)s-%(name)s-%(lineno)s-%(levelname)s - %(message)s')
# Format log
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s-%(name)s-%(lineno)s-%(levelname)s - %(message)s",
)
log = logging.getLogger(__name__)
def print_help():
print("ip2region xdb maker")
print("ip2region xdb python maker")
print("{} [command] [command options]".format(sys.argv[0]))
print("Command: ")
print(" gen generate the binary db file")
print(" search binary xdb search test")
print(" bench binary xdb bench test")
def gen_db():
src_file, dst_file = "", ""
index_policy = idx.VectorIndexPolicy
# check input argv
index_policy = idx.Vector_Index_Policy
# Check input parameters
for i in range(2, len(sys.argv)):
r = sys.argv[i]
if len(r) < 5:
@@ -40,11 +41,11 @@ def gen_db():
print("missing = for args pair '{}'".format(r))
return
if r[2:s_idx] == "src":
src_file = r[s_idx+1:]
src_file = r[s_idx + 1:]
elif r[2:s_idx] == "dst":
dst_file = r[s_idx+1:]
dst_file = r[s_idx + 1:]
elif r[2:s_idx] == "index":
index_policy = idx.index_policy_from_string(r[s_idx+1:])
index_policy = idx.index_policy_from_string(r[s_idx + 1:])
else:
print("undefined option `{}`".format(r))
return
@@ -56,142 +57,17 @@ def gen_db():
return
start_time = time.time()
# make the binary file
# Make the binary file
maker = mk.new_maker(index_policy, src_file, dst_file)
maker.init()
maker.start()
maker.end()
logging.info("Done, elapsed: {:.0f}m{:.0f}s".format((time.time() - start_time) / 60, (time.time() - start_time) % 60))
def test_search():
db_file = ""
for i in range(2, len(sys.argv)):
r = sys.argv[i]
if len(r) < 5:
continue
if not r.startswith("--"):
continue
e_idx = r.index("=")
if e_idx < 0:
print("missing = for args pair '{}'".format(r))
return
if r[2:e_idx] == "db":
db_file = r[e_idx+1:]
else:
print("undefined option `{}`".format(r))
return
if db_file == "":
print("{} search [command options]".format(sys.argv[0]))
print("options:")
print(" --db string ip2region binary xdb file path")
return
cb = sc.XdbSearcher.loadContentFromFile(dbfile=db_file)
searcher = sc.XdbSearcher(contentBuff=cb)
print("ip2region xdb search test program, commands:\nloadIndex : load the vector index for search "
"speedup.\nclearIndex: clear the vector index.\nquit : exit the test program")
while True:
print("ip2region>> ", end="")
line = input()
# command interception and execution
if line == "loadIndex":
searcher.loadVectorIndexFromFile(dbfile=db_file)
print("vector index cached")
continue
elif line == "clearIndex":
# FIXME need to add 'clearVectorIndex' method in searcher
print("vector index cleared")
continue
elif line == "quit":
break
ip = util.checkip(line)
if ip == -1:
print("invalid ip address `{}`".format(line))
continue
s_tm = datetime.now()
region = searcher.search(ip)
# TODO calculate io count in `searcher.search` method
print("\x1b[0;32m[region:{}, took:{:.0f}s]\x1b[0m".format(
region, (datetime.now().microsecond - s_tm.microsecond) / 1000)
logging.info(
"Done, elapsed: {:.0f}m{:.0f}s".format(
(time.time() - start_time) / 60, (time.time() - start_time) % 60
)
def test_bench():
db_file, src_file = "", ""
ignore_error = False
for i in range(2, len(sys.argv)):
r = sys.argv[i]
if len(r) < 5:
continue
if not r.startswith("--"):
continue
s_idx = r.index("=")
if s_idx < 0:
print("missing = for args pair '{}'".format(r))
return
if r[2:s_idx] == "db":
db_file = r[s_idx + 1:]
elif r[2:s_idx] == "src":
src_file = r[s_idx + 1:]
elif r[2:s_idx] == "ignore-error":
v = r[s_idx + 1:]
if v == "true" or v == "1":
ignore_error = True
elif v == "false" or v == "0":
ignore_error = False
else:
print("invalid value for ignore-error option, could be false/0 or true/1")
return
else:
print("undefined option `{}`".format(r))
return
if db_file == "" or src_file == "":
print("{} bench [command options]".format(sys.argv[0]))
print("options:")
print(" --db string ip2region binary xdb file path")
print(" --src string source ip text file path")
print(" --ignore-error bool keep going if bench failed")
return
cb = sc.XdbSearcher.loadContentFromFile(dbfile=db_file)
searcher = sc.XdbSearcher(contentBuff=cb)
cnt, err_cnt, s_tm = 0, 0, time.time()
with open(src_file, 'r', encoding="utf-8") as f:
lines = f.read().splitlines()
for line in lines:
ps = line.split("|", maxsplit=2)
if len(ps) != 3:
print("invalid ip segment line `{}`".format(line))
return
sip = util.checkip(ps[0])
if sip == -1:
print("invalid ip address `{}`".format(line))
return
eip = util.checkip(ps[1])
if eip == -1:
print("invalid ip address `{}`".format(line))
return
print("try to bench segment: `{}`", line)
mip = util.mid_ip(sip, eip)
for ip in [sip, util.mid_ip(sip, mip), mip, util.mid_ip(mip, eip), eip]:
print("|-try to bench ip '{}' ...".format(util.long2ip(ip)), end="")
region = searcher.search(ip)
# check the region info
cnt += 1
if region != ps[2]:
err_cnt += 1
print(" --[Failed] ({} != {})".format(region, ps[2]))
if not ignore_error:
return
else:
print(" --[Ok]")
print("Bench finished, [count: {}, failed: {}, took: {:.3f}s]".format(cnt, err_cnt, time.time() - s_tm))
)
def main():
@@ -202,13 +78,9 @@ def main():
cmd = sys.argv[1].lower()
if cmd == "gen":
gen_db()
elif cmd == "search":
test_search()
elif cmd == "bench":
test_bench()
else:
print_help()
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -1,2 +1,6 @@
# Created by leolin49 on 2022/7/7.
# Copyright (C) 2022 leolin49. All rights reserved.
# Copyright 2022 The Ip2Region Authors. All rights reserved.
# Use of this source code is governed by a Apache2.0-style
# license that can be found in the LICENSE file.
#
# Author: leolin49 <leolin49@foxmail.com>
#

View File

@@ -1,21 +1,24 @@
# Created by leolin49 on 2022/7/7.
# Copyright (C) 2022 leolin49. All rights reserved.
# Copyright 2022 The Ip2Region Authors. All rights reserved.
# Use of this source code is governed by a Apache2.0-style
# license that can be found in the LICENSE file.
#
# Author: leolin49 <leolin49@foxmail.com>
#
import struct
VectorIndexPolicy = 1
BTreeIndexPolicy = 2
SegmentIndexBlockSize = 14
Vector_Index_Policy = 1
BTree_Index_Policy = 2
def index_policy_from_string(s: str) -> int:
sl = s.lower()
if sl == "vector":
return VectorIndexPolicy
return Vector_Index_Policy
elif sl == "btree":
return BTreeIndexPolicy
return BTree_Index_Policy
else:
print("invalid policy `{}`, used default vector index".format(s))
return VectorIndexPolicy
return Vector_Index_Policy
class VectorIndexBlock:
@@ -26,11 +29,14 @@ class VectorIndexBlock:
self.first_ptr = fp
self.last_ptr = lp
def __str__(self):
return "FirstPtr: {}, LastPrt: {}".format(self.first_ptr, self.last_ptr)
def encode(self) -> bytes:
return struct.pack("<II", self.first_ptr, self.last_ptr)
def string(self) -> str:
return "FirstPtr: {}, LastPrt: {}".format(self.first_ptr, self.last_ptr)
Segment_Index_Block_Size = 14
class SegmentIndexBlock:
@@ -45,8 +51,12 @@ class SegmentIndexBlock:
self.data_len = dl
self.data_ptr = dp
def encode(self) -> bytes:
return struct.pack("<IIHI", self.start_ip, self.end_ip, self.data_len, self.data_ptr)
def __str__(self):
return "{sip: {}, eip: {}, len: {}, ptr: {}}".format(
self.start_ip, self.end_ip, self.data_len, self.data_ptr
)
def string(self) -> str:
return "{sip: {}, eip: {}, len: {}, ptr: {}}".format(self.start_ip, self.end_ip, self.data_len, self.data_ptr)
def encode(self) -> bytes:
return struct.pack(
"<IIHI", self.start_ip, self.end_ip, self.data_len, self.data_ptr
)

View File

@@ -1,5 +1,8 @@
# Created by leolin49 on 2022/7/7.
# Copyright (C) 2022 leolin49. All rights reserved.
# Copyright 2022 The Ip2Region Authors. All rights reserved.
# Use of this source code is governed by a Apache2.0-style
# license that can be found in the LICENSE file.
#
# Author: leolin49 <leolin49@foxmail.com>
#
# ----
# ip2region database v2.0 structure
@@ -36,40 +39,39 @@
#
# data entry structure:
# +--------------------+-----------------------+
# | 2bytes (for desc) | dynamic length |
# | 2bytes (for desc) | dynamic length |
# +--------------------+-----------------------+
# data length whatever in bytes
#
# index entry structure
# +------------+-----------+---------------+------------+
# | 4bytes | 4bytes | 2bytes | 4 bytes |
# | 4bytes | 4bytes | 2bytes | 4 bytes |
# +------------+-----------+---------------+------------+
# start ip end ip data length data ptr
import os
import struct
import sys
sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))))
import logging
import struct
import time
import segment as seg
import index as idx
import util
import sys
import xdb.segment as seg
import xdb.index as idx
import xdb.util as util
VersionNo = 2
HeaderInfoLength = 256
VectorIndexRows = 256
VectorIndexCols = 256
VectorIndexSize = 8
VectorIndexLength = VectorIndexRows * VectorIndexCols * VectorIndexSize
Version_No = 2
Header_Info_Length = 256
Vector_Index_Rows = 256
Vector_Index_Cols = 256
Vector_Index_Size = 8
Vector_Index_Length = Vector_Index_Rows * Vector_Index_Cols * Vector_Index_Size
class Maker:
src_handle = None
dst_handle = None
index_policy = 0
segments = []
region_pool = {}
index_policy = idx.Vector_Index_Policy
segments = None
region_pool = None
vector_index = None
def __init__(self, sh, dh, ip, sg, rp, vi):
@@ -83,30 +85,32 @@ class Maker:
def init(self):
"""
Init the `xdb` binary file.
1. init the file header
2. load all the segments
1. Init the file header
2. Load all the segments
"""
self.init_db_header()
self.load_segments()
def init_db_header(self):
"""Init and write the file header to the destination xdb file."""
"""
Init and write the file header to the destination xdb file.
"""
logging.info("try to init the db header ... ")
self.src_handle.seek(0, 0)
header = bytearray([0]*256)
# make and write the header space
# 1. version number
header[0:2] = VersionNo.to_bytes(2, byteorder="little")
# 2. index policy code
# Make and write the header space
header = bytearray([0] * 256)
# 1. Version number
header[0:2] = Version_No.to_bytes(2, byteorder="little")
# 2. Index policy code
header[2:4] = int(self.index_policy).to_bytes(2, byteorder="little")
# 3. generate unix timestamp
# 3. Generate unix timestamp
header[4:8] = int(time.time()).to_bytes(4, byteorder="little")
# 4. index block start ptr
# 4. Index block start ptr
header[8:12] = int(0).to_bytes(4, byteorder="little")
# 5. index block end ptr
# 5. Index block end ptr
header[12:16] = int(0).to_bytes(4, byteorder="little")
# write header buffer to file
# Write header buffer to file
self.dst_handle.write(header)
def load_segments(self) -> list:
@@ -125,122 +129,158 @@ class Maker:
if len(ps) != 3:
logging.error("invalid ip segment line `{}`".format(line))
return []
sip = util.checkip(ps[0])
sip = util.check_ip(ps[0])
if sip == -1:
logging.error("invalid ip address `{}`".format(line))
logging.error(
"invalid ip address `{}` in line `{}`".format(ps[0], line)
)
return []
eip = util.checkip(ps[1])
eip = util.check_ip(ps[1])
if eip == -1:
logging.error("invalid ip address `{}`".format(line))
logging.error(
"invalid ip address `{}` in line `{}`".format(ps[1], line)
)
return []
if sip > eip:
logging.error("start ip({}) should not be greater than end ip({})".format(ps[0], ps[1]))
logging.error(
"start ip({}) should not be greater than end ip({})".format(
ps[0], ps[1]
)
)
return []
if len(ps[2]) < 1:
logging.error("empty region info in segment line `{}`".format(line))
return []
segment = seg.Segment(sip=sip, eip=eip, reg=ps[2])
# check the continuity of data segment
segment = seg.Segment(sip=sip, eip=eip, reg=ps[2])
# Check the continuity of data segment
if last is not None:
if last.end_ip + 1 != segment.start_ip:
logging.error("discontinuous data segment: last.eip+1({})!=seg.sip({}, {})".format(sip, eip, ps[0]))
logging.error(
"discontinuous data segment: last.eip+1({})!=seg.sip({}, {})".format(
sip, eip, ps[0]
)
)
return []
self.segments.append(segment)
last = segment
logging.info("all segments loaded, length: {}, elapsed: {}".format(len(self.segments), time.time() - s_tm))
logging.info(
"all segments loaded, length: {}, elapsed: {}".format(
len(self.segments), time.time() - s_tm
)
)
def set_vector_index(self, ip, ptr):
"""
Init and refresh the vector index based on the IP pre-two bytes.
"""
row, col = (ip >> 24) & 0xFF, (ip >> 16) & 0xFF
vi_block = self.vector_index[row][col]
if vi_block.first_ptr == 0:
vi_block.first_ptr = ptr
vi_block.last_ptr = ptr + idx.SegmentIndexBlockSize
vi_block.last_ptr = ptr + idx.Segment_Index_Block_Size
else:
vi_block.last_ptr = ptr + idx.SegmentIndexBlockSize
vi_block.last_ptr = ptr + idx.Segment_Index_Block_Size
self.vector_index[row][col] = vi_block
def start(self):
"""Start to make the 'xdb' binary file."""
"""
Start to make the 'xdb' binary file.
"""
if len(self.segments) < 1:
logging.error("empty segment list")
return
# 1. write all the region/data to the binary file
self.dst_handle.seek(HeaderInfoLength+VectorIndexLength, 0)
# 1. Write all the region/data to the binary file
self.dst_handle.seek(Header_Info_Length + Vector_Index_Length, 0)
logging.info("try to write the data block ... ")
for s in self.segments:
logging.info("try to write region '{}'...".format(s.region))
if s.region in self.region_pool:
logging.info(" --[Cached] with ptr={}".format(self.region_pool[s.region]))
logging.info(
" --[Cached] with ptr={}".format(self.region_pool[s.region])
)
continue
region = bytes(s.region, encoding="utf-8")
if len(region) > 0xFFFF:
logging.error("too long region info `{}`: should be less than {} bytes".format(s.region, 0xFFFF))
logging.error(
"too long region info `{}`: should be less than {} bytes".format(
s.region, 0xFFFF
)
)
return
# get the first ptr of the next region
# Get the first ptr of the next region
pos = self.dst_handle.seek(0, 1)
logging.info("{} {} {}".format(pos, region, s.region))
self.dst_handle.write(region)
self.region_pool[s.region] = pos
logging.info(" --[Added] with ptr={}".format(pos))
# 2. write the index block and cache the super index block
# 2. Write the index block and cache the super index block
logging.info("try to write the segment index block ... ")
counter, start_index_ptr, end_index_ptr = 0, -1, -1
for sg in self.segments:
data_ptr = -1
if sg.region in self.region_pool:
data_ptr = self.region_pool[sg.region]
else:
if sg.region not in self.region_pool:
logging.error("missing ptr cache for region `{}`".format(sg.region))
return
data_len = len(bytes(sg.region, encoding="utf-8"))
if data_len < 1:
logging.error("empty region info for segment '{}'".format(sg.region))
return
seg_list = sg.split()
logging.info("try to index segment({} split) {} ...".format(len(seg_list), sg.string()))
logging.info(
"try to index segment({} split) {} ...".format(len(seg_list), sg)
)
for s in seg_list:
pos = self.dst_handle.seek(0, 1)
s_index = idx.SegmentIndexBlock(
sip=s.start_ip, eip=s.end_ip, dl=data_len, dp=data_ptr
sip=s.start_ip,
eip=s.end_ip,
dl=data_len,
dp=self.region_pool[sg.region],
)
self.dst_handle.write(s_index.encode())
logging.info("|-segment index: {}, ptr: {}, segment: {}".format(counter, pos, s.string()))
logging.info(
"|-segment index: {}, ptr: {}, segment: {}".format(counter, pos, s)
)
self.set_vector_index(s.start_ip, pos)
counter += 1
# check and record the start index ptr
# Check and record the start index ptr
if start_index_ptr == -1:
start_index_ptr = pos
end_index_ptr = pos
# synchronized the vector index block
# 3. Synchronized the vector index block
logging.info("try to write the vector index block ... ")
self.dst_handle.seek(HeaderInfoLength, 0)
self.dst_handle.seek(Header_Info_Length, 0)
for i in range(0, len(self.vector_index)):
for j in range(0, len(self.vector_index[i])):
vi = self.vector_index[i][j]
self.dst_handle.write(vi.encode())
# synchronized the segment index info
# 4. Synchronized the segment index info
logging.info("try to write the segment index ptr ... ")
buff = struct.pack("<II", start_index_ptr, end_index_ptr)
self.dst_handle.seek(8, 0)
self.dst_handle.write(buff)
logging.info("write done, dataBlocks: {}, indexBlocks: ({}, {}), indexPtr: ({}, {})".format(
len(self.region_pool), len(self.segments), counter, start_index_ptr, end_index_ptr
))
logging.info(
"write done, dataBlocks: {}, indexBlocks: ({}, {}), indexPtr: ({}, {})".format(
len(self.region_pool),
len(self.segments),
counter,
start_index_ptr,
end_index_ptr,
)
)
def end(self):
"""End of make the 'xdb' binary file."""
"""
End of make the 'xdb' binary file.
"""
try:
self.src_handle.close()
self.dst_handle.close()
@@ -250,20 +290,27 @@ class Maker:
def new_maker(policy: int, srcfile: str, dstfile: str) -> Maker:
"""Create a xdb Maker to make the xdb binary file
"""
Create a xdb Maker to make the xdb binary file
:param policy: index algorithm code 1:vector, 2:b-tree
:param srcfile: source ip text file path
:param dstfile: destination binary xdb file path
:return: the 'xdb' Maker
"""
try:
sh = open(srcfile, mode='r', encoding='utf-8')
dh = open(dstfile, mode='wb')
sh = open(srcfile, mode="r", encoding="utf-8")
dh = open(dstfile, mode="wb")
return Maker(
sh=sh, dh=dh, ip=policy, sg=[], rp={},
vi=[[idx.VectorIndexBlock() for _ in range(VectorIndexRows)] for _ in range(VectorIndexCols)],
sh=sh,
dh=dh,
ip=policy,
sg=[],
rp={},
vi=[
[idx.VectorIndexBlock() for _ in range(Vector_Index_Rows)]
for _ in range(Vector_Index_Cols)
],
)
except IOError as e:
logging.error(e)
sys.exit()

View File

@@ -1,161 +0,0 @@
import socket
import struct
import io
import sys
HeaderInfoLength = 256
VectorIndexRows = 256
VectorIndexCols = 256
VectorIndexSize = 8
SegmentIndexSize = 14
class XdbSearcher(object):
__f = None
# the minimal memory allocation.
vectorIndex = None
# 整个读取xdb保存在内存中
contentBuff = None
@staticmethod
def loadVectorIndexFromFile(dbfile):
try:
f = io.open(dbfile, "rb")
f.seek(HeaderInfoLength)
vi_len = VectorIndexRows * VectorIndexCols * SegmentIndexSize
vector_data = f.read(vi_len)
f.close()
return vector_data
except IOError as e:
print("[Error]: %s" % e)
@staticmethod
def loadContentFromFile(dbfile):
try:
f = io.open(dbfile, "rb")
all_data = f.read()
f.close()
return all_data
except IOError as e:
print("[Error]: %s" % e)
def __init__(self, dbfile=None, vectorIndex=None, contentBuff=None):
self.initDatabase(dbfile, vectorIndex, contentBuff)
def search(self, ip):
if isinstance(ip, str):
if not ip.isdigit(): ip = self.ip2long(ip)
return self.searchByIPLong(ip)
else:
return self.searchByIPLong(ip)
def searchByIPStr(self, ip):
if not ip.isdigit(): ip = self.ip2long(ip)
return self.searchByIPLong(ip)
def searchByIPLong(self, ip):
# locate the segment index block based on the vector index
sPtr = ePtr = 0
il0 = (int)((ip >> 24) & 0xFF)
il1 = (int)((ip >> 16) & 0xFF)
idx = il0 * VectorIndexCols * VectorIndexSize + il1 * VectorIndexSize
if self.vectorIndex is not None:
sPtr = self.getLong(self.vectorIndex, idx)
ePtr = self.getLong(self.vectorIndex, idx + 4)
elif self.contentBuff is not None:
sPtr = self.getLong(self.contentBuff, HeaderInfoLength + idx)
ePtr = self.getLong(self.contentBuff, HeaderInfoLength + idx + 4)
else:
self.__f.seek(HeaderInfoLength + idx)
buffer_ptr = self.__f.read(8)
sPtr = self.getLong(buffer_ptr, 0)
ePtr = self.getLong(buffer_ptr, 4)
# binary search the segment index block to get the region info
dataLen = dataPtr = int(-1)
l = int(0)
h = int((ePtr - sPtr) / SegmentIndexSize)
while l <= h:
m = int((l + h) >> 1)
p = int(sPtr + m * SegmentIndexSize)
# read the segment index
buffer_sip = self.readBuffer(p, SegmentIndexSize)
sip = self.getLong(buffer_sip, 0)
if ip < sip:
h = m - 1
else:
eip = self.getLong(buffer_sip, 4)
if ip > eip:
l = m + 1
else:
dataLen = self.getInt2(buffer_sip, 8)
dataPtr = self.getLong(buffer_sip, 10)
break
# empty match interception
if dataPtr < 0:
return ""
buffer_string = self.readBuffer(dataPtr, dataLen)
return_string = buffer_string.decode("utf-8")
return return_string
def readBuffer(self, offset, length):
buffer = None
# check the in-memory buffer first
if self.contentBuff is not None:
buffer = self.contentBuff[offset:offset + length]
return buffer
# read from the file handle
if self.__f is not None:
self.__f.seek(offset)
buffer = self.__f.read(length)
return buffer
def initDatabase(self, dbfile, vi, cb):
"""
" initialize the database for search
" param: dbFile, vectorIndex, contentBuff
"""
try:
if cb is not None:
self.__f = None
self.vectorIndex = None
self.contentBuff = cb
else:
self.__f = io.open(dbfile, "rb")
self.vectorIndex = vi
except IOError as e:
print("[Error]: %s" % e)
sys.exit()
def ip2long(self, ip):
_ip = socket.inet_aton(ip)
return struct.unpack("!L", _ip)[0]
def isip(self, ip):
p = ip.split(".")
if len(p) != 4: return False
for pp in p:
if not pp.isdigit(): return False
if len(pp) > 3: return False
if int(pp) > 255: return False
return True
def getLong(self, b, offset):
if len(b[offset:offset + 4]) == 4:
return struct.unpack('I', b[offset:offset + 4])[0]
return 0
def getInt2(self, b, offset):
return ((b[offset] & 0x000000FF) | (b[offset + 1] & 0x0000FF00))
def close(self):
if self.__f is not None:
self.__f.close()
self.vectorIndex = None
self.contentBuff = None

View File

@@ -1,6 +1,10 @@
# Created by leolin49 on 2022/7/7.
# Copyright (C) 2022 leolin49. All rights reserved.
import util
# Copyright 2022 The Ip2Region Authors. All rights reserved.
# Use of this source code is governed by a Apache2.0-style
# license that can be found in the LICENSE file.
#
# Author: leolin49 <leolin49@foxmail.com>
#
import xdb.util as util
class Segment:
@@ -12,9 +16,30 @@ class Segment:
self.start_ip, self.end_ip = sip, eip
self.region = reg
def __str__(self):
return "{}|{}|{}".format(
util.long2ip(self.start_ip), util.long2ip(self.end_ip), self.region
)
def split(self) -> list:
"""Split the segment based on the pre-two bytes."""
# 1, split the segment with the first byte
"""
Split the segment based on the pre-two bytes.
:return: the list of segment ofter split
"""
# Example:
# split the segment "116.31.76.0|117.21.79.49|region"
#
# Return the list with segments:
# 116.31.76.0 | 116.31.255.255 | region
# 116.32.0.0 | 116.32.255.255 | region
# ... | ... | region
# 116.255.0.0 | 116.255.255.255 | region
# 117.0.0.0 | 117.0.255.255 | region
# 117.1.0.0 | 117.1.255.255 | region
# ... | ... | region
# 117.21.0.0 | 117.21.79.49 | region
# 1. Split the segment with the first byte
t_list_1 = []
s_byte_1, e_byte_1 = (self.start_ip >> 24) & 0xFF, (self.end_ip >> 24) & 0xFF
n_sip = self.start_ip
@@ -25,11 +50,10 @@ class Segment:
n_sip = (i + 1) << 24
else:
eip = self.end_ip
# append the new segment (maybe)
# Append the new segment (maybe)
t_list_1.append(Segment(sip, eip))
# 2, split the segments with the second byte
# 2. Split the segments with the second byte
t_list_2 = []
for s in t_list_1:
base = s.start_ip & 0xFF000000
@@ -42,28 +66,5 @@ class Segment:
n_sip = 0
else:
eip = self.end_ip
t_list_2.append(Segment(sip, eip, self.region))
return t_list_2
def string(self) -> str:
return util.long2ip(self.start_ip) + "|" + util.long2ip(self.end_ip) + "|" + self.region
def segment_from(seg: str) -> Segment:
segment = Segment()
ps = seg.split("|", 3)
if len(ps) != 3:
return segment
sip = util.checkip(ps[0])
if sip == -1:
return segment
eip = util.checkip(ps[1])
if eip == -1:
return segment
segment.start_ip, segment.end_ip = sip, eip
segment.region = ps[2]
return segment

View File

@@ -1,42 +1,47 @@
# Created by leolin49 on 2022/7/7.
# Copyright (C) 2022 leolin49. All rights reserved.
shift_index = (24, 16, 8, 0)
# Util function
# Copyright 2022 The Ip2Region Authors. All rights reserved.
# Use of this source code is governed by a Apache2.0-style
# license that can be found in the LICENSE file.
#
# Author: leolin49 <leolin49@foxmail.com>
#
_SHIFT_INDEX = (24, 16, 8, 0)
def checkip(ip: str) -> int:
"""Convert ip string to integer."""
def check_ip(ip: str) -> int:
"""
Convert ip string to integer.
Return -1 if ip is not the correct ipv4 address.
"""
if not is_ipv4(ip):
return -1
ps = ip.split(".")
if len(ps) != 4:
return 0
val = 0
for i in range(len(ps)):
d = int(ps[i])
if d < 0 or d > 255:
return 0
val |= d << shift_index[i]
val |= d << _SHIFT_INDEX[i]
return val
def long2ip(num: int) -> str:
"""Convert integer to ip string."""
return "{}.{}.{}.{}".format((num >> 24) & 0xFF, (num >> 16) & 0xFF, (num >> 8) & 0xFF, num & 0xFF)
def mid_ip(sip: int, eip: int):
"""Get the middle ip between sip and eip."""
return (sip + eip) >> 1
"""
Convert integer to ip string.
Return empty string if the num greater than UINT32_MAX or less than 0.
"""
if num < 0 or num > 0xFFFFFFFF:
return ""
return "{}.{}.{}.{}".format(
(num >> 24) & 0xFF, (num >> 16) & 0xFF, (num >> 8) & 0xFF, num & 0xFF
)
def is_ipv4(ip: str) -> bool:
"""Determine whether it is an ipv4 address."""
p = ip.split(".")
if len(p) != 4:
"""
Determine whether it is an ipv4 address.
"""
ps = ip.split(".")
if len(ps) != 4:
return False
for pp in p:
if not pp.isdigit() or len(pp) > 3 or int(pp) > 255:
for p in ps:
if not p.isdigit() or len(p) > 3 or (int(p) < 0 or int(p) > 255):
return False
return True