Add the java implementation for ip2region.db maker

This commit is contained in:
lionsoul
2019-05-02 19:51:29 +08:00
parent 2a9d53c2c4
commit bcb34f5191
13 changed files with 1642 additions and 0 deletions

79
maker/java/build.xml Normal file
View File

@@ -0,0 +1,79 @@
<?xml version="1.0" encoding="UTF-8"?>
<project name="ip2region" default="usage" basedir=".">
<!--Initialize-->
<target name="init">
<echo message="------------------------------------------------------------"/>
<echo message="----------BUILDING JCSEG PACKAGE----------------------------"/>
<echo message=""/>
<property name="bottom" value="ip2region 2015-2019"/>
<property name="jars" value="${basedir}"/>
<property name="sources" value="${basedir}/src"/>
<property name="classes" value="${basedir}/classes"/>
<property name="version" value="1.2.2"/>
<property name="api" value="${basedir}/doc"/>
<mkdir dir="${classes}"/>
<mkdir dir="${api}"/>
</target>
<path id="classpath">
<fileset dir="${lib.dir}" includes="**/*.jar"/>
</path>
<!--Build-->
<target name="build" depends="init">
<echo message="-------------Compiling Application--------------"/>
<javac srcdir="${sources}" destdir="${classes}"></javac>
</target>
<!--Archive-->
<target name="dist" depends="build">
<jar jarfile="${jars}/ip2region-${version}.jar" basedir="${classes}">
<!--class fileter-->
<include name="**/*"/>
<!--manifest information setting-->
<manifest>
<attribute name="Main-Class" value="org.lionsoul.ip2region.test.TestSearcher"/>
<attribute name="Class-Path" value=""/>
</manifest>
</jar>
<jar jarfile="${jars}/dbMaker-${version}.jar" basedir="${classes}" includes="org/lionsoul/ip2region/*">
<!--manifest information setting-->
<manifest>
<attribute name="Main-Class" value="org.lionsoul.ip2region.DbMaker"/>
<attribute name="Class-Path" value=""/>
</manifest>
</jar>
</target>
<!--Java document-->
<target name="all" depends="dist">
<echo message="------------Making Java Document------------------"/>
<javadoc packagenames="org.*"
sourcepath="${sources}"
destdir="${api}"
bottom="${bottom}"
charset="UTF-8"
author="false">
<classpath>
<pathelement location="${classes}"/>
</classpath>
</javadoc>
<jar jarfile="${jars}/ip2region-${version}-javadoc.jar" basedir="${api}" includes="**/*"></jar>
<!--do the data clean up-->
<delete dir="${classes}"/>
<delete dir="${api}"/>
</target>
<target name="usage">
<echo message="*** ip2region ANT Build Script ***"/>
<echo message="Usage: "/>
<echo message=" ant [target]"/>
<echo message=""/>
<echo message=" target : "/>
<echo message=" build : Build Application"/>
<echo message=" dist : Build Application + Archive (JAR)"/>
<echo message=" all : Build Application + Archive + JavaDoc"/>
</target>
</project>

Binary file not shown.

View File

@@ -0,0 +1,86 @@
package org.lionsoul.ip2region;
/**
* data block class
*
* @author chenxin<chenxin619315@gmail.com>
*/
public class DataBlock
{
/**
* city id
*/
private int city_id;
/**
* region address
*/
private String region;
/**
* region ptr in the db file
*/
private int dataPtr;
/**
* construct method
*
* @param city_id
* @param region region string
* @param dataPtr data ptr
*/
public DataBlock( int city_id, String region, int dataPtr )
{
this.city_id = city_id;
this.region = region;
this.dataPtr = dataPtr;
}
public DataBlock(int city_id, String region)
{
this(city_id, region, 0);
}
public int getCityId()
{
return city_id;
}
public DataBlock setCityId(int city_id)
{
this.city_id = city_id;
return this;
}
public String getRegion()
{
return region;
}
public DataBlock setRegion(String region)
{
this.region = region;
return this;
}
public int getDataPtr()
{
return dataPtr;
}
public DataBlock setDataPtr(int dataPtr)
{
this.dataPtr = dataPtr;
return this;
}
@Override
public String toString()
{
StringBuilder sb = new StringBuilder();
sb.append(city_id).append('|').append(region).append('|').append(dataPtr);
return sb.toString();
}
}

View File

@@ -0,0 +1,63 @@
package org.lionsoul.ip2region;
/**
* database configuration class
*
* @author chenxin<chenxin619315@gmail.com>
*/
public class DbConfig
{
/**
* total header data block size
*/
private int totalHeaderSize;
/**
* max index data block size
* u should always choice the fastest read block size
*/
private int indexBlockSize;
/**
* construct method
*
* @param totalHeaderSize
* @throws DbMakerConfigException
*/
public DbConfig( int totalHeaderSize ) throws DbMakerConfigException
{
if ( (totalHeaderSize % 8) != 0 ) {
throw new DbMakerConfigException("totalHeaderSize must be times of 8");
}
this.totalHeaderSize = totalHeaderSize;
this.indexBlockSize = 8192; //4 * 2048
}
public DbConfig() throws DbMakerConfigException
{
this(8 * 2048);
}
public int getTotalHeaderSize()
{
return totalHeaderSize;
}
public DbConfig setTotalHeaderSize(int totalHeaderSize)
{
this.totalHeaderSize = totalHeaderSize;
return this;
}
public int getIndexBlockSize()
{
return indexBlockSize;
}
public DbConfig setIndexBlockSize(int dataBlockSize)
{
this.indexBlockSize = dataBlockSize;
return this;
}
}

View File

@@ -0,0 +1,408 @@
package org.lionsoul.ip2region;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.io.UnsupportedEncodingException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
/**
* fast ip db maker
*
* db struct:
* 1. header part
* 1): super part:
* +------------+-----------+
* | 4 bytes | 4 bytes |
* +------------+-----------+
* start index ptr, end index ptr
*
* 2): b-tree index part
* +------------+-----------+-----------+-----------+
* | 4bytes | 4bytes | 4bytes | 4bytes | ...
* +------------+-----------+-----------+-----------+
* start ip ptr index ptr
*
* 2. data part:
* +------------+-----------------------+
* | 2bytes | dynamic length |
* +------------+-----------------------+
* data length city_id|Country|Province|Area|City|ISP
*
* 3. index part: (ip range)
* +------------+-----------+---------------+
* | 4bytes | 4bytes | 4bytes |
* +------------+-----------+---------------+
* start ip end ip 3 byte data ptr & 1 byte data length
*
* @author chenxin<chenxin619315@gmail.com>
*/
public class DbMaker
{
/**
* db config
*/
private DbConfig dbConfig;
/**
* ip source file path
*/
private File ipSrcFile;
/**
* buffer
*/
private LinkedList<IndexBlock> indexPool;
private LinkedList<HeaderBlock> headerPool;
/**
* global region Id mapping data
*/
private File globalRegionFile = null;
private HashMap<String, Integer> globalRegionMap = null;
/**
* region and data ptr mapping data
*/
private HashMap<String, DataBlock> regionPtrPool = null;
/**
* construct method
*
* @param config
* @param ipSrcFile tb source ip file
* @param globalRegionFile global_region.csv file offer by lion
* @throws DbMakerConfigException
* @throws IOException
*/
public DbMaker(
DbConfig config,
String ipSrcFile,
String globalRegionFile ) throws DbMakerConfigException, IOException
{
this.dbConfig = config;
this.ipSrcFile = new File(ipSrcFile);
this.globalRegionMap = new HashMap<String, Integer>();
this.regionPtrPool = new HashMap<String, DataBlock>();
if ( globalRegionFile != null ) {
this.globalRegionFile = new File(globalRegionFile);
}
if ( this.ipSrcFile.exists() == false ) {
throw new IOException("Error: Invalid file path " + ipSrcFile);
}
}
/**
* initialize the db file
*
* @param raf
* @throws IOException
*/
private void initDbFile( RandomAccessFile raf ) throws IOException
{
//1. zero fill the header part
raf.seek(0L);
raf.write(new byte[8]); //super block
raf.write(new byte[dbConfig.getTotalHeaderSize()]); //header block
headerPool = new LinkedList<HeaderBlock>();
indexPool = new LinkedList<IndexBlock>();
}
/**
* make the Db file
*
* @param dbFile target output file path
* @throws IOException
*/
public void make(String dbFile) throws IOException
{
//check and load the gloabl region
if ( globalRegionFile != null ) {
System.out.println("+-Try to load the global region data ...");
BufferedReader greader = new BufferedReader(new FileReader(globalRegionFile));
String gline = null;
while ( (gline = greader.readLine()) != null ) {
String[] p = gline.split(",");
if ( p.length != 5 ) continue;
//push the mapping
globalRegionMap.put(p[2], Integer.valueOf(p[0]));
}
greader.close();
System.out.println("|--[Ok]");
}
//alloc the header size
BufferedReader reader = new BufferedReader(new FileReader(this.ipSrcFile));
RandomAccessFile raf = new RandomAccessFile(dbFile, "rw");
//init the db file
initDbFile(raf);
System.out.println("+-Db file initialized.");
//analysis main loop
System.out.println("+-Try to write the data blocks ... ");
String line = null;
while ( ( line = reader.readLine() ) != null ) {
line = line.trim();
if ( line.length() == 0 ) continue;
if ( line.charAt(0) == '#' ) continue;
//1. get the start ip
int sIdx = 0, eIdx = 0;
if ( (eIdx = line.indexOf('|', sIdx + 1)) == -1 ) continue;
String startIp = line.substring(sIdx, eIdx);
//2. get the end ip
sIdx = eIdx + 1;
if ( (eIdx = line.indexOf('|', sIdx + 1)) == -1 ) continue;
String endIp = line.substring(sIdx, eIdx);
//3. get the region
sIdx = eIdx + 1;
String region = line.substring(sIdx);
System.out.println("+-Try to process item " + line);
addDataBlock(raf, startIp, endIp, region);
System.out.println("|--[Ok]");
}
System.out.println("|--Data block flushed!");
System.out.println("|--Data file pointer: "+raf.getFilePointer()+"\n");
//write the index bytes
System.out.println("+-Try to write index blocks ... ");
//record the start block
IndexBlock indexBlock = null;
HeaderBlock hb = null;
indexBlock = indexPool.getFirst();
long indexStartIp = indexBlock.getStartIp(),
indexStratPtr = raf.getFilePointer(), indexEndPtr;
headerPool.add(new HeaderBlock(indexStartIp, (int)(indexStratPtr)));
int blockLength = IndexBlock.getIndexBlockLength();
int counter = 0, shotCounter = (dbConfig.getIndexBlockSize()/blockLength) - 1;
Iterator<IndexBlock> indexIt = indexPool.iterator();
while ( indexIt.hasNext() ) {
indexBlock = indexIt.next();
if ( ++counter >= shotCounter ) {
hb = new HeaderBlock(
indexBlock.getStartIp(),
(int)raf.getFilePointer()
);
headerPool.add(hb);
counter = 0;
}
//write the buffer
raf.write(indexBlock.getBytes());
}
//record the end block
if ( counter > 0 ) {
indexBlock = indexPool.getLast();
hb = new HeaderBlock(
indexBlock.getStartIp(),
((int)raf.getFilePointer()) - IndexBlock.getIndexBlockLength()
);
headerPool.add(hb);
}
indexEndPtr = raf.getFilePointer();
System.out.println("|--[Ok]");
//write the super blocks
System.out.println("+-Try to write the super blocks ... ");
raf.seek(0L); //reset the file pointer
byte[] superBuffer = new byte[8];
Util.writeIntLong(superBuffer, 0, indexStratPtr);
Util.writeIntLong(superBuffer, 4, indexEndPtr - blockLength);
raf.write(superBuffer);
System.out.println("|--[Ok]");
//write the header blocks
System.out.println("+-Try to write the header blocks ... ");
Iterator<HeaderBlock> headerIt = headerPool.iterator();
while ( headerIt.hasNext() ) {
HeaderBlock headerBlock = headerIt.next();
raf.write(headerBlock.getBytes());
}
//write the copyright and the release timestamp info
System.out.println("+-Try to write the copyright and release date info ... ");
raf.seek(raf.length());
Calendar cal = Calendar.getInstance();
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd");
String copyright = "Created by lionsoul at " + dateFormat.format(cal.getTime());
raf.write((int)(cal.getTime().getTime()/1000)); //the unix timestamp
raf.write(copyright.getBytes());
System.out.println("|--[Ok]");
reader.close();
raf.close();
}
/**
* internal method to add a new data block record
*
* @param raf
* @param startIp
* @param endIp
* @param region data
*/
private void addDataBlock(
RandomAccessFile raf,
String startIp, String endIp, String region)
{
try {
byte[] data = region.getBytes("UTF-8");
int dataPtr = 0;
/*byte[] city = new byte[4];
int city_id = getCityId(region);
Util.writeIntLong(city, 0, city_id);
dataPtr = (int)raf.getFilePointer();
raf.write(city);
raf.write(data);*/
//check region ptr pool first
if ( regionPtrPool.containsKey(region) ) {
DataBlock dataBlock = regionPtrPool.get(region);
dataPtr = dataBlock.getDataPtr();
System.out.println("dataPtr: " + dataPtr + ", region: " + region);
} else {
byte[] city = new byte[4];
int city_id = getCityId(region);
Util.writeIntLong(city, 0, city_id);
dataPtr = (int)raf.getFilePointer();
raf.write(city);
raf.write(data);
regionPtrPool.put(region, new DataBlock(city_id, region, dataPtr));
}
//add the data index blocks
IndexBlock ib = new IndexBlock(
Util.ip2long(startIp),
Util.ip2long(endIp),
dataPtr,
data.length + 4 //4 bytes for the city id
);
indexPool.add(ib);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* get the city id base on the global region data
*
* @param region
* @return int
*/
public int getCityId( String region )
{
String[] p = region.split("\\|");
if ( p.length != 5 ) return 0;
String key = null;
Integer intv = null;
for ( int i = 3; i >= 0; i-- ) {
if ( p[i].equals("0") ) continue;
if ( i == 3
&& p[i].indexOf("省直辖县级") > -1 ) {
key = p[2]+p[3];
} else {
key = p[i];
}
intv = globalRegionMap.get(key);
if ( intv == null ) return 0;
return intv.intValue();
}
return 0;
}
public DbConfig getDbConfig()
{
return dbConfig;
}
public DbMaker setDbConfig(DbConfig dbConfig)
{
this.dbConfig = dbConfig;
return this;
}
public File getIpSrcFile()
{
return ipSrcFile;
}
public DbMaker setIpSrcFile(File ipSrcFile)
{
this.ipSrcFile = ipSrcFile;
return this;
}
/**
* make this directly a runnable application
* interface to make the database file
*/
public static void main(String args[])
{
String dstDir = "./data/";
String[] path = new String[]{null, null};
for ( int i = 0; i < args.length; i++ ) {
if ( args[i].equals("-src") ) {
path[0] = args[++i];
} else if ( args[i].equals("-region") ) {
path[1] = args[++i];
} else if ( args[i].equals("-dst") ) {
dstDir = args[++i];
}
}
for ( int i = 0; i < path.length; i++ ) {
if ( path[i] == null ) {
System.out.println("Usage: java -jar dbMaker.jar "
+ "-src [source text file path] "
+ "-region [global region file path]");
System.out.println("eg: java -jar dbMaker.jar "
+ "-src ./data/ip.merge.txt -region ./data/origin/global_region.csv");
System.exit(0);
}
}
//check and stdlize the destination directory
if ( ! dstDir.endsWith("/") ) {
dstDir = dstDir + "/";
}
try {
DbConfig config = new DbConfig();
DbMaker dbMaker = new DbMaker(config, path[0], path[1]);
dbMaker.make(dstDir + "ip2region.db");
} catch (DbMakerConfigException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

View File

@@ -0,0 +1,26 @@
package org.lionsoul.ip2region;
/**
* configuration exception
*
* @author chenxin<chenxin619315@gmail.com>
*/
public class DbMakerConfigException extends Exception
{
private static final long serialVersionUID = 4495714680349884838L;
public DbMakerConfigException( String info )
{
super(info);
}
public DbMakerConfigException( Throwable res )
{
super(res);
}
public DbMakerConfigException( String info, Throwable res )
{
super(info, res);
}
}

View File

@@ -0,0 +1,405 @@
package org.lionsoul.ip2region;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
/**
* ip db searcher class (Not thread safe)
*
* @author chenxin<chenxin619315@gmail.com>
*/
public class DbSearcher
{
public static final int BTREE_ALGORITHM = 1;
public static final int BINARY_ALGORITHM = 2;
public static final int MEMORY_ALGORITYM = 3;
/**
* db config
*/
private DbConfig dbConfig = null;
/**
* db file access handler
*/
private RandomAccessFile raf = null;
/**
* header blocks buffer
*/
private long[] HeaderSip = null;
private int[] HeaderPtr = null;
private int headerLength;
/**
* super blocks info
*/
private long firstIndexPtr = 0;
private long lastIndexPtr = 0;
private int totalIndexBlocks = 0;
/**
* for memory mode
* the original db binary string
*/
private byte[] dbBinStr = null;
/**
* construct class
*
* @param bdConfig
* @param dbFile
* @throws FileNotFoundException
*/
public DbSearcher( DbConfig dbConfig, String dbFile ) throws FileNotFoundException
{
this.dbConfig = dbConfig;
raf = new RandomAccessFile(dbFile, "r");
}
/**
* construct method with self-define std ip2region bianry string support
* Thanks to the issue from Wendal at https://gitee.com/lionsoul/ip2region/issues/IILFL
*
* @param dbConfig
* @param dbBinStr
*/
public DbSearcher(DbConfig dbConfig, byte[] dbBinStr)
{
this.dbConfig = dbConfig;
this.dbBinStr = dbBinStr;
firstIndexPtr = Util.getIntLong(dbBinStr, 0);
lastIndexPtr = Util.getIntLong(dbBinStr, 4);
totalIndexBlocks = (int)((lastIndexPtr - firstIndexPtr)/IndexBlock.getIndexBlockLength()) + 1;
}
/**
* get the region with a int ip address with memory binary search algorithm
*
* @param ip
* @throws IOException
*/
public DataBlock memorySearch(long ip) throws IOException
{
int blen = IndexBlock.getIndexBlockLength();
if ( dbBinStr == null ) {
dbBinStr = new byte[(int)raf.length()];
raf.seek(0L);
raf.readFully(dbBinStr, 0, dbBinStr.length);
//initialize the global vars
firstIndexPtr = Util.getIntLong(dbBinStr, 0);
lastIndexPtr = Util.getIntLong(dbBinStr, 4);
totalIndexBlocks = (int)((lastIndexPtr - firstIndexPtr)/blen) + 1;
}
//search the index blocks to define the data
int l = 0, h = totalIndexBlocks;
long sip, eip, dataptr = 0;
while ( l <= h ) {
int m = (l + h) >> 1;
int p = (int)(firstIndexPtr + m * blen);
sip = Util.getIntLong(dbBinStr, p);
if ( ip < sip ) {
h = m - 1;
} else {
eip = Util.getIntLong(dbBinStr, p + 4);
if ( ip > eip ) {
l = m + 1;
} else {
dataptr = Util.getIntLong(dbBinStr, p + 8);
break;
}
}
}
//not matched
if ( dataptr == 0 ) return null;
//get the data
int dataLen = (int)((dataptr >> 24) & 0xFF);
int dataPtr = (int)((dataptr & 0x00FFFFFF));
int city_id = (int)Util.getIntLong(dbBinStr, dataPtr);
String region = new String(dbBinStr, dataPtr + 4, dataLen - 4, "UTF-8");
return new DataBlock(city_id, region, dataPtr);
}
/**
* get the region throught the ip address with memory binary search algorithm
*
* @param ip
* @return DataBlock
* @throws IOException
*/
public DataBlock memorySearch( String ip ) throws IOException
{
return memorySearch(Util.ip2long(ip));
}
/**
* get by index ptr
*
* @param indexPtr
* @throws IOException
*/
public DataBlock getByIndexPtr( long ptr ) throws IOException
{
raf.seek(ptr);
byte[] buffer = new byte[12];
raf.readFully(buffer, 0, buffer.length);
//long startIp = Util.getIntLong(buffer, 0);
//long endIp = Util.getIntLong(buffer, 4);
long extra = Util.getIntLong(buffer, 8);
int dataLen = (int)((extra >> 24) & 0xFF);
int dataPtr = (int)((extra & 0x00FFFFFF));
raf.seek(dataPtr);
byte[] data = new byte[dataLen];
raf.readFully(data, 0, data.length);
int city_id = (int)Util.getIntLong(data, 0);
String region = new String(data, 4, data.length - 4, "UTF-8");
return new DataBlock(city_id, region, dataPtr);
}
/**
* get the region with a int ip address with b-tree algorithm
*
* @param ip
* @throws IOException
*/
public DataBlock btreeSearch( long ip ) throws IOException
{
//check and load the header
if ( HeaderSip == null ) {
raf.seek(8L); //pass the super block
byte[] b = new byte[dbConfig.getTotalHeaderSize()];
// byte[] b = new byte[4096];
raf.readFully(b, 0, b.length);
//fill the header
int len = b.length >> 3, idx = 0; //b.lenght / 8
HeaderSip = new long[len];
HeaderPtr = new int [len];
long startIp, dataPtr;
for ( int i = 0; i < b.length; i += 8 ) {
startIp = Util.getIntLong(b, i);
dataPtr = Util.getIntLong(b, i + 4);
if ( dataPtr == 0 ) break;
HeaderSip[idx] = startIp;
HeaderPtr[idx] = (int)dataPtr;
idx++;
}
headerLength = idx;
}
//1. define the index block with the binary search
if ( ip == HeaderSip[0] ) {
return getByIndexPtr(HeaderPtr[0]);
} else if ( ip == HeaderSip[headerLength-1] ) {
return getByIndexPtr(HeaderPtr[headerLength-1]);
}
int l = 0, h = headerLength, sptr = 0, eptr = 0;
while ( l <= h ) {
int m = (l + h) >> 1;
//perfetc matched, just return it
if ( ip == HeaderSip[m] ) {
if ( m > 0 ) {
sptr = HeaderPtr[m-1];
eptr = HeaderPtr[m ];
} else {
sptr = HeaderPtr[m ];
eptr = HeaderPtr[m+1];
}
break;
}
//less then the middle value
if ( ip < HeaderSip[m] ) {
if ( m == 0 ) {
sptr = HeaderPtr[m ];
eptr = HeaderPtr[m+1];
break;
} else if ( ip > HeaderSip[m-1] ) {
sptr = HeaderPtr[m-1];
eptr = HeaderPtr[m ];
break;
}
h = m - 1;
} else {
if ( m == headerLength - 1 ) {
sptr = HeaderPtr[m-1];
eptr = HeaderPtr[m ];
break;
} else if ( ip <= HeaderSip[m+1] ) {
sptr = HeaderPtr[m ];
eptr = HeaderPtr[m+1];
break;
}
l = m + 1;
}
}
//match nothing just stop it
if ( sptr == 0 ) return null;
//2. search the index blocks to define the data
int blockLen = eptr - sptr, blen = IndexBlock.getIndexBlockLength();
byte[] iBuffer = new byte[blockLen + blen]; //include the right border block
raf.seek(sptr);
raf.readFully(iBuffer, 0, iBuffer.length);
l = 0; h = blockLen / blen;
long sip, eip, dataptr = 0;
while ( l <= h ) {
int m = (l + h) >> 1;
int p = m * blen;
sip = Util.getIntLong(iBuffer, p);
if ( ip < sip ) {
h = m - 1;
} else {
eip = Util.getIntLong(iBuffer, p + 4);
if ( ip > eip ) {
l = m + 1;
} else {
dataptr = Util.getIntLong(iBuffer, p + 8);
break;
}
}
}
//not matched
if ( dataptr == 0 ) return null;
//3. get the data
int dataLen = (int)((dataptr >> 24) & 0xFF);
int dataPtr = (int)((dataptr & 0x00FFFFFF));
raf.seek(dataPtr);
byte[] data = new byte[dataLen];
raf.readFully(data, 0, data.length);
int city_id = (int)Util.getIntLong(data, 0);
String region = new String(data, 4, data.length - 4, "UTF-8");
return new DataBlock(city_id, region, dataPtr);
}
/**
* get the region throught the ip address with b-tree search algorithm
*
* @param ip
* @return DataBlock
* @throws IOException
*/
public DataBlock btreeSearch( String ip ) throws IOException
{
return btreeSearch(Util.ip2long(ip));
}
/**
* get the region with a int ip address with binary search algorithm
*
* @param ip
* @throws IOException
*/
public DataBlock binarySearch( long ip ) throws IOException
{
int blen = IndexBlock.getIndexBlockLength();
if ( totalIndexBlocks == 0 ) {
raf.seek(0L);
byte[] superBytes = new byte[8];
raf.readFully(superBytes, 0, superBytes.length);
//initialize the global vars
firstIndexPtr = Util.getIntLong(superBytes, 0);
lastIndexPtr = Util.getIntLong(superBytes, 4);
totalIndexBlocks = (int)((lastIndexPtr - firstIndexPtr)/blen) + 1;
}
//search the index blocks to define the data
int l = 0, h = totalIndexBlocks;
byte[] buffer = new byte[blen];
long sip, eip, dataptr = 0;
while ( l <= h ) {
int m = (l + h) >> 1;
raf.seek(firstIndexPtr + m * blen); //set the file pointer
raf.readFully(buffer, 0, buffer.length);
sip = Util.getIntLong(buffer, 0);
if ( ip < sip ) {
h = m - 1;
} else {
eip = Util.getIntLong(buffer, 4);
if ( ip > eip ) {
l = m + 1;
} else {
dataptr = Util.getIntLong(buffer, 8);
break;
}
}
}
//not matched
if ( dataptr == 0 ) return null;
//get the data
int dataLen = (int)((dataptr >> 24) & 0xFF);
int dataPtr = (int)((dataptr & 0x00FFFFFF));
raf.seek(dataPtr);
byte[] data = new byte[dataLen];
raf.readFully(data, 0, data.length);
int city_id = (int)Util.getIntLong(data, 0);
String region = new String(data, 4, data.length - 4, "UTF-8");
return new DataBlock(city_id, region, dataPtr);
}
/**
* get the region throught the ip address with binary search algorithm
*
* @param ip
* @return DataBlock
* @throws IOException
*/
public DataBlock binarySearch( String ip ) throws IOException
{
return binarySearch(Util.ip2long(ip));
}
/**
* get the db config
*
* @return DbConfig
*/
public DbConfig getDbConfig()
{
return dbConfig;
}
/**
* close the db
*
* @throws IOException
*/
public void close() throws IOException
{
HeaderSip = null; //let gc do its work
HeaderPtr = null;
dbBinStr = null;
raf.close();
}
}

View File

@@ -0,0 +1,68 @@
package org.lionsoul.ip2region;
/**
* header block class
*
* @author chenxin<chenxin619315@gmail.com>
*/
public class HeaderBlock
{
/**
* index block start ip address
*/
private long indexStartIp;
/**
* ip address
*/
private int indexPtr;
public HeaderBlock( long indexStartIp, int indexPtr )
{
this.indexStartIp = indexStartIp;
this.indexPtr = indexPtr;
}
public long getIndexStartIp()
{
return indexStartIp;
}
public HeaderBlock setIndexStartIp(long indexStartIp)
{
this.indexStartIp = indexStartIp;
return this;
}
public int getIndexPtr()
{
return indexPtr;
}
public HeaderBlock setIndexPtr(int indexPtr)
{
this.indexPtr = indexPtr;
return this;
}
/**
* get the bytes for db storage
*
* @return byte[]
*/
public byte[] getBytes()
{
/*
* +------------+-----------+
* | 4bytes | 4bytes |
* +------------+-----------+
* start ip index ptr
*/
byte[] b = new byte[8];
Util.writeIntLong(b, 0, indexStartIp);
Util.writeIntLong(b, 4, indexPtr);
return b;
}
}

View File

@@ -0,0 +1,113 @@
package org.lionsoul.ip2region;
/**
* item index class
*
* @author chenxin<chenxin619315@gmail.com>
*/
public class IndexBlock
{
private static int LENGTH = 12;
/**
* start ip address
*/
private long startIp;
/**
* end ip address
*/
private long endIp;
/**
* data ptr and data length
*/
private int dataPtr;
/**
* data length
*/
private int dataLen;
public IndexBlock(long startIp, long endIp, int dataPtr, int dataLen)
{
this.startIp = startIp;
this.endIp = endIp;
this.dataPtr = dataPtr;
this.dataLen = dataLen;
}
public long getStartIp()
{
return startIp;
}
public IndexBlock setStartIp(long startIp)
{
this.startIp = startIp;
return this;
}
public long getEndIp()
{
return endIp;
}
public IndexBlock setEndIp(long endIp)
{
this.endIp = endIp;
return this;
}
public int getDataPtr()
{
return dataPtr;
}
public IndexBlock setDataPtr(int dataPtr)
{
this.dataPtr = dataPtr;
return this;
}
public int getDataLen()
{
return dataLen;
}
public IndexBlock setDataLen(int dataLen)
{
this.dataLen = dataLen;
return this;
}
public static int getIndexBlockLength()
{
return LENGTH;
}
/**
* get the bytes for storage
*
* @return byte[]
*/
public byte[] getBytes()
{
/*
* +------------+-----------+-----------+
* | 4bytes | 4bytes | 4bytes |
* +------------+-----------+-----------+
* start ip end ip data ptr + len
*/
byte[] b = new byte[12];
Util.writeIntLong(b, 0, startIp); //start ip
Util.writeIntLong(b, 4, endIp); //end ip
//write the data ptr and the length
long mix = dataPtr | ((dataLen << 24) & 0xFF000000L);
Util.writeIntLong(b, 8, mix);
return b;
}
}

View File

@@ -0,0 +1,143 @@
package org.lionsoul.ip2region;
/**
* util class
*
* @author chenxin<chenxin619315@gmail.com>
*/
public class Util
{
/**
* write specfield bytes to a byte array start from offset
*
* @param b
* @param offset
* @param v
* @param bytes
*/
public static void write( byte[] b, int offset, long v, int bytes)
{
for ( int i = 0; i < bytes; i++ ) {
b[offset++] = (byte)((v >>> (8 * i)) & 0xFF);
}
}
/**
* write a int to a byte array
*
* @param b
* @param offet
* @param v
*/
public static void writeIntLong( byte[] b, int offset, long v )
{
b[offset++] = (byte)((v >> 0) & 0xFF);
b[offset++] = (byte)((v >> 8) & 0xFF);
b[offset++] = (byte)((v >> 16) & 0xFF);
b[offset ] = (byte)((v >> 24) & 0xFF);
}
/**
* get a int from a byte array start from the specifiled offset
*
* @param b
* @param offset
*/
public static long getIntLong( byte[] b, int offset )
{
return (
((b[offset++] & 0x000000FFL)) |
((b[offset++] << 8) & 0x0000FF00L) |
((b[offset++] << 16) & 0x00FF0000L) |
((b[offset ] << 24) & 0xFF000000L)
);
}
/**
* get a int from a byte array start from the specifield offset
*
* @param b
* @param offset
*/
public static int getInt3( byte[] b, int offset )
{
return (
(b[offset++] & 0x000000FF) |
(b[offset++] & 0x0000FF00) |
(b[offset ] & 0x00FF0000)
);
}
public static int getInt2( byte[] b, int offset )
{
return (
(b[offset++] & 0x000000FF) |
(b[offset ] & 0x0000FF00)
);
}
public static int getInt1( byte[] b, int offset )
{
return (
(b[offset] & 0x000000FF)
);
}
/**
* string ip to long ip
*
* @param ip
* @return long
*/
public static long ip2long( String ip )
{
String[] p = ip.split("\\.");
if ( p.length != 4 ) return 0;
int p1 = ((Integer.valueOf(p[0]) << 24) & 0xFF000000);
int p2 = ((Integer.valueOf(p[1]) << 16) & 0x00FF0000);
int p3 = ((Integer.valueOf(p[2]) << 8) & 0x0000FF00);
int p4 = ((Integer.valueOf(p[3]) << 0) & 0x000000FF);
return ((p1 | p2 | p3 | p4) & 0xFFFFFFFFL);
}
/**
* int to ip string
*
* @param ip
* @return string
*/
public static String long2ip( long ip )
{
StringBuilder sb = new StringBuilder();
sb
.append((ip >> 24) & 0xFF).append('.')
.append((ip >> 16) & 0xFF).append('.')
.append((ip >> 8) & 0xFF).append('.')
.append((ip >> 0) & 0xFF);
return sb.toString();
}
/**
* check the validate of the specifeld ip address
*
* @param ip
* @return boolean
*/
public static boolean isIpAddress( String ip )
{
String[] p = ip.split("\\.");
if ( p.length != 4 ) return false;
for ( String pp : p ) {
if ( pp.length() > 3 ) return false;
int val = Integer.valueOf(pp);
if ( val > 255 ) return false;
}
return true;
}
}

View File

@@ -0,0 +1,35 @@
package org.lionsoul.ip2region.test;
import java.io.IOException;
import org.lionsoul.ip2region.DbConfig;
import org.lionsoul.ip2region.DbMaker;
import org.lionsoul.ip2region.DbMakerConfigException;
/**
* project test script
*
* @author chenxin<chenxin619315@gmail.com>
*/
public class TestMaker
{
public static void main(String[] argv)
{
try {
DbConfig config = new DbConfig();
DbMaker dbMaker = new DbMaker(
config,
"/data0/code/java/ip2region-dev/data/ip.merge.txt",
"/data0/code/java/ip2region-dev/data/origin/global_region.csv"
);
dbMaker.make("/data0/code/java/ip2region-dev/data/ip2region.db");
} catch (DbMakerConfigException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

View File

@@ -0,0 +1,120 @@
package org.lionsoul.ip2region.test;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import org.lionsoul.ip2region.DataBlock;
import org.lionsoul.ip2region.DbConfig;
import org.lionsoul.ip2region.DbMakerConfigException;
import org.lionsoul.ip2region.DbSearcher;
import org.lionsoul.ip2region.Util;
/**
* project test script
*
* @author chenxin<chenxin619315@gmail.com>
*/
public class TestSearcher
{
public static void main(String[] argv)
{
if ( argv.length == 0 ) {
System.out.println("| Usage: java -jar ip2region-{version}.jar [ip2region db file]");
return;
}
File file = new File(argv[0]);
if ( file.exists() == false ) {
System.out.println("Error: Invalid ip2region.db file");
return;
}
int algorithm = DbSearcher.BTREE_ALGORITHM;
String algoName = "B-tree";
if ( argv.length > 1 ) {
if ( argv[1].equalsIgnoreCase("binary")) {
algoName = "Binary";
algorithm = DbSearcher.BINARY_ALGORITHM;
} else if ( argv[1].equalsIgnoreCase("memory") ) {
algoName = "Memory";
algorithm = DbSearcher.MEMORY_ALGORITYM;
}
}
try {
System.out.println("initializing "+algoName+" ... ");
DbConfig config = new DbConfig();
DbSearcher searcher = new DbSearcher(config, argv[0]);
BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));
//define the method
Method method = null;
switch ( algorithm )
{
case DbSearcher.BTREE_ALGORITHM:
method = searcher.getClass().getMethod("btreeSearch", String.class);
break;
case DbSearcher.BINARY_ALGORITHM:
method = searcher.getClass().getMethod("binarySearch", String.class);
break;
case DbSearcher.MEMORY_ALGORITYM:
method = searcher.getClass().getMethod("memorySearch", String.class);
break;
}
System.out.println("+----------------------------------+");
System.out.println("| ip2region test shell |");
System.out.println("| Author: chenxin619315@gmail.com |");
System.out.println("| Type 'quit' to exit program |");
System.out.println("+----------------------------------+");
double sTime = 0, cTime = 0;
String line = null;
DataBlock dataBlock = null;
while ( true ) {
System.out.print("ip2region>> ");
line = reader.readLine().trim();
if ( line.length() < 2 ) continue;
if ( line.equalsIgnoreCase("quit") ) break;
if ( Util.isIpAddress(line) == false ) {
System.out.println("Error: Invalid ip address");
continue;
}
sTime = System.nanoTime();
dataBlock = (DataBlock) method.invoke(searcher, line);
cTime = (System.nanoTime() - sTime) / 1000000;
System.out.printf("%s in %.5f millseconds\n", dataBlock, cTime);
}
reader.close();
searcher.close();
System.out.println("+--Bye");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (DbMakerConfigException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (NoSuchMethodException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SecurityException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IllegalAccessException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IllegalArgumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvocationTargetException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

View File

@@ -0,0 +1,96 @@
package org.lionsoul.ip2region.test;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import org.lionsoul.ip2region.DataBlock;
import org.lionsoul.ip2region.DbConfig;
import org.lionsoul.ip2region.DbMakerConfigException;
import org.lionsoul.ip2region.DbSearcher;
/**
* data check class
*
* @author koma<komazhang@foxmail.com>
**/
public class TestUnit {
public static void main(String[] args) {
try {
DbSearcher _searcher = new DbSearcher(new DbConfig(), "./data/ip2region.db");
BufferedReader bfr = new BufferedReader(new FileReader("./data/ip.merge.txt"));
BufferedWriter bwr = new BufferedWriter(new FileWriter("./data/error_log.txt", true));
int errCount = 0;
int lineCount = 0;
String str = null;
while ( (str = bfr.readLine()) != null ) {
StringBuffer line = new StringBuffer(str);
//get first ip
int first_idx = line.indexOf("|");
String first_ip = line.substring(0, first_idx);
line = new StringBuffer( line.substring(first_idx + 1) );
//get second ip
int second_idx = line.indexOf("|");
String second_ip = line.substring(0, second_idx);
//get addr
String source_region = line.substring(second_idx + 1);
//search from DbSearcher
System.out.println("+---Start, start to search");
System.out.println("+---[Info]: Source region = "+source_region);
System.out.println("+---[Info]: Step1, search for first IP: "+first_ip);
DataBlock fdata = _searcher.binarySearch(first_ip);
if ( ! fdata.getRegion().equalsIgnoreCase( source_region ) ) {
System.out.println("[Error]: Search first IP failed, DB region = "+fdata.getRegion());
bwr.write("[Source]: Region: "+fdata.getRegion());
bwr.newLine();
bwr.write("[Source]: First Ip: "+first_ip);
bwr.newLine();
bwr.write("[DB]: Region: "+fdata.getRegion());
bwr.newLine();
bwr.flush();
errCount++;
}
System.out.println("+---[Info]: Step2, search for second IP: "+second_ip);
DataBlock sdata = _searcher.btreeSearch(second_ip);
if ( ! sdata.getRegion().equalsIgnoreCase( source_region ) ) {
System.out.println("[Error]: Search second IP failed, DB region = "+sdata.getRegion());
bwr.write("[Source]: Region: "+sdata.getRegion());
bwr.newLine();
bwr.write("[Source]: First Ip: "+second_ip);
bwr.newLine();
bwr.write("[DB]: Region: "+sdata.getRegion());
bwr.newLine();
bwr.flush();
errCount++;
}
lineCount++;
}
bwr.close();
bfr.close();
System.out.println("+---Done, search complished");
System.out.println("+---Statistics, Error count = "+errCount
+", Total line = "+lineCount
+", Fail ratio = "+((float)(errCount/lineCount))*100+"%");
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (DbMakerConfigException e) {
e.printStackTrace();
} catch ( Exception e ) {
e.printStackTrace();
}
}
}