满天星
Fork me on GitHub

大数据日志分析学习笔记01--IP地址查找

IP地址查找

myhope365.com

大数据日志分析中,经常会根据访问的来源IP地址
来判定该访客的所属省,市,区甚至更精准的位置信息。从而对该访问打上相应的位置标签
    现在各大搜索引擎或专门IP服务公司,均提供类似的服务
需求分析:
    通过命令行来演示项目效果即可,查找用时要求再ms内完成
    IP地址库思考:很多公司在提供相应的服务,说明这方面的服务是有相应的公开数据的,只是细节更新上各有差异
    IP地址库开放位置:
        https://pan.baidu.com/s/1Iu0FrjYIP1QtoL63_w48Ug
        899x  (失效)
    通过相应的地址库搜索或是查找算法,实现给定IP地址,找到其对应位置信息的功能
思路和考点:
    思路:
        解析提供的地址库字符串,为结构化数据形式
        基于结构化数据构建,数据结构,加速给定IP地址的查找速度
        封装成相应的工具类API,开放其相应方法,即给定IP地址可以在ms内计算得到其位置信息
    考点:
        面向对象程序设计
        工具类封装与使用写法
        文件IO
        字符串处理
        二分查找
        ip地址的不同形式的使用

1.需求说明
2.需求分析
3.方案设计
    技术组成:javase编程,IO,二分查找实现
    步骤拆解:
        1)文件读取,逐行形成IP地址和位置信息的对象  (同事1-0.4h)
        2)对象组织成有序数据集合结构                (同事1-0.4h)
        3)实现二分查找                            (同事1-0.5h)
        4)给定IP地址,利用第2,3步,获取其位置        (同事1-0.4h)
        5)封装工具类                                (同事1-0.2h)
4.开发细节
5.BUG修复,调优
6.上线

com.tl.job002.pojos
    IPAndLocationPojo();
com.tl.job002.manager
    DataLoadManager();
    DataSearchManager();
com.tl.job002.utils
    IOUtils();
    IPAndLongConverUtil();


public class IPAndLocationPojo implements Comparable<IPAndLocationPojo>{
    private String startIp;
    private String endIp;
    private String location;
    private long startIPLong;
    private long endIpLong;

    @Override
    public int compareTo(IPAndLocationPojo o){
        return this.endIpLong - o.startIpLong > 0 ? 1 : 0;
    }

    public IPAndLocationPojo(xxx){
        super();
        xxx

        this.startIPLong = IPAndLongConverUtil.ipToLong(startIp);
        this.endIPLong = IPAndLongConverUtil.ipToLong(endIp);
    }

    setter and getter
}

public class DataLoadManager{
    public static List<IPAndLoacationPojo> getPojoList(List<String> ipAddressList){
        for(String line:ipAddressList){
            line = line.trim();
            if(line.length()==0){
                continue;
            }
            String columnArray = line.split("\t");
            if(columnArray.length!=3){
                continue;
            }
            IPAndLocationPojo pojo = new IPAndLocationPojo(columnArray[0],columnArray[1],columnArray[2]);
            pojoList.add(pojo);
        }
        return pojoList;
    }

    public static List<IPAndLoacationPojo> getPojoList(String ipPath){
        //把文本文件加载到内存的字符串集合中
        List<String> lineList = IOUtil.getLineList(ipPath,"UTF-8");
        //把String集合解析成对应的集合对象
        List<IPAndLocationPojo> pojoList = new ArrayList<IPAndLocationPojo>();
        return pojoList;
    }

    psvm(){
        String ipAddressSource = "/usr/bin/xxx.txt";
        List<IPAndLocationPojo> pojoList = getPojoList(ipAddressSource);
        sout(pojoList.size());
    }
}

public class IOUtil{
    public static List<String> getLineList(String txtFilePath,String encoding) throws Exception{

        FileInputStream fis = new InputStream(ipAddressSource);
        InputStreamReader isr = new InputStreamReader(fis,"utf-8");
        BufferedReader br = new BufferedReader(isr);
        String line = null;
        List<String> lineList = new ArrayList<String>();
        while((line=br.readLine())!=null){
            lineList.add(line);
        }
        br.close();
        return lineList;
    }
}

public class DataSearchManager{
    private IPAndLocationPojo[] sortedPojoArray = null;
    public DatatSearchManager(String idAddressLib){
        List<IPAndLocationPojo> pojoList = DataLoadManager.getPojoList(idAddressLib);
        pojoArray = new IPAndLocationPojo[0];
        pojoArray = pojoList.toArray();
    }
    //简单测试二分查找
    public static int getIndexByBinarySearch(int[] sortedArray,int startPos,int endPos,int aid){
        if(startPos<0 || endPos>sortedArray.length || startPos > endPos){
            return false;
        }
        int middle = (startPos + endPos)/2;
        if(aid>sortedArray[middle]){
            startPos = middle + 1;
            return getIndexByBinarySearch(sortedArray,startPos,endPos,aid);
        }else if(aid<sortedArray[middle]){
            endPos = middle -1;
            return getIndexByBinarySearch(sortedArray,startPos,endPos,aid);
        }
        return middle;

    }
    //对象二分查找
    public int getIndexByBinarySearch(int[] sortedArray,int startPos,int endPos,long ipLong){
        if(startPos<0 || endPos>sortedArray.length || startPos > endPos){
            return false;
        }
        int middle = (startPos + endPos)/2;
        if(ipLong>sortedArray[middle].getEndIpLong()){
            startPos = middle + 1;
            return getIndexByBinarySearch(sortedArray,startPos,endPos,ipLong);
        }else if(aid<sortedArray[middle].getStartIPLong()){
            endPos = middle -1;
            return getIndexByBinarySearch(sortedArray,startPos,endPos,ipLong);
        }
        return middle;

    }

    //封装:
    public String getLocationByIPString(String ip){

        int startPos = 0;
        int endPos = pojoArray.length - 1;
        String ip = "1.27.248.0";
        Long aidToLong  = IPAndLongConvertUtil.ipToLong(ip);
        int pos = getIndexByBinarySearch(pojoArray,startPos,endPos,aid);
        if(pos > -1){
            return pojoArray[pos].getLocation();
        }
        return null;
    }


    psvm(){
        String ipAddressSource = "";
        String ip = "1.27.233.255";
        DataSearchManager dsm = new DataSearchManager(ipAddressSource);
        long startTS = System.currentTimeMillis();
        String location = dsm.getLocationByIPString(ip);
        long endTS = System.currentTimeMillis();
        sout(endTS-startTS);

    }
    psvm(){
        int[] sortedArray={1,3,5,7,11,20,30,44,55};
        int startPos = 0;
        int endPos = sortedArray.length - 1;
        int aid  = 1;
        int index = getIndexByBinarySearch(sortedArray,startPos,endPos,aid);
        sout(index);
    }

}


maven打包:
    如果项目不是maven,则右键项目->configure->convert to Maven project 
    <sourceDirectory>src</sourceDirectory> <!-- 意思是打的src下的包-->
    plugins:
        <plugin>
            <artifactId>maven-compiler-plugin</artifactId>
            <!-- 意思是用这个插件来打,target是1.7的版本 -->
            <version>2.3.2</version>
            <configuration>
                <source>1.7</source>
                <target>1.7</target>
                <encoding>UTF-8</encoding>
            </configuration>
        </plugin>    
        <plugin>
            <artifactId>maven-assembly-plugin</artifactId>
            <configuration>
                <descriptorRefs>
                    <descriptorRef>
                        jar-with-dependencies
                    </descriptorRef>
                </descriptorRefs>
                <!-- 这里指定打包的主类 -->
                <archive>
                    <manifest>
                    <!-- 主函数入口 -->
                    <mainClass>com.tl.job001.controler.SystemController</mainClass>
                    </manifest>
                </archive>
            </configuration>
            <executions>
                <execution>
                    <id>make-assembly</id>
                    <!-- 意思是用assembly方式来打包 -->
                    <phase>package</phase>
                    <goals>
                        <goal>assembly</goal>
                    </goals>
                </execution>
            </executions>
        </plugin>

    然后项目->右键->Run As->Maven install
    一个jar包,一个allinone的包
    java -cp ?
-------------本文结束期待您的评论-------------