集合是一种高级的数据结构,它的特点是在集合中不能有相同的元素。
我在之前的文章已经详细的讲过链表了,只是贴出代码不再赘述了。
/*** @Author: Cui* @Date: 2020/12/21* @Description:*/
public class LinkedList {private class Node{public E e;public Node next;public Node(E e, Node next){this.e &#61; e;this.next &#61; next;}public Node(E e){this(e, null);}public Node(){this(null, null);}&#64;Overridepublic String toString(){return e.toString();}}private Node dummyHead;private int size;public LinkedList(){dummyHead &#61; new Node();size &#61; 0;}// 获取链表中的元素个数public int getSize(){return size;}// 返回链表是否为空public boolean isEmpty(){return size &#61;&#61; 0;}// 在链表的index(0-based)位置添加新的元素e// 在链表中不是一个常用的操作&#xff0c;练习用&#xff1a;&#xff09;public void add(int index, E e){if(index <0 || index > size)throw new IllegalArgumentException("Add failed. Illegal index.");Node prev &#61; dummyHead;for(int i &#61; 0 ; i &#61; size)throw new IllegalArgumentException("Get failed. Illegal index.");Node cur &#61; dummyHead.next;for(int i &#61; 0 ; i &#61; size)throw new IllegalArgumentException("Set failed. Illegal index.");Node cur &#61; dummyHead.next;for(int i &#61; 0 ; i &#61; size)throw new IllegalArgumentException("Remove failed. Index is illegal.");Node prev &#61; dummyHead;for(int i &#61; 0 ; i ");cur &#61; cur.next;}res.append("NULL");return res.toString();}
}
实现集合
import java.util.ArrayList;/*** &#64;Author: Cui* &#64;Date: 2020/12/21* &#64;Description:*/
public class LinkedListSet implements Set{private LinkedList list;public LinkedListSet(){list &#61; new LinkedList<>();}&#64;Overridepublic void add(E e) {if(!list.contains(e)){list.addFirst(e);}}&#64;Overridepublic void remove(E e) {list.removeElement(e);}&#64;Overridepublic boolean contains(E e) {return list.contains(e);}&#64;Overridepublic int getSize() {return list.getSize();}&#64;Overridepublic boolean isEmpty() {return list.isEmpty();}
}
测试需要用到的文件读取代码
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Scanner;
import java.util.Locale;
import java.io.File;
import java.io.BufferedInputStream;
import java.io.IOException;
public class FileOperation {// 读取文件名称为filename中的内容&#xff0c;并将其中包含的所有词语放进words中public static boolean readFile(String filename, ArrayList words){if (filename &#61;&#61; null || words &#61;&#61; null){System.out.println("filename is null or words is null");return false;}// 文件读取Scanner scanner;try {File file &#61; new File(filename);if(file.exists()){FileInputStream fis &#61; new FileInputStream(file);scanner &#61; new Scanner(new BufferedInputStream(fis), "UTF-8");scanner.useLocale(Locale.ENGLISH);}elsereturn false;}catch(IOException ioe){System.out.println("Cannot open " &#43; filename);return false;}// 简单分词// 这个分词方式相对简陋, 没有考虑很多文本处理中的特殊问题// 在这里只做demo展示用if (scanner.hasNextLine()) {String contents &#61; scanner.useDelimiter("\\A").next();int start &#61; firstCharacterIndex(contents, 0);for (int i &#61; start &#43; 1; i <&#61; contents.length(); )if (i &#61;&#61; contents.length() || !Character.isLetter(contents.charAt(i))) {String word &#61; contents.substring(start, i).toLowerCase();words.add(word);start &#61; firstCharacterIndex(contents, i);i &#61; start &#43; 1;} elsei&#43;&#43;;}return true;}// 寻找字符串s中&#xff0c;从start的位置开始的第一个字母字符的位置private static int firstCharacterIndex(String s, int start){for( int i &#61; start ; i }
读者测试可以在项目的根目录下放置一个文件用来读取。
我在这里放置的是傲慢与偏见和双城记的英文版
主函数
public static void main(String[] args) {System.out.println("Pride and Prejudice");ArrayList words1 &#61; new ArrayList<>();if(FileOperation.readFile("pride-and-prejudice.txt", words1)) {System.out.println("Total words: " &#43; words1.size());LinkedListSet set1 &#61; new LinkedListSet<>();for (String word : words1)set1.add(word);System.out.println("Total different words: " &#43; set1.getSize());}System.out.println();System.out.println("A Tale of Two Cities");ArrayList words2 &#61; new ArrayList<>();if(FileOperation.readFile("a-tale-of-two-cities.txt", words2)){System.out.println("Total words: " &#43; words2.size());LinkedListSet set2 &#61; new LinkedListSet<>();for(String word: words2)set2.add(word);System.out.println("Total different words: " &#43; set2.getSize());}}
可以很明显感觉到运行时间很慢
使用树来实现&#xff0c;先贴出树实现的代码
import java.util.LinkedList;
import java.util.Queue;
import java.util.Stack;/*** &#64;Author: Cui* &#64;Date: 2020/12/14* &#64;Description:*/
public class BST> {private class Node{public E e;public Node right,left;public Node(E e){this.e&#61;e;left&#61;null;right&#61;null;}}private Node root;private int size;public BST(){root&#61;null;size&#61;0;}public int size(){return size;}public boolean isEmpty(){return size&#61;&#61;0;}public void add(E e){if(root &#61;&#61; null){root &#61; new Node(e);size&#43;&#43;;}elseadd(root,e);}private Node add(Node node,E e){if(node&#61;&#61;null){size&#43;&#43;;return new Node(e);}if(e.compareTo(node.e) <0)node.left&#61;add(node.left,e);else if(e.compareTo(node.e) > 0)node.right&#61;add(node.right,e);return node;}public boolean contains(E e){return contains(root,e);}private boolean contains(Node node, E e) {if(node &#61;&#61; null)return false;if(e.compareTo(node.e) &#61;&#61; 0){return true;}else if(e.compareTo(node.e) <0){return contains(node.left,e);}else {return contains(node.right,e);}}//递归型前序遍历public void preOrder(){preOrder(root);}private void preOrder(Node node) {if(node &#61;&#61; null){return;}System.out.println(node.e);preOrder(node.left);preOrder(node.right);}//非递归型前序遍历public void preOrderNR(){Stack stack &#61; new Stack<>();stack.push(root);while (!stack.isEmpty()){Node cur &#61; stack.pop();System.out.println(cur.e);if(cur.right!&#61;null)stack.push(cur.right);if(cur.left!&#61;null)stack.push(cur.left);}}//二分搜索树的层序遍历&#xff0c;即广度遍历public void levelOrder(){Queue q &#61; new LinkedList<>();q.add(root);while (!q.isEmpty()){Node cur &#61; q.remove();System.out.println(cur.e);if(cur.left!&#61;null){q.add(cur.left);}if(cur.right!&#61;null){q.add(cur.right);}}}//寻找二分搜索树中的最小元素public E minimum(){if(size&#61;&#61;0){throw new IllegalArgumentException("BST is empty");}return minimum(root).e;}//返回以node为根的二分搜索树的最小值所在的结点private Node minimum(Node node) {if(node.left &#61;&#61; null){return node;}return minimum(node.left);}//寻找二分搜索树中的最大元素public E maximum(){if(size&#61;&#61;0){throw new IllegalArgumentException("BST is empty");}return maximum(root).e;}//返回以node为根的二分搜索树的最小值所在的结点private Node maximum(Node node) {if(node.right &#61;&#61; null){return node;}return minimum(node.right);}//二分搜索树中删除最小结点所在结点public E removeMin(){E ret &#61; minimum();root &#61; removeMin(root);return ret;}//删除掉以node为根的二分搜索树的最小结点//返回删除结点后的新的二分搜索树的根private Node removeMin(Node node) {if(node.left&#61;&#61;null){Node rightNode &#61; node.right;node.right &#61; null;size--;return rightNode;}node.left &#61; removeMin(node.left);return node;}//二分搜索树中删除最大结点所在结点public E removeMax(){E ret &#61; maximum();root &#61; removeMax(root);return ret;}//删除掉以node为根的二分搜索树的最大结点//返回删除结点后的新的二分搜索树的根private Node removeMax(Node node) {if(node.right&#61;&#61;null){Node rightNode &#61; node.left;node.left &#61; null;size--;return rightNode;}node.right &#61; removeMin(node.right);return node;}public void remove(E e){root &#61; remove(root,e);}private Node remove(Node node, E e) {if(node&#61;&#61;null)return null;if(e.compareTo(node.e)<0){node.left &#61; remove(node.left,e);return node;}else if(e.compareTo(node.e)>0){node.right &#61; remove(node.right,e);return node;}else {//e&#61;&#61;node.e//待删除的结点左子树为空if(node.left &#61;&#61; null){Node rightNode &#61; node.right;node.right &#61; null;size--;return rightNode;}//待删除的结点右子树为空if(node.right &#61;&#61; null){Node leftNode &#61; node.left;node.right &#61; null;size--;return leftNode;}//待删除的结点左右子树均不为空//找到比待删除结点大的最小结点&#xff0c;即删除结点右子树的最小节点//用这个结点来顶替待删除结点的位置Node successor &#61; minimum(node.right);successor.right &#61; removeMin(node.right);successor.left &#61; node.left;node.left &#61; null;node.right &#61; null;return successor;}}//从二叉搜索树中寻找一个结点&#xff0c;此结点是不小于且最接近传入参数的结点public Node ceil(E e){Node node &#61; floor(root,e);return node;}//传入一个结点和一个值&#xff0c;从以这个结点为根节点的二叉树中递归的寻找值private Node ceil(Node node, E e) {if(node &#61;&#61; null){return null;}//待寻找的结点和此结点值相等&#xff0c;即此结点为待寻找的结点最接近的值if(node.e.compareTo(e) &#61;&#61; 0){return node;}//此结点大于待寻找的结点&#xff0c;进入此结点的左孩子继续寻找if(node.e.compareTo(e) <0){return ceil(node.right,e);}//此结点的值大于待寻找的值&#xff0c;进入左孩子继续寻找Node tempNode &#61; ceil(node.left, e);//从左孩子里找到的值&#xff0c;如果不为空&#xff0c;则此值为解&#xff0c;如果为空&#xff0c;则返回此结点的值if(tempNode !&#61; null){return tempNode;}return node;}//从二叉搜索树中寻找一个结点&#xff0c;此结点是不大于且最接近传入参数的结点public Node floor(E e){Node node &#61; floor(root,e);return node;}private Node floor(Node node, E e) {if(node &#61;&#61; null){return null;}//待寻找的结点和此结点值相等&#xff0c;即此结点为待寻找的结点最接近的值if(node.e.compareTo(e) &#61;&#61; 0){return node;}//此结点大于待寻找的结点&#xff0c;进入此结点的左孩子继续寻找if(node.e.compareTo(e) > 0){return floor(node.left,e);}//此结点的值小于待寻找的值&#xff0c;进入右孩子继续寻找Node tempNode &#61; floor(node.right, e);//从右孩子里找到的值&#xff0c;如果不为空&#xff0c;则此值为解&#xff0c;如果为空&#xff0c;则返回此结点的值if(tempNode !&#61; null){return tempNode;}return node;}&#64;Overridepublic String toString() {StringBuilder res &#61; new StringBuilder();generateBSTString(root,0,res);return res.toString();}//生成以node为根节点&#xff0c;深度为depth的描述二叉树的字符串private void generateBSTString(Node node, int depth, StringBuilder res) {if(node &#61;&#61; null){res.append(generateDepthString(depth)&#43;"null\n");return;}res.append(generateDepthString(depth)&#43;node.e&#43;"\n");generateBSTString(node.left,depth&#43;1,res);generateBSTString(node.right,depth&#43;1,res);}private String generateDepthString(int depth) {StringBuilder res &#61; new StringBuilder();for(int i&#61;0;i}
实现的集合
/*** &#64;Author: Cui* &#64;Date: 2020/12/21* &#64;Description:*/
public class BSTSet> implements Set{private BST bst;public BSTSet(){bst &#61; new BST<>();}&#64;Overridepublic void add(E e) {bst.add(e);}&#64;Overridepublic void remove(E e) {bst.remove(e);}&#64;Overridepublic boolean contains(E e) {return bst.contains(e);}&#64;Overridepublic int getSize() {return bst.size();}&#64;Overridepublic boolean isEmpty() {return bst.isEmpty();}
}
继续使用刚刚那个测试的例子进行性能比较
public class Main {public static double testSet(Set set,String filename){long startTime &#61; System.nanoTime();ArrayList words &#61; new ArrayList<>();if(FileOperation.readFile(filename, words)) {System.out.println("Total words: " &#43; words.size());for (String word : words)set.add(word);System.out.println("Total different words: " &#43; set.getSize());}long endTime &#61; System.nanoTime();return (endTime-startTime)/1000000000.0;}public static void main(String[] args) {String filename &#61; "pride-and-prejudice.txt";BSTSet bstSet &#61; new BSTSet<>();double time &#61; testSet(bstSet,filename);System.out.println("BST Set:"&#43;time&#43;"s");System.out.println();LinkedListSet linkedListSet &#61; new LinkedListSet<>();double time1 &#61; testSet(linkedListSet,filename);System.out.println("LinkedList Set:"&#43;time1&#43;"s");}
}
可以看出使用树实现的集合进行统计傲慢与偏见时需要0.1秒&#xff0c;而链表需要2.2秒&#xff0c;22倍。。。
可以看出二分搜索树的性能在实现集合的这个场景下是比链表要高效很多的。