@article{JIA2020102908,
title = {Scalable Hash From Triplet Loss Feature Aggregation For Video De-duplication},
journal = {Journal of Visual Communication and Image Representation},
volume = {72},
pages = {102908},
year = {2020},
issn = {1047-3203},
doi = {https://doi.org/10.1016/j.jvcir.2020.102908},
url = {https://www.sciencedirect.com/science/article/pii/S1047320320301462},
author = {Wei Jia and Li Li and Zhu Li and Shuai Zhao and Shan Liu},
keywords = {Binary hash, Binary tree, Fisher vector, Triplet loss, Video de-duplication},
abstract = {The producing, sharing and consuming life cycle of video content creates massive amount of duplicates in video segments due to variable bit rate representation and fragmentation in the playbacks. The inefficiency of this duplicates to storage and communication motivate researchers in both academia and industry to come up with computationally efficient video deduplication solutions for storage and CDN providers. Moreover, the increasing demands of high resolution and quality aggravate the status of heavy burden of cluster storage side and restricted bandwidth resources. Hence, video de-duplication in storage and transmission is becoming an important feature for video cloud storage and Content Delivery Network (CDN) service providers. Despite of the necessity of optimizing the multimedia data de-duplication approach, it is a challenging task because we should match as many as possible duplicated videos under not removing videos by mistake. The current video de-duplication schemes mostly relies on the URL based solution, which is not able to deal with non-cacheable content like video, which the same piece of content may have totally different URL identification and fragmentation and different quality representations further complicate the problem. In this paper, we propose a novel content based video segmentation identification scheme that is invariant to the underlying codec and operational bit rates, it computes robust features from a triplet loss deep learning network that captures the invariance of the same content under different coding tools and strategy, while a scalable hashing solution is developed based on Fisher Vector aggregation of the convolutional features from the Triplet loss network. Our simulation results demonstrate the great improvement in terms of large scale video repository de-duplication compared with state-of-the-art methods.}
}