From a819050e30bb6906858165796655c0c726843a3f Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Sun, 7 Mar 2021 05:57:02 +0100 Subject: Collect all relevant references from the latest publications --- perfrefs.bib | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 perfrefs.bib (limited to 'perfrefs.bib') diff --git a/perfrefs.bib b/perfrefs.bib new file mode 100644 index 0000000..9ece185 --- /dev/null +++ b/perfrefs.bib @@ -0,0 +1,99 @@ +@article{mei2017microbench, + author = {Mei, Xinxin and Chu, Xiaowen}, + title = {Dissecting GPU Memory Hierarchy Through Microbenchmarking}, + journal = {IEEE Trans. Parallel Distrib. Syst.}, + volume = {28}, + number = {1}, + pages = {72--86}, + publisher = {IEEE Press}, + year = {2017}, + issn = {1045-9219}, + doi = {10.1109/TPDS.2016.2549523}, +} + +@inproceedings{zhang2017performance, + author = {Zhang, Xiuxia and Tan, Guangming and Xue, Shuangbai and Li, Jiajia and Zhou, Keren and Chen, Mingyu}, + title = {Understanding the GPU Microarchitecture to Achieve Bare-Metal Performance Tuning}, + booktitle = {Proceedings of the 22Nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming}, + series = {PPoPP '17}, + year = {2017}, + isbn = {978-1-4503-4493-7}, + pages = {31--43}, + doi = {10.1145/3018743.3018755}, + publisher = {ACM} +} + +@article{lim2017autotuning, + author = {Robert V. Lim and Boyana Norris and Allen D. Malony}, + title = {Autotuning {GPU} Kernels via Static and Predictive Analysis}, + journal = {CoRR}, + volume = {abs/1701.08547}, + year = {2017}, + url = {http://arxiv.org/abs/1701.08547} +} + +@phdthesis{volkov2016thesis, + author = {Volkov, Vasily}, + title = {Understanding Latency Hiding on GPUs}, + school = {EECS Department, University of California, Berkeley}, + number = {UCB/EECS-2016-143}, + year = {2016}, + url = {http://www2.eecs.berkeley.edu/Pubs/TechRpts/2016/EECS-2016-143.html} +} + +@inbook{mei2014, + author = {Mei, Xinxin and Zhao, Kaiyong and Liu, Chengjian and Chu, Xiaowen}, + title = {Benchmarking the Memory Hierarchy of Modern GPUs}, + booktitle = {Network and Parallel Computing: 11th IFIP WG 10.3 International Conference}, + year = {2014}, + publisher = {Springer Berlin Heidelberg}, + pages = {144--156}, + isbn = {978-3-662-44917-2}, + doi = {10.1007/978-3-662-44917-2_13}, +} + + +@article{zhang2014performance, + author = {Zhang, Ying and Peng, Lu and Li, Bin and Peir, Jih-Kwon and Chen, Jianmin}, + title = {Performance and Power Comparisons Between Nvidia and ATI GPUs}, + journal = {International Journal of Computer Science \& Information Technology}, + volume = {6}, + number = {6}, + year = {2014} +} + +@inproceedings{zhang2011ati, + author = {Zhang, Ying and Hu, Yue and Li, Bin and Peng, Lu}, + title = {Performance and power analysis of ATI GPU: A statistical approach}, + booktitle = {Networking, Architecture and Storage (NAS), 6th IEEE International Conference on}, + pages = {149--158}, + year = {2011} +} + +@inproceedings{volkov2010occupation, + author = {Volkov, Vasily}, + title = {Better performance at lower occupancy}, + booktitle = {Proceedings of the GPU technology conference (GTC)}, + volume = {10}, + pages = {16}, + year = {2010} +} + +@inproceedings{konstantinidis2016gpumembench, + author = {E. Konstantinidis and Y. Cotronis}, + title = {A Quantitative Performance Evaluation of Fast on-Chip Memories of GPUs}, + booktitle= {24th Euromicro International Conference on Parallel, Distributed, and Network-Based Processing (PDP)}, + pages = {448-455}, + year = {2016}, + doi = {10.1109/PDP.2016.56} +} + +@article{konstantinidis2017mixbench, + author = {Elias Konstantinidis and Yiannis Cotronis}, + title = {A quantitative roofline model for GPU kernel performance estimation using micro-benchmarks and hardware metric profiling}, + journal = {Journal of Parallel and Distributed Computing}, + volume = {107}, + pages = {37 - 56}, + year = {2017}, + doi = {10.1016/j.jpdc.2017.04.002} +} -- cgit v1.2.1