From a819050e30bb6906858165796655c0c726843a3f Mon Sep 17 00:00:00 2001
From: "Suren A. Chilingaryan" <csa@suren.me>
Date: Sun, 7 Mar 2021 05:57:02 +0100
Subject: Collect all relevant references from the latest publications

---
 perfrefs.bib | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100644 perfrefs.bib

(limited to 'perfrefs.bib')

diff --git a/perfrefs.bib b/perfrefs.bib
new file mode 100644
index 0000000..9ece185
--- /dev/null
+++ b/perfrefs.bib
@@ -0,0 +1,99 @@
+@article{mei2017microbench,
+  author    = {Mei, Xinxin and Chu, Xiaowen},
+  title     = {Dissecting GPU Memory Hierarchy Through Microbenchmarking},
+  journal   = {IEEE Trans. Parallel Distrib. Syst.},
+  volume    = {28},
+  number    = {1},
+  pages     = {72--86},
+  publisher = {IEEE Press},
+  year      = {2017},
+  issn      = {1045-9219},
+  doi       = {10.1109/TPDS.2016.2549523},
+} 
+
+@inproceedings{zhang2017performance,
+  author    = {Zhang, Xiuxia and Tan, Guangming and Xue, Shuangbai and Li, Jiajia and Zhou, Keren and Chen, Mingyu},
+  title     = {Understanding the GPU Microarchitecture to Achieve Bare-Metal Performance Tuning},
+  booktitle = {Proceedings of the 22Nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming},
+  series    = {PPoPP '17},
+  year      = {2017},
+  isbn      = {978-1-4503-4493-7},
+  pages     = {31--43},
+  doi       = {10.1145/3018743.3018755},
+  publisher = {ACM}
+} 
+
+@article{lim2017autotuning,
+  author    = {Robert V. Lim and Boyana Norris and Allen D. Malony},
+  title     = {Autotuning {GPU} Kernels via Static and Predictive Analysis},
+  journal   = {CoRR},
+  volume    = {abs/1701.08547},
+  year      = {2017},
+  url       = {http://arxiv.org/abs/1701.08547}
+}
+
+@phdthesis{volkov2016thesis,
+  author    = {Volkov, Vasily},
+  title     = {Understanding Latency Hiding on GPUs},
+  school    = {EECS Department, University of California, Berkeley},
+  number    = {UCB/EECS-2016-143},
+  year      = {2016},
+  url       = {http://www2.eecs.berkeley.edu/Pubs/TechRpts/2016/EECS-2016-143.html}
+}
+
+@inbook{mei2014,
+  author    = {Mei, Xinxin and Zhao, Kaiyong and Liu, Chengjian and Chu, Xiaowen},
+  title     = {Benchmarking the Memory Hierarchy of Modern GPUs},
+  booktitle = {Network and Parallel Computing: 11th IFIP WG 10.3 International Conference},
+  year      = {2014},
+  publisher = {Springer Berlin Heidelberg},
+  pages     = {144--156},
+  isbn      = {978-3-662-44917-2},
+  doi       = {10.1007/978-3-662-44917-2_13},
+}
+
+
+@article{zhang2014performance,
+  author    = {Zhang, Ying and Peng, Lu and Li, Bin and Peir, Jih-Kwon and Chen, Jianmin},
+  title     = {Performance and Power Comparisons Between Nvidia and ATI GPUs},
+  journal   = {International Journal of Computer Science \& Information Technology},
+  volume    = {6},
+  number    = {6},
+  year      = {2014}
+}
+
+@inproceedings{zhang2011ati,
+  author    = {Zhang, Ying and Hu, Yue and Li, Bin and Peng, Lu},
+  title     = {Performance and power analysis of ATI GPU: A statistical approach},
+  booktitle = {Networking, Architecture and Storage (NAS), 6th IEEE International Conference on},
+  pages     = {149--158},
+  year      = {2011}
+}
+
+@inproceedings{volkov2010occupation,
+  author    = {Volkov, Vasily},
+  title     = {Better performance at lower occupancy},
+  booktitle = {Proceedings of the GPU technology conference (GTC)},
+  volume    = {10},
+  pages     = {16},
+  year      = {2010}
+}
+
+@inproceedings{konstantinidis2016gpumembench,
+  author   = {E. Konstantinidis and Y. Cotronis},
+  title    = {A Quantitative Performance Evaluation of Fast on-Chip Memories of GPUs},
+  booktitle= {24th Euromicro International Conference on Parallel, Distributed, and Network-Based Processing (PDP)},
+  pages    = {448-455},
+  year     = {2016},
+  doi      = {10.1109/PDP.2016.56}
+}
+
+@article{konstantinidis2017mixbench,
+  author   = {Elias Konstantinidis and Yiannis Cotronis},
+  title    = {A quantitative roofline model for GPU kernel performance estimation using micro-benchmarks and hardware metric profiling},
+  journal  = {Journal of Parallel and Distributed Computing},
+  volume   = {107},
+  pages    = {37 - 56},
+  year     = {2017},
+  doi      = {10.1016/j.jpdc.2017.04.002}
+}
-- 
cgit v1.2.1