@inproceedings{55cf4435439344b692139979c1aef4ed,
title = "LU Factorisation on Xeon and Xeon Phi Processors",
abstract = "This paper outlines the parallelisation and vectorisation methods we have used to port a LU decomposition library to the Xeon Phi co-processor. We ported a LU factorisation algorithm, which utilizes the Gaussian elimination method to perform the decomposition, using Intel LEO directives, OpenMP 4.0 directives, Intel's Cilk array notation, and vectorisation directives. We compare the performance achieved with these different methods, investigate the cost of data transfer on the overall time to solution, and analyse the impact of these optimization and parallelisation techniques on code running on the host processors as well. The results show that performance can be improved on the Xeon Phi by optimising the memory operations, and that Cilk array notation can benefit this benchmark on standard processors but do not have the same impact on the Xeon Phi co-processor. We have also demonstrated cases where the Xeon Phi will compute our implementations faster than we can run them on a node of a HPC system, and that our implementations are not as efficient as the LU factorisation implemented in the mkl library.",
author = "William Jackson and Dubaniowski, {Mateusz Iwo}",
note = "No acceptance date available and only a publication date of 2016. In order to remove from our reports for action used workaround of using 31.03.16 date for both. If this should be considered for REF2021 contact Open Access team.",
year = "2016",
month = mar,
day = "31",
doi = "10.3233/978-1-61499-621-7-591",
language = "English",
isbn = "978-1-61499-620-0",
volume = "27",
series = "Advances in Parallel Computing",
publisher = "IOS Press Ebooks",
pages = "591 -- 599",
editor = "Joubert, {Gerhard R.} and Hugh Leather and Mark Parsons and Frans Peters and Mark Sawyer",
booktitle = "Parallel Computing: On the Road to Exascale",
edition = "2016",
}