Skip to content

Translational Data Science in Health

I investigate a wide range of applications of data science to health, ranging from analysis of medical imaging to diagnostics. My research with the Alfred Hospital led to a revision to Medical Emergency Team protocols that saves $500,000 per annum while improving clinical outcomes.

Publications

Cell graph neural networks enable the precise prediction of patient survival in gastric cancer.
Wang, Y., Wang, Y. G., Hu, C., Li, M., Fan, Y., Otter, N., Sam, I., Gou, H., Hu, Y., Kwok, T., Zalcberg, J., Boussioutas, A., Daly, R. J., Montúfar, G., Liò, P., Xu, D., Webb, G. I., & Song, J.
npj Precision Oncology, 6(1), Art. no. 45, 2022.
[Bibtex] [Abstract]  → Access on publisher site

@Article{Wang2022,
author = {Wang, Yanan and Wang, Yu Guang and Hu, Changyuan and Li, Ming and Fan, Yanan and Otter, Nina and Sam, Ikuan and Gou, Hongquan and Hu, Yiqun and Kwok, Terry and Zalcberg, John and Boussioutas, Alex and Daly, Roger J. and Montúfar, Guido and Liò, Pietro and Xu, Dakang and Webb, Geoffrey I. and Song, Jiangning},
journal = {npj Precision Oncology},
title = {Cell graph neural networks enable the precise prediction of patient survival in gastric cancer},
year = {2022},
issn = {2397-768X},
number = {1},
volume = {6},
abstract = {Gastric cancer is one of the deadliest cancers worldwide. An accurate prognosis is essential for effective clinical assessment and treatment. Spatial patterns in the tumor microenvironment (TME) are conceptually indicative of the staging and progression of gastric cancer patients. Using spatial patterns of the TME by integrating and transforming the multiplexed immunohistochemistry (mIHC) images as Cell-Graphs, we propose a graph neural network-based approach, termed Cell−Graph Signature or CGSignature, powered by artificial intelligence, for the digital staging of TME and precise prediction of patient survival in gastric cancer. In this study, patient survival prediction is formulated as either a binary (short-term and long-term) or ternary (short-term, medium-term, and long-term) classification task. Extensive benchmarking experiments demonstrate that the CGSignature achieves outstanding model performance, with Area Under the Receiver Operating Characteristic curve of 0.960 +/- 0.01, and 0.771+/-0.024 to 0.904+/-0.012 for the binary- and ternary-classification, respectively. Moreover, Kaplan-Meier survival analysis indicates that the 'digital grade' cancer staging produced by CGSignature provides a remarkable capability in discriminating both binary and ternary classes with statistical significance (P value < 0.0001), significantly outperforming the AJCC 8th edition Tumor Node Metastasis staging system. Using Cell-Graphs extracted from mIHC images, CGSignature improves the assessment of the link between the TME spatial patterns and patient prognosis. Our study suggests the feasibility and benefits of such an artificial intelligence-powered digital staging system in diagnostic pathology and precision oncology.},
articlenumber = {45},
doi = {10.1038/s41698-022-00285-5},
keywords = {health},
related = {health},
url = {https://rdcu.be/cQeFD},
}
ABSTRACT Gastric cancer is one of the deadliest cancers worldwide. An accurate prognosis is essential for effective clinical assessment and treatment. Spatial patterns in the tumor microenvironment (TME) are conceptually indicative of the staging and progression of gastric cancer patients. Using spatial patterns of the TME by integrating and transforming the multiplexed immunohistochemistry (mIHC) images as Cell-Graphs, we propose a graph neural network-based approach, termed Cell−Graph Signature or CGSignature, powered by artificial intelligence, for the digital staging of TME and precise prediction of patient survival in gastric cancer. In this study, patient survival prediction is formulated as either a binary (short-term and long-term) or ternary (short-term, medium-term, and long-term) classification task. Extensive benchmarking experiments demonstrate that the CGSignature achieves outstanding model performance, with Area Under the Receiver Operating Characteristic curve of 0.960 +/- 0.01, and 0.771+/-0.024 to 0.904+/-0.012 for the binary- and ternary-classification, respectively. Moreover, Kaplan-Meier survival analysis indicates that the 'digital grade' cancer staging produced by CGSignature provides a remarkable capability in discriminating both binary and ternary classes with statistical significance (P value < 0.0001), significantly outperforming the AJCC 8th edition Tumor Node Metastasis staging system. Using Cell-Graphs extracted from mIHC images, CGSignature improves the assessment of the link between the TME spatial patterns and patient prognosis. Our study suggests the feasibility and benefits of such an artificial intelligence-powered digital staging system in diagnostic pathology and precision oncology.

HEAL: an automated deep learning framework for cancer histopathology image analysis.
Wang, Y., Coudray, N., Zhao, Y., Li, F., Hu, C., Zhang, Y., Imoto, S., Tsirigos, A., Webb, G. I., Daly, R. J., & Song, J.
Bioinformatics, 37(22), 4291-4295, 2021.
[Bibtex] [Abstract]  → Access on publisher site

@Article{Wang2021,
author = {Wang, Yanan and Coudray, Nicolas and Zhao, Yun and Li, Fuyi and Hu, Changyuan and Zhang, Yao-Zhong and Imoto, Seiya and Tsirigos, Aristotelis and Webb, Geoffrey I and Daly, Roger J and Song, Jiangning},
journal = {Bioinformatics},
title = {{HEAL}: an automated deep learning framework for cancer histopathology image analysis},
year = {2021},
number = {22},
pages = {4291-4295},
volume = {37},
abstract = {{Digital pathology supports analysis of histopathological images using deep learning methods at a large-scale. However, applications of deep learning in this area have been limited by the complexities of configuration of the computational environment and of hyperparameter optimization, which hinder deployment and reduce reproducibility.Here, we propose HEAL, a deep learning-based automated framework for easy, flexible, and multi-faceted histopathological image analysis. We demonstrate its utility and functionality by performing two case studies on lung cancer and one on colon cancer. Leveraging the capability of Docker, HEAL represents an ideal end-to-end tool to conduct complex histopathological analysis and enables deep learning in a broad range of applications for cancer image analysis.Supplementary data are available at Bioinformatics online.}},
doi = {10.1093/bioinformatics/btab380},
keywords = {health},
publisher = {Oxford University Press ({OUP})},
related = {health},
}
ABSTRACT {Digital pathology supports analysis of histopathological images using deep learning methods at a large-scale. However, applications of deep learning in this area have been limited by the complexities of configuration of the computational environment and of hyperparameter optimization, which hinder deployment and reduce reproducibility.Here, we propose HEAL, a deep learning-based automated framework for easy, flexible, and multi-faceted histopathological image analysis. We demonstrate its utility and functionality by performing two case studies on lung cancer and one on colon cancer. Leveraging the capability of Docker, HEAL represents an ideal end-to-end tool to conduct complex histopathological analysis and enables deep learning in a broad range of applications for cancer image analysis.Supplementary data are available at Bioinformatics online.}

OCTID: a one-class learning-based Python package for tumor image detection.
Wang, Y., Yang, L., Webb, G. I., Ge, Z., & Song, J.
Bioinformatics, 37(21), 3986–3988, 2021.
[Bibtex] [Abstract]  → Access on publisher site

@Article{10.1093/bioinformatics/btab416,
author = {Wang, Yanan and Yang, Litao and Webb, Geoffrey I and Ge, Zongyuan and Song, Jiangning},
journal = {Bioinformatics},
title = {{OCTID}: a one-class learning-based {Python} package for tumor image detection},
year = {2021},
issn = {1367-4803},
number = {21},
pages = {3986–3988},
volume = {37},
abstract = {{Tumor tile selection is a necessary prerequisite in patch-based cancer whole slide image analysis, which is labor-intensive and requires expertise. Whole slides are annotated as tumor or tumor free, but tiles within a tumor slide are not. As all tiles within a tumor free slide are tumor free, these can be used to capture tumor-free patterns using the one-class learning strategy. We present a Python package, termed OCTID, which combines a pretrained convolutional neural network (CNN) model, Uniform Manifold Approximation and Projection (UMAP) and one-class support vector machine to achieve accurate tumor tile classification using a training set of tumor free tiles. Benchmarking experiments on four H&E image datasets achieved remarkable performance in terms of F1-score (0.90?+/-0.06), Matthews correlation coefficient (0.93?+/-0.05) and accuracy (0.94?+/-0.03).Detailed information can be found in the Supplementary File.Supplementary data are available at Bioinformatics online.}},
doi = {10.1093/bioinformatics/btab416},
keywords = {health},
related = {health},
}
ABSTRACT {Tumor tile selection is a necessary prerequisite in patch-based cancer whole slide image analysis, which is labor-intensive and requires expertise. Whole slides are annotated as tumor or tumor free, but tiles within a tumor slide are not. As all tiles within a tumor free slide are tumor free, these can be used to capture tumor-free patterns using the one-class learning strategy. We present a Python package, termed OCTID, which combines a pretrained convolutional neural network (CNN) model, Uniform Manifold Approximation and Projection (UMAP) and one-class support vector machine to achieve accurate tumor tile classification using a training set of tumor free tiles. Benchmarking experiments on four H&E image datasets achieved remarkable performance in terms of F1-score (0.90?+/-0.06), Matthews correlation coefficient (0.93?+/-0.05) and accuracy (0.94?+/-0.03).Detailed information can be found in the Supplementary File.Supplementary data are available at Bioinformatics online.}

Designing a more efficient, effective and safe Medical Emergency Team (MET) service using data analysis.
Bergmeir, C., Bilgrami, I., Bain, C., Webb, G. I., Orosz, J., & Pilcher, D.
PLoS ONE, 12(12), Art. no. e0188688, 2017.
[Bibtex]  → Access on publisher site

@Article{BergmeirEtAl2017,
author = {Bergmeir, Christoph and Bilgrami, Irma and Bain, Christopher and Webb, Geoffrey I and Orosz, Judit and Pilcher, David},
journal = {PLoS ONE},
title = {Designing a more efficient, effective and safe Medical Emergency Team (MET) service using data analysis},
year = {2017},
number = {12},
volume = {12},
articlenumber = {e0188688},
doi = {10.1371/journal.pone.0188688},
keywords = {health},
related = {health},
}
ABSTRACT 

Identifying markers of pathology in SAXS data of malignant tissues of the brain.
Siu, K. K. W., Butler, S. M., Beveridge, T., Gillam, J. E., Hall, C. J., Kaye, A. H., Lewis, R. A., Mannan, K., McLoughlin, G., Pearson, S., Round, A. R., E., S., Webb, G. I., & Wilkinson, S. J.
Nuclear Instruments and Methods in Physics Research A, 548, 140-146, 2005.
[Bibtex] [Abstract]  → Download PDF  → Access on publisher site

@Article{SiuEtAl05,
author = {Siu, K. K. W. and Butler, S. M. and Beveridge, T. and Gillam, J. E. and Hall, C. J. and Kaye, A. H. and Lewis, R. A. and Mannan, K. and McLoughlin, G. and Pearson, S. and Round, A. R. and Schultke E. and Webb, G. I. and Wilkinson, S. J.},
journal = {Nuclear Instruments and Methods in Physics Research A},
title = {Identifying markers of pathology in SAXS data of malignant tissues of the brain},
year = {2005},
pages = {140-146},
volume = {548},
abstract = {Conventional neuropathological analysis for brain malignancies is heavily reliant on the observation of morphological abnormalities, observed in thin, stained sections of tissue. Small Angle X-ray Scattering (SAXS) data provide an alternative means of distinguishing pathology by examining the ultra-structural (nanometer length scales) characteristics of tissue. To evaluate the diagnostic potential of SAXS for brain tumors, data was collected from normal, malignant and benign tissues of the human brain at station 2.1 of the Daresbury Laboratory Synchrotron Radiation Source and subjected to data mining and multivariate statistical analysis. The results suggest SAXS data may be an effective classi.er of malignancy.},
doi = {10.1016/j.nima.2005.03.081},
keywords = {health},
publisher = {Elsevier},
related = {health},
}
ABSTRACT Conventional neuropathological analysis for brain malignancies is heavily reliant on the observation of morphological abnormalities, observed in thin, stained sections of tissue. Small Angle X-ray Scattering (SAXS) data provide an alternative means of distinguishing pathology by examining the ultra-structural (nanometer length scales) characteristics of tissue. To evaluate the diagnostic potential of SAXS for brain tumors, data was collected from normal, malignant and benign tissues of the human brain at station 2.1 of the Daresbury Laboratory Synchrotron Radiation Source and subjected to data mining and multivariate statistical analysis. The results suggest SAXS data may be an effective classi.er of malignancy.

A Case Study in Feature Invention for Breast Cancer Diagnosis Using X-Ray Scatter Images.
Butler, S. M., Webb, G. I., & Lewis, R. A.
Lecture Notes in Artificial Intelligence Vol. 2903: Proceedings of the 16th Australian Conference on Artificial Intelligence (AI 03), Berlin/Heidelberg, pp. 677-685, 2003.
[Bibtex] [Abstract]  → Download PDF  → Access on publisher site

@InProceedings{ButlerWebbLewis03,
author = {Butler, S. M. and Webb, G. I. and Lewis, R. A.},
booktitle = {Lecture Notes in Artificial Intelligence Vol. 2903: Proceedings of the 16th Australian Conference on Artificial Intelligence (AI 03)},
title = {A Case Study in Feature Invention for Breast Cancer Diagnosis Using X-Ray Scatter Images},
year = {2003},
address = {Berlin/Heidelberg},
editor = {Gedeon, T.D. and Fung, L.C.C.},
pages = {677-685},
publisher = {Springer},
abstract = {X-ray mammography is the current method for screening for breast cancer, and like any technique, has its limitations. Several groups have reported differences in the X-ray scattering patterns of normal and tumour tissue from the breast. This gives rise to the hope that X-ray scatter analysis techniques may lead to a more accurate and cost effective method of diagnosing beast cancer which lends itself to automation. This is a particularly challenging exercise due to the inherent complexity of the information content in X-ray scatter patterns from complex heterogenous tissue samples. We use a simple naive Bayes classier, coupled with Equal Frequency Discretization (EFD) as our classification system. High-level features are extracted from the low-level pixel data. This paper reports some preliminary results in the ongoing development of this classification method that can distinguish between the diffraction patterns of normal and cancerous tissue, with particular emphasis on the invention of features for classification.},
doi = {10.1007/978-3-540-24581-0_58},
keywords = {health},
related = {health},
}
ABSTRACT X-ray mammography is the current method for screening for breast cancer, and like any technique, has its limitations. Several groups have reported differences in the X-ray scattering patterns of normal and tumour tissue from the breast. This gives rise to the hope that X-ray scatter analysis techniques may lead to a more accurate and cost effective method of diagnosing beast cancer which lends itself to automation. This is a particularly challenging exercise due to the inherent complexity of the information content in X-ray scatter patterns from complex heterogenous tissue samples. We use a simple naive Bayes classier, coupled with Equal Frequency Discretization (EFD) as our classification system. High-level features are extracted from the low-level pixel data. This paper reports some preliminary results in the ongoing development of this classification method that can distinguish between the diffraction patterns of normal and cancerous tissue, with particular emphasis on the invention of features for classification.

Application Of Machine Learning To A Renal Biopsy Data-Base.
Agar, J., & Webb, G. I.
Nephrology, Dialysis and Transplantation, 7, 472-478, 1992.
[Bibtex] [Abstract]  → Access on publisher site

@Article{AgarWebb92,
author = {Agar, J. and Webb, G. I.},
journal = {Nephrology, Dialysis and Transplantation},
title = {Application Of Machine Learning To A Renal Biopsy Data-Base},
year = {1992},
pages = {472-478},
volume = {7},
abstract = {This pilot study has applied machine learning (artificial intelligence derived qualitative analysis procedures) to yield non-invasive techniques for the assessment and interpretation of clinical and laboratory data in glomerular disease. To evaluate the appropriateness of these techniques, they were applied to subsets of a small database of 284 case histories and the resulting procedures evaluated against the remaining cases. Over such evaluations, the following average diagnostic accuracies were obtained: microscopic polyarteritis, 95.37%; minimal lesion nephrotic syndrome, 96.50%; immunoglobulin A nephropathy, 81.26%; minor changes, 93.66%; lupus nephritis, 96.27%; focal glomerulosclerosis, 92.06%; mesangial proliferative glomerulonephritis, 92.56%; and membranous nephropathy, 92.56%. Although in general the new diagnostic system is not yet as accurate as the histological evaluation of renal biopsy specimens, it shows promise of adding a further dimension to the diagnostic process. When the machine learning techniques are applied to a larger database, greater diagnostic accuracy should be obtained. It may allow accurate non- invasive diagnosis of some cases of glomerular disease without the need for renal biopsy. This may reduce both the cost and the morbidity of the investigation of glomerular disease and may be of particular value in situations where renal biopsy is considered hazardous or contraindicated.},
address = {Oxford UK},
audit-trail = {28/10/03 Link to abstract only at this stage available via Oxford Press.},
keywords = {Rule Learning, health},
publisher = {Oxford University Press},
related = {health},
url = {http://ndt.oxfordjournals.org/content/7/6/472.abstract},
}
ABSTRACT This pilot study has applied machine learning (artificial intelligence derived qualitative analysis procedures) to yield non-invasive techniques for the assessment and interpretation of clinical and laboratory data in glomerular disease. To evaluate the appropriateness of these techniques, they were applied to subsets of a small database of 284 case histories and the resulting procedures evaluated against the remaining cases. Over such evaluations, the following average diagnostic accuracies were obtained: microscopic polyarteritis, 95.37%; minimal lesion nephrotic syndrome, 96.50%; immunoglobulin A nephropathy, 81.26%; minor changes, 93.66%; lupus nephritis, 96.27%; focal glomerulosclerosis, 92.06%; mesangial proliferative glomerulonephritis, 92.56%; and membranous nephropathy, 92.56%. Although in general the new diagnostic system is not yet as accurate as the histological evaluation of renal biopsy specimens, it shows promise of adding a further dimension to the diagnostic process. When the machine learning techniques are applied to a larger database, greater diagnostic accuracy should be obtained. It may allow accurate non- invasive diagnosis of some cases of glomerular disease without the need for renal biopsy. This may reduce both the cost and the morbidity of the investigation of glomerular disease and may be of particular value in situations where renal biopsy is considered hazardous or contraindicated.

The Application of Machine Learning to the Diagnosis of Glomerular Disease.
Webb, G. I., & Agar, J.
Proceedings of the IJCAI Workshop W.15: Representing Knowledge in Medical Decision Support Systems, pp. 8.1-8.8, 1991.
[Bibtex] [Abstract]  → Download PDF

@InProceedings{WebbAgar91,
author = {Webb, G. I. and Agar, J.},
booktitle = {Proceedings of the {IJCAI} Workshop W.15: Representing Knowledge in Medical Decision Support Systems},
title = {The Application of Machine Learning to the Diagnosis of Glomerular Disease},
year = {1991},
editor = {Sarmeinto, C.},
pages = {8.1-8.8},
abstract = {A pilot study has applied the DLG machine learning algorithm to create expert systems for the assessment and interpretation of clinical and laboratory data in glomerular disease. Despite the limited size of the data-set and major deficiencies in the information recorded therein, for one of the conditions examined in this study, microscopic polyarteritis, a consistent diagnostic accuracy of 100% was obtained. With expansion of the data base, it is possible that techniques will be derived that provide accurate non-invasive diagnosis of some cases of glomerular disease, thus obviating the need for renal biopsy. Success in this project will result in significant reductions in both the cost and the morbidity associated with the investigation of glomerular disease.},
audit-trail = {Reconstructed paper posted May 2006},
keywords = {Rule Learning, health},
location = {Sydney, Australia},
related = {health},
}
ABSTRACT A pilot study has applied the DLG machine learning algorithm to create expert systems for the assessment and interpretation of clinical and laboratory data in glomerular disease. Despite the limited size of the data-set and major deficiencies in the information recorded therein, for one of the conditions examined in this study, microscopic polyarteritis, a consistent diagnostic accuracy of 100% was obtained. With expansion of the data base, it is possible that techniques will be derived that provide accurate non-invasive diagnosis of some cases of glomerular disease, thus obviating the need for renal biopsy. Success in this project will result in significant reductions in both the cost and the morbidity associated with the investigation of glomerular disease.

COVID-19 restrictions and the incidence and prevalence of prescription opioid use in Australia – a nation-wide study.
Jung, M., Lukose, D., Nielsen, S., Bell, S. J., Webb, G. I., & Ilomäki, J.
British Journal of Clinical Pharmacology, n/a(n/a).
[Bibtex] [Abstract]  → Access on publisher site

@Article{Jung,
author = {Jung, Monica and Lukose, Dickson and Nielsen, Suzanne and Bell, J. Simon and Webb, Geoffrey I. and Ilomäki, Jenni},
journal = {British Journal of Clinical Pharmacology},
title = {COVID-19 restrictions and the incidence and prevalence of prescription opioid use in Australia – a nation-wide study},
number = {n/a},
volume = {n/a},
abstract = {The COVID-19 pandemic has disrupted seeking and delivery of healthcare. Different Australian jurisdictions implemented different COVID-19 restrictions. We used Australian national pharmacy dispensing data to conduct interrupted time series analyses to examine the incidence and prevalence of opioid dispensing in different jurisdictions. Following nationwide COVID-19 restrictions, the incidence dropped by -0.40 [-0.50, -0.31], -0.33 [-0.46, -0.21] and -0.21 [-0.37, -0.04] /1000 people/week and prevalence dropped by -0.85 [-1.39, -0.31], -0.54 [-1.01, -0.07] and -0.62 [-0.99, -0.25] /1000 people/week in Victoria, New South Wales and other jurisdictions, respectively. Incidence and prevalence increased by 0.29 [0.13, 0.44] and 0.72 [0.11, 1.33] /1000 people/week, respectively in Victoria post-lockdown; no significant changes were observed in other jurisdictions. No significant changes were observed in the initiation of long-term opioid use in any jurisdictions. More stringent restrictions coincided with more pronounced reductions in overall opioid initiation, but initiation of long-term opioid use did not change.},
doi = {https://doi.org/10.1111/bcp.15577},
eprint = {https://bpspubs.onlinelibrary.wiley.com/doi/pdf/10.1111/bcp.15577},
keywords = {health, opioids, chronic pain, drug utilisation, medication safety, quality use of medicines},
related = {health},
url = {https://bpspubs.onlinelibrary.wiley.com/doi/abs/10.1111/bcp.15577},
}
ABSTRACT The COVID-19 pandemic has disrupted seeking and delivery of healthcare. Different Australian jurisdictions implemented different COVID-19 restrictions. We used Australian national pharmacy dispensing data to conduct interrupted time series analyses to examine the incidence and prevalence of opioid dispensing in different jurisdictions. Following nationwide COVID-19 restrictions, the incidence dropped by -0.40 [-0.50, -0.31], -0.33 [-0.46, -0.21] and -0.21 [-0.37, -0.04] /1000 people/week and prevalence dropped by -0.85 [-1.39, -0.31], -0.54 [-1.01, -0.07] and -0.62 [-0.99, -0.25] /1000 people/week in Victoria, New South Wales and other jurisdictions, respectively. Incidence and prevalence increased by 0.29 [0.13, 0.44] and 0.72 [0.11, 1.33] /1000 people/week, respectively in Victoria post-lockdown; no significant changes were observed in other jurisdictions. No significant changes were observed in the initiation of long-term opioid use in any jurisdictions. More stringent restrictions coincided with more pronounced reductions in overall opioid initiation, but initiation of long-term opioid use did not change.