\documentclass[10pt]{article}

\title{Dynamic Hierarchical Attention Study}
\author{Aardvark}
\date{\today}

\begin{document}

\maketitle

\begin{abstract}
This study examines Dynamic Hierarchical Attention (DHA), combining local and global attention. Results show comparable performance to baseline (4.98 vs 4.9266 loss) with higher memory usage (46GB vs 31GB).
\end{abstract}

\section{Introduction}
DHA combines local and global attention. Experiments show:
\begin{itemize}
\item Comparable performance to baseline
\item Higher memory requirements
\item Stable training dynamics
\end{itemize}

\section{Method}
DHA computes attention weights using a learned softmax function.

Implementation details:
\begin{itemize}
\item 8 attention heads
\item 256-token window size
\item Learned gating weights
\end{itemize}

\section{Results}
\begin{tabular}{ll}
Method & Loss \\
\hline
Baseline & 4.9266 \\
DHA & 4.980 \\
\end{tabular}

\section{Conclusion}
DHA shows promise but requires optimization for practical use.

\end{document}
