% !TeX program = xelatex % !TeX encoding = utf8 % !TeX root = FuVar.tex %% TODO: publish to CTAN \documentclass[twocolumn, margin=normal]{tex/hsrzf} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Packages %% TODO: publish to CTAN \usepackage{tex/hsrstud} %% Language configuration \usepackage{polyglossia} \setdefaultlanguage{english} %% License configuration \usepackage[ type={CC}, modifier={by-nc-sa}, version={4.0}, lang={english}, ]{doclicense} %% Math \usepackage{amsmath} \usepackage{amsthm} \usepackage{mathtools} %% Layout \usepackage{enumitem} \usepackage{booktabs} %% Nice drwaings \usepackage{tikz} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Metadata \course{Elektrotechnik} \module{FuVar} \semester{Spring Semseter 2021} \authoremail{naoki.pross@ost.ch} \author{\textsl{Naoki Pross} -- \texttt{\theauthoremail}} \title{\texttt{\themodule} Notes} \date{\thesemester} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Macros and settings %% Theorems \newtheoremstyle{fuvarzf} % name of the style to be used {\topsep} {\topsep} {} {0pt} {\bfseries} {.} { } { } \theoremstyle{fuvarzf} \newtheorem{theorem}{Theorem} \newtheorem{method}{Method} \newtheorem{application}{Application} \newtheorem{definition}{Definition} \newtheorem{remark}{Remark} \newtheorem{note}{Note} \DeclareMathOperator{\tr}{\mathrm{tr}} \setlist[description]{ format = { \normalfont\itshape } } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Document \begin{document} \maketitle % \tableofcontents \section{Preface} These are just my personal notes of the \themodule{} course, and definitively not a rigorously constructed mathematical text. The good looking \LaTeX{} typesetting may trick you into thinking it is rigorous, but really, it is not. \section{Derivatives of vector valued functions} \begin{definition}[Partial derivative] A vector values function \(f: \mathbb{R}^m\to\mathbb{R}\), with \(\vec{v}\in\mathbb{R}^m\), has a partial derivative with respect to \(v_i\) defined as \[ \partial_{v_i} f(\vec{v}) = f_{v_i}(\vec{v}) = \lim_{h\to 0} \frac{f(\vec{v} + h\vec{e}_j) - f(\vec{v})}{h} \] \end{definition} \begin{theorem}(Schwarz's theorem, symmetry of partial derivatives) Under some generally satisfied conditions (continuity of \(n\)-th order partial derivatives) Schwarz's theorem states that it is possible to swap the order of differentiation. \[ \partial_x \partial_y f(x,y) = \partial_y \partial_x f(x,y) \] \end{theorem} \begin{definition}[Linearization] A function \(f: \mathbb{R}^m\to\mathbb{R}\) has a linearization \(g\) at \(\vec{x}_0\) given by \[ g(\vec{x}) = f(\vec{x}_0) + \sum_{i=1}^m \partial_{x_i} f(\vec{x}_0)(x_i - x_{i,0}) , \] if all partial derviatives are defined at \(\vec{x}_0\). \end{definition} \begin{theorem}[Propagation of uncertanty] Given a measurement of \(m\) values in a vector \(\vec{x}\in\mathbb{R}^m\) with values given in the form \(x_i = \bar{x}_i \pm \sigma_{x_i}\), a linear approximation the error of a dependent variable \(y\) is computed with \[ y = \bar{y} \pm \sigma_y \approx f(\bar{\vec{x}}) \pm \sqrt{\sum_{i=1}^m \left( \partial_{x_i} f(\bar{\vec{x}}) \sigma_{x_i}\right)^2} \] \end{theorem} \begin{definition}[Gradient vector] The \emph{gradient} of a function \(f(\vec{x}), \vec{x}\in\mathbb{R}^m\) is a column vector\footnote{In matrix notation it is also often defined as row vector to avoid having to do some transpositions in the Jacobian matrix and dot products in directional derivatives} containing the derivatives in each direction. \[ \grad f (\vec{x}) = \sum_{i=1}^m \partial_{x_i} f(\vec{x}) \vec{e}_i = \begin{pmatrix} \partial_{x_1} f(\vec{x}) \\ \vdots \\ \partial_{x_m} f(\vec{x}) \\ \end{pmatrix} \] \end{definition} \begin{definition}[Directional derivative] A function \(f(\vec{x})\) has a directional derivative in direction \(\vec{r}\) (with \(|\vec{r}| = 1\)) given by \[ \frac{\partial f}{\partial\vec{r}} = \nabla_\vec{r} f = \vec{r} \dotp \grad f \] \end{definition} \begin{theorem} The gradient vector always points towards \emph{the direction of steepest ascent}. \end{theorem} \begin{definition}[Jacobian Matrix] The \emph{Jacobian} \(\mx{J}_f\) (sometimes written as \(\frac{\partial(f_1,\ldots f_m)}{\partial(x_1,\ldots,x_n)}\)) of a function \(\vec{f}: \mathbb{R}^n \to \mathbb{R}^m\) is a matrix \(\in\mathbb{R}^{n\times m}\) whose entry at the \(i\)-th row and \(j\)-th column is given by \((\mx{J}_f)_{i,j} = \partial_{x_j} f_i\), so \[ \mx{J}_f = \begin{pmatrix} \partial_{x_1} f_1 & \cdots & \partial_{x_n} f_1 \\ \vdots & \ddots & \vdots \\ \partial_{x_1} f_m & \cdots & \partial_{x_n} f_m \\ \end{pmatrix} = \begin{pmatrix} (\grad f_1)^t \\ \vdots \\ (\grad f_m)^t \\ \end{pmatrix} \] \end{definition} \begin{remark} In the scalar case (\(m = 1\)) the Jacobian matrix is the transpose of the gradient vector. \end{remark} \begin{definition}[Hessian matrix] Given a function \(f: \mathbb{R}^m \to \mathbb{R}\), the square matrix whose entry at the \(i\)-th row and \(j\)-th column is the second derivative of \(f\) first with respect to \(x_j\) and then to \(x_i\) is know as the \emph{Hessian} matrix. \( \left(\mx{H}_f\right)_{i,j} = \partial_{x_i}\partial_{x_j} f \) or \[ \mx{H}_f = \begin{pmatrix} \partial_{x_1}\partial_{x_1} f & \cdots & \partial_{x_1}\partial_{x_m} f \\ \vdots & \ddots & \vdots \\ \partial_{x_m}\partial_{x_1} f & \cdots & \partial_{x_m}\partial_{x_m} f \\ \end{pmatrix} \] Because (almost always) the order of differentiation does not matter, it is a symmetric matrix. \end{definition} \section{Methods for maximization and minimization problems} \begin{method}[Find stationary points] Given a function \(f: D \subseteq \mathbb{R}^m \to \mathbb{R}\), to find its maxima and minima we shall consider the points \begin{itemize} \item that are on the boundary of the domain \(\partial D\), \item where the gradient \(\grad f\) is not defined, \item that are stationary, i.e. where \(\grad f = \vec{0}\). \end{itemize} \end{method} \begin{method}[Determine the type of stationary point for 2 dimensions] Given a scalar function of two variables \(f(x,y)\) and a stationary point \(\vec{x}_s\) (where \(\grad f(\vec{x}_s) = \vec{0}\)), we define the \emph{discriminant} \[ \Delta = \partial_x^2 f \partial_y^2 f - \partial_y \partial_x f \] \begin{itemize} \item if \(\Delta > 0\) then \(\vec{x}_s\) is an extrema, if \(\partial_x^2 f(\vec{x}_s) < 0\) it is a maximum, whereas if \(\partial_x^2 f(\vec{x}_s) > 0\) it is a minimum; \item if \(\Delta < 0\) then \(\vec{x}_s\) is a saddle point; \item if \(\Delta = 0\) we need to analyze further. \end{itemize} \end{method} \begin{remark} The previous method is obtained by studying the second directional derivative \(\nabla_\vec{r}\nabla_\vec{r} f\) at the stationary point in direction of a vector \(\vec{r} = \vec{e}_1\cos(\alpha) + \vec{e}_2\sin(\alpha)\) \end{remark} \begin{method}[Determine the type of stationary point in higher dimensions] Given a scalar function of two variables \(f(x,y)\) and a stationary point \(\vec{x}_s\) (where \(\grad f(\vec{x}_s) = \vec{0}\)), we compute the Hessian matrix \(\mx{H}_f(\vec{x}_s)\). Then we compute its eigenvalues \(\lambda_1, \ldots, \lambda_m\) and \begin{itemize} \item if all \(\lambda_i > 0\), the point is a minimum; \item if all \(\lambda_i < 0\), the point is a maximum; \item if there are both positive and negative eigenvalues, it is a saddle point. \end{itemize} In the other cases, when there are \(\lambda_i \leq 0\) and/or \(\lambda_i \geq 0\) further analysis is required. \end{method} \begin{remark} Recall that to compute the eigenvalues of a matrix, one must solve the equation \((\mx{H} - \lambda\mx{I})\vec{x} = \vec{0}\). Which can be done by solving the characteristic polynomial \(\det\left(\mx{H} - \lambda\mx{I}\right) = 0\) to obtain \(\dim(\mx{H})\) \(\lambda_i\), which when plugged back in result in a overdetermined system of equations. \end{remark} \begin{method}[Quickly find the eigenvalues of a \(2\times 2\) matrix] This is a nice trick. For a square matrix \(\mx{H}\), let \[ m = \frac{1}{2}\tr \mx{H} = \frac{a + d}{2} , \quad p = \det\mx{H} = ad - bc , \] then \(\lambda_{1,2} = m \pm \sqrt{m^2 - p}\). \end{method} \begin{method}[Search for a constrained extremum in 2 dimensions] Let \(n(x,y) = 0\) be a constraint in the search of the extrema of a function \(f: D \subseteq \mathbb{R}^2 \to \mathbb{R}\). To find the extrema we look for points \begin{itemize} \item on the boundary \(\vec{u} \in \partial D\) where \(n(\vec{u}) = 0\); \item \(\vec{u}\) where the gradient either does not exist or is \(\vec{0}\), and satisfy \(n(\vec{u}) = 0\); \item that solve the system of equations \[ \begin{cases} \partial_x f(\vec{u}) \cdot \partial_y n(\vec{u}) = \partial_y f(\vec{u}) \cdot \partial_x n(\vec{u}) \\ n(\vec{u}) = 0 \end{cases} \] \end{itemize} \end{method} \begin{figure} \centering \includegraphics{img/lagrange-multipliers} \caption{ Intuition for the method of Lagrange multipliers. Extrema of a constrained function are where \(\grad f\) is proportional to \(\grad n\). } \end{figure} \begin{method}[% Search for a constrained extremum in higher dimensions, method of Lagrange multipliers] We wish to find the extrema of \(f: D \subseteq \mathbb{R}^m \to \mathbb{R}\) under \(k < m\) constraints \(n_1 = 0, \cdots, n_k = 0\). To find the extrema we consider the following points: \begin{itemize} \item Points on the boundary \(\vec{u} \in \partial D\) that satisfy \(n_i(\vec{u}) = 0\) for all \(1 \leq i \leq k\), \item Points \(\vec{u} \in D\) where either \begin{itemize} \item any of \(\grad f, \grad n_1, \ldots, \grad n_k\) do not exist, or \item \(\grad n_1, \ldots, \grad n_k\) are linearly \emph{dependent}, \end{itemize} and that satisfy \(0 = n_1(\vec{u}) = \ldots = n_k(\vec{u})\). \item Points that solve the system of \(m+k\) equations \[ \begin{dcases} \grad f(\vec{u}) = \sum_{i = 1}^k \lambda_i \grad n_i(\vec{u}) & (m\text{-dimensional}) \\ n_i(\vec{u}) = 0 & \text{ for } 1 \leq i \leq k \end{dcases} \] The \(\lambda\) values are known as \emph{Lagrange multipliers}. The same calculation can be written more compactly by defining the \(m+k\) dimensional \emph{Lagrangian} \[ \mathcal{L}(\vec{u}, \vec{\lambda}) = f(\vec{u}) - \sum_{i = 0}^k \lambda_i n_i(\vec{u}) \] where \(\vec{\lambda} = \lambda_1, \ldots, \lambda_k\) and then solving \(\grad \mathcal{L}(\vec{u}, \vec{\lambda}) = \vec{0}\). This is generally used in numerical computations and not very useful by hand. \end{itemize} \end{method} \section{Integration of vector values scalar functions} \begin{figure} \centering \includegraphics{img/double-integral} \caption{ Double integral. \label{fig:double-integral} } \end{figure} \begin{theorem}[Change the order of integration for double integrals] For a double integral over a region \(S\) (see Fig. \ref{fig:double-integral}) we need to compute \[ \iint_S f(x,y) \,ds = \int\limits_{x_1}^{x_2} \int\limits_{y_1(x)}^{y_2(x)} f(x,y) \,dydx . \] If \(y_1(x)\) and \(y_2(x)\) are bijective we can swap the order of integration by finding the inverse functions \(x_1(y)\) and \(x_2(y)\). If they are not bijective (like in Fig. \ref{fig:double-integral}), the region must be split into smaller parts. If the region is a rectangle it is always possible to change the order of integration. \end{theorem} \begin{theorem}[Transformation of coordinates in 2 dimensions] \label{thm:transform-coords} Given two ``nice'' functions \(x(u,v)\) and \(y(u,v)\), that means are a bijection from \(S\) to \(S'\) with continuous partial derivatives and nonzero Jacobian determinant \(|\mx{J}_f| = \partial_u x \partial_v y - \partial_v x \partial_u y\), which transform the coordinate system. Then \[ \iint_S f(x,y) \,ds = \iint_{S'} f(x(u,v), y(u,v)) |\mx{J}_f| \,ds \] \end{theorem} \begin{theorem}[Transformation of coordinates] The generalization of theorem \ref{thm:transform-coords} is quite simple. For an \(n\)-integral of a function \(f:\mathbb{R}^m\to\mathbb{R}\) over a region \(B\), we let \(\vec{x}(\vec{u})\) be ``nice'' functions that transform the coordinate system. Then as before \[ \int_B f(\vec{x}) \,ds = \int_{B'} f(\vec{x}(\vec{u})) |\mx{J}_f| \,ds \] \end{theorem} \begin{table} \centering \begin{tabular}{l >{\(}l<{\)} >{\(}l<{\)}} \toprule & \text{Volume } dv & \text{Surface } d\vec{s}\\ \midrule Cartesian & - & dx\,dy \\ Polar & - & rd\,rd\phi \\ Curvilinear & - & |\mx{J}_f|\,du\,dv \\ \midrule Cartesian & dx\,dy\,dz & \uvec{z}\,dx\,dy \\ Cylindrical & r\,dr\,d\phi\,dz & \uvec{z}r\,dr\,d\phi \\ & & \uvec{\phi}\,dr\,dz \\ & & \uvec{r}r\,d\phi\,dz \\ Spherical & r^2\sin\theta\, dr\,d\theta\,d\phi & \uvec{r}r^2\sin\theta\,d\theta\,d\phi \\ Curvilinear & |\mx{J}_f|\,du\,dv\,dw & - \\ \bottomrule \end{tabular} \caption{Differential elements for integration.} \end{table} \section{Derivatives of curves} \section*{License} \doclicenseText \begin{center} \doclicenseImage \end{center} \end{document} % vim:ts=2 sw=2 et spell: