Dual GPU configuration on Ubuntu 16.04

Alan-Cugler picture Alan-Cugler · Apr 3, 2018 · Viewed 9.7k times · Source

Objective:

Have a multi-monitor extended display with monitors on two identical GPUs.

Hardware:

  • Geforce GTX 970 (x2)
  • DELL E207WFP Monitor (x6)
  • Z97-PRO motherboard
  • i5-4690K CPU @3.50GHz
  • 24GB of RAM

Software:

  • Ubuntu 16.04.04 LTS
  • 4.4.0-116-generic kernel
  • NVIDIA driver version 384.111

Test procedure

1) Hardware Tests

  • Tested each monitor and their ports. (success)
  • Tested each Mobo PCI port one at a time with GPU & monitor. (success)
  • Tested each GPU and all of its display ports. (success)

    NOTE: each GPU has 1 display, 1 HDMI, and 2 DVI ports.

    • Tested each port by itself. (success)
    • Tested all ports displaying at the same time. (success)
  • Tested all cabling involved individually. (success)

2) Software Tests

  • Tested if Ubuntu could handle 4 monitors on 1 GPU in extended mode. (success)
    • Monitor 1: Playing HD Movie in full-screen.
    • Monitor 2: Playing Ark survival evolved on highest quality settings.
    • Monitor 3: Active Discord video call.
    • Monitor 4: OBS software rendering game & Discord for active stream.
  • Tested if Nvidia X Server can see both GPU when plugged in at same time. (success)

    NOTE: One GPU is disabled by default.


Attempting my objective

I have attempted a number of configurations with the xorg.conf file. The closest to success I get most trials is:

  • All screens working, shows an extended screen saver image stretched across the monitors.
  • The mouse is able to travel across all screens happily on the screen saver.
  • Unfortunately when attempting to login in, I get an infinite login loop that reloads the login screen and re-prompts to login again.

xorg.conf file:

#xorg.file for 6-screen, 2-GPU setup

###############################################################
# Mouse Configuration options
Section "InputDevice"
    Identifier     "Mouse0"
    Driver         "mouse"
    Option         "Protocol" "auto"
    Option         "Device" "/dev/psaux"
    Option         "Emulate3Buttons" "no"
    Option         "ZAxisMapping" "4 5"
EndSection


# Keyboard Configuration options
Section "InputDevice"
    Identifier     "Keyboard0"
    Driver         "kbd"
EndSection

###############################################################
# Monitor Configuration options
Section "Monitor"
    Identifier     "Monitor0"
    VendorName     "Dell, Inc."
    ModelName      "DELL E207WFP"
    HorizSync       30.0 - 83.0
    VertRefresh     56.0 - 75.0
EndSection

Section "Monitor"
    Identifier     "Monitor1"
    VendorName     "Dell, Inc."
    ModelName      "DELL E207WFP"
    HorizSync       30.0 - 83.0
    VertRefresh     56.0 - 75.0
EndSection

Section "Monitor"
    Identifier     "Monitor2"
    VendorName     "Dell, Inc."
    ModelName      "DELL E207WFP"
    HorizSync       30.0 - 83.0
    VertRefresh     56.0 - 75.0
EndSection

Section "Monitor"
    Identifier     "Monitor3"
    VendorName     "Dell, Inc."
    ModelName      "DELL E207WFP"
    HorizSync       30.0 - 83.0
    VertRefresh     56.0 - 75.0
EndSection

Section "Monitor"
    Identifier     "Monitor4"
    VendorName     "Dell, Inc."
    ModelName      "DELL E2009W"
    HorizSync       30.0 - 83.0
    VertRefresh     56.0 - 75.0
EndSection

Section "Monitor"
    Identifier     "Monitor5"
    VendorName     "Dell, Inc."
    ModelName      "DELL E2009W"
    HorizSync       30.0 - 83.0
    VertRefresh     56.0 - 75.0
EndSection

################################################################
# GPU Configuration options
Section "Device"
    Identifier     "Device0"
    Driver         "nvidia"
    VendorName     "NVIDIA Corporation"
    BoardName      "GeForce GTX 970"
    BusID          "PCI:1:0:0"
EndSection

Section "Device"
    Identifier     "Device1"
    Driver         "nvidia"
    VendorName     "NVIDIA Corporation"
    BoardName      "GeForce GTX 970"
    BusID          "PCI:2:0:0"
EndSection

################################################################
# device & monitor bindings configuration options
Section "Screen"
    Identifier     "Screen0"
    Device         "Device0"
    Monitor        "Monitor0"
    Monitor        "Monitor1"
    Monitor        "Monitor2"
    Monitor        "Monitor3"
    Option         "Stereo" "0"
    Option         "nvidiaXineramaInfoOrder" "DFP-0"
    Option         "metamodes" "DVI-I-1: nvidia-auto-select +1680+0, HDMI-0: nvidia-auto-select +3360+0, DP-1: nvidia-auto-select +5040+0, DVI-D-0: nvidia-auto-select +0+0"
    Option         "SLI" "Off"
    Option         "MultiGPU" "Off"
    Option         "BaseMosaic" "off"
    DefaultDepth    24
    SubSection     "Display"
        Depth       24
    EndSubSection
EndSection


Section "Screen"
    Identifier     "Screen1"
    Device         "Device1"
    Monitor        "Monitor4"
    Monitor        "Monitor5"
    Option         "Stereo" "0"
    Option         "metamodes" "DVI-I-1: nvidia-auto-select +1680+1050, DVI-D-0: nvidia-auto-select +3360+1050"
    Option         "SLI" "Off"
    Option         "MultiGPU" "Off"
    Option         "BaseMosaic" "off"
    DefaultDepth    24
    SubSection     "Display"
        Depth       24
    EndSubSection
EndSection

#############################################################
# Xinerma & Compsite extensions
Section "Extensions"
    Option         "Composite" "Disable"
EndSection

##############################################################
# Screen bindings configuration options
Section "ServerLayout"
    Identifier     "Layout0"
    Screen      0  "Screen0" 0 1050
    Screen      1  "Screen1" Above "Screen0"

    InputDevice    "Keyboard0" "CoreKeyboard"
    InputDevice    "Mouse0" "CorePointer"

    Option         "Xinerama" "1"
EndSection

###########################################################
# Unused section
Section "Files"
EndSection

Clarifications

This is not an infinite login loop question. The infinite login loop is a symptom of my problem.

There are several infinite loop question/answers, those assume you've made a mistake and want to "fix" the problem by resetting to default. Here are several of those reset to default "fixes' that are not my problem.

I do not want to reset to default, and can return my system to working conditions on one graphics cards after each attempt. I am asking how to further change settings/configurations so as to get my goal (without the login loop)?

Answer

Alan-Cugler picture Alan-Cugler · May 11, 2018

Alright, I answered my question through some extensive research and testing. If you have a multi-gpu, multi-monitor system that you wanted a unified desktop across (this means windows snap to monitors, and can drag program windows across monitors regardless of which monitor is on which gpu). Then the short answer is SLI/base Mosaic for Nvidia graphics cards.

However, There are multiple points of information that are lost unless you spend 60+ hours reading documentation and talking with others (which is what I did).

I am going to give multiple links and clarification points that should help with other peoples frustration and endeavors. Understand all of the commands and advice is from a Ubuntu 16.04, Unity DE, Nvidia Geforce driver v380+ standpoint, and may not work for other systems and hardware.


Nvidia --> X server & RandR

There are several points of [mis]communication between:

  • RandR (native installed display manager, also in CLI referred to as xrandr)
  • X server (The main management/connection system between inputs and outputs, this encompasses the Xorg, xorg.conf aspects)
  • Nvidia-settings (Nvidia's display manager that sends information to X server and RandR)

When using nvidia-settings, Nvidia will configure and send misinformation to both x server and RandR and then internally handle the logic itself.

  1. Nvidia has its own xinerama separate from the X server xinerama. If you enable xinerama in the xorg.conf file, that is the X server's own Xinerma not Nvidia's (So don't do that if you are trying to do things through nvidia-settings.

  2. Nvidia will place all monitors under 1 screen and present this screen as a unified desktop to X server, while using its own internal xinerama logic to allow RandR to see the separate monitors properly.

    • This means making separate screens, monitors, device sections in the xorg.conf is breaking nvidia-settings natural way of of managing the desktop (I.E. disabling Nvidia's internal xinerama). This will then require X server's xinerama to be enabled in the xorg.conf file as a result (however this tends to break one of the unified desktop expectations that I mentioned at the beginning).
    • Even when handling multiple gpus to render a desktop, it still presents as 1 screen to X server while using its internal logic to report to RandR all of the monitors available. This will require an option called base/SLI Mosaic, but I need to clarify that below as well.

End of Nvidia --> RandR & X server explanations


Nvidia --> xorg.conf

How Nvidia uses the xorg.conf file is not reflective of what you read about in X server, X org, and xorg.conf documentation. Instead you need to focus on reading Nvidia documentation. However pay attention to the URL when you go to the docs. The number before '/README/' needs to be your Nvidia driver version number, which is the '390.42' in this example URL below. A lot of google hits to nvidia-settings documentation are really old docs to old drivers. This will throw you through a loop if you get old documentation. Example:

http://us.download.nvidia.com/XFree86/Linux-x86_64/390.42/README/index.html

  1. As said before, Nvidia reports only a single screen to X server. Here I will show you examples of the Section "Screen" of a xorg.conf file to clarify.

Section "Screen"
    Identifier     "Screen0"
    Device         "Device0"
    Monitor        "Monitor0"
    DefaultDepth    24
    Option         "Stereo" "0"
    Option         "nvidiaXineramaInfoOrder" "DFP-0"
    Option         "metamodes" "GPU-0.DFP-0: nvidia-auto-select +0+0, 
                                GPU-0.DFP-4: nvidia-auto-select +1680+0, 
                                GPU-1.DFP-0: nvidia-auto-select +3360+0, 
                                GPU-1.DFP-4: nvidia-auto-select +5040+0, 
    Option         "MultiGPU" "Off"
    Option         "SLI" "on"
    Option         "BaseMosaic" "on"
    SubSection     "Display"
        Depth       24
    EndSubSection
EndSection 
  • As you can see its still one screen but the metamode can be specific to which gpu and port to use.
  • Another point is that MultiGPU is not referring to the number of graphics cards on your motherboard.
  • The nvidia-auto-select takes care of your Horizon sync, vertical Refresh range, monitor resolutions for you, but as you can see from the trailing numbers that are for monitor placement, I am expecting it will set my system resolutions to 1680x1050.

A less ambiguous Section Screen is:


Section "Screen"
    Identifier "Screen0"
    Device "Device0"
    Monitor "Monitor0"
    DefaultDepth 24
    Option "Stereo" "0"
    Option "nvidiaXineramaInfoOrder" "DFP-0"
    Option "metamodes" "GPU-a0bd4a65-1c2b-f765-eeb2-db96b3f4b25a.GPU-0.DVI-I-1: nvidia-auto-select +0+0, 
                        GPU-a0bd4a65-1c2b-f765-eeb2-db96b3f4b25a.GPU-0.DVI-D-0: nvidia-auto-select +1680+0, 
                        GPU-a85ce4bc-1e9b-6275-2aee-0d1ed8bd7ce2.GPU-1.DVI-I-1: nvidia-auto-select +3360+0, 
                        GPU-a85ce4bc-1e9b-6275-2aee-0d1ed8bd7ce2.GPU-1.DVI-D-0: nvidia-auto-select +5040+0"
    Option "MultiGPU" "Off"
    Option "SLI" "on"
    Option "BaseMosaic" "on"
    SubSection "Display"
        Depth 24
    EndSubSection
EndSection
  • By adding the GPU hash you are directly saying which graphics cards to use and not leaving it to the system to interpret.
  • The DVI-D-0 is == to DFP-4 on my system and so there isn't any interpretation left up to the system, however I think using the port type instead of the DFP placement removes ambiguity for the user.

End of Nvidia --> xorg.conf


SLI/Base Mosiac catch 22

So The eventual answer is SLI Mosiac is needed when using nvidia-settings for the unified desktop I want. However, It will only allow three screens to be enabled across 2 or more GPUs if they are not Quadro or NVS selected GPU families. This is not because the technology of my precious Geforce GTX 970's cant handle it, it's because Nvidia artificially limited this in the nvidia-settings code to promote their newer GPU. I can note this from both this Nvidia forum, and from looking at the source code in testing repository.

In fact, originally you could have more than 3 screens across 2 non-selected GPU up until 296 driver where they forced the change.

If you do attempt anyways without the proper GPU it will result in all screens being shown in both RandR GUI as well as Nvidia-settings GUI. However, you will notice all but three monitors will be disabled, and they can only be enabled if you disable 1 of the 3 already enabled.


Useful commands

xrandr --version
xrandr --listproviders
sudo Xorg -version
sudo lspci -vvv |grep -i -A 20 nvidia
nvidia-smi
nvidia-xconfig --query-gpu-info
nvidia-settings
cat /var/log/Xorg.0.log
nvidia-xconfig -t

Fully functional xorg.conf

Section "ServerLayout"
     Identifier "Layout0"
     Screen 0 "Screen0" 0 0
     InputDevice "Keyboard0" "CoreKeyboard"
     InputDevice "Mouse0" "CorePointer"
     Option "Xinerama" "0"
EndSection

Section "InputDevice"
    Identifier "Mouse0"
     Driver "mouse"
     Option "Protocol" "auto"
     Option "Device" "/dev/psaux"
     Option "Emulate3Buttons" "no"
     Option "ZAxisMapping" "4 5"
EndSection

Section "InputDevice"
     Identifier "Keyboard0"
     Driver "kbd"
EndSection

Section "Monitor"
     Identifier "Monitor0"
     VendorName "Dell, Inc."
     ModelName "DELL E207WFP"
     HorizSync 30.0 - 83.0
     VertRefresh 56.0 - 75.0
     Option "DPMS"
EndSection

Section "Device"
     Identifier "Device0"
     Driver "nvidia"
     VendorName "NVIDIA Corporation"
     BoardName "Geforce GTX 970"
     BusID "PCI:1:0:0"
EndSection 

Section "Screen"
     Identifier "Screen0"
     Device "Device0"
     Monitor "Monitor0"
     DefaultDepth 24
     Option "Stereo" "0"
     Option "nvidiaXineramaInfoOrder" "DFP-0"
     Option "metamodes" "GPU-a0bd4a65-1c2b-f765-eeb2-db96b3f4b25a.GPU-0.DVI-I-1: nvidia-auto-select +0+0, 
                         GPU-a0bd4a65-1c2b-f765-eeb2-db96b3f4b25a.GPU-0.DVI-D-0: nvidia-auto-select +1680+0, 
                         GPU-a85ce4bc-1e9b-6275-2aee-0d1ed8bd7ce2.GPU-1.DVI-I-1: nvidia-auto-select +3360+0, 
                         GPU-a85ce4bc-1e9b-6275-2aee-0d1ed8bd7ce2.GPU-1.DVI-D-0: nvidia-auto-select +5040+0"

                        #GPU-a0bd4a65-1c2b-f765-eeb2-db96b3f4b25a.GPU-0.DP-1: nvidia-auto-select +1680+1050,
                        #GPU-a0bd4a65-1c2b-f765-eeb2-db96b3f4b25a.GPU-0.HDMI-0: nvidia-auto-select +0+1050,

                        #GPU-a85ce4bc-1e9b-6275-2aee-0d1ed8bd7ce2.GPU-1.HDMI-0: nvidia-auto-select +3360+1050,
                        #GPU-a85ce4bc-1e9b-6275-2aee-0d1ed8bd7ce2.GPU-1.DP-1: nvidia-auto-select +5040+1050"
     Option "MultiGPU" "Off"
     Option "SLI" "on"
     Option "BaseMosaic" "on"
     SubSection "Display"
         Depth 24
     EndSubSection
EndSection