Files
Michael Bestas 6be06baf80 Merge branch 'linux-4.14.y' of github.com:openela/kernel-lts into android-msm-pixel-4.14
* 'linux-4.14.y' of github.com:openela/kernel-lts:
  LTS: Update to 4.14.348
  docs: kernel_include.py: Cope with docutils 0.21
  serial: kgdboc: Fix NMI-safety problems from keyboard reset code
  btrfs: add missing mutex_unlock in btrfs_relocate_sys_chunks()
  dm: limit the number of targets and parameter size area
  Revert "selftests: mm: fix map_hugetlb failure on 64K page size systems"
  LTS: Update to 4.14.347
  rds: Fix build regression.
  RDS: IB: Use DEFINE_PER_CPU_SHARED_ALIGNED for rds_ib_stats
  af_unix: Suppress false-positive lockdep splat for spin_lock() in __unix_gc().
  net: fix out-of-bounds access in ops_init
  drm/vmwgfx: Fix invalid reads in fence signaled events
  dyndbg: fix old BUG_ON in >control parser
  tipc: fix UAF in error path
  usb: gadget: f_fs: Fix a race condition when processing setup packets.
  usb: gadget: composite: fix OS descriptors w_value logic
  firewire: nosy: ensure user_length is taken into account when fetching packet contents
  af_unix: Fix garbage collector racing against connect()
  af_unix: Do not use atomic ops for unix_sk(sk)->inflight.
  ipv6: fib6_rules: avoid possible NULL dereference in fib6_rule_action()
  net/ipv6: Refactor fib6_rule_action
  net: bridge: fix corrupted ethernet header on multicast-to-unicast
  net: bridge: use DEV_STATS_INC()
  phonet: fix rtm_phonet_notify() skb allocation
  rtnetlink: Correct nested IFLA_VF_VLAN_LIST attribute validation
  Bluetooth: l2cap: fix null-ptr-deref in l2cap_chan_timeout
  Bluetooth: Fix use-after-free bugs caused by sco_sock_timeout
  tcp: defer shutdown(SEND_SHUTDOWN) for TCP_SYN_RECV sockets
  tcp: remove redundant check on tskb
  net:usb:qmi_wwan: support Rolling modules
  fs/9p: drop inodes immediately on non-.L too
  gpio: crystalcove: Use -ENOTSUPP consistently
  gpio: wcove: Use -ENOTSUPP consistently
  9p: explicitly deny setlease attempts
  fs/9p: translate O_TRUNC into OTRUNC
  fs/9p: only translate RWX permissions for plain 9P2000
  selftests: timers: Fix valid-adjtimex signed left-shift undefined behavior
  scsi: target: Fix SELinux error when systemd-modules loads the target module
  tools/power turbostat: Fix Bzy_MHz documentation typo
  tools/power turbostat: Fix added raw MSR output
  firewire: ohci: mask bus reset interrupts between ISR and bottom half
  ata: sata_gemini: Check clk_enable() result
  net: bcmgenet: Reset RBUF on first open
  ALSA: line6: Zero-initialize message buffers
  scsi: bnx2fc: Remove spin_lock_bh while releasing resources after upload
  net: mark racy access on sk->sk_rcvbuf
  wifi: mac80211: fix ieee80211_bss_*_flags kernel-doc
  scsi: lpfc: Update lpfc_ramp_down_queue_handler() logic
  tipc: fix a possible memleak in tipc_buf_append
  net: bridge: fix multicast-to-unicast with fraglist GSO
  net: dsa: mv88e6xxx: Fix number of databases for 88E6141 / 88E6341
  net: dsa: mv88e6xxx: Fix name of switch 88E6141
  net: dsa: mv88e6xxx: Add number of MACs in the ATU
  net l2tp: drop flow hash on forward
  nsh: Restore skb->{protocol,data,mac_header} for outer header in nsh_gso_segment().
  bna: ensure the copied buf is NUL terminated
  pinctrl: devicetree: fix refcount leak in pinctrl_dt_to_map()
  power: rt9455: hide unused rt9455_boost_voltage_values
  pinctrl: core: delete incorrect free in pinctrl_enable()
  ethernet: Add helper for assigning packet type when dest address does not match device address
  ethernet: add a helper for assigning port addresses
  net: create netdev->dev_addr assignment helpers
  net: slightly optimize eth_type_trans
  wifi: nl80211: don't free NULL coalescing rule
  dmaengine: Revert "dmaengine: pl330: issue_pending waits until WFP state"
  dmaengine: pl330: issue_pending waits until WFP state
  LTS: Update to 4.14.346
  Simplify major/minor non-dynamic logic
  net: fix unused variable warning in do_tcp_setsockopt()
  serial: core: fix kernel-doc for uart_port_unlock_irqrestore()
  HID: i2c-hid: remove I2C_HID_READ_PENDING flag to prevent lock-up
  i2c: smbus: fix NULL function pointer dereference
  i2c: add param sanity check to i2c_transfer()
  idma64: Don't try to serve interrupts when device is powered off
  mtd: diskonchip: work around ubsan link failure
  stackdepot: respect __GFP_NOLOCKDEP allocation flag
  net: b44: set pause params only when interface is up
  irqchip/gic-v3-its: Prevent double free on error
  arm64: dts: rockchip: enable internal pull-up for Q7_THRM# on RK3399 Puma
  btrfs: fix information leak in btrfs_ioctl_logical_to_ino()
  Bluetooth: Fix type of len in {l2cap,sco}_sock_getsockopt_old()
  tracing: Increase PERF_MAX_TRACE_SIZE to handle Sentinel1 and docker together
  tracing: Show size of requested perf buffer
  Revert "crypto: api - Disallow identical driver names"
  drm/amdgpu: validate the parameters of bo mapping operations more clearly
  amdgpu: validate offset_in_bo of drm_amdgpu_gem_va
  drm/amdgpu: restrict bo mapping within gpu address limits
  serial: mxs-auart: add spinlock around changing cts state
  serial: core: Provide port lock wrappers
  i40e: Do not use WQ_MEM_RECLAIM flag for workqueue
  ipvs: Fix checksumming on GSO of SCTP packets
  bpf: fix bpf_skb_adjust_net/bpf_skb_proto_xlat to deal with gso sctp skbs
  docs: segmentation-offloads.txt: add SCTP info
  net: gtp: Fix Use-After-Free in gtp_dellink
  net: usb: ax88179_178a: stop lying about skb->truesize
  NFC: trf7970a: disable all regulators on removal
  mlxsw: core: Unregister EMAD trap using FORWARD action
  vxlan: drop packets from invalid src-address
  ARC: [plat-hsdk]: Remove misplaced interrupt-cells property
  arm64: dts: rockchip: enable internal pull-up on PCIE_WAKE# for RK3399 Puma
  arm64: dts: rockchip: fix alphabetical ordering RK3399 puma
  nilfs2: fix OOB in nilfs_set_de_type
  fs: sysfs: Fix reference leak in sysfs_break_active_protection()
  speakup: Avoid crash on very long word
  usb: dwc2: host: Fix dereference issue in DDMA completion flow.
  Revert "usb: cdc-wdm: close race between read and workqueue"
  USB: serial: option: add Telit FN920C04 rmnet compositions
  USB: serial: option: add Rolling RW101-GL and RW135-GL support
  USB: serial: option: support Quectel EM060K sub-models
  USB: serial: option: add Lonsung U8300/U9300 product
  USB: serial: option: add support for Fibocom FM650/FG650
  USB: serial: option: add Fibocom FM135-GL variants
  serial/pmac_zilog: Remove flawed mitigation for rx irq flood
  comedi: vmk80xx: fix incomplete endpoint checking
  drm: nv04: Fix out of bounds access
  tun: limit printing rate when illegal packet received by tun dev
  netfilter: nf_tables: Fix potential data-race in __nft_expr_type_get()
  netfilter: nf_tables: __nft_expr_type_get() selects specific family type
  Revert "tracing/trigger: Fix to return error if failed to alloc snapshot"
  kprobes: Fix possible use-after-free issue on kprobe registration
  selftests/ftrace: Limit length in subsystem-enable tests
  x86/apic: Force native_apic_mem_read() to use the MOV instruction
  selftests: timers: Fix abs() warning in posix_timers test
  vhost: Add smp_rmb() in vhost_vq_avail_empty()
  tracing: hide unused ftrace_event_id_fops
  net/mlx5: Properly link new fs rules into the tree
  ipv6: fix race condition between ipv6_get_ifaddr and ipv6_del_addr
  ipv4/route: avoid unused-but-set-variable warning
  geneve: fix header validation in geneve[6]_xmit_skb
  nouveau: fix function cast warning
  Bluetooth: Fix memory leak in hci_req_sync_complete()
  batman-adv: Avoid infinite loop trying to resize local TT
  LTS: Update to 4.14.345
  net: check vlan filter feature in vlan_vids_add_by_dev() and vlan_vids_del_by_dev()
  Revert "net: check vlan filter feature in vlan_vids_add_by_dev() and vlan_vids_del_by_dev()"
  netfilter: nftables: exthdr: fix 4-byte stack OOB write
  ext4: fix to check return value of freeze_bdev() in ext4_shutdown()
  Revert "ext4: fix to check return value of freeze_bdev() in ext4_shutdown()"
  VMCI: Fix possible memcpy() run-time warning in vmci_datagram_invoke_guest_handler()
  Bluetooth: btintel: Fixe build regression
  x86/mm/pat: fix VM_PAT handling in COW mappings
  virtio: reenable config if freezing device failed
  tty: n_gsm: require CAP_NET_ADMIN to attach N_GSM0710 ldisc
  fbmon: prevent division by zero in fb_videomode_from_videomode()
  fbdev: viafb: fix typo in hw_bitblt_1 and hw_bitblt_2
  usb: sl811-hcd: only defined function checkdone if QUIRK2 is defined
  tools: iio: replace seekdir() in iio_generic_buffer
  block: prevent division by zero in blk_rq_stat_sum()
  SUNRPC: increase size of rpc_wait_queue.qlen from unsigned short to unsigned int
  media: sta2x11: fix irq handler cast
  isofs: handle CDs with bad root inode but good Joliet root directory
  scsi: lpfc: Fix possible memory leak in lpfc_rcv_padisc()
  sysv: don't call sb_bread() with pointers_lock held
  Input: synaptics-rmi4 - fail probing if memory allocation for "phys" fails
  Bluetooth: btintel: Fix null ptr deref in btintel_read_version
  btrfs: send: handle path ref underflow in header iterate_inode_ref()
  btrfs: export: handle invalid inode or root reference in btrfs_get_parent()
  btrfs: handle chunk tree lookup error in btrfs_relocate_sys_chunks()
  tools/power x86_energy_perf_policy: Fix file leak in get_pkg_num()
  arm64: dts: rockchip: fix rk3399 hdmi ports node
  VMCI: Fix memcpy() run-time warning in dg_dispatch_as_host()
  wifi: ath9k: fix LNA selection in ath_ant_try_scan()
  ALSA: hda/realtek: Update Panasonic CF-SZ6 quirk to support headset with microphone
  ata: sata_mv: Fix PCI device ID table declaration compilation warning
  ata: sata_sx4: fix pdc20621_get_from_dimm() on 64-bit
  ASoC: ops: Fix wraparound for mask in snd_soc_get_volsw
  init: open /initrd.image with O_LARGEFILE
  staging: vc04_services: fix information leak in create_component()
  staging: vc04_services: changen strncpy() to strscpy_pad()
  staging: mmal-vchiq: Fix client_component for 64 bit kernel
  staging: mmal-vchiq: Allocate and free components as required
  staging: mmal-vchiq: Avoid use of bool in structures
  ipv6: Fix infinite recursion in fib6_dump_done().
  selftests: reuseaddr_conflict: add missing new line at the end of the output
  net/sched: act_skbmod: prevent kernel-infoleak
  net: stmmac: fix rx queue priority assignment
  net: stmmac: Fix issues when number of Queues >= 4
  mm, vmscan: prevent infinite loop for costly GFP_NOIO | __GFP_RETRY_MAYFAIL allocations
  Revert "x86/mm/ident_map: Use gbpages only where full GB page should be mapped."
  netfilter: nf_tables: disallow timeout for anonymous sets
  Bluetooth: Fix TOCTOU in HCI debugfs implementation
  Bluetooth: hci_event: set the conn encrypted before conn establishes
  tcp: properly terminate timers for kernel sockets
  mptcp: add sk_stop_timer_sync helper
  nfc: nci: Fix uninit-value in nci_dev_up and nci_ntf_packet
  USB: core: Fix deadlock in usb_deauthorize_interface()
  scsi: lpfc: Correct size for wqe for memset()
  x86/cpu: Enable STIBP on AMD if Automatic IBRS is enabled
  scsi: qla2xxx: Fix command flush on cable pull
  usb: udc: remove warning when queue disabled ep
  usb: dwc2: host: Fix ISOC flow in DDMA mode
  usb: dwc2: host: Fix hibernation flow
  powerpc: xor_vmx: Add '-mhard-float' to CFLAGS
  efivarfs: Request at most 512 bytes for variable names
  perf/core: Fix reentry problem in perf_output_read_group()
  loop: Call loop_config_discard() only after new config is applied
  Revert "loop: Check for overflow while configuring loop"
  btrfs: allocate btrfs_ioctl_defrag_range_args on stack
  btrfs: add define for oldest generation
  printk: Update @console_may_schedule in console_trylock_spinning()
  fs/aio: Check IOCB_AIO_RW before the struct aio_kiocb conversion
  ALSA: sh: aica: reorder cleanup operations to avoid UAF bugs
  ALSA: aica: Fix a long-time build breakage
  ALSA: sh: aica: Convert timers to use timer_setup()
  usb: cdc-wdm: close race between read and workqueue
  USB: cdc-wdm: Fix use after free in service_outstanding_interrupt().
  exec: Fix NOMMU linux_binprm::exec in transfer_args_to_stack()
  wifi: mac80211: check/clear fast rx for non-4addr sta VLAN changes
  mm/migrate: set swap entry values of THP tail pages properly.
  mm/memory-failure: fix an incorrect use of tail pages
  vt: fix memory overlapping when deleting chars in the buffer
  tty: serial: fsl_lpuart: avoid idle preamble pending if CTS is enabled
  usb: port: Don't try to peer unused USB ports based on location
  usb: gadget: ncm: Fix handling of zero block length packets
  USB: usb-storage: Prevent divide-by-0 error in isd200_ata_command
  ALSA: hda/realtek - Fix headset Mic no show at resume back for Lenovo ALC897 platform
  xfrm: Avoid clang fortify warning in copy_to_user_tmpl()
  netfilter: nf_tables: reject constant set with timeout
  netfilter: nf_tables: disallow anonymous set with timeout flag
  comedi: comedi_test: Prevent timers rescheduling during deletion
  ahci: asm1064: asm1166: don't limit reported ports
  ahci: asm1064: correct count of reported ports
  nilfs2: prevent kernel bug at submit_bh_wbc()
  nilfs2: use a more common logging style
  nilfs2: fix failure to detect DAT corruption in btree and direct mappings
  memtest: use {READ,WRITE}_ONCE in memory scanning
  drm/vc4: hdmi: do not return negative values from .get_modes()
  drm/imx/ipuv3: do not return negative values from .get_modes()
  s390/zcrypt: fix reference counting on zcrypt card objects
  soc: fsl: qbman: Use raw spinlock for cgr_lock
  soc: fsl: qbman: Add CGR update function
  soc: fsl: qbman: Add helper for sanity checking cgr ops
  soc: fsl: qbman: Always disable interrupts when taking cgr_lock
  vfio/platform: Disable virqfds on cleanup
  kbuild: Move -Wenum-{compare-conditional,enum-conversion} into W=1
  speakup: Fix 8bit characters from direct synth
  ext4: fix corruption during on-line resize
  hwmon: (amc6821) add of_match table
  mmc: core: Fix switch on gp3 partition
  dm-raid: fix lockdep waring in "pers->hot_add_disk"
  Revert "Revert "md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d""
  PCI/PM: Drain runtime-idle callbacks before driver removal
  PCI: Drop pci_device_remove() test of pci_dev->driver
  fuse: don't unhash root
  mmc: tmio: avoid concurrent runs of mmc_request_done()
  PM: sleep: wakeirq: fix wake irq warning in system suspend
  USB: serial: cp210x: add pid/vid for TDK NC0110013M and MM0110113M
  USB: serial: option: add MeiG Smart SLM320 product
  USB: serial: cp210x: add ID for MGP Instruments PDS100
  USB: serial: add device ID for VeriFone adapter
  USB: serial: ftdi_sio: add support for GMC Z216C Adapter IR-USB
  powerpc/fsl: Fix mfpmr build errors with newer binutils
  clk: qcom: mmcc-msm8974: fix terminating of frequency table arrays
  clk: qcom: mmcc-apq8084: fix terminating of frequency table arrays
  PM: suspend: Set mem_sleep_current during kernel command line setup
  parisc: Strip upper 32 bit of sum in csum_ipv6_magic for 64-bit builds
  parisc: Fix csum_ipv6_magic on 64-bit systems
  parisc: Fix csum_ipv6_magic on 32-bit systems
  parisc: Fix ip_fast_csum
  parisc: Do not hardcode registers in checksum functions
  ubi: correct the calculation of fastmap size
  ubi: Check for too small LEB size in VTBL code
  ubifs: Set page uptodate in the correct place
  fat: fix uninitialized field in nostale filehandles
  crypto: qat - resolve race condition during AER recovery
  crypto: qat - fix double free during reset
  sparc64: NMI watchdog: fix return value of __setup handler
  KVM: Always flush async #PF workqueue when vCPU is being destroyed
  media: xc4000: Fix atomicity violation in xc4000_get_frequency
  arm: dts: marvell: Fix maxium->maxim typo in brownstone dts
  ARM: dts: mmp2-brownstone: Don't redeclare phandle references
  smack: Handle SMACK64TRANSMUTE in smack_inode_setsecurity()
  smack: Set SMACK64TRANSMUTE only for dirs in smack_inode_setxattr()
  wifi: brcmfmac: Fix use-after-free bug in brcmf_cfg80211_detach
  x86/bugs: Use sysfs_emit()
  x86/pti: Don't report XenPV as vulnerable
  x86/cpu: Support AMD Automatic IBRS
  Documentation/hw-vuln: Update spectre doc
  LTS: Update to 4.14.344
  binder: signal epoll threads of self-work
  ANDROID: binder: Add thread->process_todo flag.
  scsi: bnx2fc: Fix skb double free in bnx2fc_rcv()
  scsi: bnx2fc: Remove set but not used variable 'oxid'
  net: check dev->gso_max_size in gso_features_check()
  driver: staging: count ashmem_range into SLAB_RECLAIMBLE
  net: warn if gso_type isn't set for a GSO SKB
  staging: android: ashmem: Remove use of unlikely()
  ALSA: hda/realtek: Enable headset on Lenovo M90 Gen5
  ALSA: hda/realtek: Enable headset onLenovo M70/M90
  ALSA: hda/realtek: Add quirk for Lenovo TianYi510Pro-14IOB
  ALSA: hda/realtek - ALC897 headset MIC no sound
  ALSA: hda/realtek - Add headset Mic support for Lenovo ALC897 platform
  ALSA: hda/realtek: Fix the mic type detection issue for ASUS G551JW
  ALSA: hda/realtek - The front Mic on a HP machine doesn't work
  ALSA: hda/realtek - Enable the headset of Acer N50-600 with ALC662
  ALSA: hda/realtek - Enable headset mic of Acer X2660G with ALC662
  ALSA: hda/realtek - Add Headset Mic supported for HP cPC
  ALSA: hda/realtek - More constifications
  Add Acer Aspire Ethos 8951G model quirk
  devcoredump: Send uevent once devcd is ready
  devcoredump : Serialize devcd_del work
  netfilter: xt_owner: Fix for unsafe access of sk->sk_socket
  netfilter: xt_owner: Add supplementary groups option
  mtd: cfi_cmdset_0001: Byte swap OTP info
  mtd: cfi_cmdset_0001: Support the absence of protection registers
  s390/cmma: fix detection of DAT pages
  s390/mm: fix phys vs virt confusion in mark_kernel_pXd() functions family
  ALSA: hda/realtek: Headset Mic VREF to 100%
  hfsplus: unmap the page in the "fail_page" label
  ALSA: hda/realtek - Fix microphone noise on ASUS TUF B550M-PLUS
  ALSA: hda/realtek: Enable audio jacks of ASUS D700SA with ALC887
  ALSA: hda/realtek - Add quirk for Tuxedo XC 1509
  ALSA: hda/realtek - Headset microphone and internal speaker support for System76 oryp5
  ALSA: hda/realtek - Clevo P950ER ALC1220 Fixup
  ALSA: hda/realtek - Add support for ALC1220
  hv_netvsc: Fix race of register_netdevice_notifier and VF register
  hv_netvsc: use reciprocal divide to speed up percent calculation
  pwm: sti: Reduce number of allocations and drop usage of chip_data
  pwm: sti: Avoid conditional gotos
  tools: iio: iio_generic_buffer ensure alignment
  tools: iio: iio_generic_buffer: Fix some integer type and calculation
  tools: iio: privatize globals and functions in iio_generic_buffer.c file
  leds: trigger: ledtrig-cpu:: Fix 'output may be truncated' issue for 'cpu'
  ledtrig-cpu: Limit to 8 CPUs
  leds: pwm: Don't disable the PWM when the LED should be off
  leds: pwm: convert to atomic PWM API
  leds: pwm: simplify if condition
  regmap: debugfs: Fix a erroneous check after snprintf()
  regmap: Allow missing device in regmap_name_read_file()
  tcp_metrics: add missing barriers on delete
  tcp: batch tcp_net_metrics_exit
  tcp: fix excessive TLP and RACK timeouts from HZ rounding
  tcp: Namespace-ify sysctl_tcp_early_retrans
  net: nfc: fix races in nfc_llcp_sock_get() and nfc_llcp_sock_get_sn()
  ata: libata-core: Do not register PM operations for SAS ports
  libata: make ata_port_type const
  libata: Add new med_power_with_dipm link_power_management_policy setting
  ALSA: hda: Disable power save for solving pop issue on Lenovo ThinkCentre M70q
  ALSA: hda - add Lenovo IdeaCentre B550 to the power_save_blacklist
  ALSA: hda: Add Intel NUC7i3BNB to the power_save blacklist
  ext4: mark group as trimmed only if it was fully scanned
  ext4: add new helper interface ext4_try_to_trim_range()
  ext4: remove the 'group' parameter of ext4_trim_extent
  scsi: qla2xxx: Remove unsupported ql2xenabledif option
  scsi: qla2xxx: Add protection mask module parameters
  scsi: qla2xxx: Add option for use reserve exch for ELS
  scsi: qla2xxx: Reinstate module parameter ql2xenablemsix
  scsi: lpfc: remove redundant null check on eqe
  usb: typec: tcpci: clear the fault status bit
  usb: typec: add fwnode to tcpc
  staging: typec: fix endianness mismatch identified by sparse
  staging: typec: tcpm: Document data structures
  serial: sc16is7xx: fix broken port 0 uart init
  sc16is7xx: Set iobase to device index
  dlm: fix plock lookup when using multiple lockspaces
  drm/tegra: dpaux: Fix incorrect return value of platform_get_irq
  drm/tegra: Remove superfluous error messages around platform_get_irq()
  ARM: dts: BCM53573: Drop nonexistent #usb-cells
  ARM: dts: BCM5301X: Harmonize EHCI/OHCI DT nodes name
  wifi: ath9k: fix races between ath9k_wmi_cmd and ath9k_wmi_ctrl_rx
  ath9k: use irqsave() in USB's complete callback
  wifi: mwifiex: fix error recovery in PCIE buffer descriptor management
  mwifiex: switch from 'pci_' to 'dma_' API
  mwifiex: drop 'set_consistent_dma_mask' log message
  bonding: fix macvlan over alb bond support
  net: remove bond_slave_has_mac_rcu()
  fbdev: fix potential OOB read in fast_imageblit()
  fbdev: Fix sys_imageblit() for arbitrary image widths
  fbdev: Improve performance of sys_imageblit()
  tty: serial: fsl_lpuart: add earlycon for imx8ulp platform
  Revert "tty: serial: fsl_lpuart: drop earlycon entry for i.MX8QXP"
  MIPS: cpu-features: Use boot_cpu_type for CPU type based features
  MIPS: cpu-features: Enable octeon_cache by cpu_type
  fs: dlm: fix mismatch of plock results from userspace
  fs: dlm: use dlm_plock_info for do_unlock_close
  fs: dlm: change plock interrupted message to debug again
  fs: dlm: add pid to debug log
  dlm: replace usage of found with dedicated list iterator variable
  dlm: improve plock logging if interrupted
  nfsd: Remove incorrect check in nfsd4_validate_stateid
  nfsd4: kill warnings on testing stateids with mismatched clientids
  mmc: meson-gx: remove redundant mmc_request_done() call from irq context
  mmc: meson-gx: remove useless lock
  PM: sleep: wakeirq: fix wake irq arming
  PM / wakeirq: support enabling wake-up irq after runtime_suspend called
  scsi: zfcp: Defer fc_rport blocking until after ADISC response
  scsi: zfcp: workqueue: set description for port work items with their WWPN as context
  btrfs: check for commit error at btrfs_attach_transaction_barrier()
  btrfs: simplify IS_ERR/PTR_ERR checks
  fs: dlm: interrupt posix locks only when process is killed
  dlm: rearrange async condition return
  dlm: cleanup plock_op vs plock_xop
  ext4: Fix reusing stale buffer heads from last failed mounting
  ext4: rename journal_dev to s_journal_dev inside ext4_sb_info
  tcp: annotate data-races around tp->linger2
  net: Replace the limit of TCP_LINGER2 with TCP_FIN_TIMEOUT_MAX
  ceph: don't let check_caps skip sending responses for revoke msgs
  ceph: define argument structure for handle_cap_grant
  net: bcmgenet: Ensure MDIO unregistration has clocks enabled
  net: bcmgenet: Avoid calling platform_device_put() twice in bcmgenet_mii_exit()
  net: tcp_input: Neaten DBGUNDO
  i2c: xiic: Don't try to handle more interrupt events after error
  i2c: xiic: Defer xiic_wakeup() and __xiic_start_xfer() in xiic_process()
  i2c: xiic: Fix broken locking on tx_msg
  i2c: xiic: Change code alignment to 1 space only
  i2c: xiic: Add timeout to the rx fifo wait loop
  i2c: xiic: Fix kerneldoc warnings
  hwrng: virtio - Fix race on data_avail and actual data
  hwrng: virtio - always add a pending request
  hwrng: virtio - don't waste entropy
  hwrng: virtio - don't wait on cleanup
  hwrng: virtio - add an internal buffer
  nfc: llcp: fix possible use of uninitialized variable in nfc_llcp_send_connect()
  nfc: constify several pointers to u8, char and sk_buff
  irqchip/jcore-aic: Fix missing allocation of IRQ descriptors
  irqchip/jcore-aic: Kill use of irq_create_strict_mappings()
  Documentation: fix little inconsistencies
  usb: musb: fix MUSB_QUIRK_B_DISCONNECT_99 handling
  net/rose: fix races in rose_kill_by_device()
  reset: Fix crash when freeing non-existent optional resets
  ksmbd: fix wrong name of SMB2_CREATE_ALLOCATION_SIZE
  PCI: keystone: Don't discard .probe() callback
  PCI: keystone: Don't discard .remove() callback
  can: dev: can_restart(): fix race condition between controller restart and netif_carrier_on()
  can: dev: can_restart(): don't crash kernel if carrier is OK
  r8169: fix the KCSAN reported data-race in rtl_tx while reading TxDescArray[entry].opts1
  xen-netback: use default TX queue size for vifs
  MIPS: Alchemy: only build mmc support helpers if au1xmmc is enabled
  arm64: dts: qcom: msm8996: Add missing interrupt to the USB2 controller
  sched/rt: pick_next_rt_entity(): check list_entry
  regmap: Account for register length in SMBus I/O limits
  x86/topology: Fix erroneous smp_num_siblings on Intel Hybrid platforms
  ASoC: cs42l51: fix driver to properly autoload with automatic module loading
  PCI: qcom: Disable write access to read only registers for IP v2.3.3
  pinctrl: amd: Only use special debounce behavior for GPIO 0
  IB/hfi1: Fix sdma.h tx->num_descs off-by-one error
  usb: fotg210-hcd: delete an incorrect bounds test
  smb: client: fix OOB in smbCalcSize()
  btrfs: do not allow non subvolume root targets for snapshot
  pinctrl: at91-pio4: use dedicated lock class for IRQ
  net: check vlan filter feature in vlan_vids_add_by_dev() and vlan_vids_del_by_dev()
  arm64: dts: mediatek: mt8173-evb: Fix regulator-fixed node names
  IB/isert: Fix unaligned immediate-data handling
  fbdev: stifb: Make the STI next font pointer a 32-bit signed offset
  smb3: fix touch -h of symlink
  MIPS: KVM: Fix a build warning about variable set but not used
  cifs: spnego: add ';' in HOST_KEY_LEN
  macvlan: Don't propagate promisc change to lower dev in passthru
  ppp: limit MRU to 64K
  ptp: annotate data-race around q->head and q->tail
  xen/events: fix delayed eoi list handling
  tipc: Fix kernel-infoleak due to uninitialized TLV value
  tty: Fix uninit-value access in ppp_sync_receive()
  iio: exynos-adc: request second interupt only when touchscreen mode is used
  selftests/ftrace: Add new test case which checks non unique symbol
  media: v4l2-fwnode: fix v4l2_fwnode_parse_link handling
  block: fix signed int overflow in Amiga partition support
  iio: addac: stx104: Fix race condition for stx104_write_raw()
  ext4: fix to check return value of freeze_bdev() in ext4_shutdown()
  btrfs: fix extent buffer leak after tree mod log failure at split_node()
  pinctrl: amd: Detect internal GPIO0 debounce handling
  ALSA: jack: Fix mutex call in snd_jack_report()
  IB/hfi1: Fix sdma.h tx->num_descs off-by-one errors
  ARM: 9303/1: kprobes: avoid missing-declaration warnings
  LTS: Update to 4.14.343
  crypto: af_alg - Work around empty control messages without MSG_MORE
  crypto: af_alg - Fix regression on empty requests
  spi: spi-mt65xx: Fix NULL pointer access in interrupt handler
  net/bnx2x: Prevent access to a freed page in page_pool
  hsr: Handle failures in module init
  rds: introduce acquire/release ordering in acquire/release_in_xmit()
  hsr: Fix uninit-value access in hsr_get_node()
  net: hsr: fix placement of logical operator in a multi-line statement
  usb: gadget: net2272: Use irqflags in the call to net2272_probe_fin
  staging: greybus: fix get_channel_from_mode() failure path
  serial: 8250_exar: Don't remove GPIO device on suspend
  rtc: mt6397: select IRQ_DOMAIN instead of depending on it
  rtc: mediatek: enhance the description for MediaTek PMIC based RTC
  tty: serial: samsung: fix tx_empty() to return TIOCSER_TEMT
  serial: max310x: fix syntax error in IRQ error message
  clk: qcom: gdsc: Add support to update GDSC transition delay
  NFS: Fix an off by one in root_nfs_cat()
  net: sunrpc: Fix an off by one in rpc_sockaddr2uaddr()
  scsi: bfa: Fix function pointer type mismatch for hcb_qe->cbfn
  scsi: csiostor: Avoid function pointer casts
  ALSA: usb-audio: Stop parsing channels bits when all channels are found.
  sparc32: Fix section mismatch in leon_pci_grpci
  backlight: lp8788: Fully initialize backlight_properties during probe
  backlight: lm3639: Fully initialize backlight_properties during probe
  backlight: da9052: Fully initialize backlight_properties during probe
  backlight: lm3630a: Don't set bl->props.brightness in get_brightness
  backlight: lm3630a: Initialize backlight_properties on init
  powerpc/embedded6xx: Fix no previous prototype for avr_uart_send() etc.
  powerpc/hv-gpci: Fix the H_GET_PERF_COUNTER_INFO hcall return value checks
  drm/mediatek: Fix a null pointer crash in mtk_drm_crtc_finish_page_flip
  media: go7007: fix a memleak in go7007_load_encoder
  media: dvb-frontends: avoid stack overflow warnings with clang
  media: pvrusb2: fix uaf in pvr2_context_set_notify
  drm/amdgpu: Fix missing break in ATOM_ARG_IMM Case of atom_get_src_int()
  mtd: rawnand: lpc32xx_mlc: fix irq handler prototype
  crypto: arm/sha - fix function cast warnings
  crypto: arm - Rename functions to avoid conflict with crypto/sha256.h
  mfd: syscon: Call of_node_put() only when of_parse_phandle() takes a ref
  drm/tegra: put drm_gem_object ref on error in tegra_fb_create
  clk: hisilicon: hi3519: Release the correct number of gates in hi3519_clk_unregister()
  PCI: Mark 3ware-9650SE Root Port Extended Tags as broken
  drm/mediatek: dsi: Fix DSI RGB666 formats and definitions
  media: pvrusb2: fix pvr2_stream_callback casts
  media: go7007: add check of return value of go7007_read_addr()
  ALSA: seq: fix function cast warnings
  drm/radeon/ni: Fix wrong firmware size logging in ni_init_microcode()
  perf thread_map: Free strlist on normal path in thread_map__new_by_tid_str()
  quota: Fix rcu annotations of inode dquot pointers
  quota: Fix potential NULL pointer dereference
  quota: simplify drop_dquot_ref()
  quota: check time limit when back out space/inode change
  fs/quota: erase unused but set variable warning
  quota: code cleanup for __dquot_alloc_space()
  clk: qcom: reset: Ensure write completion on reset de/assertion
  clk: qcom: reset: Commonize the de/assert functions
  clk: qcom: reset: support resetting multiple bits
  clk: qcom: reset: Allow specifying custom reset delay
  media: edia: dvbdev: fix a use-after-free
  media: dvb-core: Fix use-after-free due to race at dvb_register_device()
  media: dvbdev: convert DVB device types into an enum
  media: dvbdev: fix error logic at dvb_register_device()
  media: dvbdev: Fix memleak in dvb_register_device
  media: media/dvb: Use kmemdup rather than duplicating its implementation
  media: dvbdev: remove double-unlock
  media: v4l2-tpg: fix some memleaks in tpg_alloc
  media: em28xx: annotate unchecked call to media_device_register()
  media: tc358743: register v4l2 async device only after successful setup
  drm: Don't treat 0 as -1 in drm_fixp2int_ceil
  drm/rockchip: inno_hdmi: Fix video timing
  drm/tegra: dsi: Fix missing pm_runtime_disable() in the error handling path of tegra_dsi_probe()
  drm/tegra: dsi: Fix some error handling paths in tegra_dsi_probe()
  drm/tegra: dsi: Make use of the helper function dev_err_probe()
  gpu: host1x: mipi: Update tegra_mipi_request() to be node based
  drm/tegra: dsi: Add missing check for of_find_device_by_node
  dm: call the resume method on internal suspend
  dm raid: fix false positive for requeue needed during reshape
  net/x25: fix incorrect parameter validation in the x25_getsockopt() function
  net: kcm: fix incorrect parameter validation in the kcm_getsockopt) function
  udp: fix incorrect parameter validation in the udp_lib_getsockopt() function
  l2tp: fix incorrect parameter validation in the pppol2tp_getsockopt() function
  tcp: fix incorrect parameter validation in the do_tcp_getsockopt() function
  ipv6: fib6_rules: flush route cache when rule is changed
  bpf: Fix stackmap overflow check on 32-bit arches
  bpf: Fix hashtab overflow check on 32-bit arches
  sr9800: Add check for usbnet_get_endpoints
  Bluetooth: hci_core: Fix possible buffer overflow
  Bluetooth: Remove superfluous call to hci_conn_check_pending()
  igb: Fix missing time sync events
  igb: move PEROUT and EXTTS isr logic to separate functions
  mmc: wmt-sdmmc: remove an incorrect release_mem_region() call in the .remove function
  SUNRPC: fix some memleaks in gssx_dec_option_array
  x86, relocs: Ignore relocations in .notes section
  ACPI: scan: Fix device check notification handling
  ARM: dts: arm: realview: Fix development chip ROM compatible value
  wifi: brcmsmac: avoid function pointer casts
  iommu/amd: Mark interrupt as managed
  bus: tegra-aconnect: Update dependency to ARCH_TEGRA
  ACPI: processor_idle: Fix memory leak in acpi_processor_power_exit()
  wifi: libertas: fix some memleaks in lbs_allocate_cmd_buffer()
  af_unix: Annotate data-race of gc_in_progress in wait_for_unix_gc().
  sock_diag: annotate data-races around sock_diag_handlers[family]
  sock_diag: request _diag module only when the family or proto has been registered
  wifi: mwifiex: debugfs: Drop unnecessary error check for debugfs_create_dir()
  wifi: b43: Disable QoS for bcm4331
  wifi: b43: Stop correct queue in DMA worker when QoS is disabled
  b43: main: Fix use true/false for bool type
  wifi: b43: Stop/wake correct queue in PIO Tx path when QoS is disabled
  wifi: b43: Stop/wake correct queue in DMA Tx path when QoS is disabled
  b43: dma: Fix use true/false for bool type variable
  timekeeping: Fix cross-timestamp interpolation for non-x86
  timekeeping: Fix cross-timestamp interpolation corner case decision
  timekeeping: Fix cross-timestamp interpolation on counter wrap
  aoe: fix the potential use-after-free problem in aoecmd_cfg_pkts
  md: Don't clear MD_CLOSING when the raid is about to stop
  md: implement ->set_read_only to hook into BLKROSET processing
  block: add a new set_read_only method
  md: switch to ->check_events for media change notifications
  fs/select: rework stack allocation hack for clang
  do_sys_name_to_handle(): use kzalloc() to fix kernel-infoleak
  crypto: algif_aead - Only wake up when ctx->more is zero
  crypto: af_alg - make some functions static
  ASoC: wm8962: Fix up incorrect error message in wm8962_set_fll
  ASoC: wm8962: Enable both SPKOUTR_ENA and SPKOUTL_ENA in mono mode
  ASoC: wm8962: Enable oscillator if selecting WM8962_FLL_OSC
  Input: gpio_keys_polled - suppress deferred probe error for gpio
  firewire: core: use long bus reset on gap count error
  Bluetooth: rfcomm: Fix null-ptr-deref in rfcomm_check_security
  scsi: mpt3sas: Prevent sending diag_reset when the controller is ready
  dm-verity, dm-crypt: align "struct bvec_iter" correctly
  block: sed-opal: handle empty atoms when parsing response
  net/iucv: fix the allocation size of iucv_path_table array
  MIPS: Clear Cause.BD in instruction_pointer_set
  x86/xen: Add some null pointer checking to smp.c
  x86/xen: Fix memory leak in xen_smp_intr_init{_pv}()
  xen/events: only register debug interrupt for 2-level events
  LTS: Update to 4.14.342
  selftests/vm: fix map_hugetlb length used for testing read and write
  selftests/vm: fix display of page size in map_hugetlb
  getrusage: use sig->stats_lock rather than lock_task_sighand()
  getrusage: use __for_each_thread()
  getrusage: move thread_group_cputime_adjusted() outside of lock_task_sighand()
  getrusage: add the "signal_struct *sig" local variable
  hv_netvsc: use netif_is_bond_master() instead of open code
  um: allow not setting extra rpaths in the linux binary
  selftests: mm: fix map_hugetlb failure on 64K page size systems
  tools/selftest/vm: allow choosing mem size and page size in map_hugetlb
  netrom: Fix data-races around sysctl_net_busy_read
  netrom: Fix a data-race around sysctl_netrom_link_fails_count
  netrom: Fix a data-race around sysctl_netrom_routing_control
  netrom: Fix a data-race around sysctl_netrom_transport_no_activity_timeout
  netrom: Fix a data-race around sysctl_netrom_transport_requested_window_size
  netrom: Fix a data-race around sysctl_netrom_transport_busy_delay
  netrom: Fix a data-race around sysctl_netrom_transport_acknowledge_delay
  netrom: Fix a data-race around sysctl_netrom_transport_maximum_tries
  netrom: Fix a data-race around sysctl_netrom_transport_timeout
  netrom: Fix data-races around sysctl_netrom_network_ttl_initialiser
  netrom: Fix a data-race around sysctl_netrom_obsolescence_count_initialiser
  netrom: Fix a data-race around sysctl_netrom_default_path_quality
  netfilter: nf_conntrack_h323: Add protection for bmp length out of range
  netfilter: nf_ct_h323: Extend nf_h323_error_boundary to work on bits as well
  netfilter: nf_ct_h323: Convert CHECK_BOUND macro to function
  netfilter: nf_ct_h323: Out Of Bound Read in Netfilter Conntrack
  netfilter: nf_conntrack_h323: Remove typedef struct
  geneve: make sure to pull inner header in geneve_rx()
  net: geneve: modify IP header check in geneve6_xmit_skb and geneve_xmit_skb
  net: move definition of pcpu_lstats to header file
  net: lan78xx: fix runtime PM count underflow on link stop
  lan78xx: Fix race conditions in suspend/resume handling
  lan78xx: Fix partial packet errors on suspend/resume
  lan78xx: Add missing return code checks
  lan78xx: Fix white space and style issues
  net: usb: lan78xx: Remove lots of set but unused 'ret' variables
  net: usb: lan78xx: Disable interrupts before calling generic_handle_irq()
  net: lan78xx: Allow for VLAN headers in timeout calcs
  ip: validate header length on virtual device xmit
  LTS: Update to 4.14.341
  gpio: 74x164: Enable output pins after registers are reset
  cachefiles: fix memory leak in cachefiles_add_cache()
  mmc: core: Fix eMMC initialization with 1-bit bus connection
  btrfs: dev-replace: properly validate device names
  wifi: nl80211: reject iftype change with mesh ID change
  gtp: fix use-after-free and null-ptr-deref in gtp_newlink()
  ALSA: Drop leftover snd-rtctimer stuff from Makefile
  power: supply: bq27xxx-i2c: Do not free non existing IRQ
  efi/capsule-loader: fix incorrect allocation size
  Bluetooth: Enforce validation on max value of connection interval
  Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
  Bluetooth: Avoid potential use-after-free in hci_error_reset
  net: usb: dm9601: fix wrong return value in dm9601_mdio_read
  lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected
  netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter

 Conflicts:
	drivers/android/binder.c
	drivers/block/loop.c
	drivers/mmc/host/meson-gx-mmc.c
	drivers/staging/typec/tcpm.h
	drivers/usb/typec/tcpm/tcpm.c
	fs/aio.c
	fs/select.c
	include/net/netns/ipv4.h
	mm/page_alloc.c
	net/core/filter.c
	net/ipv4/sysctl_net_ipv4.c
	net/ipv4/tcp_ipv4.c
	sound/usb/stream.c

Change-Id: I90aff1a1b88379a959c6dda1c89e5efb48af5450
2024-06-22 22:29:18 +03:00

2756 lines
64 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* linux/kernel/sys.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/utsname.h>
#include <linux/mman.h>
#include <linux/reboot.h>
#include <linux/prctl.h>
#include <linux/highuid.h>
#include <linux/fs.h>
#include <linux/kmod.h>
#include <linux/perf_event.h>
#include <linux/resource.h>
#include <linux/kernel.h>
#include <linux/workqueue.h>
#include <linux/capability.h>
#include <linux/device.h>
#include <linux/key.h>
#include <linux/times.h>
#include <linux/posix-timers.h>
#include <linux/security.h>
#include <linux/dcookies.h>
#include <linux/suspend.h>
#include <linux/tty.h>
#include <linux/signal.h>
#include <linux/cn_proc.h>
#include <linux/getcpu.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/seccomp.h>
#include <linux/cpu.h>
#include <linux/personality.h>
#include <linux/ptrace.h>
#include <linux/fs_struct.h>
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/gfp.h>
#include <linux/syscore_ops.h>
#include <linux/version.h>
#include <linux/ctype.h>
#include <linux/mm.h>
#include <linux/mempolicy.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
#include <linux/kprobes.h>
#include <linux/user_namespace.h>
#include <linux/binfmts.h>
#include <linux/sched.h>
#include <linux/sched/autogroup.h>
#include <linux/sched/loadavg.h>
#include <linux/sched/stat.h>
#include <linux/sched/mm.h>
#include <linux/sched/coredump.h>
#include <linux/sched/task.h>
#include <linux/sched/cputime.h>
#include <linux/rcupdate.h>
#include <linux/uidgid.h>
#include <linux/cred.h>
#include <linux/nospec.h>
#include <linux/kmsg_dump.h>
/* Move somewhere else to avoid recompiling? */
#include <generated/utsrelease.h>
#include <linux/uaccess.h>
#include <asm/io.h>
#include <asm/unistd.h>
#ifndef SET_UNALIGN_CTL
# define SET_UNALIGN_CTL(a, b) (-EINVAL)
#endif
#ifndef GET_UNALIGN_CTL
# define GET_UNALIGN_CTL(a, b) (-EINVAL)
#endif
#ifndef SET_FPEMU_CTL
# define SET_FPEMU_CTL(a, b) (-EINVAL)
#endif
#ifndef GET_FPEMU_CTL
# define GET_FPEMU_CTL(a, b) (-EINVAL)
#endif
#ifndef SET_FPEXC_CTL
# define SET_FPEXC_CTL(a, b) (-EINVAL)
#endif
#ifndef GET_FPEXC_CTL
# define GET_FPEXC_CTL(a, b) (-EINVAL)
#endif
#ifndef GET_ENDIAN
# define GET_ENDIAN(a, b) (-EINVAL)
#endif
#ifndef SET_ENDIAN
# define SET_ENDIAN(a, b) (-EINVAL)
#endif
#ifndef GET_TSC_CTL
# define GET_TSC_CTL(a) (-EINVAL)
#endif
#ifndef SET_TSC_CTL
# define SET_TSC_CTL(a) (-EINVAL)
#endif
#ifndef MPX_ENABLE_MANAGEMENT
# define MPX_ENABLE_MANAGEMENT() (-EINVAL)
#endif
#ifndef MPX_DISABLE_MANAGEMENT
# define MPX_DISABLE_MANAGEMENT() (-EINVAL)
#endif
#ifndef GET_FP_MODE
# define GET_FP_MODE(a) (-EINVAL)
#endif
#ifndef SET_FP_MODE
# define SET_FP_MODE(a,b) (-EINVAL)
#endif
#ifndef SET_TAGGED_ADDR_CTRL
# define SET_TAGGED_ADDR_CTRL(a) (-EINVAL)
#endif
#ifndef GET_TAGGED_ADDR_CTRL
# define GET_TAGGED_ADDR_CTRL() (-EINVAL)
#endif
/*
* this is where the system-wide overflow UID and GID are defined, for
* architectures that now have 32-bit UID/GID but didn't in the past
*/
int overflowuid = DEFAULT_OVERFLOWUID;
int overflowgid = DEFAULT_OVERFLOWGID;
EXPORT_SYMBOL(overflowuid);
EXPORT_SYMBOL(overflowgid);
/*
* the same as above, but for filesystems which can only store a 16-bit
* UID and GID. as such, this is needed on all architectures
*/
int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
EXPORT_SYMBOL(fs_overflowuid);
EXPORT_SYMBOL(fs_overflowgid);
/*
* Returns true if current's euid is same as p's uid or euid,
* or has CAP_SYS_NICE to p's user_ns.
*
* Called with rcu_read_lock, creds are safe
*/
static bool set_one_prio_perm(struct task_struct *p)
{
const struct cred *cred = current_cred(), *pcred = __task_cred(p);
if (uid_eq(pcred->uid, cred->euid) ||
uid_eq(pcred->euid, cred->euid))
return true;
if (ns_capable(pcred->user_ns, CAP_SYS_NICE))
return true;
return false;
}
/*
* set the priority of a task
* - the caller must hold the RCU read lock
*/
static int set_one_prio(struct task_struct *p, int niceval, int error)
{
int no_nice;
if (!set_one_prio_perm(p)) {
error = -EPERM;
goto out;
}
if (niceval < task_nice(p) && !can_nice(p, niceval)) {
error = -EACCES;
goto out;
}
no_nice = security_task_setnice(p, niceval);
if (no_nice) {
error = no_nice;
goto out;
}
if (error == -ESRCH)
error = 0;
set_user_nice(p, niceval);
out:
return error;
}
SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
{
struct task_struct *g, *p;
struct user_struct *user;
const struct cred *cred = current_cred();
int error = -EINVAL;
struct pid *pgrp;
kuid_t uid;
if (which > PRIO_USER || which < PRIO_PROCESS)
goto out;
/* normalize: avoid signed division (rounding problems) */
error = -ESRCH;
if (niceval < MIN_NICE)
niceval = MIN_NICE;
if (niceval > MAX_NICE)
niceval = MAX_NICE;
rcu_read_lock();
read_lock(&tasklist_lock);
switch (which) {
case PRIO_PROCESS:
if (who)
p = find_task_by_vpid(who);
else
p = current;
if (p)
error = set_one_prio(p, niceval, error);
break;
case PRIO_PGRP:
if (who)
pgrp = find_vpid(who);
else
pgrp = task_pgrp(current);
do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
error = set_one_prio(p, niceval, error);
} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
break;
case PRIO_USER:
uid = make_kuid(cred->user_ns, who);
user = cred->user;
if (!who)
uid = cred->uid;
else if (!uid_eq(uid, cred->uid)) {
user = find_user(uid);
if (!user)
goto out_unlock; /* No processes for this user */
}
do_each_thread(g, p) {
if (uid_eq(task_uid(p), uid) && task_pid_vnr(p))
error = set_one_prio(p, niceval, error);
} while_each_thread(g, p);
if (!uid_eq(uid, cred->uid))
free_uid(user); /* For find_user() */
break;
}
out_unlock:
read_unlock(&tasklist_lock);
rcu_read_unlock();
out:
return error;
}
/*
* Ugh. To avoid negative return values, "getpriority()" will
* not return the normal nice-value, but a negated value that
* has been offset by 20 (ie it returns 40..1 instead of -20..19)
* to stay compatible.
*/
SYSCALL_DEFINE2(getpriority, int, which, int, who)
{
struct task_struct *g, *p;
struct user_struct *user;
const struct cred *cred = current_cred();
long niceval, retval = -ESRCH;
struct pid *pgrp;
kuid_t uid;
if (which > PRIO_USER || which < PRIO_PROCESS)
return -EINVAL;
rcu_read_lock();
read_lock(&tasklist_lock);
switch (which) {
case PRIO_PROCESS:
if (who)
p = find_task_by_vpid(who);
else
p = current;
if (p) {
niceval = nice_to_rlimit(task_nice(p));
if (niceval > retval)
retval = niceval;
}
break;
case PRIO_PGRP:
if (who)
pgrp = find_vpid(who);
else
pgrp = task_pgrp(current);
do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
niceval = nice_to_rlimit(task_nice(p));
if (niceval > retval)
retval = niceval;
} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
break;
case PRIO_USER:
uid = make_kuid(cred->user_ns, who);
user = cred->user;
if (!who)
uid = cred->uid;
else if (!uid_eq(uid, cred->uid)) {
user = find_user(uid);
if (!user)
goto out_unlock; /* No processes for this user */
}
do_each_thread(g, p) {
if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) {
niceval = nice_to_rlimit(task_nice(p));
if (niceval > retval)
retval = niceval;
}
} while_each_thread(g, p);
if (!uid_eq(uid, cred->uid))
free_uid(user); /* for find_user() */
break;
}
out_unlock:
read_unlock(&tasklist_lock);
rcu_read_unlock();
return retval;
}
/*
* Unprivileged users may change the real gid to the effective gid
* or vice versa. (BSD-style)
*
* If you set the real gid at all, or set the effective gid to a value not
* equal to the real gid, then the saved gid is set to the new effective gid.
*
* This makes it possible for a setgid program to completely drop its
* privileges, which is often a useful assertion to make when you are doing
* a security audit over a program.
*
* The general idea is that a program which uses just setregid() will be
* 100% compatible with BSD. A program which uses just setgid() will be
* 100% compatible with POSIX with saved IDs.
*
* SMP: There are not races, the GIDs are checked only by filesystem
* operations (as far as semantic preservation is concerned).
*/
#ifdef CONFIG_MULTIUSER
SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
{
struct user_namespace *ns = current_user_ns();
const struct cred *old;
struct cred *new;
int retval;
kgid_t krgid, kegid;
krgid = make_kgid(ns, rgid);
kegid = make_kgid(ns, egid);
if ((rgid != (gid_t) -1) && !gid_valid(krgid))
return -EINVAL;
if ((egid != (gid_t) -1) && !gid_valid(kegid))
return -EINVAL;
new = prepare_creds();
if (!new)
return -ENOMEM;
old = current_cred();
retval = -EPERM;
if (rgid != (gid_t) -1) {
if (gid_eq(old->gid, krgid) ||
gid_eq(old->egid, krgid) ||
ns_capable(old->user_ns, CAP_SETGID))
new->gid = krgid;
else
goto error;
}
if (egid != (gid_t) -1) {
if (gid_eq(old->gid, kegid) ||
gid_eq(old->egid, kegid) ||
gid_eq(old->sgid, kegid) ||
ns_capable(old->user_ns, CAP_SETGID))
new->egid = kegid;
else
goto error;
}
if (rgid != (gid_t) -1 ||
(egid != (gid_t) -1 && !gid_eq(kegid, old->gid)))
new->sgid = new->egid;
new->fsgid = new->egid;
return commit_creds(new);
error:
abort_creds(new);
return retval;
}
/*
* setgid() is implemented like SysV w/ SAVED_IDS
*
* SMP: Same implicit races as above.
*/
SYSCALL_DEFINE1(setgid, gid_t, gid)
{
struct user_namespace *ns = current_user_ns();
const struct cred *old;
struct cred *new;
int retval;
kgid_t kgid;
kgid = make_kgid(ns, gid);
if (!gid_valid(kgid))
return -EINVAL;
new = prepare_creds();
if (!new)
return -ENOMEM;
old = current_cred();
retval = -EPERM;
if (ns_capable(old->user_ns, CAP_SETGID))
new->gid = new->egid = new->sgid = new->fsgid = kgid;
else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid))
new->egid = new->fsgid = kgid;
else
goto error;
return commit_creds(new);
error:
abort_creds(new);
return retval;
}
/*
* change the user struct in a credentials set to match the new UID
*/
static int set_user(struct cred *new)
{
struct user_struct *new_user;
new_user = alloc_uid(new->uid);
if (!new_user)
return -EAGAIN;
/*
* We don't fail in case of NPROC limit excess here because too many
* poorly written programs don't check set*uid() return code, assuming
* it never fails if called by root. We may still enforce NPROC limit
* for programs doing set*uid()+execve() by harmlessly deferring the
* failure to the execve() stage.
*/
if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
new_user != INIT_USER)
current->flags |= PF_NPROC_EXCEEDED;
else
current->flags &= ~PF_NPROC_EXCEEDED;
free_uid(new->user);
new->user = new_user;
return 0;
}
/*
* Unprivileged users may change the real uid to the effective uid
* or vice versa. (BSD-style)
*
* If you set the real uid at all, or set the effective uid to a value not
* equal to the real uid, then the saved uid is set to the new effective uid.
*
* This makes it possible for a setuid program to completely drop its
* privileges, which is often a useful assertion to make when you are doing
* a security audit over a program.
*
* The general idea is that a program which uses just setreuid() will be
* 100% compatible with BSD. A program which uses just setuid() will be
* 100% compatible with POSIX with saved IDs.
*/
SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
{
struct user_namespace *ns = current_user_ns();
const struct cred *old;
struct cred *new;
int retval;
kuid_t kruid, keuid;
kruid = make_kuid(ns, ruid);
keuid = make_kuid(ns, euid);
if ((ruid != (uid_t) -1) && !uid_valid(kruid))
return -EINVAL;
if ((euid != (uid_t) -1) && !uid_valid(keuid))
return -EINVAL;
new = prepare_creds();
if (!new)
return -ENOMEM;
old = current_cred();
retval = -EPERM;
if (ruid != (uid_t) -1) {
new->uid = kruid;
if (!uid_eq(old->uid, kruid) &&
!uid_eq(old->euid, kruid) &&
!ns_capable(old->user_ns, CAP_SETUID))
goto error;
}
if (euid != (uid_t) -1) {
new->euid = keuid;
if (!uid_eq(old->uid, keuid) &&
!uid_eq(old->euid, keuid) &&
!uid_eq(old->suid, keuid) &&
!ns_capable(old->user_ns, CAP_SETUID))
goto error;
}
if (!uid_eq(new->uid, old->uid)) {
retval = set_user(new);
if (retval < 0)
goto error;
}
if (ruid != (uid_t) -1 ||
(euid != (uid_t) -1 && !uid_eq(keuid, old->uid)))
new->suid = new->euid;
new->fsuid = new->euid;
retval = security_task_fix_setuid(new, old, LSM_SETID_RE);
if (retval < 0)
goto error;
return commit_creds(new);
error:
abort_creds(new);
return retval;
}
/*
* setuid() is implemented like SysV with SAVED_IDS
*
* Note that SAVED_ID's is deficient in that a setuid root program
* like sendmail, for example, cannot set its uid to be a normal
* user and then switch back, because if you're root, setuid() sets
* the saved uid too. If you don't like this, blame the bright people
* in the POSIX committee and/or USG. Note that the BSD-style setreuid()
* will allow a root program to temporarily drop privileges and be able to
* regain them by swapping the real and effective uid.
*/
SYSCALL_DEFINE1(setuid, uid_t, uid)
{
struct user_namespace *ns = current_user_ns();
const struct cred *old;
struct cred *new;
int retval;
kuid_t kuid;
kuid = make_kuid(ns, uid);
if (!uid_valid(kuid))
return -EINVAL;
new = prepare_creds();
if (!new)
return -ENOMEM;
old = current_cred();
retval = -EPERM;
if (ns_capable(old->user_ns, CAP_SETUID)) {
new->suid = new->uid = kuid;
if (!uid_eq(kuid, old->uid)) {
retval = set_user(new);
if (retval < 0)
goto error;
}
} else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) {
goto error;
}
new->fsuid = new->euid = kuid;
retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
if (retval < 0)
goto error;
return commit_creds(new);
error:
abort_creds(new);
return retval;
}
/*
* This function implements a generic ability to update ruid, euid,
* and suid. This allows you to implement the 4.4 compatible seteuid().
*/
SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
{
struct user_namespace *ns = current_user_ns();
const struct cred *old;
struct cred *new;
int retval;
kuid_t kruid, keuid, ksuid;
kruid = make_kuid(ns, ruid);
keuid = make_kuid(ns, euid);
ksuid = make_kuid(ns, suid);
if ((ruid != (uid_t) -1) && !uid_valid(kruid))
return -EINVAL;
if ((euid != (uid_t) -1) && !uid_valid(keuid))
return -EINVAL;
if ((suid != (uid_t) -1) && !uid_valid(ksuid))
return -EINVAL;
new = prepare_creds();
if (!new)
return -ENOMEM;
old = current_cred();
retval = -EPERM;
if (!ns_capable(old->user_ns, CAP_SETUID)) {
if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
!uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
goto error;
if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) &&
!uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid))
goto error;
if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) &&
!uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid))
goto error;
}
if (ruid != (uid_t) -1) {
new->uid = kruid;
if (!uid_eq(kruid, old->uid)) {
retval = set_user(new);
if (retval < 0)
goto error;
}
}
if (euid != (uid_t) -1)
new->euid = keuid;
if (suid != (uid_t) -1)
new->suid = ksuid;
new->fsuid = new->euid;
retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
if (retval < 0)
goto error;
return commit_creds(new);
error:
abort_creds(new);
return retval;
}
SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp)
{
const struct cred *cred = current_cred();
int retval;
uid_t ruid, euid, suid;
ruid = from_kuid_munged(cred->user_ns, cred->uid);
euid = from_kuid_munged(cred->user_ns, cred->euid);
suid = from_kuid_munged(cred->user_ns, cred->suid);
retval = put_user(ruid, ruidp);
if (!retval) {
retval = put_user(euid, euidp);
if (!retval)
return put_user(suid, suidp);
}
return retval;
}
/*
* Same as above, but for rgid, egid, sgid.
*/
SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
{
struct user_namespace *ns = current_user_ns();
const struct cred *old;
struct cred *new;
int retval;
kgid_t krgid, kegid, ksgid;
krgid = make_kgid(ns, rgid);
kegid = make_kgid(ns, egid);
ksgid = make_kgid(ns, sgid);
if ((rgid != (gid_t) -1) && !gid_valid(krgid))
return -EINVAL;
if ((egid != (gid_t) -1) && !gid_valid(kegid))
return -EINVAL;
if ((sgid != (gid_t) -1) && !gid_valid(ksgid))
return -EINVAL;
new = prepare_creds();
if (!new)
return -ENOMEM;
old = current_cred();
retval = -EPERM;
if (!ns_capable(old->user_ns, CAP_SETGID)) {
if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) &&
!gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
goto error;
if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) &&
!gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid))
goto error;
if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) &&
!gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid))
goto error;
}
if (rgid != (gid_t) -1)
new->gid = krgid;
if (egid != (gid_t) -1)
new->egid = kegid;
if (sgid != (gid_t) -1)
new->sgid = ksgid;
new->fsgid = new->egid;
return commit_creds(new);
error:
abort_creds(new);
return retval;
}
SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp)
{
const struct cred *cred = current_cred();
int retval;
gid_t rgid, egid, sgid;
rgid = from_kgid_munged(cred->user_ns, cred->gid);
egid = from_kgid_munged(cred->user_ns, cred->egid);
sgid = from_kgid_munged(cred->user_ns, cred->sgid);
retval = put_user(rgid, rgidp);
if (!retval) {
retval = put_user(egid, egidp);
if (!retval)
retval = put_user(sgid, sgidp);
}
return retval;
}
/*
* "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
* is used for "access()" and for the NFS daemon (letting nfsd stay at
* whatever uid it wants to). It normally shadows "euid", except when
* explicitly set by setfsuid() or for access..
*/
SYSCALL_DEFINE1(setfsuid, uid_t, uid)
{
const struct cred *old;
struct cred *new;
uid_t old_fsuid;
kuid_t kuid;
old = current_cred();
old_fsuid = from_kuid_munged(old->user_ns, old->fsuid);
kuid = make_kuid(old->user_ns, uid);
if (!uid_valid(kuid))
return old_fsuid;
new = prepare_creds();
if (!new)
return old_fsuid;
if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) ||
uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) ||
ns_capable(old->user_ns, CAP_SETUID)) {
if (!uid_eq(kuid, old->fsuid)) {
new->fsuid = kuid;
if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
goto change_okay;
}
}
abort_creds(new);
return old_fsuid;
change_okay:
commit_creds(new);
return old_fsuid;
}
/*
* Samma på svenska..
*/
SYSCALL_DEFINE1(setfsgid, gid_t, gid)
{
const struct cred *old;
struct cred *new;
gid_t old_fsgid;
kgid_t kgid;
old = current_cred();
old_fsgid = from_kgid_munged(old->user_ns, old->fsgid);
kgid = make_kgid(old->user_ns, gid);
if (!gid_valid(kgid))
return old_fsgid;
new = prepare_creds();
if (!new)
return old_fsgid;
if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) ||
gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) ||
ns_capable(old->user_ns, CAP_SETGID)) {
if (!gid_eq(kgid, old->fsgid)) {
new->fsgid = kgid;
goto change_okay;
}
}
abort_creds(new);
return old_fsgid;
change_okay:
commit_creds(new);
return old_fsgid;
}
#endif /* CONFIG_MULTIUSER */
/**
* sys_getpid - return the thread group id of the current process
*
* Note, despite the name, this returns the tgid not the pid. The tgid and
* the pid are identical unless CLONE_THREAD was specified on clone() in
* which case the tgid is the same in all threads of the same group.
*
* This is SMP safe as current->tgid does not change.
*/
SYSCALL_DEFINE0(getpid)
{
return task_tgid_vnr(current);
}
/* Thread ID - the internal kernel "pid" */
SYSCALL_DEFINE0(gettid)
{
return task_pid_vnr(current);
}
/*
* Accessing ->real_parent is not SMP-safe, it could
* change from under us. However, we can use a stale
* value of ->real_parent under rcu_read_lock(), see
* release_task()->call_rcu(delayed_put_task_struct).
*/
SYSCALL_DEFINE0(getppid)
{
int pid;
rcu_read_lock();
pid = task_tgid_vnr(rcu_dereference(current->real_parent));
rcu_read_unlock();
return pid;
}
SYSCALL_DEFINE0(getuid)
{
/* Only we change this so SMP safe */
return from_kuid_munged(current_user_ns(), current_uid());
}
SYSCALL_DEFINE0(geteuid)
{
/* Only we change this so SMP safe */
return from_kuid_munged(current_user_ns(), current_euid());
}
SYSCALL_DEFINE0(getgid)
{
/* Only we change this so SMP safe */
return from_kgid_munged(current_user_ns(), current_gid());
}
SYSCALL_DEFINE0(getegid)
{
/* Only we change this so SMP safe */
return from_kgid_munged(current_user_ns(), current_egid());
}
static void do_sys_times(struct tms *tms)
{
u64 tgutime, tgstime, cutime, cstime;
thread_group_cputime_adjusted(current, &tgutime, &tgstime);
cutime = current->signal->cutime;
cstime = current->signal->cstime;
tms->tms_utime = nsec_to_clock_t(tgutime);
tms->tms_stime = nsec_to_clock_t(tgstime);
tms->tms_cutime = nsec_to_clock_t(cutime);
tms->tms_cstime = nsec_to_clock_t(cstime);
}
SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
{
if (tbuf) {
struct tms tmp;
do_sys_times(&tmp);
if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
return -EFAULT;
}
force_successful_syscall_return();
return (long) jiffies_64_to_clock_t(get_jiffies_64());
}
#ifdef CONFIG_COMPAT
static compat_clock_t clock_t_to_compat_clock_t(clock_t x)
{
return compat_jiffies_to_clock_t(clock_t_to_jiffies(x));
}
COMPAT_SYSCALL_DEFINE1(times, struct compat_tms __user *, tbuf)
{
if (tbuf) {
struct tms tms;
struct compat_tms tmp;
do_sys_times(&tms);
/* Convert our struct tms to the compat version. */
tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime);
tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime);
tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime);
tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime);
if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
return -EFAULT;
}
force_successful_syscall_return();
return compat_jiffies_to_clock_t(jiffies);
}
#endif
/*
* This needs some heavy checking ...
* I just haven't the stomach for it. I also don't fully
* understand sessions/pgrp etc. Let somebody who does explain it.
*
* OK, I think I have the protection semantics right.... this is really
* only important on a multi-user system anyway, to make sure one user
* can't send a signal to a process owned by another. -TYT, 12/12/91
*
* !PF_FORKNOEXEC check to conform completely to POSIX.
*/
SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
{
struct task_struct *p;
struct task_struct *group_leader = current->group_leader;
struct pid *pgrp;
int err;
if (!pid)
pid = task_pid_vnr(group_leader);
if (!pgid)
pgid = pid;
if (pgid < 0)
return -EINVAL;
rcu_read_lock();
/* From this point forward we keep holding onto the tasklist lock
* so that our parent does not change from under us. -DaveM
*/
write_lock_irq(&tasklist_lock);
err = -ESRCH;
p = find_task_by_vpid(pid);
if (!p)
goto out;
err = -EINVAL;
if (!thread_group_leader(p))
goto out;
if (same_thread_group(p->real_parent, group_leader)) {
err = -EPERM;
if (task_session(p) != task_session(group_leader))
goto out;
err = -EACCES;
if (!(p->flags & PF_FORKNOEXEC))
goto out;
} else {
err = -ESRCH;
if (p != group_leader)
goto out;
}
err = -EPERM;
if (p->signal->leader)
goto out;
pgrp = task_pid(p);
if (pgid != pid) {
struct task_struct *g;
pgrp = find_vpid(pgid);
g = pid_task(pgrp, PIDTYPE_PGID);
if (!g || task_session(g) != task_session(group_leader))
goto out;
}
err = security_task_setpgid(p, pgid);
if (err)
goto out;
if (task_pgrp(p) != pgrp)
change_pid(p, PIDTYPE_PGID, pgrp);
err = 0;
out:
/* All paths lead to here, thus we are safe. -DaveM */
write_unlock_irq(&tasklist_lock);
rcu_read_unlock();
return err;
}
SYSCALL_DEFINE1(getpgid, pid_t, pid)
{
struct task_struct *p;
struct pid *grp;
int retval;
rcu_read_lock();
if (!pid)
grp = task_pgrp(current);
else {
retval = -ESRCH;
p = find_task_by_vpid(pid);
if (!p)
goto out;
grp = task_pgrp(p);
if (!grp)
goto out;
retval = security_task_getpgid(p);
if (retval)
goto out;
}
retval = pid_vnr(grp);
out:
rcu_read_unlock();
return retval;
}
#ifdef __ARCH_WANT_SYS_GETPGRP
SYSCALL_DEFINE0(getpgrp)
{
return sys_getpgid(0);
}
#endif
SYSCALL_DEFINE1(getsid, pid_t, pid)
{
struct task_struct *p;
struct pid *sid;
int retval;
rcu_read_lock();
if (!pid)
sid = task_session(current);
else {
retval = -ESRCH;
p = find_task_by_vpid(pid);
if (!p)
goto out;
sid = task_session(p);
if (!sid)
goto out;
retval = security_task_getsid(p);
if (retval)
goto out;
}
retval = pid_vnr(sid);
out:
rcu_read_unlock();
return retval;
}
static void set_special_pids(struct pid *pid)
{
struct task_struct *curr = current->group_leader;
if (task_session(curr) != pid)
change_pid(curr, PIDTYPE_SID, pid);
if (task_pgrp(curr) != pid)
change_pid(curr, PIDTYPE_PGID, pid);
}
SYSCALL_DEFINE0(setsid)
{
struct task_struct *group_leader = current->group_leader;
struct pid *sid = task_pid(group_leader);
pid_t session = pid_vnr(sid);
int err = -EPERM;
write_lock_irq(&tasklist_lock);
/* Fail if I am already a session leader */
if (group_leader->signal->leader)
goto out;
/* Fail if a process group id already exists that equals the
* proposed session id.
*/
if (pid_task(sid, PIDTYPE_PGID))
goto out;
group_leader->signal->leader = 1;
set_special_pids(sid);
proc_clear_tty(group_leader);
err = session;
out:
write_unlock_irq(&tasklist_lock);
if (err > 0) {
proc_sid_connector(group_leader);
sched_autogroup_create_attach(group_leader);
}
return err;
}
DECLARE_RWSEM(uts_sem);
#ifdef COMPAT_UTS_MACHINE
#define override_architecture(name) \
(personality(current->personality) == PER_LINUX32 && \
copy_to_user(name->machine, COMPAT_UTS_MACHINE, \
sizeof(COMPAT_UTS_MACHINE)))
#else
#define override_architecture(name) 0
#endif
/*
* Work around broken programs that cannot handle "Linux 3.0".
* Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
* And we map 4.x to 2.6.60+x, so 4.0 would be 2.6.60.
*/
static int override_release(char __user *release, size_t len)
{
int ret = 0;
if (current->personality & UNAME26) {
const char *rest = UTS_RELEASE;
char buf[65] = { 0 };
int ndots = 0;
unsigned v;
size_t copy;
while (*rest) {
if (*rest == '.' && ++ndots >= 3)
break;
if (!isdigit(*rest) && *rest != '.')
break;
rest++;
}
v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 60;
copy = clamp_t(size_t, len, 1, sizeof(buf));
copy = scnprintf(buf, copy, "2.6.%u%s", v, rest);
ret = copy_to_user(release, buf, copy + 1);
}
return ret;
}
SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
{
struct new_utsname tmp;
down_read(&uts_sem);
memcpy(&tmp, utsname(), sizeof(tmp));
up_read(&uts_sem);
if (copy_to_user(name, &tmp, sizeof(tmp)))
return -EFAULT;
if (override_release(name->release, sizeof(name->release)))
return -EFAULT;
if (override_architecture(name))
return -EFAULT;
return 0;
}
#ifdef __ARCH_WANT_SYS_OLD_UNAME
/*
* Old cruft
*/
SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
{
struct old_utsname tmp;
if (!name)
return -EFAULT;
down_read(&uts_sem);
memcpy(&tmp, utsname(), sizeof(tmp));
up_read(&uts_sem);
if (copy_to_user(name, &tmp, sizeof(tmp)))
return -EFAULT;
if (override_release(name->release, sizeof(name->release)))
return -EFAULT;
if (override_architecture(name))
return -EFAULT;
return 0;
}
SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
{
struct oldold_utsname tmp;
if (!name)
return -EFAULT;
memset(&tmp, 0, sizeof(tmp));
down_read(&uts_sem);
memcpy(&tmp.sysname, &utsname()->sysname, __OLD_UTS_LEN);
memcpy(&tmp.nodename, &utsname()->nodename, __OLD_UTS_LEN);
memcpy(&tmp.release, &utsname()->release, __OLD_UTS_LEN);
memcpy(&tmp.version, &utsname()->version, __OLD_UTS_LEN);
memcpy(&tmp.machine, &utsname()->machine, __OLD_UTS_LEN);
up_read(&uts_sem);
if (copy_to_user(name, &tmp, sizeof(tmp)))
return -EFAULT;
if (override_architecture(name))
return -EFAULT;
if (override_release(name->release, sizeof(name->release)))
return -EFAULT;
return 0;
}
#endif
SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
{
int errno;
char tmp[__NEW_UTS_LEN];
if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
errno = -EFAULT;
if (!copy_from_user(tmp, name, len)) {
struct new_utsname *u;
down_write(&uts_sem);
u = utsname();
memcpy(u->nodename, tmp, len);
memset(u->nodename + len, 0, sizeof(u->nodename) - len);
errno = 0;
uts_proc_notify(UTS_PROC_HOSTNAME);
up_write(&uts_sem);
}
return errno;
}
#ifdef __ARCH_WANT_SYS_GETHOSTNAME
SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
{
int i;
struct new_utsname *u;
char tmp[__NEW_UTS_LEN + 1];
if (len < 0)
return -EINVAL;
down_read(&uts_sem);
u = utsname();
i = 1 + strlen(u->nodename);
if (i > len)
i = len;
memcpy(tmp, u->nodename, i);
up_read(&uts_sem);
if (copy_to_user(name, tmp, i))
return -EFAULT;
return 0;
}
#endif
/*
* Only setdomainname; getdomainname can be implemented by calling
* uname()
*/
SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
{
int errno;
char tmp[__NEW_UTS_LEN];
if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
errno = -EFAULT;
if (!copy_from_user(tmp, name, len)) {
struct new_utsname *u;
down_write(&uts_sem);
u = utsname();
memcpy(u->domainname, tmp, len);
memset(u->domainname + len, 0, sizeof(u->domainname) - len);
errno = 0;
uts_proc_notify(UTS_PROC_DOMAINNAME);
up_write(&uts_sem);
}
return errno;
}
SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
{
struct rlimit value;
int ret;
ret = do_prlimit(current, resource, NULL, &value);
if (!ret)
ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
return ret;
}
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(setrlimit, unsigned int, resource,
struct compat_rlimit __user *, rlim)
{
struct rlimit r;
struct compat_rlimit r32;
if (copy_from_user(&r32, rlim, sizeof(struct compat_rlimit)))
return -EFAULT;
if (r32.rlim_cur == COMPAT_RLIM_INFINITY)
r.rlim_cur = RLIM_INFINITY;
else
r.rlim_cur = r32.rlim_cur;
if (r32.rlim_max == COMPAT_RLIM_INFINITY)
r.rlim_max = RLIM_INFINITY;
else
r.rlim_max = r32.rlim_max;
return do_prlimit(current, resource, &r, NULL);
}
COMPAT_SYSCALL_DEFINE2(getrlimit, unsigned int, resource,
struct compat_rlimit __user *, rlim)
{
struct rlimit r;
int ret;
ret = do_prlimit(current, resource, NULL, &r);
if (!ret) {
struct compat_rlimit r32;
if (r.rlim_cur > COMPAT_RLIM_INFINITY)
r32.rlim_cur = COMPAT_RLIM_INFINITY;
else
r32.rlim_cur = r.rlim_cur;
if (r.rlim_max > COMPAT_RLIM_INFINITY)
r32.rlim_max = COMPAT_RLIM_INFINITY;
else
r32.rlim_max = r.rlim_max;
if (copy_to_user(rlim, &r32, sizeof(struct compat_rlimit)))
return -EFAULT;
}
return ret;
}
#endif
#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
/*
* Back compatibility for getrlimit. Needed for some apps.
*/
SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
struct rlimit __user *, rlim)
{
struct rlimit x;
if (resource >= RLIM_NLIMITS)
return -EINVAL;
resource = array_index_nospec(resource, RLIM_NLIMITS);
task_lock(current->group_leader);
x = current->signal->rlim[resource];
task_unlock(current->group_leader);
if (x.rlim_cur > 0x7FFFFFFF)
x.rlim_cur = 0x7FFFFFFF;
if (x.rlim_max > 0x7FFFFFFF)
x.rlim_max = 0x7FFFFFFF;
return copy_to_user(rlim, &x, sizeof(x)) ? -EFAULT : 0;
}
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
struct compat_rlimit __user *, rlim)
{
struct rlimit r;
if (resource >= RLIM_NLIMITS)
return -EINVAL;
resource = array_index_nospec(resource, RLIM_NLIMITS);
task_lock(current->group_leader);
r = current->signal->rlim[resource];
task_unlock(current->group_leader);
if (r.rlim_cur > 0x7FFFFFFF)
r.rlim_cur = 0x7FFFFFFF;
if (r.rlim_max > 0x7FFFFFFF)
r.rlim_max = 0x7FFFFFFF;
if (put_user(r.rlim_cur, &rlim->rlim_cur) ||
put_user(r.rlim_max, &rlim->rlim_max))
return -EFAULT;
return 0;
}
#endif
#endif
static inline bool rlim64_is_infinity(__u64 rlim64)
{
#if BITS_PER_LONG < 64
return rlim64 >= ULONG_MAX;
#else
return rlim64 == RLIM64_INFINITY;
#endif
}
static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64)
{
if (rlim->rlim_cur == RLIM_INFINITY)
rlim64->rlim_cur = RLIM64_INFINITY;
else
rlim64->rlim_cur = rlim->rlim_cur;
if (rlim->rlim_max == RLIM_INFINITY)
rlim64->rlim_max = RLIM64_INFINITY;
else
rlim64->rlim_max = rlim->rlim_max;
}
static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim)
{
if (rlim64_is_infinity(rlim64->rlim_cur))
rlim->rlim_cur = RLIM_INFINITY;
else
rlim->rlim_cur = (unsigned long)rlim64->rlim_cur;
if (rlim64_is_infinity(rlim64->rlim_max))
rlim->rlim_max = RLIM_INFINITY;
else
rlim->rlim_max = (unsigned long)rlim64->rlim_max;
}
/* make sure you are allowed to change @tsk limits before calling this */
int do_prlimit(struct task_struct *tsk, unsigned int resource,
struct rlimit *new_rlim, struct rlimit *old_rlim)
{
struct rlimit *rlim;
int retval = 0;
if (resource >= RLIM_NLIMITS)
return -EINVAL;
resource = array_index_nospec(resource, RLIM_NLIMITS);
if (new_rlim) {
if (new_rlim->rlim_cur > new_rlim->rlim_max)
return -EINVAL;
if (resource == RLIMIT_NOFILE &&
new_rlim->rlim_max > sysctl_nr_open)
return -EPERM;
}
/* protect tsk->signal and tsk->sighand from disappearing */
read_lock(&tasklist_lock);
if (!tsk->sighand) {
retval = -ESRCH;
goto out;
}
rlim = tsk->signal->rlim + resource;
task_lock(tsk->group_leader);
if (new_rlim) {
/* Keep the capable check against init_user_ns until
cgroups can contain all limits */
if (new_rlim->rlim_max > rlim->rlim_max &&
!capable(CAP_SYS_RESOURCE))
retval = -EPERM;
if (!retval)
retval = security_task_setrlimit(tsk, resource, new_rlim);
if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
/*
* The caller is asking for an immediate RLIMIT_CPU
* expiry. But we use the zero value to mean "it was
* never set". So let's cheat and make it one second
* instead
*/
new_rlim->rlim_cur = 1;
}
}
if (!retval) {
if (old_rlim)
*old_rlim = *rlim;
if (new_rlim)
*rlim = *new_rlim;
}
task_unlock(tsk->group_leader);
/*
* RLIMIT_CPU handling. Note that the kernel fails to return an error
* code if it rejected the user's attempt to set RLIMIT_CPU. This is a
* very long-standing error, and fixing it now risks breakage of
* applications, so we live with it
*/
if (!retval && new_rlim && resource == RLIMIT_CPU &&
new_rlim->rlim_cur != RLIM_INFINITY &&
IS_ENABLED(CONFIG_POSIX_TIMERS))
update_rlimit_cpu(tsk, new_rlim->rlim_cur);
out:
read_unlock(&tasklist_lock);
return retval;
}
/* rcu lock must be held */
static int check_prlimit_permission(struct task_struct *task,
unsigned int flags)
{
const struct cred *cred = current_cred(), *tcred;
bool id_match;
if (current == task)
return 0;
tcred = __task_cred(task);
id_match = (uid_eq(cred->uid, tcred->euid) &&
uid_eq(cred->uid, tcred->suid) &&
uid_eq(cred->uid, tcred->uid) &&
gid_eq(cred->gid, tcred->egid) &&
gid_eq(cred->gid, tcred->sgid) &&
gid_eq(cred->gid, tcred->gid));
if (!id_match && !ns_capable(tcred->user_ns, CAP_SYS_RESOURCE))
return -EPERM;
return security_task_prlimit(cred, tcred, flags);
}
SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
const struct rlimit64 __user *, new_rlim,
struct rlimit64 __user *, old_rlim)
{
struct rlimit64 old64, new64;
struct rlimit old, new;
struct task_struct *tsk;
unsigned int checkflags = 0;
int ret;
if (old_rlim)
checkflags |= LSM_PRLIMIT_READ;
if (new_rlim) {
if (copy_from_user(&new64, new_rlim, sizeof(new64)))
return -EFAULT;
rlim64_to_rlim(&new64, &new);
checkflags |= LSM_PRLIMIT_WRITE;
}
rcu_read_lock();
tsk = pid ? find_task_by_vpid(pid) : current;
if (!tsk) {
rcu_read_unlock();
return -ESRCH;
}
ret = check_prlimit_permission(tsk, checkflags);
if (ret) {
rcu_read_unlock();
return ret;
}
get_task_struct(tsk);
rcu_read_unlock();
ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
old_rlim ? &old : NULL);
if (!ret && old_rlim) {
rlim_to_rlim64(&old, &old64);
if (copy_to_user(old_rlim, &old64, sizeof(old64)))
ret = -EFAULT;
}
put_task_struct(tsk);
return ret;
}
SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
{
struct rlimit new_rlim;
if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
return -EFAULT;
return do_prlimit(current, resource, &new_rlim, NULL);
}
/*
* It would make sense to put struct rusage in the task_struct,
* except that would make the task_struct be *really big*. After
* task_struct gets moved into malloc'ed memory, it would
* make sense to do this. It will make moving the rest of the information
* a lot simpler! (Which we're not doing right now because we're not
* measuring them yet).
*
* When sampling multiple threads for RUSAGE_SELF, under SMP we might have
* races with threads incrementing their own counters. But since word
* reads are atomic, we either get new values or old values and we don't
* care which for the sums. We always take the siglock to protect reading
* the c* fields from p->signal from races with exit.c updating those
* fields when reaping, so a sample either gets all the additions of a
* given child after it's reaped, or none so this sample is before reaping.
*
* Locking:
* We need to take the siglock for CHILDEREN, SELF and BOTH
* for the cases current multithreaded, non-current single threaded
* non-current multithreaded. Thread traversal is now safe with
* the siglock held.
* Strictly speaking, we donot need to take the siglock if we are current and
* single threaded, as no one else can take our signal_struct away, no one
* else can reap the children to update signal->c* counters, and no one else
* can race with the signal-> fields. If we do not take any lock, the
* signal-> fields could be read out of order while another thread was just
* exiting. So we should place a read memory barrier when we avoid the lock.
* On the writer side, write memory barrier is implied in __exit_signal
* as __exit_signal releases the siglock spinlock after updating the signal->
* fields. But we don't do this yet to keep things simple.
*
*/
static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
{
r->ru_nvcsw += t->nvcsw;
r->ru_nivcsw += t->nivcsw;
r->ru_minflt += t->min_flt;
r->ru_majflt += t->maj_flt;
r->ru_inblock += task_io_get_inblock(t);
r->ru_oublock += task_io_get_oublock(t);
}
void getrusage(struct task_struct *p, int who, struct rusage *r)
{
struct task_struct *t;
unsigned long flags;
u64 tgutime, tgstime, utime, stime;
unsigned long maxrss;
struct mm_struct *mm;
struct signal_struct *sig = p->signal;
unsigned int seq = 0;
retry:
memset(r, 0, sizeof(*r));
utime = stime = 0;
maxrss = 0;
if (who == RUSAGE_THREAD) {
task_cputime_adjusted(current, &utime, &stime);
accumulate_thread_rusage(p, r);
maxrss = sig->maxrss;
goto out_thread;
}
flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
switch (who) {
case RUSAGE_BOTH:
case RUSAGE_CHILDREN:
utime = sig->cutime;
stime = sig->cstime;
r->ru_nvcsw = sig->cnvcsw;
r->ru_nivcsw = sig->cnivcsw;
r->ru_minflt = sig->cmin_flt;
r->ru_majflt = sig->cmaj_flt;
r->ru_inblock = sig->cinblock;
r->ru_oublock = sig->coublock;
maxrss = sig->cmaxrss;
if (who == RUSAGE_CHILDREN)
break;
case RUSAGE_SELF:
r->ru_nvcsw += sig->nvcsw;
r->ru_nivcsw += sig->nivcsw;
r->ru_minflt += sig->min_flt;
r->ru_majflt += sig->maj_flt;
r->ru_inblock += sig->inblock;
r->ru_oublock += sig->oublock;
if (maxrss < sig->maxrss)
maxrss = sig->maxrss;
rcu_read_lock();
__for_each_thread(sig, t)
accumulate_thread_rusage(t, r);
rcu_read_unlock();
break;
default:
BUG();
}
if (need_seqretry(&sig->stats_lock, seq)) {
seq = 1;
goto retry;
}
done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
if (who == RUSAGE_CHILDREN)
goto out_children;
thread_group_cputime_adjusted(p, &tgutime, &tgstime);
utime += tgutime;
stime += tgstime;
out_thread:
mm = get_task_mm(p);
if (mm) {
setmax_mm_hiwater_rss(&maxrss, mm);
mmput(mm);
}
out_children:
r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
r->ru_utime = ns_to_timeval(utime);
r->ru_stime = ns_to_timeval(stime);
}
SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
{
struct rusage r;
if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
who != RUSAGE_THREAD)
return -EINVAL;
getrusage(current, who, &r);
return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
}
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(getrusage, int, who, struct compat_rusage __user *, ru)
{
struct rusage r;
if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
who != RUSAGE_THREAD)
return -EINVAL;
getrusage(current, who, &r);
return put_compat_rusage(&r, ru);
}
#endif
SYSCALL_DEFINE1(umask, int, mask)
{
mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
return mask;
}
static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
{
struct fd exe;
struct file *old_exe, *exe_file;
struct inode *inode;
int err;
exe = fdget(fd);
if (!exe.file)
return -EBADF;
inode = file_inode(exe.file);
/*
* Because the original mm->exe_file points to executable file, make
* sure that this one is executable as well, to avoid breaking an
* overall picture.
*/
err = -EACCES;
if (!S_ISREG(inode->i_mode) || path_noexec(&exe.file->f_path))
goto exit;
err = inode_permission(inode, MAY_EXEC);
if (err)
goto exit;
/*
* Forbid mm->exe_file change if old file still mapped.
*/
exe_file = get_mm_exe_file(mm);
err = -EBUSY;
if (exe_file) {
struct vm_area_struct *vma;
down_read(&mm->mmap_sem);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (!vma->vm_file)
continue;
if (path_equal(&vma->vm_file->f_path,
&exe_file->f_path))
goto exit_err;
}
up_read(&mm->mmap_sem);
fput(exe_file);
}
err = 0;
/* set the new file, lockless */
get_file(exe.file);
old_exe = xchg(&mm->exe_file, exe.file);
if (old_exe)
fput(old_exe);
exit:
fdput(exe);
return err;
exit_err:
up_read(&mm->mmap_sem);
fput(exe_file);
goto exit;
}
/*
* WARNING: we don't require any capability here so be very careful
* in what is allowed for modification from userspace.
*/
static int validate_prctl_map(struct prctl_mm_map *prctl_map)
{
unsigned long mmap_max_addr = TASK_SIZE;
struct mm_struct *mm = current->mm;
int error = -EINVAL, i;
static const unsigned char offsets[] = {
offsetof(struct prctl_mm_map, start_code),
offsetof(struct prctl_mm_map, end_code),
offsetof(struct prctl_mm_map, start_data),
offsetof(struct prctl_mm_map, end_data),
offsetof(struct prctl_mm_map, start_brk),
offsetof(struct prctl_mm_map, brk),
offsetof(struct prctl_mm_map, start_stack),
offsetof(struct prctl_mm_map, arg_start),
offsetof(struct prctl_mm_map, arg_end),
offsetof(struct prctl_mm_map, env_start),
offsetof(struct prctl_mm_map, env_end),
};
/*
* Make sure the members are not somewhere outside
* of allowed address space.
*/
for (i = 0; i < ARRAY_SIZE(offsets); i++) {
u64 val = *(u64 *)((char *)prctl_map + offsets[i]);
if ((unsigned long)val >= mmap_max_addr ||
(unsigned long)val < mmap_min_addr)
goto out;
}
/*
* Make sure the pairs are ordered.
*/
#define __prctl_check_order(__m1, __op, __m2) \
((unsigned long)prctl_map->__m1 __op \
(unsigned long)prctl_map->__m2) ? 0 : -EINVAL
error = __prctl_check_order(start_code, <, end_code);
error |= __prctl_check_order(start_data,<=, end_data);
error |= __prctl_check_order(start_brk, <=, brk);
error |= __prctl_check_order(arg_start, <=, arg_end);
error |= __prctl_check_order(env_start, <=, env_end);
if (error)
goto out;
#undef __prctl_check_order
error = -EINVAL;
/*
* Neither we should allow to override limits if they set.
*/
if (check_data_rlimit(rlimit(RLIMIT_DATA), prctl_map->brk,
prctl_map->start_brk, prctl_map->end_data,
prctl_map->start_data))
goto out;
/*
* Someone is trying to cheat the auxv vector.
*/
if (prctl_map->auxv_size) {
if (!prctl_map->auxv || prctl_map->auxv_size > sizeof(mm->saved_auxv))
goto out;
}
/*
* Finally, make sure the caller has the rights to
* change /proc/pid/exe link: only local sys admin should
* be allowed to.
*/
if (prctl_map->exe_fd != (u32)-1) {
if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
goto out;
}
error = 0;
out:
return error;
}
#ifdef CONFIG_CHECKPOINT_RESTORE
static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size)
{
struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, };
unsigned long user_auxv[AT_VECTOR_SIZE];
struct mm_struct *mm = current->mm;
int error;
BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
BUILD_BUG_ON(sizeof(struct prctl_mm_map) > 256);
if (opt == PR_SET_MM_MAP_SIZE)
return put_user((unsigned int)sizeof(prctl_map),
(unsigned int __user *)addr);
if (data_size != sizeof(prctl_map))
return -EINVAL;
if (copy_from_user(&prctl_map, addr, sizeof(prctl_map)))
return -EFAULT;
error = validate_prctl_map(&prctl_map);
if (error)
return error;
if (prctl_map.auxv_size) {
memset(user_auxv, 0, sizeof(user_auxv));
if (copy_from_user(user_auxv,
(const void __user *)prctl_map.auxv,
prctl_map.auxv_size))
return -EFAULT;
/* Last entry must be AT_NULL as specification requires */
user_auxv[AT_VECTOR_SIZE - 2] = AT_NULL;
user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL;
}
if (prctl_map.exe_fd != (u32)-1) {
error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd);
if (error)
return error;
}
/*
* arg_lock protects concurent updates but we still need mmap_sem for
* read to exclude races with sys_brk.
*/
down_read(&mm->mmap_sem);
/*
* We don't validate if these members are pointing to
* real present VMAs because application may have correspond
* VMAs already unmapped and kernel uses these members for statistics
* output in procfs mostly, except
*
* - @start_brk/@brk which are used in do_brk but kernel lookups
* for VMAs when updating these memvers so anything wrong written
* here cause kernel to swear at userspace program but won't lead
* to any problem in kernel itself
*/
spin_lock(&mm->arg_lock);
mm->start_code = prctl_map.start_code;
mm->end_code = prctl_map.end_code;
mm->start_data = prctl_map.start_data;
mm->end_data = prctl_map.end_data;
mm->start_brk = prctl_map.start_brk;
mm->brk = prctl_map.brk;
mm->start_stack = prctl_map.start_stack;
mm->arg_start = prctl_map.arg_start;
mm->arg_end = prctl_map.arg_end;
mm->env_start = prctl_map.env_start;
mm->env_end = prctl_map.env_end;
spin_unlock(&mm->arg_lock);
/*
* Note this update of @saved_auxv is lockless thus
* if someone reads this member in procfs while we're
* updating -- it may get partly updated results. It's
* known and acceptable trade off: we leave it as is to
* not introduce additional locks here making the kernel
* more complex.
*/
if (prctl_map.auxv_size)
memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
up_read(&mm->mmap_sem);
return 0;
}
#endif /* CONFIG_CHECKPOINT_RESTORE */
static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr,
unsigned long len)
{
/*
* This doesn't move the auxiliary vector itself since it's pinned to
* mm_struct, but it permits filling the vector with new values. It's
* up to the caller to provide sane values here, otherwise userspace
* tools which use this vector might be unhappy.
*/
unsigned long user_auxv[AT_VECTOR_SIZE];
if (len > sizeof(user_auxv))
return -EINVAL;
if (copy_from_user(user_auxv, (const void __user *)addr, len))
return -EFAULT;
/* Make sure the last entry is always AT_NULL */
user_auxv[AT_VECTOR_SIZE - 2] = 0;
user_auxv[AT_VECTOR_SIZE - 1] = 0;
BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
task_lock(current);
memcpy(mm->saved_auxv, user_auxv, len);
task_unlock(current);
return 0;
}
static int prctl_set_mm(int opt, unsigned long addr,
unsigned long arg4, unsigned long arg5)
{
struct mm_struct *mm = current->mm;
struct prctl_mm_map prctl_map;
struct vm_area_struct *vma;
int error;
if (arg5 || (arg4 && (opt != PR_SET_MM_AUXV &&
opt != PR_SET_MM_MAP &&
opt != PR_SET_MM_MAP_SIZE)))
return -EINVAL;
#ifdef CONFIG_CHECKPOINT_RESTORE
if (opt == PR_SET_MM_MAP || opt == PR_SET_MM_MAP_SIZE)
return prctl_set_mm_map(opt, (const void __user *)addr, arg4);
#endif
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
if (opt == PR_SET_MM_EXE_FILE)
return prctl_set_mm_exe_file(mm, (unsigned int)addr);
if (opt == PR_SET_MM_AUXV)
return prctl_set_auxv(mm, addr, arg4);
if (addr >= TASK_SIZE || addr < mmap_min_addr)
return -EINVAL;
error = -EINVAL;
down_write(&mm->mmap_sem);
vma = find_vma(mm, addr);
prctl_map.start_code = mm->start_code;
prctl_map.end_code = mm->end_code;
prctl_map.start_data = mm->start_data;
prctl_map.end_data = mm->end_data;
prctl_map.start_brk = mm->start_brk;
prctl_map.brk = mm->brk;
prctl_map.start_stack = mm->start_stack;
prctl_map.arg_start = mm->arg_start;
prctl_map.arg_end = mm->arg_end;
prctl_map.env_start = mm->env_start;
prctl_map.env_end = mm->env_end;
prctl_map.auxv = NULL;
prctl_map.auxv_size = 0;
prctl_map.exe_fd = -1;
switch (opt) {
case PR_SET_MM_START_CODE:
prctl_map.start_code = addr;
break;
case PR_SET_MM_END_CODE:
prctl_map.end_code = addr;
break;
case PR_SET_MM_START_DATA:
prctl_map.start_data = addr;
break;
case PR_SET_MM_END_DATA:
prctl_map.end_data = addr;
break;
case PR_SET_MM_START_STACK:
prctl_map.start_stack = addr;
break;
case PR_SET_MM_START_BRK:
prctl_map.start_brk = addr;
break;
case PR_SET_MM_BRK:
prctl_map.brk = addr;
break;
case PR_SET_MM_ARG_START:
prctl_map.arg_start = addr;
break;
case PR_SET_MM_ARG_END:
prctl_map.arg_end = addr;
break;
case PR_SET_MM_ENV_START:
prctl_map.env_start = addr;
break;
case PR_SET_MM_ENV_END:
prctl_map.env_end = addr;
break;
default:
goto out;
}
error = validate_prctl_map(&prctl_map);
if (error)
goto out;
switch (opt) {
/*
* If command line arguments and environment
* are placed somewhere else on stack, we can
* set them up here, ARG_START/END to setup
* command line argumets and ENV_START/END
* for environment.
*/
case PR_SET_MM_START_STACK:
case PR_SET_MM_ARG_START:
case PR_SET_MM_ARG_END:
case PR_SET_MM_ENV_START:
case PR_SET_MM_ENV_END:
if (!vma) {
error = -EFAULT;
goto out;
}
}
mm->start_code = prctl_map.start_code;
mm->end_code = prctl_map.end_code;
mm->start_data = prctl_map.start_data;
mm->end_data = prctl_map.end_data;
mm->start_brk = prctl_map.start_brk;
mm->brk = prctl_map.brk;
mm->start_stack = prctl_map.start_stack;
mm->arg_start = prctl_map.arg_start;
mm->arg_end = prctl_map.arg_end;
mm->env_start = prctl_map.env_start;
mm->env_end = prctl_map.env_end;
error = 0;
out:
up_write(&mm->mmap_sem);
return error;
}
#ifdef CONFIG_CHECKPOINT_RESTORE
static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
{
return put_user(me->clear_child_tid, tid_addr);
}
#else
static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
{
return -EINVAL;
}
#endif
static int propagate_has_child_subreaper(struct task_struct *p, void *data)
{
/*
* If task has has_child_subreaper - all its decendants
* already have these flag too and new decendants will
* inherit it on fork, skip them.
*
* If we've found child_reaper - skip descendants in
* it's subtree as they will never get out pidns.
*/
if (p->signal->has_child_subreaper ||
is_child_reaper(task_pid(p)))
return 0;
p->signal->has_child_subreaper = 1;
return 1;
}
#ifdef CONFIG_MMU
static int prctl_update_vma_anon_name(struct vm_area_struct *vma,
struct vm_area_struct **prev,
unsigned long start, unsigned long end,
const char __user *name_addr)
{
struct mm_struct *mm = vma->vm_mm;
int error = 0;
pgoff_t pgoff;
if (name_addr == vma_get_anon_name(vma)) {
*prev = vma;
goto out;
}
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
*prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma,
vma->vm_file, pgoff, vma_policy(vma),
vma->vm_userfaultfd_ctx, name_addr);
if (*prev) {
vma = *prev;
goto success;
}
*prev = vma;
if (start != vma->vm_start) {
error = split_vma(mm, vma, start, 1);
if (error)
goto out;
}
if (end != vma->vm_end) {
error = split_vma(mm, vma, end, 0);
if (error)
goto out;
}
success:
if (!vma->vm_file)
vma->anon_name = name_addr;
out:
if (error == -ENOMEM)
error = -EAGAIN;
return error;
}
static int prctl_set_vma_anon_name(unsigned long start, unsigned long end,
unsigned long arg)
{
unsigned long tmp;
struct vm_area_struct *vma, *prev;
int unmapped_error = 0;
int error = -EINVAL;
/*
* If the interval [start,end) covers some unmapped address
* ranges, just ignore them, but return -ENOMEM at the end.
* - this matches the handling in madvise.
*/
vma = find_vma_prev(current->mm, start, &prev);
if (vma && start > vma->vm_start)
prev = vma;
for (;;) {
/* Still start < end. */
error = -ENOMEM;
if (!vma)
return error;
/* Here start < (end|vma->vm_end). */
if (start < vma->vm_start) {
unmapped_error = -ENOMEM;
start = vma->vm_start;
if (start >= end)
return error;
}
/* Here vma->vm_start <= start < (end|vma->vm_end) */
tmp = vma->vm_end;
if (end < tmp)
tmp = end;
/* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
error = prctl_update_vma_anon_name(vma, &prev, start, tmp,
(const char __user *)arg);
if (error)
return error;
start = tmp;
if (prev && start < prev->vm_end)
start = prev->vm_end;
error = unmapped_error;
if (start >= end)
return error;
if (prev)
vma = prev->vm_next;
else /* madvise_remove dropped mmap_sem */
vma = find_vma(current->mm, start);
}
}
static int prctl_set_vma(unsigned long opt, unsigned long start,
unsigned long len_in, unsigned long arg)
{
struct mm_struct *mm = current->mm;
int error;
unsigned long len;
unsigned long end;
if (start & ~PAGE_MASK)
return -EINVAL;
len = (len_in + ~PAGE_MASK) & PAGE_MASK;
/* Check to see whether len was rounded up from small -ve to zero */
if (len_in && !len)
return -EINVAL;
end = start + len;
if (end < start)
return -EINVAL;
if (end == start)
return 0;
down_write(&mm->mmap_sem);
switch (opt) {
case PR_SET_VMA_ANON_NAME:
error = prctl_set_vma_anon_name(start, end, arg);
break;
default:
error = -EINVAL;
}
up_write(&mm->mmap_sem);
return error;
}
#else /* CONFIG_MMU */
static int prctl_set_vma(unsigned long opt, unsigned long start,
unsigned long len_in, unsigned long arg)
{
return -EINVAL;
}
#endif
int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which)
{
return -EINVAL;
}
int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
unsigned long ctrl)
{
return -EINVAL;
}
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
struct task_struct *me = current;
unsigned char comm[sizeof(me->comm)];
long error;
error = security_task_prctl(option, arg2, arg3, arg4, arg5);
if (error != -ENOSYS)
return error;
error = 0;
switch (option) {
case PR_SET_PDEATHSIG:
if (!valid_signal(arg2)) {
error = -EINVAL;
break;
}
me->pdeath_signal = arg2;
break;
case PR_GET_PDEATHSIG:
error = put_user(me->pdeath_signal, (int __user *)arg2);
break;
case PR_GET_DUMPABLE:
error = get_dumpable(me->mm);
break;
case PR_SET_DUMPABLE:
if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) {
error = -EINVAL;
break;
}
set_dumpable(me->mm, arg2);
break;
case PR_SET_UNALIGN:
error = SET_UNALIGN_CTL(me, arg2);
break;
case PR_GET_UNALIGN:
error = GET_UNALIGN_CTL(me, arg2);
break;
case PR_SET_FPEMU:
error = SET_FPEMU_CTL(me, arg2);
break;
case PR_GET_FPEMU:
error = GET_FPEMU_CTL(me, arg2);
break;
case PR_SET_FPEXC:
error = SET_FPEXC_CTL(me, arg2);
break;
case PR_GET_FPEXC:
error = GET_FPEXC_CTL(me, arg2);
break;
case PR_GET_TIMING:
error = PR_TIMING_STATISTICAL;
break;
case PR_SET_TIMING:
if (arg2 != PR_TIMING_STATISTICAL)
error = -EINVAL;
break;
case PR_SET_NAME:
comm[sizeof(me->comm) - 1] = 0;
if (strncpy_from_user(comm, (char __user *)arg2,
sizeof(me->comm) - 1) < 0)
return -EFAULT;
set_task_comm(me, comm);
proc_comm_connector(me);
break;
case PR_GET_NAME:
get_task_comm(comm, me);
if (copy_to_user((char __user *)arg2, comm, sizeof(comm)))
return -EFAULT;
break;
case PR_GET_ENDIAN:
error = GET_ENDIAN(me, arg2);
break;
case PR_SET_ENDIAN:
error = SET_ENDIAN(me, arg2);
break;
case PR_GET_SECCOMP:
error = prctl_get_seccomp();
break;
case PR_SET_SECCOMP:
error = prctl_set_seccomp(arg2, (char __user *)arg3);
break;
case PR_GET_TSC:
error = GET_TSC_CTL(arg2);
break;
case PR_SET_TSC:
error = SET_TSC_CTL(arg2);
break;
case PR_TASK_PERF_EVENTS_DISABLE:
error = perf_event_task_disable();
break;
case PR_TASK_PERF_EVENTS_ENABLE:
error = perf_event_task_enable();
break;
case PR_GET_TIMERSLACK:
if (current->timer_slack_ns > ULONG_MAX)
error = ULONG_MAX;
else
error = current->timer_slack_ns;
break;
case PR_SET_TIMERSLACK:
if (arg2 <= 0)
current->timer_slack_ns =
current->default_timer_slack_ns;
else
current->timer_slack_ns = arg2;
break;
case PR_MCE_KILL:
if (arg4 | arg5)
return -EINVAL;
switch (arg2) {
case PR_MCE_KILL_CLEAR:
if (arg3 != 0)
return -EINVAL;
current->flags &= ~PF_MCE_PROCESS;
break;
case PR_MCE_KILL_SET:
current->flags |= PF_MCE_PROCESS;
if (arg3 == PR_MCE_KILL_EARLY)
current->flags |= PF_MCE_EARLY;
else if (arg3 == PR_MCE_KILL_LATE)
current->flags &= ~PF_MCE_EARLY;
else if (arg3 == PR_MCE_KILL_DEFAULT)
current->flags &=
~(PF_MCE_EARLY|PF_MCE_PROCESS);
else
return -EINVAL;
break;
default:
return -EINVAL;
}
break;
case PR_MCE_KILL_GET:
if (arg2 | arg3 | arg4 | arg5)
return -EINVAL;
if (current->flags & PF_MCE_PROCESS)
error = (current->flags & PF_MCE_EARLY) ?
PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
else
error = PR_MCE_KILL_DEFAULT;
break;
case PR_SET_MM:
error = prctl_set_mm(arg2, arg3, arg4, arg5);
break;
case PR_GET_TID_ADDRESS:
error = prctl_get_tid_address(me, (int __user **)arg2);
break;
case PR_SET_CHILD_SUBREAPER:
me->signal->is_child_subreaper = !!arg2;
if (!arg2)
break;
walk_process_tree(me, propagate_has_child_subreaper, NULL);
break;
case PR_GET_CHILD_SUBREAPER:
error = put_user(me->signal->is_child_subreaper,
(int __user *)arg2);
break;
case PR_SET_NO_NEW_PRIVS:
if (arg2 != 1 || arg3 || arg4 || arg5)
return -EINVAL;
task_set_no_new_privs(current);
break;
case PR_GET_NO_NEW_PRIVS:
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
return task_no_new_privs(current) ? 1 : 0;
case PR_GET_THP_DISABLE:
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
error = !!test_bit(MMF_DISABLE_THP, &me->mm->flags);
break;
case PR_SET_THP_DISABLE:
if (arg3 || arg4 || arg5)
return -EINVAL;
if (down_write_killable(&me->mm->mmap_sem))
return -EINTR;
if (arg2)
set_bit(MMF_DISABLE_THP, &me->mm->flags);
else
clear_bit(MMF_DISABLE_THP, &me->mm->flags);
up_write(&me->mm->mmap_sem);
break;
case PR_MPX_ENABLE_MANAGEMENT:
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
error = MPX_ENABLE_MANAGEMENT();
break;
case PR_MPX_DISABLE_MANAGEMENT:
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
error = MPX_DISABLE_MANAGEMENT();
break;
case PR_SET_FP_MODE:
error = SET_FP_MODE(me, arg2);
break;
case PR_GET_FP_MODE:
error = GET_FP_MODE(me);
break;
case PR_GET_SPECULATION_CTRL:
if (arg3 || arg4 || arg5)
return -EINVAL;
error = arch_prctl_spec_ctrl_get(me, arg2);
break;
case PR_SET_SPECULATION_CTRL:
if (arg4 || arg5)
return -EINVAL;
error = arch_prctl_spec_ctrl_set(me, arg2, arg3);
break;
case PR_SET_VMA:
error = prctl_set_vma(arg2, arg3, arg4, arg5);
break;
case PR_SET_TAGGED_ADDR_CTRL:
if (arg3 || arg4 || arg5)
return -EINVAL;
error = SET_TAGGED_ADDR_CTRL(arg2);
break;
case PR_GET_TAGGED_ADDR_CTRL:
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
error = GET_TAGGED_ADDR_CTRL();
break;
default:
error = -EINVAL;
break;
}
return error;
}
SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
struct getcpu_cache __user *, unused)
{
int err = 0;
int cpu = raw_smp_processor_id();
if (cpup)
err |= put_user(cpu, cpup);
if (nodep)
err |= put_user(cpu_to_node(cpu), nodep);
return err ? -EFAULT : 0;
}
/**
* do_sysinfo - fill in sysinfo struct
* @info: pointer to buffer to fill
*/
static int do_sysinfo(struct sysinfo *info)
{
unsigned long mem_total, sav_total;
unsigned int mem_unit, bitcount;
struct timespec tp;
memset(info, 0, sizeof(struct sysinfo));
get_monotonic_boottime(&tp);
info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
info->procs = nr_threads;
si_meminfo(info);
si_swapinfo(info);
/*
* If the sum of all the available memory (i.e. ram + swap)
* is less than can be stored in a 32 bit unsigned long then
* we can be binary compatible with 2.2.x kernels. If not,
* well, in that case 2.2.x was broken anyways...
*
* -Erik Andersen <andersee@debian.org>
*/
mem_total = info->totalram + info->totalswap;
if (mem_total < info->totalram || mem_total < info->totalswap)
goto out;
bitcount = 0;
mem_unit = info->mem_unit;
while (mem_unit > 1) {
bitcount++;
mem_unit >>= 1;
sav_total = mem_total;
mem_total <<= 1;
if (mem_total < sav_total)
goto out;
}
/*
* If mem_total did not overflow, multiply all memory values by
* info->mem_unit and set it to 1. This leaves things compatible
* with 2.2.x, and also retains compatibility with earlier 2.4.x
* kernels...
*/
info->mem_unit = 1;
info->totalram <<= bitcount;
info->freeram <<= bitcount;
info->sharedram <<= bitcount;
info->bufferram <<= bitcount;
info->totalswap <<= bitcount;
info->freeswap <<= bitcount;
info->totalhigh <<= bitcount;
info->freehigh <<= bitcount;
out:
return 0;
}
SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info)
{
struct sysinfo val;
do_sysinfo(&val);
if (copy_to_user(info, &val, sizeof(struct sysinfo)))
return -EFAULT;
return 0;
}
#ifdef CONFIG_COMPAT
struct compat_sysinfo {
s32 uptime;
u32 loads[3];
u32 totalram;
u32 freeram;
u32 sharedram;
u32 bufferram;
u32 totalswap;
u32 freeswap;
u16 procs;
u16 pad;
u32 totalhigh;
u32 freehigh;
u32 mem_unit;
char _f[20-2*sizeof(u32)-sizeof(int)];
};
COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
{
struct sysinfo s;
do_sysinfo(&s);
/* Check to see if any memory value is too large for 32-bit and scale
* down if needed
*/
if (upper_32_bits(s.totalram) || upper_32_bits(s.totalswap)) {
int bitcount = 0;
while (s.mem_unit < PAGE_SIZE) {
s.mem_unit <<= 1;
bitcount++;
}
s.totalram >>= bitcount;
s.freeram >>= bitcount;
s.sharedram >>= bitcount;
s.bufferram >>= bitcount;
s.totalswap >>= bitcount;
s.freeswap >>= bitcount;
s.totalhigh >>= bitcount;
s.freehigh >>= bitcount;
}
if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) ||
__put_user(s.uptime, &info->uptime) ||
__put_user(s.loads[0], &info->loads[0]) ||
__put_user(s.loads[1], &info->loads[1]) ||
__put_user(s.loads[2], &info->loads[2]) ||
__put_user(s.totalram, &info->totalram) ||
__put_user(s.freeram, &info->freeram) ||
__put_user(s.sharedram, &info->sharedram) ||
__put_user(s.bufferram, &info->bufferram) ||
__put_user(s.totalswap, &info->totalswap) ||
__put_user(s.freeswap, &info->freeswap) ||
__put_user(s.procs, &info->procs) ||
__put_user(s.totalhigh, &info->totalhigh) ||
__put_user(s.freehigh, &info->freehigh) ||
__put_user(s.mem_unit, &info->mem_unit))
return -EFAULT;
return 0;
}
#endif /* CONFIG_COMPAT */